Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEva Hasler <ehasler@saxnot.inf.ed.ac.uk>2012-10-03 21:53:55 +0400
committerEva Hasler <ehasler@saxnot.inf.ed.ac.uk>2012-10-03 21:53:55 +0400
commite7e4dbd405e8d29a9a429c6b5eb366a2fe0ecf9a (patch)
treed6cc808944fab7112253c2be3e62652f45f0d076
parentebbf0d028c8724767f49a29f1f052a938020dd5a (diff)
parent9931a1e0fd84fa0cd2a149fde0b67ccd54b4b65c (diff)
merge remaining changes to mira, word pair features, phrase pair features
-rw-r--r--.gitignore4
-rw-r--r--.gitmodules3
-rw-r--r--BUILD-INSTRUCTIONS.txt3
-rw-r--r--Jamroot43
-rw-r--r--NOTICE3
-rw-r--r--OnDiskPt/Jamfile2
-rw-r--r--OnDiskPt/Main.cpp20
-rw-r--r--OnDiskPt/Main.h4
-rw-r--r--OnDiskPt/OnDiskWrapper.cpp49
-rw-r--r--OnDiskPt/OnDiskWrapper.h4
-rw-r--r--OnDiskPt/Phrase.cpp18
-rw-r--r--OnDiskPt/Phrase.h14
-rw-r--r--OnDiskPt/PhraseNode.cpp11
-rw-r--r--OnDiskPt/PhraseNode.h7
-rw-r--r--OnDiskPt/SourcePhrase.h3
-rw-r--r--OnDiskPt/TargetPhrase.cpp34
-rw-r--r--OnDiskPt/TargetPhrase.h18
-rw-r--r--OnDiskPt/TargetPhraseCollection.cpp6
-rw-r--r--OnDiskPt/TargetPhraseCollection.h2
-rw-r--r--OnDiskPt/Vocab.cpp21
-rw-r--r--OnDiskPt/Vocab.h12
-rw-r--r--OnDiskPt/Word.cpp83
-rw-r--r--OnDiskPt/Word.h27
-rw-r--r--OnDiskPt/queryOnDiskPt.cpp8
-rw-r--r--biconcor/Alignment.cpp (renamed from scripts/ems/biconcor/Alignment.cpp)0
-rw-r--r--biconcor/Alignment.h (renamed from scripts/ems/biconcor/Alignment.h)0
-rw-r--r--biconcor/Jamfile (renamed from scripts/ems/biconcor/Jamfile)1
-rw-r--r--biconcor/Mismatch.cpp (renamed from scripts/ems/biconcor/Mismatch.cpp)0
-rw-r--r--biconcor/Mismatch.h (renamed from scripts/ems/biconcor/Mismatch.h)0
-rw-r--r--biconcor/PhrasePair.cpp (renamed from scripts/ems/biconcor/PhrasePair.cpp)0
-rw-r--r--biconcor/PhrasePair.h (renamed from scripts/ems/biconcor/PhrasePair.h)0
-rw-r--r--biconcor/PhrasePairCollection.cpp (renamed from scripts/ems/biconcor/PhrasePairCollection.cpp)0
-rw-r--r--biconcor/PhrasePairCollection.h (renamed from scripts/ems/biconcor/PhrasePairCollection.h)0
-rw-r--r--biconcor/SuffixArray.cpp (renamed from scripts/ems/biconcor/SuffixArray.cpp)0
-rw-r--r--biconcor/SuffixArray.h (renamed from scripts/ems/biconcor/SuffixArray.h)0
-rw-r--r--biconcor/TargetCorpus.cpp (renamed from scripts/ems/biconcor/TargetCorpus.cpp)0
-rw-r--r--biconcor/TargetCorpus.h (renamed from scripts/ems/biconcor/TargetCorpus.h)0
-rw-r--r--biconcor/Vocabulary.cpp (renamed from scripts/ems/biconcor/Vocabulary.cpp)0
-rw-r--r--biconcor/Vocabulary.h (renamed from scripts/ems/biconcor/Vocabulary.h)0
-rw-r--r--biconcor/base64.cpp (renamed from scripts/ems/biconcor/base64.cpp)0
-rw-r--r--biconcor/base64.h (renamed from scripts/ems/biconcor/base64.h)0
-rw-r--r--biconcor/biconcor.cpp (renamed from scripts/ems/biconcor/biconcor.cpp)0
-rwxr-xr-xbjam4
-rwxr-xr-xcontrib/Extract_TMX_Corpus/Extract_TMX_Corpus.py594
-rwxr-xr-xcontrib/Extract_TMX_Corpus/Extract_TMX_Corpus.rsrc.py141
-rw-r--r--contrib/Extract_TMX_Corpus/LanguageCodes.txt22
-rw-r--r--contrib/Extract_TMX_Corpus/LanguagePairs.txt3
-rw-r--r--contrib/Extract_TMX_Corpus/_READ_ME_FIRST.txt241
-rw-r--r--contrib/Extract_TMX_Corpus/gpl.txt674
-rw-r--r--contrib/Moses2TMX/LanguageCodes.txt22
-rwxr-xr-xcontrib/Moses2TMX/Moses2TMX.py166
-rwxr-xr-xcontrib/Moses2TMX/Moses2TMX.rsrc.py95
-rw-r--r--contrib/Moses2TMX/_READ_ME_FIRST.txt127
-rw-r--r--contrib/Moses2TMX/gpl.txt674
-rw-r--r--contrib/combine-ptables/README.md139
-rwxr-xr-xcontrib/combine-ptables/combine-ptables.pl425
-rwxr-xr-x[-rw-r--r--]contrib/eppex/configure0
-rwxr-xr-x[-rw-r--r--]contrib/eppex/depcomp0
-rwxr-xr-x[-rw-r--r--]contrib/eppex/install-sh0
-rwxr-xr-x[-rw-r--r--]contrib/eppex/missing0
-rw-r--r--contrib/fuzzy-match/Makefile16
-rw-r--r--contrib/fuzzy-match/Match.h29
-rw-r--r--contrib/fuzzy-match/SentenceAlignment.h48
-rw-r--r--contrib/fuzzy-match/SuffixArray.cpp244
-rw-r--r--contrib/fuzzy-match/SuffixArray.h45
-rw-r--r--contrib/fuzzy-match/Util.cpp147
-rw-r--r--contrib/fuzzy-match/Util.h87
-rw-r--r--contrib/fuzzy-match/Vocabulary.cpp45
-rw-r--r--contrib/fuzzy-match/Vocabulary.h40
-rw-r--r--contrib/fuzzy-match/fuzzy-match2.cpp460
-rw-r--r--contrib/fuzzy-match/fuzzy-match2.h561
-rw-r--r--contrib/fuzzy-match/make-xml-from-match.perl214
-rw-r--r--contrib/fuzzy-match/old/fuzzy-match.cpp982
-rw-r--r--contrib/fuzzy-match/old/get-multiple-translations-for-uniq-sources.perl58
-rwxr-xr-xcontrib/fuzzy-match/old/make-pt-from-tm.perl308
-rwxr-xr-xcontrib/fuzzy-match/old/make-pt-from-tm2.perl300
-rwxr-xr-xcontrib/fuzzy-match/old/make-xml-from-match-multiple.perl288
-rw-r--r--contrib/fuzzy-match/suffix-test.cpp27
-rwxr-xr-xcontrib/iSenWeb/Introduction/iSenWeb A Web-based Machine Translation System to Translate Sentences.docxbin0 -> 305920 bytes
-rwxr-xr-xcontrib/iSenWeb/Introduction/iSenWeb A Web-based Machine Translation System to Translate Sentences.pdfbin0 -> 360891 bytes
-rwxr-xr-xcontrib/iSenWeb/index.html129
-rwxr-xr-xcontrib/iSenWeb/jquery-1.7.2.js9405
-rwxr-xr-xcontrib/iSenWeb/moses.pl59
-rwxr-xr-xcontrib/iSenWeb/themes/images/common/Logo (1000x300).pngbin0 -> 54193 bytes
-rwxr-xr-xcontrib/iSenWeb/themes/images/common/Logo (2000x2000).pngbin0 -> 271893 bytes
-rwxr-xr-xcontrib/iSenWeb/themes/images/common/Logo (250x250).pngbin0 -> 23296 bytes
-rwxr-xr-xcontrib/iSenWeb/themes/images/common/Logo (500x500).pngbin0 -> 56068 bytes
-rwxr-xr-xcontrib/iSenWeb/themes/images/common/Logo.pngbin0 -> 54193 bytes
-rwxr-xr-xcontrib/iSenWeb/themes/images/common/Logo_lab.pngbin0 -> 24039 bytes
-rwxr-xr-xcontrib/iSenWeb/themes/images/common/header_bg.pngbin0 -> 3678 bytes
-rwxr-xr-xcontrib/iSenWeb/themes/images/common/ico_cor10.pngbin0 -> 958 bytes
-rwxr-xr-xcontrib/iSenWeb/themes/images/common/icon_feedback.pngbin0 -> 6207 bytes
-rwxr-xr-xcontrib/iSenWeb/themes/images/common/logo_christmas.pngbin0 -> 28418 bytes
-rwxr-xr-xcontrib/iSenWeb/themes/images/common/logo_christmas1.pngbin0 -> 6310 bytes
-rwxr-xr-xcontrib/iSenWeb/themes/images/common/logo_christmas2.pngbin0 -> 10407 bytes
-rwxr-xr-xcontrib/iSenWeb/themes/images/common/logo_christmas3.pngbin0 -> 35303 bytes
-rwxr-xr-xcontrib/iSenWeb/themes/images/common/nav_bgn.pngbin0 -> 2940 bytes
-rwxr-xr-xcontrib/iSenWeb/themes/images/common/sidebar_bg.pngbin0 -> 10773 bytes
-rwxr-xr-xcontrib/iSenWeb/themes/images/fanyi/fanyi_sprite.pngbin0 -> 8744 bytes
-rwxr-xr-xcontrib/iSenWeb/themes/images/fanyi/inputTextBg.pngbin0 -> 501 bytes
-rwxr-xr-xcontrib/iSenWeb/themes/images/search/s.pngbin0 -> 4439 bytes
-rwxr-xr-xcontrib/iSenWeb/themes/styles/common.css288
-rwxr-xr-xcontrib/iSenWeb/themes/styles/fanyi.css583
-rwxr-xr-xcontrib/iSenWeb/themes/styles/search.css31
-rwxr-xr-xcontrib/iSenWeb/trans_result.php10
-rwxr-xr-x[-rw-r--r--]contrib/lmserver/BUILD0
l---------contrib/lmserver/INSTALL1
-rwxr-xr-x[-rw-r--r--]contrib/lmserver/compile0
-rwxr-xr-x[-rw-r--r--]contrib/lmserver/config.guess0
-rwxr-xr-x[-rw-r--r--]contrib/lmserver/config.status0
-rwxr-xr-x[-rw-r--r--]contrib/lmserver/config.sub0
-rwxr-xr-x[-rw-r--r--]contrib/lmserver/configure0
-rwxr-xr-x[-rw-r--r--]contrib/lmserver/depcomp0
-rwxr-xr-x[-rw-r--r--]contrib/lmserver/install-sh0
-rwxr-xr-x[-rw-r--r--]contrib/lmserver/missing0
-rwxr-xr-x[-rw-r--r--]contrib/memscore/configure0
-rwxr-xr-x[-rw-r--r--]contrib/memscore/depcomp0
-rwxr-xr-x[-rw-r--r--]contrib/memscore/install-sh0
-rwxr-xr-x[-rw-r--r--]contrib/memscore/missing0
-rwxr-xr-xcontrib/mert-moses-multi.pl (renamed from scripts/training/mert-moses-multi.pl)16
-rw-r--r--contrib/moses-for-mere-mortals/READ_ME_FIRST.txt54
-rwxr-xr-xcontrib/moses-for-mere-mortals/Windows-add-ins/Extract_TMX_Corpus-1.043/Extract_TMX_Corpus.py592
-rwxr-xr-xcontrib/moses-for-mere-mortals/Windows-add-ins/Extract_TMX_Corpus-1.043/Extract_TMX_Corpus.rsrc.py141
-rw-r--r--contrib/moses-for-mere-mortals/Windows-add-ins/Extract_TMX_Corpus-1.043/LanguageCodes.txt22
-rw-r--r--contrib/moses-for-mere-mortals/Windows-add-ins/Extract_TMX_Corpus-1.043/LanguagePairs.txt3
-rw-r--r--contrib/moses-for-mere-mortals/Windows-add-ins/Extract_TMX_Corpus-1.043/_READ_ME_FIRST.txt119
-rw-r--r--contrib/moses-for-mere-mortals/Windows-add-ins/Extract_TMX_Corpus-1.043/gpl.txt674
-rw-r--r--contrib/moses-for-mere-mortals/Windows-add-ins/Moses2TMX-1.032/LanguageCodes.txt22
-rwxr-xr-xcontrib/moses-for-mere-mortals/Windows-add-ins/Moses2TMX-1.032/Moses2TMX.py166
-rwxr-xr-xcontrib/moses-for-mere-mortals/Windows-add-ins/Moses2TMX-1.032/Moses2TMX.rsrc.py95
-rw-r--r--contrib/moses-for-mere-mortals/Windows-add-ins/Moses2TMX-1.032/_READ_ME_FIRST.txt82
-rw-r--r--contrib/moses-for-mere-mortals/Windows-add-ins/Moses2TMX-1.032/gpl.txt674
-rw-r--r--contrib/moses-for-mere-mortals/all.css55
-rw-r--r--contrib/moses-for-mere-mortals/docs/Help-Tutorial.docbin565248 -> 0 bytes
-rw-r--r--contrib/moses-for-mere-mortals/docs/Overview.jpegbin207618 -> 0 bytes
-rw-r--r--contrib/moses-for-mere-mortals/docs/Quick-Start-Guide.docbin16896 -> 0 bytes
-rw-r--r--contrib/moses-for-mere-mortals/docs/all.css55
-rw-r--r--contrib/moses-for-mere-mortals/docs/thanks.html27
-rw-r--r--contrib/moses-for-mere-mortals/index.html22
-rw-r--r--contrib/moses-for-mere-mortals/scripts/create-1.37557
-rw-r--r--contrib/moses-for-mere-mortals/scripts/make-test-files-0.14137
-rw-r--r--contrib/moses-for-mere-mortals/scripts/modified-scripts/READ_ME_FIRST2
-rwxr-xr-xcontrib/moses-for-mere-mortals/scripts/modified-scripts/mert-moses-new-modif.pl1217
-rw-r--r--contrib/moses-for-mere-mortals/scripts/modified-scripts/nonbreaking_prefix.pt209
-rw-r--r--contrib/moses-for-mere-mortals/scripts/score-0.85509
-rw-r--r--contrib/moses-for-mere-mortals/scripts/train-1.111538
-rw-r--r--contrib/moses-for-mere-mortals/scripts/transfer-training-to-another-location-0.0755
-rw-r--r--contrib/moses-for-mere-mortals/scripts/translate-1.32453
-rw-r--r--contrib/other-builds/CreateOnDisk.vcxproj16
-rw-r--r--contrib/other-builds/CreateOnDisk.xcodeproj/project.pbxproj4
-rw-r--r--contrib/other-builds/OnDiskPt.vcxproj8
-rw-r--r--contrib/other-builds/OnDiskPt/.cproject7
-rw-r--r--contrib/other-builds/fuzzy-match.xcodeproj/project.pbxproj292
-rw-r--r--contrib/other-builds/fuzzy-match.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcdebugger/Breakpoints.xcbkptlist21
-rw-r--r--contrib/other-builds/fuzzy-match.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/fuzzy-match.xcscheme78
-rw-r--r--contrib/other-builds/fuzzy-match.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/fuzzy-match2.xcscheme79
-rw-r--r--contrib/other-builds/fuzzy-match.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/xcschememanagement.plist32
-rw-r--r--contrib/other-builds/kbmira.xcodeproj/project.pbxproj311
-rwxr-xr-xcontrib/other-builds/kenlm.vcxproj21
-rw-r--r--contrib/other-builds/lm.xcodeproj/project.pbxproj17
-rw-r--r--contrib/other-builds/lm.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/lm.xcscheme54
-rw-r--r--contrib/other-builds/lm.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/xcschememanagement.plist22
-rw-r--r--contrib/other-builds/lm/.cproject10
-rw-r--r--contrib/other-builds/lm/.project15
-rw-r--r--contrib/other-builds/mert.xcodeproj/project.pbxproj338
-rw-r--r--contrib/other-builds/mert.xcodeproj/project.xcworkspace/contents.xcworkspacedata7
-rw-r--r--contrib/other-builds/mert.xcodeproj/project.xcworkspace/xcuserdata/hieuhoang.xcuserdatad/UserInterfaceState.xcuserstate8628
-rw-r--r--contrib/other-builds/mert.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcdebugger/Breakpoints.xcbkptlist35
-rw-r--r--contrib/other-builds/mert.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/extractor.xcscheme72
-rw-r--r--contrib/other-builds/mert.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/mert.xcscheme72
-rw-r--r--contrib/other-builds/mert.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/xcschememanagement.plist32
-rw-r--r--contrib/other-builds/mert_lib.xcodeproj/project.pbxproj621
-rw-r--r--contrib/other-builds/mert_lib.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/mert_lib.xcscheme54
-rw-r--r--contrib/other-builds/mert_lib.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/xcschememanagement.plist22
-rw-r--r--contrib/other-builds/moses-chart-cmd.vcxproj2
-rw-r--r--contrib/other-builds/moses-chart-cmd.xcodeproj/project.pbxproj8
-rw-r--r--contrib/other-builds/moses-cmd.vcxproj18
-rw-r--r--contrib/other-builds/moses-cmd.xcodeproj/project.pbxproj39
-rw-r--r--contrib/other-builds/moses-cmd.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/moses-cmd.xcscheme72
-rw-r--r--contrib/other-builds/moses-cmd.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/xcschememanagement.plist22
-rw-r--r--contrib/other-builds/moses-cmd/.cproject32
-rw-r--r--contrib/other-builds/moses.sln6
-rw-r--r--contrib/other-builds/moses.vcxproj88
-rw-r--r--contrib/other-builds/moses.xcodeproj/project.pbxproj258
-rw-r--r--contrib/other-builds/moses/.cproject173
-rw-r--r--contrib/other-builds/moses/.project732
-rw-r--r--contrib/other-builds/mosesserver.vcxproj102
-rw-r--r--contrib/other-builds/processLexicalTableMin.xcodeproj/project.pbxproj297
-rw-r--r--contrib/other-builds/processPhraseTableMin.xcodeproj/project.pbxproj304
-rwxr-xr-xcontrib/other-builds/query.sln29
-rwxr-xr-xcontrib/other-builds/query.vcxproj89
-rw-r--r--contrib/other-builds/util/.cproject6
-rw-r--r--contrib/python/README.md28
-rw-r--r--contrib/python/binpt/binpt.cpp5648
-rw-r--r--contrib/python/binpt/binpt.pxd25
-rw-r--r--contrib/python/binpt/binpt.pyx166
-rw-r--r--contrib/python/example.py31
-rw-r--r--contrib/python/examples/phrase-table.binphr.idxbin0 -> 20 bytes
-rw-r--r--contrib/python/examples/phrase-table.binphr.srctree.wabin0 -> 84 bytes
-rw-r--r--contrib/python/examples/phrase-table.binphr.srcvoc2
-rw-r--r--contrib/python/examples/phrase-table.binphr.tgtdata.wabin0 -> 180 bytes
-rw-r--r--contrib/python/examples/phrase-table.binphr.tgtvoc4
-rw-r--r--contrib/python/examples/phrase-table.txt4
-rw-r--r--contrib/python/setup.py47
-rw-r--r--contrib/relent-filter/AUTHORS1
-rw-r--r--contrib/relent-filter/README.txt91
-rw-r--r--contrib/relent-filter/scripts/calcEmpiricalDistribution.pl53
-rwxr-xr-xcontrib/relent-filter/scripts/calcPruningScores.pl351
-rw-r--r--contrib/relent-filter/scripts/interpolateScores.pl94
-rwxr-xr-xcontrib/relent-filter/scripts/prunePT.pl114
-rwxr-xr-xcontrib/relent-filter/sigtest-filter/Makefile10
-rwxr-xr-xcontrib/relent-filter/sigtest-filter/README.txt42
-rwxr-xr-xcontrib/relent-filter/sigtest-filter/WIN32_functions.cpp231
-rwxr-xr-xcontrib/relent-filter/sigtest-filter/WIN32_functions.h24
-rwxr-xr-xcontrib/relent-filter/sigtest-filter/check-install5
-rwxr-xr-xcontrib/relent-filter/sigtest-filter/filter-pt.cpp377
-rwxr-xr-xcontrib/relent-filter/sigtest-filter/sigtest-filter.sln20
-rwxr-xr-xcontrib/relent-filter/src/IOWrapper.cpp580
-rwxr-xr-xcontrib/relent-filter/src/IOWrapper.h142
-rwxr-xr-xcontrib/relent-filter/src/Jamfile6
-rwxr-xr-xcontrib/relent-filter/src/LatticeMBR.cpp669
-rwxr-xr-xcontrib/relent-filter/src/LatticeMBR.h153
-rwxr-xr-xcontrib/relent-filter/src/LatticeMBRGrid.cpp213
-rwxr-xr-xcontrib/relent-filter/src/Main.cpp282
-rwxr-xr-xcontrib/relent-filter/src/Main.h39
-rwxr-xr-xcontrib/relent-filter/src/RelativeEntropyCalc.cpp83
-rwxr-xr-xcontrib/relent-filter/src/RelativeEntropyCalc.h51
-rwxr-xr-xcontrib/relent-filter/src/TranslationAnalysis.cpp126
-rwxr-xr-xcontrib/relent-filter/src/TranslationAnalysis.h25
-rwxr-xr-xcontrib/relent-filter/src/mbr.cpp178
-rwxr-xr-x[-rw-r--r--]contrib/relent-filter/src/mbr.h (renamed from moses/src/OnlineCommand.h)43
-rw-r--r--contrib/reranking/data/README5
-rw-r--r--contrib/reranking/data/nbest.small7
-rw-r--r--contrib/reranking/data/weights11
-rw-r--r--contrib/reranking/src/Hypo.cpp59
-rw-r--r--contrib/reranking/src/Hypo.h44
-rw-r--r--contrib/reranking/src/Main.cpp98
-rw-r--r--contrib/reranking/src/Makefile18
-rw-r--r--contrib/reranking/src/NBest.cpp131
-rw-r--r--contrib/reranking/src/NBest.h44
-rw-r--r--contrib/reranking/src/ParameterNBest.cpp337
-rw-r--r--contrib/reranking/src/ParameterNBest.h76
-rw-r--r--contrib/reranking/src/Tools.cpp29
-rw-r--r--contrib/reranking/src/Tools.h73
-rwxr-xr-xcontrib/server/Translation-web/src/conf/MANIFEST.MF2
-rwxr-xr-xcontrib/server/Translation-web/src/java/com/hpl/mt/Translate.java129
-rwxr-xr-xcontrib/server/Translation-web/web/META-INF/context.xml2
-rwxr-xr-xcontrib/server/Translation-web/web/WEB-INF/web.xml16
-rwxr-xr-xcontrib/server/Translation-web/web/css/common.css22
-rwxr-xr-xcontrib/server/Translation-web/web/index.html47
-rwxr-xr-xcontrib/server/Translation-web/web/lib/jquery-1.6.4.js9046
-rwxr-xr-xcontrib/server/Translation-web/web/lib/jquery-ui-1.8.16.custom.js11769
-rw-r--r--contrib/server/mosesserver.cpp5
-rw-r--r--contrib/sigtest-filter/Makefile2
-rwxr-xr-x[-rw-r--r--]contrib/sigtest-filter/check-install0
-rw-r--r--contrib/sigtest-filter/filter-pt.cpp204
-rw-r--r--contrib/tmcombine/README.md2
-rw-r--r--contrib/tmcombine/test/model3/model/lex.counts.e2f8
-rw-r--r--contrib/tmcombine/test/model3/model/lex.counts.f2e8
-rw-r--r--contrib/tmcombine/test/model3/model/lex.e2f8
-rw-r--r--contrib/tmcombine/test/model3/model/lex.f2e8
-rw-r--r--contrib/tmcombine/test/model3/model/phrase-table8
-rw-r--r--contrib/tmcombine/test/model4/model/lex.counts.e2f8
-rw-r--r--contrib/tmcombine/test/model4/model/lex.counts.f2e8
-rw-r--r--contrib/tmcombine/test/model4/model/lex.e2f8
-rw-r--r--contrib/tmcombine/test/model4/model/lex.f2e8
-rw-r--r--contrib/tmcombine/test/model4/model/phrase-table5
-rw-r--r--contrib/tmcombine/test/phrase-table_test109
-rw-r--r--contrib/tmcombine/test/phrase-table_test816
-rw-r--r--contrib/tmcombine/test/phrase-table_test99
-rwxr-xr-xcontrib/tmcombine/tmcombine.py491
-rw-r--r--contrib/tmcombine/train_model.patch24
-rwxr-xr-xcontrib/web/bin/daemon.pl7
-rw-r--r--cruise-control/config.ems3
-rwxr-xr-xcruise-control/test_all_new_commits.sh24
-rwxr-xr-x[-rw-r--r--]jam-files/engine/debian/rules0
-rw-r--r--jam-files/fail/Jamroot4
-rw-r--r--jam-files/sanity.jam131
l---------kenlm1
-rw-r--r--lm/Jamfile16
-rw-r--r--lm/binary_format.cc2
-rw-r--r--lm/build_binary.cc103
-rw-r--r--lm/config.cc1
-rw-r--r--lm/config.hh22
-rw-r--r--lm/left.hh113
-rw-r--r--lm/left_test.cc118
-rw-r--r--lm/max_order.cc5
-rw-r--r--lm/max_order.hh14
-rw-r--r--lm/model.cc190
-rw-r--r--lm/model.hh94
-rw-r--r--lm/model_test.cc65
-rw-r--r--lm/model_type.hh13
-rw-r--r--lm/ngram_query.cc18
-rw-r--r--lm/ngram_query.hh47
-rw-r--r--lm/quantize.cc20
-rw-r--r--lm/quantize.hh163
-rw-r--r--lm/read_arpa.cc23
-rw-r--r--lm/read_arpa.hh42
-rw-r--r--lm/return.hh3
-rw-r--r--lm/search_hashed.cc243
-rw-r--r--lm/search_hashed.hh229
-rw-r--r--lm/search_trie.cc59
-rw-r--r--lm/search_trie.hh71
-rw-r--r--lm/state.hh122
-rw-r--r--lm/trie.cc61
-rw-r--r--lm/trie.hh61
-rw-r--r--lm/trie_sort.cc24
-rw-r--r--lm/trie_sort.hh3
-rw-r--r--lm/value.hh157
-rw-r--r--lm/value_build.cc58
-rw-r--r--lm/value_build.hh97
-rw-r--r--lm/vocab.cc2
-rw-r--r--lm/vocab.hh6
-rw-r--r--lm/weights.hh5
-rw-r--r--mert/BleuScorer.cpp8
-rw-r--r--mert/BleuScorer.h11
-rw-r--r--mert/BleuScorerTest.cpp2
-rw-r--r--mert/CderScorer.cpp6
-rw-r--r--mert/CderScorer.h8
-rw-r--r--mert/Data.cpp24
-rw-r--r--mert/Data.h5
-rw-r--r--mert/DataTest.cpp2
-rw-r--r--mert/Fdstream.h5
-rw-r--r--mert/FeatureArray.cpp7
-rw-r--r--mert/FeatureArray.h6
-rw-r--r--mert/FeatureData.cpp7
-rw-r--r--mert/FeatureData.h6
-rw-r--r--mert/FeatureDataIterator.cpp7
-rw-r--r--mert/FeatureDataIterator.h5
-rw-r--r--mert/FeatureDataTest.cpp2
-rw-r--r--mert/FeatureStats.cpp8
-rw-r--r--mert/FeatureStats.h8
-rw-r--r--mert/HypPackEnumerator.cpp28
-rw-r--r--mert/HypPackEnumerator.h19
-rw-r--r--mert/InterpolatedScorer.cpp36
-rw-r--r--mert/InterpolatedScorer.h8
-rw-r--r--mert/Jamfile5
-rw-r--r--mert/MergeScorer.cpp7
-rw-r--r--mert/MergeScorer.h8
-rw-r--r--mert/MiraFeatureVector.cpp23
-rw-r--r--mert/MiraFeatureVector.h11
-rw-r--r--mert/MiraWeightVector.cpp19
-rw-r--r--mert/MiraWeightVector.h9
-rw-r--r--mert/Ngram.h5
-rw-r--r--mert/NgramTest.cpp2
-rw-r--r--mert/Optimizer.cpp6
-rw-r--r--mert/Optimizer.h6
-rw-r--r--mert/OptimizerFactory.cpp6
-rw-r--r--mert/OptimizerFactory.h7
-rw-r--r--mert/OptimizerFactoryTest.cpp2
-rw-r--r--mert/PerScorer.cpp7
-rw-r--r--mert/PerScorer.h8
-rw-r--r--mert/Permutation.cpp337
-rw-r--r--mert/Permutation.h71
-rw-r--r--mert/PermutationScorer.cpp245
-rw-r--r--mert/PermutationScorer.h75
-rw-r--r--mert/Point.cpp6
-rw-r--r--mert/Point.h6
-rw-r--r--mert/PointTest.cpp1
-rw-r--r--mert/PreProcessFilter.cpp7
-rw-r--r--mert/PreProcessFilter.h6
-rw-r--r--mert/Reference.h7
-rw-r--r--mert/ReferenceTest.cpp2
-rw-r--r--mert/ScopedVector.h5
-rw-r--r--mert/ScoreArray.cpp7
-rw-r--r--mert/ScoreArray.h5
-rw-r--r--mert/ScoreData.cpp7
-rw-r--r--mert/ScoreData.h6
-rw-r--r--mert/ScoreDataIterator.cpp6
-rw-r--r--mert/ScoreDataIterator.h7
-rw-r--r--mert/ScoreStats.cpp8
-rw-r--r--mert/ScoreStats.h8
-rw-r--r--mert/Scorer.cpp139
-rw-r--r--mert/Scorer.h86
-rw-r--r--mert/ScorerFactory.cpp11
-rw-r--r--mert/ScorerFactory.h6
-rw-r--r--mert/SemposOverlapping.cpp8
-rw-r--r--mert/SemposOverlapping.h6
-rw-r--r--mert/SemposScorer.cpp7
-rw-r--r--mert/SemposScorer.h7
-rw-r--r--mert/SentenceLevelScorer.cpp108
-rw-r--r--mert/SentenceLevelScorer.h88
-rw-r--r--mert/Singleton.h6
-rw-r--r--mert/SingletonTest.cpp2
-rw-r--r--mert/StatisticsBasedScorer.cpp126
-rw-r--r--mert/StatisticsBasedScorer.h50
-rw-r--r--mert/TerScorer.cpp6
-rw-r--r--mert/TerScorer.h8
-rw-r--r--mert/Timer.cpp6
-rw-r--r--mert/Timer.h7
-rw-r--r--mert/TimerTest.cpp2
-rw-r--r--mert/Types.h8
-rw-r--r--mert/Util.cpp7
-rw-r--r--mert/Util.h14
-rw-r--r--mert/UtilTest.cpp2
-rw-r--r--mert/Vocabulary.cpp24
-rw-r--r--mert/Vocabulary.h21
-rw-r--r--mert/VocabularyTest.cpp3
-rw-r--r--mert/evaluator.cpp1
-rw-r--r--mert/extractor.cpp14
-rw-r--r--mert/init.opt1
-rw-r--r--mert/kbmira.cpp89
-rw-r--r--mert/mert.cpp1
-rw-r--r--mert/pro.cpp5
-rw-r--r--mert/test_scorer.cpp60
-rw-r--r--mira/Decoder.cpp12
-rw-r--r--mira/Main.cpp17
-rw-r--r--mira/Main.h1
-rw-r--r--mira/MiraOptimiser.cpp28
-rw-r--r--mira/Optimiser.h253
-rwxr-xr-xmira/training-expt.perl181
-rw-r--r--misc/Jamfile14
l---------misc/pmoses/COPYING1
l---------misc/pmoses/INSTALL1
-rwxr-xr-x[-rw-r--r--]misc/pmoses/configure0
-rw-r--r--misc/processLexicalTableMin.cpp131
-rw-r--r--misc/processPhraseTableMin.cpp177
-rw-r--r--misc/queryPhraseTable.cpp35
-rw-r--r--misc/queryPhraseTableMin.cpp97
-rw-r--r--moses-chart-cmd/src/IOWrapper.cpp158
-rw-r--r--moses-chart-cmd/src/IOWrapper.h12
-rw-r--r--moses-chart-cmd/src/Jamfile1
-rw-r--r--moses-chart-cmd/src/Main.cpp25
-rw-r--r--moses-chart-cmd/src/Main.h6
-rw-r--r--moses-chart-cmd/src/TranslationAnalysis.h2
-rw-r--r--moses-cmd/src/IOWrapper.cpp16
-rw-r--r--moses-cmd/src/IOWrapper.h10
-rw-r--r--moses-cmd/src/LatticeMBR.cpp5
-rw-r--r--moses-cmd/src/LatticeMBR.h6
-rw-r--r--moses-cmd/src/LatticeMBRGrid.cpp12
-rw-r--r--moses-cmd/src/Main.cpp64
-rw-r--r--moses/src/AlignmentInfo.h18
-rw-r--r--moses/src/AlignmentInfoCollection.cpp12
-rw-r--r--moses/src/AlignmentInfoCollection.h29
-rw-r--r--moses/src/BilingualDynSuffixArray.cpp50
-rw-r--r--moses/src/BilingualDynSuffixArray.h14
-rw-r--r--moses/src/BitmapContainer.cpp10
-rw-r--r--moses/src/BitmapContainer.h3
-rw-r--r--moses/src/CYKPlusParser/ChartRuleLookupManagerCYKPlus.cpp1
-rw-r--r--moses/src/CYKPlusParser/ChartRuleLookupManagerCYKPlus.h2
-rw-r--r--moses/src/CYKPlusParser/ChartRuleLookupManagerMemory.cpp1
-rw-r--r--moses/src/CYKPlusParser/ChartRuleLookupManagerMemory.h2
-rw-r--r--moses/src/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.cpp280
-rw-r--r--moses/src/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.h78
-rw-r--r--moses/src/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp2
-rw-r--r--moses/src/CYKPlusParser/ChartRuleLookupManagerOnDisk.h2
-rw-r--r--moses/src/CYKPlusParser/DotChart.h2
-rw-r--r--moses/src/CYKPlusParser/DotChartInMemory.h2
-rw-r--r--moses/src/CYKPlusParser/DotChartOnDisk.h4
-rw-r--r--moses/src/CellCollection.h40
-rw-r--r--moses/src/ChartCell.cpp25
-rw-r--r--moses/src/ChartCell.h8
-rw-r--r--moses/src/ChartCellCollection.cpp4
-rw-r--r--moses/src/ChartCellCollection.h8
-rw-r--r--moses/src/ChartCellLabel.h5
-rw-r--r--moses/src/ChartCellLabelSet.h2
-rw-r--r--moses/src/ChartHypothesis.cpp39
-rw-r--r--moses/src/ChartHypothesis.h54
-rw-r--r--moses/src/ChartHypothesisCollection.cpp30
-rw-r--r--moses/src/ChartHypothesisCollection.h16
-rw-r--r--moses/src/ChartManager.cpp207
-rw-r--r--moses/src/ChartManager.h38
-rw-r--r--moses/src/ChartRuleLookupManager.h21
-rw-r--r--moses/src/ChartTranslationOptionCollection.cpp177
-rw-r--r--moses/src/ChartTranslationOptionCollection.h72
-rw-r--r--moses/src/ChartTranslationOptionList.cpp12
-rw-r--r--moses/src/ChartTranslationOptionList.h10
-rw-r--r--moses/src/ChartTranslationOptions.cpp (renamed from moses/src/ChartTranslationOption.cpp)4
-rw-r--r--moses/src/ChartTranslationOptions.h (renamed from moses/src/ChartTranslationOption.h)32
-rw-r--r--moses/src/ChartTrellisDetour.h2
-rw-r--r--moses/src/ChartTrellisDetourQueue.h9
-rw-r--r--moses/src/ChartTrellisNode.cpp6
-rw-r--r--moses/src/ChartTrellisNode.h2
-rw-r--r--moses/src/ChartTrellisPath.h5
-rw-r--r--moses/src/CompactPT/BlockHashIndex.cpp422
-rw-r--r--moses/src/CompactPT/BlockHashIndex.h182
-rw-r--r--moses/src/CompactPT/CanonicalHuffman.h350
-rw-r--r--moses/src/CompactPT/CmphStringVectorAdapter.cpp94
-rw-r--r--moses/src/CompactPT/CmphStringVectorAdapter.h106
-rw-r--r--moses/src/CompactPT/ConsistentPhrases.h127
-rw-r--r--moses/src/CompactPT/Jamfile18
-rw-r--r--moses/src/CompactPT/LexicalReorderingTableCompact.cpp157
-rw-r--r--moses/src/CompactPT/LexicalReorderingTableCompact.h77
-rw-r--r--moses/src/CompactPT/LexicalReorderingTableCreator.cpp433
-rw-r--r--moses/src/CompactPT/LexicalReorderingTableCreator.h139
-rw-r--r--moses/src/CompactPT/ListCoders.h309
-rw-r--r--moses/src/CompactPT/MmapAllocator.h204
-rw-r--r--moses/src/CompactPT/MonotonicVector.h249
-rw-r--r--moses/src/CompactPT/MurmurHash3.cpp335
-rw-r--r--moses/src/CompactPT/MurmurHash3.h37
-rw-r--r--moses/src/CompactPT/PackedArray.h201
-rw-r--r--moses/src/CompactPT/PhraseDecoder.cpp491
-rw-r--r--moses/src/CompactPT/PhraseDecoder.h153
-rw-r--r--moses/src/CompactPT/PhraseDictionaryCompact.cpp195
-rw-r--r--moses/src/CompactPT/PhraseDictionaryCompact.h124
-rw-r--r--moses/src/CompactPT/PhraseTableCreator.cpp1283
-rw-r--r--moses/src/CompactPT/PhraseTableCreator.h425
-rw-r--r--moses/src/CompactPT/StringVector.h622
-rw-r--r--moses/src/CompactPT/TargetPhraseCollectionCache.h182
-rw-r--r--moses/src/CompactPT/ThrowingFwrite.cpp29
-rw-r--r--moses/src/CompactPT/ThrowingFwrite.h31
-rw-r--r--moses/src/ConfusionNet.h3
-rw-r--r--moses/src/DecodeFeature.h1
-rw-r--r--moses/src/DecodeGraph.h2
-rw-r--r--moses/src/DecodeStep.h2
-rw-r--r--moses/src/Dictionary.cpp2
-rw-r--r--moses/src/Dictionary.h10
-rw-r--r--moses/src/DummyScoreProducers.cpp63
-rw-r--r--moses/src/DummyScoreProducers.h45
-rw-r--r--moses/src/DynSAInclude/FileHandler.cpp (renamed from moses/src/DynSAInclude/file.cpp)18
-rw-r--r--moses/src/DynSAInclude/FileHandler.h (renamed from moses/src/DynSAInclude/file.h)2
-rw-r--r--moses/src/DynSAInclude/RandLMCache.h1
-rw-r--r--moses/src/DynSAInclude/RandLMFilter.h25
-rw-r--r--moses/src/DynSAInclude/hash.h11
-rw-r--r--moses/src/DynSAInclude/onlineRLM.h39
-rw-r--r--moses/src/DynSAInclude/params.h3
-rw-r--r--moses/src/DynSAInclude/perfectHash.h21
-rw-r--r--moses/src/DynSAInclude/quantizer.h6
-rw-r--r--moses/src/DynSAInclude/types.h8
-rw-r--r--moses/src/DynSAInclude/utils.h1
-rw-r--r--moses/src/DynSAInclude/vocab.h6
-rw-r--r--moses/src/DynSuffixArray.h2
-rw-r--r--moses/src/FFState.h2
-rw-r--r--moses/src/Factor.h1
-rw-r--r--moses/src/FactorCollection.cpp1
-rw-r--r--moses/src/FactorCollection.h2
-rw-r--r--moses/src/FactorTypeSet.h2
-rw-r--r--moses/src/FeatureFunction.cpp81
-rw-r--r--moses/src/FeatureFunction.h97
-rw-r--r--moses/src/FilePtr.h6
-rw-r--r--moses/src/GenerationDictionary.h17
-rw-r--r--moses/src/GlobalLexicalModel.cpp8
-rw-r--r--moses/src/GlobalLexicalModel.h8
-rw-r--r--moses/src/GlobalLexicalModelUnlimited.h2
-rw-r--r--moses/src/HypoList.h1
-rw-r--r--moses/src/Hypothesis.cpp40
-rw-r--r--moses/src/Hypothesis.h14
-rw-r--r--moses/src/HypothesisStack.h3
-rw-r--r--moses/src/HypothesisStackCubePruning.h2
-rw-r--r--moses/src/HypothesisStackNormal.h2
-rw-r--r--moses/src/InputType.cpp6
-rw-r--r--moses/src/InputType.h8
-rw-r--r--moses/src/Jamfile12
-rw-r--r--moses/src/LM/Base.h13
-rw-r--r--moses/src/LM/IRST.cpp2
-rw-r--r--moses/src/LM/IRST.h6
-rw-r--r--moses/src/LM/Implementation.cpp20
-rw-r--r--moses/src/LM/Implementation.h7
-rw-r--r--moses/src/LM/Jamfile60
-rw-r--r--moses/src/LM/Joint.h5
-rw-r--r--moses/src/LM/Ken.cpp18
-rw-r--r--moses/src/LM/Ken.h2
-rw-r--r--moses/src/LM/LDHT.cpp171
-rw-r--r--moses/src/LM/MultiFactor.h4
-rw-r--r--moses/src/LM/ORLM.h4
-rw-r--r--moses/src/LM/ParallelBackoff.h2
-rw-r--r--moses/src/LM/Remote.h2
-rw-r--r--moses/src/LM/SRI.h2
-rw-r--r--moses/src/LMList.h19
-rw-r--r--moses/src/LVoc.h7
-rw-r--r--moses/src/LexicalReordering.h2
-rw-r--r--moses/src/LexicalReorderingState.cpp23
-rw-r--r--moses/src/LexicalReorderingState.h6
-rw-r--r--moses/src/LexicalReorderingTable.cpp18
-rw-r--r--moses/src/LexicalReorderingTable.h4
-rw-r--r--moses/src/Manager.cpp31
-rw-r--r--moses/src/Manager.h7
-rw-r--r--moses/src/NonTerminal.h6
-rw-r--r--moses/src/OnlineCommand.cpp143
-rw-r--r--moses/src/OutputCollector.h35
-rw-r--r--moses/src/PCNTools.h8
-rw-r--r--moses/src/PDTAimp.h7
-rw-r--r--moses/src/Parameter.cpp12
-rw-r--r--moses/src/Parameter.h3
-rw-r--r--moses/src/PartialTranslOptColl.h4
-rw-r--r--moses/src/Phrase.h3
-rw-r--r--moses/src/PhraseDictionary.cpp53
-rw-r--r--moses/src/PhraseDictionary.h19
-rw-r--r--moses/src/PhraseDictionaryDynSuffixArray.cpp4
-rw-r--r--moses/src/PhraseDictionaryDynSuffixArray.h5
-rw-r--r--moses/src/PhraseDictionaryMemory.cpp16
-rw-r--r--moses/src/PhraseDictionaryNode.h2
-rw-r--r--moses/src/PhraseDictionaryTree.h3
-rw-r--r--moses/src/PhraseDictionaryTreeAdaptor.h2
-rw-r--r--moses/src/PhraseLengthFeature.cpp10
-rw-r--r--moses/src/PhraseLengthFeature.h11
-rw-r--r--moses/src/PhrasePairFeature.cpp292
-rw-r--r--moses/src/PhrasePairFeature.h129
-rw-r--r--moses/src/PrefixTree.h4
-rw-r--r--moses/src/PrefixTreeMap.h20
-rw-r--r--moses/src/ReorderingConstraint.h3
-rw-r--r--moses/src/ReorderingStack.h2
-rw-r--r--moses/src/RuleCube.cpp5
-rw-r--r--moses/src/RuleCube.h26
-rw-r--r--moses/src/RuleCubeItem.cpp7
-rw-r--r--moses/src/RuleCubeItem.h15
-rw-r--r--moses/src/RuleCubeQueue.h6
-rw-r--r--moses/src/RuleTable/Jamfile2
-rw-r--r--moses/src/RuleTable/Loader.h4
-rw-r--r--moses/src/RuleTable/LoaderCompact.h1
-rw-r--r--moses/src/RuleTable/LoaderFactory.cpp42
-rw-r--r--moses/src/RuleTable/LoaderFactory.h2
-rw-r--r--moses/src/RuleTable/LoaderHiero.h1
-rw-r--r--moses/src/RuleTable/LoaderStandard.cpp6
-rw-r--r--moses/src/RuleTable/LoaderStandard.h7
-rw-r--r--moses/src/RuleTable/PhraseDictionaryALSuffixArray.cpp4
-rw-r--r--moses/src/RuleTable/PhraseDictionaryALSuffixArray.h5
-rw-r--r--moses/src/RuleTable/PhraseDictionaryFuzzyMatch.cpp311
-rw-r--r--moses/src/RuleTable/PhraseDictionaryFuzzyMatch.h98
-rw-r--r--moses/src/RuleTable/PhraseDictionaryNodeSCFG.h8
-rw-r--r--moses/src/RuleTable/PhraseDictionaryOnDisk.cpp7
-rw-r--r--moses/src/RuleTable/PhraseDictionaryOnDisk.h2
-rw-r--r--moses/src/RuleTable/PhraseDictionarySCFG.cpp9
-rw-r--r--moses/src/RuleTable/PhraseDictionarySCFG.h2
-rw-r--r--moses/src/RuleTable/Trie.h1
-rw-r--r--moses/src/RuleTable/UTrie.h2
-rw-r--r--moses/src/RuleTable/UTrieNode.h1
-rw-r--r--moses/src/Scope3Parser/ApplicableRuleTrie.h2
-rw-r--r--moses/src/Scope3Parser/IntermediateVarSpanNode.h2
-rw-r--r--moses/src/Scope3Parser/Parser.h2
-rw-r--r--moses/src/Scope3Parser/StackLattice.h11
-rw-r--r--moses/src/Scope3Parser/StackLatticeBuilder.h2
-rw-r--r--moses/src/Scope3Parser/VarSpanNode.h2
-rw-r--r--moses/src/Scope3Parser/VarSpanTrieBuilder.h2
-rw-r--r--moses/src/ScoreComponentCollection.h8
-rw-r--r--moses/src/Search.cpp3
-rw-r--r--moses/src/Search.h3
-rw-r--r--moses/src/SearchCubePruning.h3
-rw-r--r--moses/src/SearchNormal.h5
-rw-r--r--moses/src/SearchNormalBatch.cpp221
-rw-r--r--moses/src/SearchNormalBatch.h43
-rw-r--r--moses/src/Sentence.h2
-rw-r--r--moses/src/SentenceStats.h6
-rw-r--r--moses/src/SourceWordDeletionFeature.cpp18
-rw-r--r--moses/src/SourceWordDeletionFeature.h7
-rw-r--r--moses/src/SparsePhraseDictionaryFeature.cpp5
-rw-r--r--moses/src/SparsePhraseDictionaryFeature.h11
-rw-r--r--moses/src/SquareMatrix.h2
-rw-r--r--moses/src/StaticData.cpp103
-rw-r--r--moses/src/StaticData.h49
-rw-r--r--moses/src/TargetPhrase.cpp11
-rw-r--r--moses/src/TargetPhrase.h35
-rw-r--r--moses/src/TargetWordInsertionFeature.cpp18
-rw-r--r--moses/src/TargetWordInsertionFeature.h10
-rw-r--r--moses/src/ThreadPool.h9
-rw-r--r--moses/src/Timer.cpp12
-rw-r--r--moses/src/Timer.h20
-rw-r--r--moses/src/TranslationOption.cpp43
-rw-r--r--moses/src/TranslationOption.h33
-rw-r--r--moses/src/TranslationOptionCollection.cpp122
-rw-r--r--moses/src/TranslationOptionCollection.h26
-rw-r--r--moses/src/TranslationOptionCollectionConfusionNet.h3
-rw-r--r--moses/src/TranslationOptionCollectionText.h3
-rw-r--r--moses/src/TranslationOptionList.h2
-rw-r--r--moses/src/TranslationSystem.cpp31
-rw-r--r--moses/src/TranslationSystem.h9
-rw-r--r--moses/src/TreeInput.cpp2
-rw-r--r--moses/src/TreeInput.h12
-rw-r--r--moses/src/TrellisPath.h3
-rw-r--r--moses/src/TrellisPathCollection.h5
-rw-r--r--moses/src/TrellisPathList.h4
-rw-r--r--moses/src/TypeDef.h11
-rw-r--r--moses/src/UniqueObject.h1
-rw-r--r--moses/src/UserMessage.h4
-rw-r--r--moses/src/Util.cpp19
-rw-r--r--moses/src/Util.h4
-rw-r--r--moses/src/Word.cpp6
-rw-r--r--moses/src/Word.h4
-rw-r--r--moses/src/WordLattice.h4
-rw-r--r--moses/src/WordTranslationFeature.cpp215
-rw-r--r--moses/src/WordTranslationFeature.h111
-rw-r--r--moses/src/WordsRange.h8
-rw-r--r--moses/src/fuzzy-match/FuzzyMatchWrapper.cpp1077
-rw-r--r--moses/src/fuzzy-match/FuzzyMatchWrapper.h67
-rw-r--r--moses/src/fuzzy-match/Jamfile1
-rw-r--r--moses/src/fuzzy-match/Match.h33
-rw-r--r--moses/src/fuzzy-match/SentenceAlignment.cpp24
-rw-r--r--moses/src/fuzzy-match/SentenceAlignment.h44
-rw-r--r--moses/src/fuzzy-match/SuffixArray.cpp250
-rw-r--r--moses/src/fuzzy-match/SuffixArray.h50
-rw-r--r--moses/src/fuzzy-match/Vocabulary.cpp53
-rw-r--r--moses/src/fuzzy-match/Vocabulary.h44
-rw-r--r--moses/src/gzfilebuf.h3
-rw-r--r--phrase-extract/AlignmentPhrase.cpp (renamed from scripts/training/phrase-extract/AlignmentPhrase.cpp)7
-rw-r--r--phrase-extract/AlignmentPhrase.h (renamed from scripts/training/phrase-extract/AlignmentPhrase.h)6
-rw-r--r--phrase-extract/ExtractedRule.cpp (renamed from scripts/training/phrase-extract/ExtractedRule.cpp)4
-rw-r--r--phrase-extract/ExtractedRule.h (renamed from scripts/training/phrase-extract/ExtractedRule.h)5
-rw-r--r--phrase-extract/Hole.h (renamed from scripts/training/phrase-extract/Hole.h)5
-rw-r--r--phrase-extract/HoleCollection.cpp (renamed from scripts/training/phrase-extract/HoleCollection.cpp)5
-rw-r--r--phrase-extract/HoleCollection.h (renamed from scripts/training/phrase-extract/HoleCollection.h)5
-rw-r--r--phrase-extract/InputFileStream.cpp (renamed from scripts/training/phrase-extract/InputFileStream.cpp)0
-rw-r--r--phrase-extract/InputFileStream.h (renamed from scripts/training/phrase-extract/InputFileStream.h)0
-rw-r--r--phrase-extract/Jamfile (renamed from scripts/training/phrase-extract/Jamfile)20
-rw-r--r--phrase-extract/OutputFileStream.cpp (renamed from scripts/training/phrase-extract/OutputFileStream.cpp)0
-rw-r--r--phrase-extract/OutputFileStream.h (renamed from scripts/training/phrase-extract/OutputFileStream.h)0
-rw-r--r--phrase-extract/PhraseAlignment.cpp (renamed from scripts/training/phrase-extract/PhraseAlignment.cpp)18
-rw-r--r--phrase-extract/PhraseAlignment.h (renamed from scripts/training/phrase-extract/PhraseAlignment.h)11
-rw-r--r--phrase-extract/PhraseExtractionOptions.h152
-rw-r--r--phrase-extract/RuleExist.h (renamed from scripts/training/phrase-extract/RuleExist.h)6
-rw-r--r--phrase-extract/RuleExtractionOptions.h (renamed from scripts/training/phrase-extract/RuleExtractionOptions.h)7
-rw-r--r--phrase-extract/SafeGetline.h (renamed from scripts/training/phrase-extract/SafeGetline.h)0
-rw-r--r--phrase-extract/SentenceAlignment.cpp (renamed from scripts/training/phrase-extract/SentenceAlignment.cpp)44
-rw-r--r--phrase-extract/SentenceAlignment.h (renamed from scripts/training/phrase-extract/SentenceAlignment.h)13
-rw-r--r--phrase-extract/SentenceAlignmentWithSyntax.cpp (renamed from scripts/training/phrase-extract/SentenceAlignmentWithSyntax.cpp)16
-rw-r--r--phrase-extract/SentenceAlignmentWithSyntax.h (renamed from scripts/training/phrase-extract/SentenceAlignmentWithSyntax.h)9
-rw-r--r--phrase-extract/SyntaxTree.cpp (renamed from scripts/training/phrase-extract/SyntaxTree.cpp)6
-rw-r--r--phrase-extract/SyntaxTree.h (renamed from scripts/training/phrase-extract/SyntaxTree.h)5
-rw-r--r--phrase-extract/XmlException.h (renamed from scripts/training/phrase-extract/XmlException.h)5
-rw-r--r--phrase-extract/XmlTree.cpp (renamed from scripts/training/phrase-extract/XmlTree.cpp)4
-rw-r--r--phrase-extract/XmlTree.h (renamed from scripts/training/phrase-extract/XmlTree.h)7
-rw-r--r--phrase-extract/consolidate-direct.cpp (renamed from scripts/training/phrase-extract/consolidate-direct.cpp)0
-rw-r--r--phrase-extract/consolidate-direct.vcxproj (renamed from scripts/training/phrase-extract/consolidate-direct.vcxproj)0
-rw-r--r--phrase-extract/consolidate-reverse.cpp (renamed from scripts/training/phrase-extract/consolidate-reverse.cpp)0
-rw-r--r--phrase-extract/consolidate.cpp (renamed from scripts/training/phrase-extract/consolidate.cpp)89
-rw-r--r--phrase-extract/consolidate.vcxproj (renamed from scripts/training/phrase-extract/consolidate.vcxproj)0
-rw-r--r--phrase-extract/domain.cpp52
-rw-r--r--phrase-extract/domain.h32
-rw-r--r--phrase-extract/extract-ghkm/Alignment.cpp (renamed from scripts/training/phrase-extract/extract-ghkm/Alignment.cpp)0
-rw-r--r--phrase-extract/extract-ghkm/Alignment.h (renamed from scripts/training/phrase-extract/extract-ghkm/Alignment.h)0
-rw-r--r--phrase-extract/extract-ghkm/AlignmentGraph.cpp (renamed from scripts/training/phrase-extract/extract-ghkm/AlignmentGraph.cpp)0
-rw-r--r--phrase-extract/extract-ghkm/AlignmentGraph.h (renamed from scripts/training/phrase-extract/extract-ghkm/AlignmentGraph.h)0
-rw-r--r--phrase-extract/extract-ghkm/ComposedRule.cpp (renamed from scripts/training/phrase-extract/extract-ghkm/ComposedRule.cpp)0
-rw-r--r--phrase-extract/extract-ghkm/ComposedRule.h (renamed from scripts/training/phrase-extract/extract-ghkm/ComposedRule.h)0
-rw-r--r--phrase-extract/extract-ghkm/Exception.h (renamed from scripts/training/phrase-extract/extract-ghkm/Exception.h)0
-rw-r--r--phrase-extract/extract-ghkm/ExtractGHKM.cpp (renamed from scripts/training/phrase-extract/extract-ghkm/ExtractGHKM.cpp)0
-rw-r--r--phrase-extract/extract-ghkm/ExtractGHKM.h (renamed from scripts/training/phrase-extract/extract-ghkm/ExtractGHKM.h)0
-rw-r--r--phrase-extract/extract-ghkm/Jamfile1
-rw-r--r--phrase-extract/extract-ghkm/Main.cpp (renamed from scripts/training/phrase-extract/extract-ghkm/Main.cpp)0
-rw-r--r--phrase-extract/extract-ghkm/Node.cpp (renamed from scripts/training/phrase-extract/extract-ghkm/Node.cpp)0
-rw-r--r--phrase-extract/extract-ghkm/Node.h (renamed from scripts/training/phrase-extract/extract-ghkm/Node.h)0
-rw-r--r--phrase-extract/extract-ghkm/Options.h (renamed from scripts/training/phrase-extract/extract-ghkm/Options.h)0
-rw-r--r--phrase-extract/extract-ghkm/ParseTree.cpp (renamed from scripts/training/phrase-extract/extract-ghkm/ParseTree.cpp)0
-rw-r--r--phrase-extract/extract-ghkm/ParseTree.h (renamed from scripts/training/phrase-extract/extract-ghkm/ParseTree.h)0
-rw-r--r--phrase-extract/extract-ghkm/ScfgRule.cpp (renamed from scripts/training/phrase-extract/extract-ghkm/ScfgRule.cpp)0
-rw-r--r--phrase-extract/extract-ghkm/ScfgRule.h (renamed from scripts/training/phrase-extract/extract-ghkm/ScfgRule.h)0
-rw-r--r--phrase-extract/extract-ghkm/ScfgRuleWriter.cpp (renamed from scripts/training/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp)0
-rw-r--r--phrase-extract/extract-ghkm/ScfgRuleWriter.h (renamed from scripts/training/phrase-extract/extract-ghkm/ScfgRuleWriter.h)0
-rw-r--r--phrase-extract/extract-ghkm/Span.cpp (renamed from scripts/training/phrase-extract/extract-ghkm/Span.cpp)0
-rw-r--r--phrase-extract/extract-ghkm/Span.h (renamed from scripts/training/phrase-extract/extract-ghkm/Span.h)0
-rw-r--r--phrase-extract/extract-ghkm/Subgraph.cpp (renamed from scripts/training/phrase-extract/extract-ghkm/Subgraph.cpp)0
-rw-r--r--phrase-extract/extract-ghkm/Subgraph.h (renamed from scripts/training/phrase-extract/extract-ghkm/Subgraph.h)0
-rw-r--r--phrase-extract/extract-ghkm/XmlTreeParser.cpp (renamed from scripts/training/phrase-extract/extract-ghkm/XmlTreeParser.cpp)2
-rw-r--r--phrase-extract/extract-ghkm/XmlTreeParser.h (renamed from scripts/training/phrase-extract/extract-ghkm/XmlTreeParser.h)4
-rw-r--r--phrase-extract/extract-lex.cpp (renamed from scripts/training/phrase-extract/extract-lex.cpp)5
-rw-r--r--phrase-extract/extract-lex.h (renamed from scripts/training/phrase-extract/extract-lex.h)4
-rw-r--r--phrase-extract/extract-lex.vcxproj (renamed from scripts/training/phrase-extract/extract-lex.vcxproj)0
-rw-r--r--phrase-extract/extract-rules.cpp (renamed from scripts/training/phrase-extract/extract-rules.cpp)270
-rw-r--r--phrase-extract/extract-rules.vcxproj (renamed from scripts/training/phrase-extract/extract-rules.vcxproj)0
-rw-r--r--phrase-extract/extract.cpp (renamed from scripts/training/phrase-extract/extract.cpp)414
-rw-r--r--phrase-extract/extract.vcxproj (renamed from scripts/training/phrase-extract/extract.vcxproj)0
-rw-r--r--phrase-extract/extract.xcodeproj/project.pbxproj (renamed from scripts/training/phrase-extract/extract.xcodeproj/project.pbxproj)18
-rw-r--r--[-rwxr-xr-x]phrase-extract/gzfilebuf.h (renamed from scripts/training/lexical-reordering/gzfilebuf.h)0
-rw-r--r--phrase-extract/hierarchical.h (renamed from scripts/training/phrase-extract/hierarchical.h)4
-rwxr-xr-xphrase-extract/lexical-reordering/InputFileStream.cpp (renamed from scripts/training/lexical-reordering/InputFileStream.cpp)0
-rwxr-xr-xphrase-extract/lexical-reordering/InputFileStream.h (renamed from scripts/training/lexical-reordering/InputFileStream.h)0
-rw-r--r--phrase-extract/lexical-reordering/Jamfile2
-rwxr-xr-x[-rw-r--r--]phrase-extract/lexical-reordering/gzfilebuf.h (renamed from scripts/training/phrase-extract/gzfilebuf.h)0
-rw-r--r--phrase-extract/lexical-reordering/reordering_classes.cpp (renamed from scripts/training/lexical-reordering/reordering_classes.cpp)0
-rw-r--r--phrase-extract/lexical-reordering/reordering_classes.h (renamed from scripts/training/lexical-reordering/reordering_classes.h)0
-rw-r--r--phrase-extract/lexical-reordering/score.cpp (renamed from scripts/training/lexical-reordering/score.cpp)0
-rw-r--r--phrase-extract/pcfg-common/Jamfile (renamed from scripts/training/phrase-extract/pcfg-common/Jamfile)0
-rw-r--r--phrase-extract/pcfg-common/exception.h (renamed from scripts/training/phrase-extract/pcfg-common/exception.h)0
-rw-r--r--phrase-extract/pcfg-common/numbered_set.h (renamed from scripts/training/phrase-extract/pcfg-common/numbered_set.h)0
-rw-r--r--phrase-extract/pcfg-common/pcfg.cc (renamed from scripts/training/phrase-extract/pcfg-common/pcfg.cc)0
-rw-r--r--phrase-extract/pcfg-common/pcfg.h (renamed from scripts/training/phrase-extract/pcfg-common/pcfg.h)0
-rw-r--r--phrase-extract/pcfg-common/pcfg_tree.h (renamed from scripts/training/phrase-extract/pcfg-common/pcfg_tree.h)0
-rw-r--r--phrase-extract/pcfg-common/syntax_tree.h (renamed from scripts/training/phrase-extract/pcfg-common/syntax_tree.h)0
-rw-r--r--phrase-extract/pcfg-common/tool.cc (renamed from scripts/training/phrase-extract/pcfg-common/tool.cc)0
-rw-r--r--phrase-extract/pcfg-common/tool.h (renamed from scripts/training/phrase-extract/pcfg-common/tool.h)0
-rw-r--r--phrase-extract/pcfg-common/typedef.h (renamed from scripts/training/phrase-extract/pcfg-common/typedef.h)0
-rw-r--r--phrase-extract/pcfg-common/xml_tree_parser.cc (renamed from scripts/training/phrase-extract/pcfg-common/xml_tree_parser.cc)2
-rw-r--r--phrase-extract/pcfg-common/xml_tree_parser.h (renamed from scripts/training/phrase-extract/pcfg-common/xml_tree_parser.h)4
-rw-r--r--phrase-extract/pcfg-common/xml_tree_writer.h (renamed from scripts/training/phrase-extract/pcfg-common/xml_tree_writer.h)0
-rw-r--r--phrase-extract/pcfg-extract/Jamfile1
-rw-r--r--phrase-extract/pcfg-extract/main.cc (renamed from scripts/training/phrase-extract/pcfg-extract/main.cc)0
-rw-r--r--phrase-extract/pcfg-extract/options.h (renamed from scripts/training/phrase-extract/pcfg-extract/options.h)0
-rw-r--r--phrase-extract/pcfg-extract/pcfg_extract.cc (renamed from scripts/training/phrase-extract/pcfg-extract/pcfg_extract.cc)0
-rw-r--r--phrase-extract/pcfg-extract/pcfg_extract.h (renamed from scripts/training/phrase-extract/pcfg-extract/pcfg_extract.h)0
-rw-r--r--phrase-extract/pcfg-extract/rule_collection.cc (renamed from scripts/training/phrase-extract/pcfg-extract/rule_collection.cc)0
-rw-r--r--phrase-extract/pcfg-extract/rule_collection.h (renamed from scripts/training/phrase-extract/pcfg-extract/rule_collection.h)0
-rw-r--r--phrase-extract/pcfg-extract/rule_extractor.cc (renamed from scripts/training/phrase-extract/pcfg-extract/rule_extractor.cc)0
-rw-r--r--phrase-extract/pcfg-extract/rule_extractor.h (renamed from scripts/training/phrase-extract/pcfg-extract/rule_extractor.h)0
-rw-r--r--phrase-extract/pcfg-score/Jamfile1
-rw-r--r--phrase-extract/pcfg-score/main.cc (renamed from scripts/training/phrase-extract/pcfg-score/main.cc)0
-rw-r--r--phrase-extract/pcfg-score/options.h (renamed from scripts/training/phrase-extract/pcfg-score/options.h)0
-rw-r--r--phrase-extract/pcfg-score/pcfg_score.cc (renamed from scripts/training/phrase-extract/pcfg-score/pcfg_score.cc)0
-rw-r--r--phrase-extract/pcfg-score/pcfg_score.h (renamed from scripts/training/phrase-extract/pcfg-score/pcfg_score.h)0
-rw-r--r--phrase-extract/pcfg-score/tree_scorer.cc (renamed from scripts/training/phrase-extract/pcfg-score/tree_scorer.cc)0
-rw-r--r--phrase-extract/pcfg-score/tree_scorer.h (renamed from scripts/training/phrase-extract/pcfg-score/tree_scorer.h)0
-rw-r--r--phrase-extract/phrase-extract.sln (renamed from scripts/training/phrase-extract/phrase-extract.sln)0
-rw-r--r--phrase-extract/relax-parse.cpp (renamed from scripts/training/phrase-extract/relax-parse.cpp)1
-rw-r--r--phrase-extract/relax-parse.h (renamed from scripts/training/phrase-extract/relax-parse.h)8
-rw-r--r--phrase-extract/score.cpp (renamed from scripts/training/phrase-extract/score.cpp)393
-rw-r--r--phrase-extract/score.h (renamed from scripts/training/phrase-extract/score.h)21
-rw-r--r--phrase-extract/score.vcxproj (renamed from scripts/training/phrase-extract/score.vcxproj)0
-rw-r--r--phrase-extract/statistics.cpp (renamed from scripts/training/phrase-extract/statistics.cpp)16
-rw-r--r--phrase-extract/tables-core.cpp (renamed from scripts/training/phrase-extract/tables-core.cpp)5
-rw-r--r--phrase-extract/tables-core.h (renamed from scripts/training/phrase-extract/tables-core.h)5
-rw-r--r--regression-testing/Jamfile45
-rw-r--r--regression-testing/MosesRegressionTesting.pm8
-rwxr-xr-xregression-testing/run-test-extract.perl2
-rwxr-xr-xregression-testing/run-test-mert.perl8
-rwxr-xr-xregression-testing/run-test-suite.perl180
m---------regression-testing/tests0
-rw-r--r--scripts/Jamfile54
-rwxr-xr-x[-rw-r--r--]scripts/analysis/smtgui/newsmtgui.cgi0
-rwxr-xr-xscripts/analysis/weight-scan.pl4
-rw-r--r--scripts/ems/example/config.basic7
-rw-r--r--scripts/ems/example/config.factored7
-rw-r--r--scripts/ems/example/config.hierarchical7
-rw-r--r--scripts/ems/example/config.syntax7
-rw-r--r--scripts/ems/example/config.toy7
-rw-r--r--scripts/ems/experiment.meta2
-rwxr-xr-xscripts/ems/experiment.perl303
-rwxr-xr-xscripts/ems/support/analysis.perl60
-rwxr-xr-xscripts/ems/support/build-domain-file-from-subcorpora.perl38
-rwxr-xr-xscripts/ems/support/build-sparse-lexical-features.perl100
-rwxr-xr-xscripts/ems/support/run-command-on-multiple-refsets.perl15
-rwxr-xr-xscripts/ems/support/split-sentences.perl4
-rw-r--r--scripts/ems/support/train-irstlm.perl22
-rwxr-xr-xscripts/fuzzy-match/create_xml.perl309
-rwxr-xr-xscripts/generic/compound-splitter.perl2
-rwxr-xr-xscripts/generic/extract-parallel.perl27
-rwxr-xr-xscripts/generic/multi-bleu.perl2
-rwxr-xr-xscripts/generic/score-parallel.perl7
-rwxr-xr-xscripts/generic/trainlm-irst.perl10
-rwxr-xr-xscripts/recaser/recase.perl2
-rwxr-xr-xscripts/recaser/train-recaser.perl25
-rwxr-xr-x[-rw-r--r--]scripts/regression-testing/moses-virtual0
-rwxr-xr-x[-rw-r--r--]scripts/regression-testing/tests/mert-moses-new-aggregate/command0
-rwxr-xr-x[-rw-r--r--]scripts/regression-testing/tests/mert-moses-new-continue/command0
-rwxr-xr-x[-rw-r--r--]scripts/regression-testing/tests/mert-moses-new-nocase/command0
-rwxr-xr-x[-rw-r--r--]scripts/regression-testing/tests/mert-moses-new/command0
-rw-r--r--scripts/share/nonbreaking_prefixes/README.txt (renamed from scripts/tokenizer/nonbreaking_prefixes/README.txt)0
-rw-r--r--scripts/share/nonbreaking_prefixes/nonbreaking_prefix.ca (renamed from scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.ca)0
-rw-r--r--scripts/share/nonbreaking_prefixes/nonbreaking_prefix.cs390
-rw-r--r--scripts/share/nonbreaking_prefixes/nonbreaking_prefix.de (renamed from scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.de)0
-rw-r--r--scripts/share/nonbreaking_prefixes/nonbreaking_prefix.el (renamed from scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.el)0
-rw-r--r--scripts/share/nonbreaking_prefixes/nonbreaking_prefix.en (renamed from scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.en)0
-rw-r--r--scripts/share/nonbreaking_prefixes/nonbreaking_prefix.es (renamed from scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.es)0
-rw-r--r--scripts/share/nonbreaking_prefixes/nonbreaking_prefix.fr (renamed from scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.fr)0
-rw-r--r--scripts/share/nonbreaking_prefixes/nonbreaking_prefix.is (renamed from scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.is)0
-rw-r--r--scripts/share/nonbreaking_prefixes/nonbreaking_prefix.it (renamed from scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.it)0
-rw-r--r--scripts/share/nonbreaking_prefixes/nonbreaking_prefix.nl (renamed from scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.nl)0
-rwxr-xr-xscripts/share/nonbreaking_prefixes/nonbreaking_prefix.pl (renamed from scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.pl)0
-rw-r--r--scripts/share/nonbreaking_prefixes/nonbreaking_prefix.pt (renamed from scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.pt)0
-rw-r--r--scripts/share/nonbreaking_prefixes/nonbreaking_prefix.ro (renamed from scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.ro)0
-rw-r--r--scripts/share/nonbreaking_prefixes/nonbreaking_prefix.ru (renamed from scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.ru)0
-rw-r--r--scripts/share/nonbreaking_prefixes/nonbreaking_prefix.sk (renamed from scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.sk)0
-rw-r--r--scripts/share/nonbreaking_prefixes/nonbreaking_prefix.sl (renamed from scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.sl)0
-rw-r--r--scripts/share/nonbreaking_prefixes/nonbreaking_prefix.sv (renamed from scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.sv)0
-rwxr-xr-x[-rw-r--r--]scripts/tests/full-train-mert-decode.test0
-rwxr-xr-x[-rw-r--r--]scripts/tests/train-factored-test-step9.test0
-rwxr-xr-xscripts/tokenizer/deescape-special-chars.perl3
-rwxr-xr-xscripts/tokenizer/detokenizer.perl3
-rwxr-xr-xscripts/tokenizer/escape-special-chars.perl4
-rwxr-xr-xscripts/tokenizer/tokenizer.perl436
-rw-r--r--scripts/training/Jamfile14
-rw-r--r--scripts/training/LexicalTranslationModel.pm132
-rwxr-xr-xscripts/training/absolutize_moses_model.pl10
-rw-r--r--scripts/training/compact-rule-table/Compactify.cpp296
-rw-r--r--scripts/training/compact-rule-table/Compactify.h43
-rw-r--r--scripts/training/compact-rule-table/Compactify_Main.cpp6
-rw-r--r--scripts/training/compact-rule-table/Exception.h22
-rw-r--r--scripts/training/compact-rule-table/Jamfile3
-rw-r--r--scripts/training/compact-rule-table/NumberedSet.h79
-rw-r--r--scripts/training/compact-rule-table/Options.h18
-rw-r--r--scripts/training/compact-rule-table/RuleTableParser.cpp168
-rw-r--r--scripts/training/compact-rule-table/RuleTableParser.h51
-rw-r--r--scripts/training/compact-rule-table/Tool.h34
-rwxr-xr-xscripts/training/filter-model-given-input.pl4
-rwxr-xr-xscripts/training/get-lexical.perl19
-rwxr-xr-xscripts/training/giza2bal.pl (renamed from scripts/training/symal/giza2bal.pl)0
-rw-r--r--scripts/training/lexical-reordering/Jamfile3
-rwxr-xr-xscripts/training/mert-moses.pl94
-rw-r--r--scripts/training/phrase-extract/extract-ghkm/Jamfile3
-rw-r--r--scripts/training/phrase-extract/pcfg-extract/Jamfile1
-rw-r--r--scripts/training/phrase-extract/pcfg-score/Jamfile1
-rwxr-xr-xscripts/training/reduce-topt-count.pl207
-rw-r--r--scripts/training/symal/Jamfile3
-rwxr-xr-x[-rw-r--r--]scripts/training/train-model.perl (renamed from scripts/training/train-model.perl.missing_bin_dir)334
-rwxr-xr-xscripts/training/wrappers/adam-suffix-array/suffix-array-create.sh33
-rwxr-xr-xscripts/training/wrappers/adam-suffix-array/suffix-array-extract.sh25
-rwxr-xr-xscripts/training/wrappers/parse-de-berkeley.perl8
-rwxr-xr-xscripts/training/wrappers/parse-de-bitpar.perl2
-rwxr-xr-xscripts/training/wrappers/suffix-array-create.sh22
-rwxr-xr-xscripts/training/wrappers/suffix-array-extract.sh18
-rwxr-xr-xscripts/training/zmert-moses.pl4
-rw-r--r--symal/Jamfile2
-rw-r--r--symal/cmd.c (renamed from scripts/training/symal/cmd.c)0
-rw-r--r--symal/cmd.h (renamed from scripts/training/symal/cmd.h)0
-rw-r--r--symal/symal.cpp (renamed from scripts/training/symal/symal.cpp)0
-rw-r--r--symal/symal.vcproj (renamed from scripts/training/symal/symal.vcproj)0
-rw-r--r--util/Jamfile2
-rw-r--r--util/bit_packing.hh7
-rw-r--r--util/ersatz_progress.cc8
-rw-r--r--util/ersatz_progress.hh4
-rw-r--r--util/file.cc30
-rw-r--r--util/file.hh5
-rw-r--r--util/file_piece.cc4
-rw-r--r--util/have.hh2
-rw-r--r--util/mmap.cc1
-rw-r--r--util/murmur_hash.cc11
-rw-r--r--util/murmur_hash.hh6
-rw-r--r--util/probing_hash_table.hh21
-rw-r--r--util/probing_hash_table_test.cc9
-rw-r--r--util/string_piece.hh5
-rw-r--r--util/usage.cc46
-rw-r--r--util/usage.hh8
-rw-r--r--util/util.xcodeproj/project.pbxproj7
910 files changed, 79110 insertions, 17543 deletions
diff --git a/.gitignore b/.gitignore
index be2ad9afb..004f7d759 100644
--- a/.gitignore
+++ b/.gitignore
@@ -61,3 +61,7 @@ scripts/training/train-model.perl
dist
bin
previous.sh
+contrib/other-builds/*.xcodeproj/project.xcworkspace/
+contrib/other-builds/*.xcodeproj/xcuserdata/
+*/*.xcodeproj/project.xcworkspace
+*/*.xcodeproj/xcuserdata
diff --git a/.gitmodules b/.gitmodules
new file mode 100644
index 000000000..0470c63fd
--- /dev/null
+++ b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "regression-testing/tests"]
+ path = regression-testing/tests
+ url = ../moses-regression-tests.git
diff --git a/BUILD-INSTRUCTIONS.txt b/BUILD-INSTRUCTIONS.txt
index 0fd1ed4e1..5b4ec2565 100644
--- a/BUILD-INSTRUCTIONS.txt
+++ b/BUILD-INSTRUCTIONS.txt
@@ -8,7 +8,7 @@ available at http://boost.org .
There are several optional dependencies:
-GIZA++ from http://code.google.com/p/giza-pp/ is used to build phrase tables.
+GIZA++ from http://code.google.com/p/giza-pp/ is used to align words in the parallel corpus during training.
Moses server requires xmlrpc-c with abyss-server. Source is available from
http://xmlrpc-c.sourceforge.net/.
@@ -85,7 +85,6 @@ Building consists of running
Common options are:
--with-srilm=/path/to/srilm to compile the decoder with SRILM support
--with-irstlm=/path/to/irstlm to compile the decoder with IRSTLM support
---with-giza=/path/to/giza to enable training scripts
-jN where N is the number of CPUs
--with-macports=/path/to/macports use MacPorts on Mac OS X.
diff --git a/Jamroot b/Jamroot
index e19c60df9..927ccc577 100644
--- a/Jamroot
+++ b/Jamroot
@@ -15,9 +15,8 @@
#Note that, like language models, this is the --prefix where the library was
#installed, not some executable within the library.
#
-#--with-giza=/path/to/giza
-#Indicates where binaries GIZA++, snt2cooc.out, and mkcls live.
-#Builds scripts/training/train-model.perl using these paths.
+#Compact phrase table and compact lexical reordering table
+#--with-cmph=/path/to/cmph
#
#Thread-caching malloc (optional):
#--with-tcmalloc
@@ -25,14 +24,14 @@
#REGRESSION TESTING
#--with-regtest=/path/to/moses-reg-test-data
#
-#
#INSTALLATION
-#--prefix=/path/to/prefix sets the install prefix [dist].
+#--prefix=/path/to/prefix sets the install prefix [default is source root].
#--bindir=/path/to/prefix/bin sets the bin directory [PREFIX/bin]
#--libdir=/path/to/prefix/lib sets the lib directory [PREFIX/lib]
#--includedir=/path/to/prefix/include installs headers.
# Does not install if missing. No argument defaults to PREFIX/include .
#--install-scripts=/path/to/scripts copies scripts into a directory.
+# Does not install if missing. No argument defaults to PREFIX/scripts .
#--git appends the git revision to the prefix directory.
#
#
@@ -45,7 +44,9 @@
# variant=release|debug|profile builds optimized (default), for debug, or for
# profiling
#
-# link=static|shared controls linking (default static)
+# link=static|shared controls preferred linking (default static)
+# --static forces static linking (the default will fall
+# back to shared)
#
# debug-symbols=on|off include (default) or exclude debugging
# information also known as -g
@@ -54,6 +55,9 @@
# --enable-boost-pool uses Boost pools for the memory SCFG table
#
# --enable-mpi switch on mpi
+# --without-libsegfault does not link with libSegFault
+#
+# --max-kenlm-order maximum ngram order that kenlm can process (default 6)
#
#CONTROLLING THE BUILD
#-a to build from scratch
@@ -88,6 +92,10 @@ if [ option.get "enable-mpi" : : "yes" ] {
requirements += [ option.get "notrace" : <define>TRACE_ENABLE=1 ] ;
requirements += [ option.get "enable-boost-pool" : : <define>USE_BOOST_POOL ] ;
+if [ option.get "with-cmph" ] {
+ requirements += <define>HAVE_CMPH ;
+}
+
project : default-build
<threading>multi
<warnings>on
@@ -104,23 +112,12 @@ project : requirements
;
#Add directories here if you want their incidental targets too (i.e. tests).
-build-project lm ;
-build-project util ;
-#Trigger instllation into legacy paths.
-build-project mert ;
-build-project moses-cmd/src ;
-build-project moses-chart-cmd/src ;
-build-project mira ;
-build-project moses/src ;
-#Scripts have their own binaries.
-build-project scripts ;
-#Regression tests (only does anything if --with-regtest is passed)
-build-project regression-testing ;
+build-projects util lm mert moses-cmd/src moses-chart-cmd/src mira scripts regression-testing ;
-alias programs : lm//query lm//build_binary moses-chart-cmd/src//moses_chart moses-cmd/src//programs OnDiskPt//CreateOnDisk OnDiskPt//queryOnDiskPt mert//programs contrib/server//mosesserver misc//programs mira//programs ;
+alias programs : lm//query lm//build_binary lm//kenlm_max_order moses-chart-cmd/src//moses_chart moses-cmd/src//programs OnDiskPt//CreateOnDiskPt OnDiskPt//queryOnDiskPt mert//programs contrib/server//mosesserver misc//programs mira//programs symal phrase-extract phrase-extract//lexical-reordering phrase-extract//extract-ghkm phrase-extract//pcfg-extract phrase-extract//pcfg-score biconcor ;
install-bin-libs programs ;
-install-headers headers-base : [ glob-tree *.h *.hh : jam-files dist kenlm moses ] : . ;
+install-headers headers-base : [ path.glob-tree biconcor contrib lm mert misc moses-chart-cmd moses-cmd OnDiskPt phrase-extract symal util : *.hh *.h ] : . ;
install-headers headers-moses : moses/src//headers-to-install : moses/src ;
alias install : prefix-bin prefix-lib headers-base headers-moses ;
@@ -128,3 +125,9 @@ alias install : prefix-bin prefix-lib headers-base headers-moses ;
if ! [ option.get "includedir" : : $(prefix)/include ] {
explicit install headers-base headers-moses ;
}
+
+if [ path.exists $(TOP)/dist ] && $(prefix) != dist {
+ echo "You have a $(TOP)/dist directory, but the build system now places files directly in the root i.e. $(TOP)/bin ." ;
+ echo "To disable this message, delete $(TOP)/dist ." ;
+ echo ;
+}
diff --git a/NOTICE b/NOTICE
new file mode 100644
index 000000000..7d631cd88
--- /dev/null
+++ b/NOTICE
@@ -0,0 +1,3 @@
+This code includes data from Daniel Naber's Language Tools (czech abbreviations).
+
+This code includes data from czech wiktionary (also czech abbreviations).
diff --git a/OnDiskPt/Jamfile b/OnDiskPt/Jamfile
index 9aa00fcae..4a6662453 100644
--- a/OnDiskPt/Jamfile
+++ b/OnDiskPt/Jamfile
@@ -1,5 +1,5 @@
lib OnDiskPt : OnDiskWrapper.cpp SourcePhrase.cpp TargetPhrase.cpp Word.cpp Phrase.cpp PhraseNode.cpp TargetPhraseCollection.cpp Vocab.cpp ../moses/src//headers ;
-exe CreateOnDisk : Main.cpp ../moses/src//moses OnDiskPt ;
+exe CreateOnDiskPt : Main.cpp ../moses/src//moses OnDiskPt ;
exe queryOnDiskPt : queryOnDiskPt.cpp ../moses/src//moses OnDiskPt ;
diff --git a/OnDiskPt/Main.cpp b/OnDiskPt/Main.cpp
index 4e6c5ad09..acff2d405 100644
--- a/OnDiskPt/Main.cpp
+++ b/OnDiskPt/Main.cpp
@@ -77,7 +77,7 @@ int main (int argc, char * const argv[])
std::vector<float> misc(1);
SourcePhrase sourcePhrase;
TargetPhrase *targetPhrase = new TargetPhrase(numScores);
- OnDiskPt::Phrase *spShort = Tokenize(sourcePhrase, *targetPhrase, line, onDiskWrapper, numScores, misc);
+ OnDiskPt::PhrasePtr spShort = Tokenize(sourcePhrase, *targetPhrase, line, onDiskWrapper, numScores, misc);
assert(misc.size() == onDiskWrapper.GetNumCounts());
rootNode.AddTargetPhrase(sourcePhrase, targetPhrase, onDiskWrapper, tableLimit, misc, spShort);
@@ -105,7 +105,7 @@ bool Flush(const OnDiskPt::SourcePhrase *prevSourcePhrase, const OnDiskPt::Sourc
return ret;
}
-OnDiskPt::Phrase *Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhrase, char *line, OnDiskWrapper &onDiskWrapper, int numScores, vector<float> &misc)
+OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhrase, char *line, OnDiskWrapper &onDiskWrapper, int numScores, vector<float> &misc)
{
size_t scoreInd = 0;
@@ -118,14 +118,14 @@ OnDiskPt::Phrase *Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhras
4 = count
*/
char *tok = strtok (line," ");
- OnDiskPt::Phrase *out = new Phrase();
+ OnDiskPt::PhrasePtr out(new Phrase());
while (tok != NULL) {
if (0 == strcmp(tok, "|||")) {
++stage;
} else {
switch (stage) {
case 0: {
- Word *w = Tokenize(sourcePhrase, tok, true, true, onDiskWrapper);
+ WordPtr w = Tokenize(sourcePhrase, tok, true, true, onDiskWrapper);
if (w != NULL)
out->AddWord(w);
@@ -184,7 +184,7 @@ OnDiskPt::Phrase *Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhras
return out;
} // Tokenize()
-OnDiskPt::Word *Tokenize(OnDiskPt::Phrase &phrase
+OnDiskPt::WordPtr Tokenize(OnDiskPt::Phrase &phrase
, const std::string &token, bool addSourceNonTerm, bool addTargetNonTerm
, OnDiskPt::OnDiskWrapper &onDiskWrapper)
{
@@ -198,7 +198,7 @@ OnDiskPt::Word *Tokenize(OnDiskPt::Phrase &phrase
nonTerm = comStr == 0;
}
- OnDiskPt::Word *out = NULL;
+ OnDiskPt::WordPtr out;
if (nonTerm) {
// non-term
size_t splitPos = token.find_first_of("[", 2);
@@ -206,20 +206,20 @@ OnDiskPt::Word *Tokenize(OnDiskPt::Phrase &phrase
if (splitPos == string::npos) {
// lhs - only 1 word
- Word *word = new Word();
+ WordPtr word(new Word());
word->CreateFromString(wordStr, onDiskWrapper.GetVocab());
phrase.AddWord(word);
} else {
// source & target non-terms
if (addSourceNonTerm) {
- Word *word = new Word();
+ WordPtr word(new Word());
word->CreateFromString(wordStr, onDiskWrapper.GetVocab());
phrase.AddWord(word);
}
wordStr = token.substr(splitPos, tokSize - splitPos);
if (addTargetNonTerm) {
- Word *word = new Word();
+ WordPtr word(new Word());
word->CreateFromString(wordStr, onDiskWrapper.GetVocab());
phrase.AddWord(word);
out = word;
@@ -228,7 +228,7 @@ OnDiskPt::Word *Tokenize(OnDiskPt::Phrase &phrase
}
} else {
// term
- Word *word = new Word();
+ WordPtr word(new Word());
word->CreateFromString(token, onDiskWrapper.GetVocab());
phrase.AddWord(word);
out = word;
diff --git a/OnDiskPt/Main.h b/OnDiskPt/Main.h
index 2db4eb864..c8cfcb1dd 100644
--- a/OnDiskPt/Main.h
+++ b/OnDiskPt/Main.h
@@ -25,10 +25,10 @@
typedef std::pair<size_t, size_t> AlignPair;
typedef std::vector<AlignPair> AlignType;
-OnDiskPt::Word *Tokenize(OnDiskPt::Phrase &phrase
+OnDiskPt::WordPtr Tokenize(OnDiskPt::Phrase &phrase
, const std::string &token, bool addSourceNonTerm, bool addTargetNonTerm
, OnDiskPt::OnDiskWrapper &onDiskWrapper);
-OnDiskPt::Phrase *Tokenize(OnDiskPt::SourcePhrase &sourcePhrase, OnDiskPt::TargetPhrase &targetPhrase
+OnDiskPt::PhrasePtr Tokenize(OnDiskPt::SourcePhrase &sourcePhrase, OnDiskPt::TargetPhrase &targetPhrase
, char *line, OnDiskPt::OnDiskWrapper &onDiskWrapper
, int numScores
, std::vector<float> &misc);
diff --git a/OnDiskPt/OnDiskWrapper.cpp b/OnDiskPt/OnDiskWrapper.cpp
index 79b0563a8..cd58922b0 100644
--- a/OnDiskPt/OnDiskWrapper.cpp
+++ b/OnDiskPt/OnDiskWrapper.cpp
@@ -163,7 +163,7 @@ void OnDiskWrapper::EndSave()
void OnDiskWrapper::SaveMisc()
{
- m_fileMisc << "Version 3" << endl;
+ m_fileMisc << "Version 4" << endl;
m_fileMisc << "NumSourceFactors " << m_numSourceFactors << endl;
m_fileMisc << "NumTargetFactors " << m_numTargetFactors << endl;
m_fileMisc << "NumScores " << m_numScores << endl;
@@ -172,12 +172,12 @@ void OnDiskWrapper::SaveMisc()
size_t OnDiskWrapper::GetSourceWordSize() const
{
- return m_numSourceFactors * sizeof(UINT64) + sizeof(char);
+ return sizeof(UINT64) + sizeof(char);
}
size_t OnDiskWrapper::GetTargetWordSize() const
{
- return m_numTargetFactors * sizeof(UINT64) + sizeof(char);
+ return sizeof(UINT64) + sizeof(char);
}
UINT64 OnDiskWrapper::GetMisc(const std::string &key) const
@@ -199,32 +199,37 @@ Word *OnDiskWrapper::ConvertFromMoses(Moses::FactorDirection /* direction */
, const Moses::Word &origWord) const
{
bool isNonTerminal = origWord.IsNonTerminal();
- Word *newWord = new Word(1, isNonTerminal); // TODO - num of factors
+ Word *newWord = new Word(isNonTerminal);
+ stringstream strme;
- for (size_t ind = 0 ; ind < factorsVec.size() ; ++ind) {
- size_t factorType = factorsVec[ind];
+ size_t factorType = factorsVec[0];
+ const Moses::Factor *factor = origWord.GetFactor(factorType);
+ CHECK(factor);
+ string str = factor->GetString();
+ strme << str;
+ for (size_t ind = 1 ; ind < factorsVec.size() ; ++ind) {
+ size_t factorType = factorsVec[ind];
const Moses::Factor *factor = origWord.GetFactor(factorType);
+ if (factor == NULL)
+ { // can have less factors than factorType.size()
+ break;
+ }
CHECK(factor);
-
string str = factor->GetString();
- if (isNonTerminal) {
- str = "[" + str + "]";
- }
-
- bool found;
- UINT64 vocabId = m_vocab.GetVocabId(str, found);
- if (!found) {
- // factor not in phrase table -> phrse definately not in. exit
- delete newWord;
- return NULL;
- } else {
- newWord->SetVocabId(ind, vocabId);
- }
+ strme << "|" << str;
} // for (size_t factorType
- return newWord;
-
+ bool found;
+ UINT64 vocabId = m_vocab.GetVocabId(strme.str(), found);
+ if (!found) {
+ // factor not in phrase table -> phrse definately not in. exit
+ delete newWord;
+ return NULL;
+ } else {
+ newWord->SetVocabId(vocabId);
+ return newWord;
+ }
}
diff --git a/OnDiskPt/OnDiskWrapper.h b/OnDiskPt/OnDiskWrapper.h
index c49afdda1..d95aae68e 100644
--- a/OnDiskPt/OnDiskWrapper.h
+++ b/OnDiskPt/OnDiskWrapper.h
@@ -28,6 +28,10 @@ namespace OnDiskPt
{
const float DEFAULT_COUNT = 66666;
+/** Global class with misc information need to create and use the on-disk rule table.
+ * 1 object of this class should be instantiated per rule table.
+ * Currently only hierarchical/syntax models use this, but can & should be used with pb models too
+ */
class OnDiskWrapper
{
protected:
diff --git a/OnDiskPt/Phrase.cpp b/OnDiskPt/Phrase.cpp
index dc289a81a..73d2dbac9 100644
--- a/OnDiskPt/Phrase.cpp
+++ b/OnDiskPt/Phrase.cpp
@@ -27,27 +27,13 @@ using namespace std;
namespace OnDiskPt
{
-Phrase::Phrase(const Phrase &copy)
- :m_words(copy.GetSize())
-{
- for (size_t pos = 0; pos < copy.GetSize(); ++pos) {
- const Word &oldWord = copy.GetWord(pos);
- Word *newWord = new Word(oldWord);
- m_words[pos] = newWord;
- }
-}
-
-Phrase::~Phrase()
-{
- Moses::RemoveAllInColl(m_words);
-}
-void Phrase::AddWord(Word *word)
+void Phrase::AddWord(WordPtr word)
{
m_words.push_back(word);
}
-void Phrase::AddWord(Word *word, size_t pos)
+void Phrase::AddWord(WordPtr word, size_t pos)
{
CHECK(pos < m_words.size());
m_words.insert(m_words.begin() + pos + 1, word);
diff --git a/OnDiskPt/Phrase.h b/OnDiskPt/Phrase.h
index f7de9a10d..e785be823 100644
--- a/OnDiskPt/Phrase.h
+++ b/OnDiskPt/Phrase.h
@@ -20,27 +20,29 @@
***********************************************************************/
#include <vector>
#include <iostream>
+#include <boost/shared_ptr.hpp>
#include "Word.h"
namespace OnDiskPt
{
class Vocab;
+
+/** A contiguous phrase. SourcePhrase & TargetPhrase inherit from this and add the on-disk functionality
+ */
class Phrase
{
friend std::ostream& operator<<(std::ostream&, const Phrase&);
protected:
- std::vector<Word*> m_words;
+ std::vector<WordPtr> m_words;
public:
Phrase()
{}
- Phrase(const Phrase &copy);
- virtual ~Phrase();
- void AddWord(Word *word);
- void AddWord(Word *word, size_t pos);
+ void AddWord(WordPtr word);
+ void AddWord(WordPtr word, size_t pos);
const Word &GetWord(size_t pos) const {
return *m_words[pos];
@@ -57,4 +59,6 @@ public:
bool operator==(const Phrase &compare) const;
};
+typedef boost::shared_ptr<Phrase> PhrasePtr;
+
}
diff --git a/OnDiskPt/PhraseNode.cpp b/OnDiskPt/PhraseNode.cpp
index 8e3d8050d..5ced697b4 100644
--- a/OnDiskPt/PhraseNode.cpp
+++ b/OnDiskPt/PhraseNode.cpp
@@ -160,14 +160,14 @@ void PhraseNode::Save(OnDiskWrapper &onDiskWrapper, size_t pos, size_t tableLimi
void PhraseNode::AddTargetPhrase(const SourcePhrase &sourcePhrase, TargetPhrase *targetPhrase
, OnDiskWrapper &onDiskWrapper, size_t tableLimit
- , const std::vector<float> &counts, OnDiskPt::Phrase *spShort)
+ , const std::vector<float> &counts, OnDiskPt::PhrasePtr spShort)
{
AddTargetPhrase(0, sourcePhrase, targetPhrase, onDiskWrapper, tableLimit, counts, spShort);
}
void PhraseNode::AddTargetPhrase(size_t pos, const SourcePhrase &sourcePhrase
, TargetPhrase *targetPhrase, OnDiskWrapper &onDiskWrapper
- , size_t tableLimit, const std::vector<float> &counts, OnDiskPt::Phrase *spShort)
+ , size_t tableLimit, const std::vector<float> &counts, OnDiskPt::PhrasePtr spShort)
{
size_t phraseSize = sourcePhrase.GetSize();
if (pos < phraseSize) {
@@ -228,20 +228,19 @@ void PhraseNode::GetChild(Word &wordFound, UINT64 &childFilePos, size_t ind, OnD
size_t wordSize = onDiskWrapper.GetSourceWordSize();
size_t childSize = wordSize + sizeof(UINT64);
- size_t numFactors = onDiskWrapper.GetNumSourceFactors();
char *currMem = m_memLoad
+ sizeof(UINT64) * 2 // size & file pos of target phrase coll
+ sizeof(float) * onDiskWrapper.GetNumCounts() // count info
+ childSize * ind;
- size_t memRead = ReadChild(wordFound, childFilePos, currMem, numFactors);
+ size_t memRead = ReadChild(wordFound, childFilePos, currMem);
CHECK(memRead == childSize);
}
-size_t PhraseNode::ReadChild(Word &wordFound, UINT64 &childFilePos, const char *mem, size_t numFactors) const
+size_t PhraseNode::ReadChild(Word &wordFound, UINT64 &childFilePos, const char *mem) const
{
- size_t memRead = wordFound.ReadFromMemory(mem, numFactors);
+ size_t memRead = wordFound.ReadFromMemory(mem);
const char *currMem = mem + memRead;
UINT64 *memArray = (UINT64*) (currMem);
diff --git a/OnDiskPt/PhraseNode.h b/OnDiskPt/PhraseNode.h
index e4704d142..fbd20ce36 100644
--- a/OnDiskPt/PhraseNode.h
+++ b/OnDiskPt/PhraseNode.h
@@ -31,6 +31,7 @@ namespace OnDiskPt
class OnDiskWrapper;
class SourcePhrase;
+/** A node in the source tree trie */
class PhraseNode
{
friend std::ostream& operator<<(std::ostream&, const PhraseNode&);
@@ -51,8 +52,8 @@ protected:
void AddTargetPhrase(size_t pos, const SourcePhrase &sourcePhrase
, TargetPhrase *targetPhrase, OnDiskWrapper &onDiskWrapper
- , size_t tableLimit, const std::vector<float> &counts, OnDiskPt::Phrase *spShort);
- size_t ReadChild(Word &wordFound, UINT64 &childFilePos, const char *mem, size_t numFactors) const;
+ , size_t tableLimit, const std::vector<float> &counts, OnDiskPt::PhrasePtr spShort);
+ size_t ReadChild(Word &wordFound, UINT64 &childFilePos, const char *mem) const;
void GetChild(Word &wordFound, UINT64 &childFilePos, size_t ind, OnDiskWrapper &onDiskWrapper) const;
public:
@@ -67,7 +68,7 @@ public:
void AddTargetPhrase(const SourcePhrase &sourcePhrase, TargetPhrase *targetPhrase
, OnDiskWrapper &onDiskWrapper, size_t tableLimit
- , const std::vector<float> &counts, OnDiskPt::Phrase *spShort);
+ , const std::vector<float> &counts, OnDiskPt::PhrasePtr spShort);
UINT64 GetFilePos() const {
return m_filePos;
diff --git a/OnDiskPt/SourcePhrase.h b/OnDiskPt/SourcePhrase.h
index b4ae46705..e0c510e42 100644
--- a/OnDiskPt/SourcePhrase.h
+++ b/OnDiskPt/SourcePhrase.h
@@ -25,6 +25,8 @@
namespace OnDiskPt
{
+/** A source phrase. No extension of a norm Phrase class because source phrases are saved as tries.
+ */
class SourcePhrase: public Phrase
{
protected:
@@ -32,4 +34,5 @@ protected:
public:
};
+
}
diff --git a/OnDiskPt/TargetPhrase.cpp b/OnDiskPt/TargetPhrase.cpp
index 587233485..e7123af3b 100644
--- a/OnDiskPt/TargetPhrase.cpp
+++ b/OnDiskPt/TargetPhrase.cpp
@@ -50,7 +50,7 @@ TargetPhrase::~TargetPhrase()
{
}
-void TargetPhrase::SetLHS(Word *lhs)
+void TargetPhrase::SetLHS(WordPtr lhs)
{
AddWord(lhs);
}
@@ -99,7 +99,7 @@ char *TargetPhrase::WriteToMemory(OnDiskWrapper &onDiskWrapper, size_t &memUsed)
size_t phraseSize = GetSize();
size_t targetWordSize = onDiskWrapper.GetTargetWordSize();
- const Phrase* sp = GetSourcePhrase();
+ const PhrasePtr sp = GetSourcePhrase();
size_t spSize = sp->GetSize();
size_t sourceWordSize = onDiskWrapper.GetSourceWordSize();
@@ -240,9 +240,7 @@ Moses::TargetPhrase *TargetPhrase::ConvertToMoses(const std::vector<Moses::Facto
--phraseSize;
for (size_t pos = 0; pos < phraseSize; ++pos) {
- Moses::Word *mosesWord = GetWord(pos).ConvertToMoses(Moses::Output, outputFactors, vocab);
- ret->AddWord(*mosesWord);
- delete mosesWord;
+ GetWord(pos).ConvertToMoses(outputFactors, vocab, ret->AddWord());
}
// scores
@@ -252,7 +250,7 @@ Moses::TargetPhrase *TargetPhrase::ConvertToMoses(const std::vector<Moses::Facto
int indicator[m_align.size()];
int index = 0;
std::set<std::pair<size_t, size_t> > alignmentInfo;
- const Phrase* sp = GetSourcePhrase();
+ const PhrasePtr sp = GetSourcePhrase();
for (size_t ind = 0; ind < m_align.size(); ++ind) {
const std::pair<size_t, size_t> &entry = m_align[ind];
alignmentInfo.insert(entry);
@@ -261,18 +259,14 @@ Moses::TargetPhrase *TargetPhrase::ConvertToMoses(const std::vector<Moses::Facto
}
ret->SetAlignmentInfo(alignmentInfo, indicator);
- Moses::Word *lhs = GetWord(GetSize() - 1).ConvertToMoses(Moses::Output, outputFactors, vocab);
- ret->SetTargetLHS(*lhs);
- delete lhs;
+ GetWord(GetSize() - 1).ConvertToMoses(outputFactors, vocab, ret->MutableTargetLHS());
// set source phrase
- Moses::Phrase *mosesSP = new Moses::Phrase(Moses::Input);
+ Moses::Phrase mosesSP(Moses::Input);
for (size_t pos = 0; pos < sp->GetSize(); ++pos) {
- Moses::Word *mosesWord = sp->GetWord(pos).ConvertToMoses(Moses::Input, inputFactors, vocab);
- mosesSP->AddWord(*mosesWord);
- delete mosesWord;
+ sp->GetWord(pos).ConvertToMoses(inputFactors, vocab, mosesSP.AddWord());
}
- ret->SetSourcePhrase(*mosesSP);
+ ret->SetSourcePhrase(mosesSP);
return ret;
}
@@ -295,7 +289,7 @@ UINT64 TargetPhrase::ReadOtherInfoFromFile(UINT64 filePos, std::fstream &fileTPC
return memUsed;
}
-UINT64 TargetPhrase::ReadFromFile(std::fstream &fileTP, size_t numFactors, size_t numSourceFactors)
+UINT64 TargetPhrase::ReadFromFile(std::fstream &fileTP)
{
UINT64 bytesRead = 0;
@@ -306,8 +300,8 @@ UINT64 TargetPhrase::ReadFromFile(std::fstream &fileTP, size_t numFactors, size_
bytesRead += sizeof(UINT64);
for (size_t ind = 0; ind < numWords; ++ind) {
- Word *word = new Word();
- bytesRead += word->ReadFromFile(fileTP, numFactors);
+ WordPtr word(new Word());
+ bytesRead += word->ReadFromFile(fileTP);
AddWord(word);
}
@@ -316,10 +310,10 @@ UINT64 TargetPhrase::ReadFromFile(std::fstream &fileTP, size_t numFactors, size_
fileTP.read((char*) &numSourceWords, sizeof(UINT64));
bytesRead += sizeof(UINT64);
- SourcePhrase *sp = new SourcePhrase();
+ PhrasePtr sp(new SourcePhrase());
for (size_t ind = 0; ind < numSourceWords; ++ind) {
- Word *word = new Word();
- bytesRead += word->ReadFromFile(fileTP, numSourceFactors);
+ WordPtr word( new Word());
+ bytesRead += word->ReadFromFile(fileTP);
sp->AddWord(word);
}
SetSourcePhrase(sp);
diff --git a/OnDiskPt/TargetPhrase.h b/OnDiskPt/TargetPhrase.h
index 9a1e44e35..37fd0f526 100644
--- a/OnDiskPt/TargetPhrase.h
+++ b/OnDiskPt/TargetPhrase.h
@@ -43,12 +43,15 @@ typedef std::vector<AlignPair> AlignType;
class Vocab;
+/** A target phrase, with the score breakdowns, alignment info and assorted other information it need.
+ * Readable and writeable to disk
+ */
class TargetPhrase: public Phrase
{
friend std::ostream& operator<<(std::ostream&, const TargetPhrase&);
protected:
AlignType m_align;
- Phrase* m_sourcePhrase;
+ PhrasePtr m_sourcePhrase;
std::vector<float> m_scores;
UINT64 m_filePos;
@@ -64,15 +67,14 @@ public:
TargetPhrase(const TargetPhrase &copy);
virtual ~TargetPhrase();
- void SetSourcePhrase(Phrase *p) {
- Phrase *copy = new Phrase(*p);
- m_sourcePhrase = copy;
+ void SetSourcePhrase(PhrasePtr p) {
+ m_sourcePhrase = p;
}
- const Phrase* GetSourcePhrase() const {
- return m_sourcePhrase;
+ const PhrasePtr GetSourcePhrase() const {
+ return m_sourcePhrase;
}
- void SetLHS(Word *lhs);
+ void SetLHS(WordPtr lhs);
void Create1AlignFromString(const std::string &align1Str);
void CreateAlignFromString(const std::string &align1Str);
@@ -102,7 +104,7 @@ public:
, const Moses::WordPenaltyProducer* wpProducer
, const Moses::LMList &lmList) const;
UINT64 ReadOtherInfoFromFile(UINT64 filePos, std::fstream &fileTPColl);
- UINT64 ReadFromFile(std::fstream &fileTP, size_t numFactors, size_t numSourceFactors);
+ UINT64 ReadFromFile(std::fstream &fileTP);
virtual void DebugPrint(std::ostream &out, const Vocab &vocab) const;
diff --git a/OnDiskPt/TargetPhraseCollection.cpp b/OnDiskPt/TargetPhraseCollection.cpp
index c8d832afe..11ff7cea6 100644
--- a/OnDiskPt/TargetPhraseCollection.cpp
+++ b/OnDiskPt/TargetPhraseCollection.cpp
@@ -156,9 +156,8 @@ void TargetPhraseCollection::ReadFromFile(size_t tableLimit, UINT64 filePos, OnD
fstream &fileTP = onDiskWrapper.GetFileTargetInd();
size_t numScores = onDiskWrapper.GetNumScores();
- size_t numTargetFactors = onDiskWrapper.GetNumTargetFactors();
- size_t numSourceFactors = onDiskWrapper.GetNumSourceFactors();
+
UINT64 numPhrases;
UINT64 currFilePos = filePos;
@@ -172,8 +171,9 @@ void TargetPhraseCollection::ReadFromFile(size_t tableLimit, UINT64 filePos, OnD
for (size_t ind = 0; ind < numPhrases; ++ind) {
TargetPhrase *tp = new TargetPhrase(numScores);
+
UINT64 sizeOtherInfo = tp->ReadOtherInfoFromFile(currFilePos, fileTPColl);
- tp->ReadFromFile(fileTP, numTargetFactors, numSourceFactors);
+ tp->ReadFromFile(fileTP);
currFilePos += sizeOtherInfo;
diff --git a/OnDiskPt/TargetPhraseCollection.h b/OnDiskPt/TargetPhraseCollection.h
index 50fcd6679..1215ce5a6 100644
--- a/OnDiskPt/TargetPhraseCollection.h
+++ b/OnDiskPt/TargetPhraseCollection.h
@@ -33,6 +33,8 @@ class WordPenaltyProducer;
namespace OnDiskPt
{
+/** A vector of target phrases
+ */
class TargetPhraseCollection
{
class TargetPhraseOrderByScore
diff --git a/OnDiskPt/Vocab.cpp b/OnDiskPt/Vocab.cpp
index 1e9e4186a..5de620b75 100644
--- a/OnDiskPt/Vocab.cpp
+++ b/OnDiskPt/Vocab.cpp
@@ -21,7 +21,6 @@
#include <fstream>
#include "OnDiskWrapper.h"
#include "Vocab.h"
-#include "../moses/src/FactorCollection.h"
using namespace std;
@@ -69,13 +68,13 @@ void Vocab::Save(OnDiskWrapper &onDiskWrapper)
}
}
-UINT64 Vocab::AddVocabId(const std::string &factorString)
+UINT64 Vocab::AddVocabId(const std::string &str)
{
// find string id
- CollType::const_iterator iter = m_vocabColl.find(factorString);
+ CollType::const_iterator iter = m_vocabColl.find(str);
if (iter == m_vocabColl.end()) {
// add new vocab entry
- m_vocabColl[factorString] = m_nextId;
+ m_vocabColl[str] = m_nextId;
return m_nextId++;
} else {
// return existing entry
@@ -83,10 +82,10 @@ UINT64 Vocab::AddVocabId(const std::string &factorString)
}
}
-UINT64 Vocab::GetVocabId(const std::string &factorString, bool &found) const
+UINT64 Vocab::GetVocabId(const std::string &str, bool &found) const
{
// find string id
- CollType::const_iterator iter = m_vocabColl.find(factorString);
+ CollType::const_iterator iter = m_vocabColl.find(str);
if (iter == m_vocabColl.end()) {
found = false;
return 0; //return whatever
@@ -97,14 +96,4 @@ UINT64 Vocab::GetVocabId(const std::string &factorString, bool &found) const
}
}
-const Moses::Factor *Vocab::GetFactor(UINT32 vocabId, Moses::FactorType factorType, Moses::FactorDirection direction, bool isNonTerminal) const
-{
- string str = GetString(vocabId);
- if (isNonTerminal) {
- str = str.substr(1, str.size() - 2);
- }
- const Moses::Factor *factor = Moses::FactorCollection::Instance().AddFactor(direction, factorType, str);
- return factor;
-}
-
}
diff --git a/OnDiskPt/Vocab.h b/OnDiskPt/Vocab.h
index f8c1dd649..e2a15f8cb 100644
--- a/OnDiskPt/Vocab.h
+++ b/OnDiskPt/Vocab.h
@@ -22,16 +22,15 @@
#include <map>
#include "../moses/src/TypeDef.h"
-namespace Moses
-{
-class Factor;
-}
namespace OnDiskPt
{
class OnDiskWrapper;
+/* A bidirectional map of string<->contiguous id
+ * No distinction between source and target language
+ */
class Vocab
{
protected:
@@ -45,9 +44,8 @@ public:
Vocab()
:m_nextId(1)
{}
- UINT64 AddVocabId(const std::string &factorString);
- UINT64 GetVocabId(const std::string &factorString, bool &found) const;
- const Moses::Factor *GetFactor(UINT32 vocabId, Moses::FactorType factorType, Moses::FactorDirection direction, bool isNonTerminal) const;
+ UINT64 AddVocabId(const std::string &str);
+ UINT64 GetVocabId(const std::string &str, bool &found) const;
const std::string &GetString(UINT32 vocabId) const {
return m_lookup[vocabId];
}
diff --git a/OnDiskPt/Word.cpp b/OnDiskPt/Word.cpp
index 69a104970..52e49d8d9 100644
--- a/OnDiskPt/Word.cpp
+++ b/OnDiskPt/Word.cpp
@@ -18,10 +18,14 @@
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
+#include "../moses/src/FactorCollection.h"
#include "../moses/src/Util.h"
#include "../moses/src/Word.h"
#include "Word.h"
+#include "util/tokenize_piece.hh"
+#include "util/exception.hh"
+
using namespace std;
namespace OnDiskPt
@@ -29,7 +33,7 @@ namespace OnDiskPt
Word::Word(const Word &copy)
:m_isNonTerminal(copy.m_isNonTerminal)
- ,m_factors(copy.m_factors)
+ ,m_vocabId(copy.m_vocabId)
{}
Word::~Word()
@@ -40,23 +44,21 @@ void Word::CreateFromString(const std::string &inString, Vocab &vocab)
if (inString.substr(0, 1) == "[" && inString.substr(inString.size() - 1, 1) == "]") {
// non-term
m_isNonTerminal = true;
+ string str = inString.substr(1, inString.size() - 2);
+ m_vocabId = vocab.AddVocabId(str);
} else {
m_isNonTerminal = false;
+ m_vocabId = vocab.AddVocabId(inString);
}
- m_factors.resize(1);
- m_factors[0] = vocab.AddVocabId(inString);
}
size_t Word::WriteToMemory(char *mem) const
{
UINT64 *vocabMem = (UINT64*) mem;
+ vocabMem[0] = m_vocabId;
- // factors
- for (size_t ind = 0; ind < m_factors.size(); ind++)
- vocabMem[ind] = m_factors[ind];
-
- size_t size = sizeof(UINT64) * m_factors.size();
+ size_t size = sizeof(UINT64);
// is non-term
char bNonTerm = (char) m_isNonTerminal;
@@ -66,16 +68,12 @@ size_t Word::WriteToMemory(char *mem) const
return size;
}
-size_t Word::ReadFromMemory(const char *mem, size_t numFactors)
+size_t Word::ReadFromMemory(const char *mem)
{
- m_factors.resize(numFactors);
UINT64 *vocabMem = (UINT64*) mem;
+ m_vocabId = vocabMem[0];
- // factors
- for (size_t ind = 0; ind < m_factors.size(); ind++)
- m_factors[ind] = vocabMem[ind];
-
- size_t memUsed = sizeof(UINT64) * m_factors.size();
+ size_t memUsed = sizeof(UINT64);
// is non-term
char bNonTerm;
@@ -86,34 +84,34 @@ size_t Word::ReadFromMemory(const char *mem, size_t numFactors)
return memUsed;
}
-size_t Word::ReadFromFile(std::fstream &file, size_t numFactors)
+size_t Word::ReadFromFile(std::fstream &file)
{
- size_t memAlloc = numFactors * sizeof(UINT64) + sizeof(char);
+ size_t memAlloc = sizeof(UINT64) + sizeof(char);
char *mem = (char*) malloc(memAlloc);
file.read(mem, memAlloc);
- size_t memUsed = ReadFromMemory(mem, numFactors);
+ size_t memUsed = ReadFromMemory(mem);
CHECK(memAlloc == memUsed);
free(mem);
return memUsed;
}
-Moses::Word *Word::ConvertToMoses(Moses::FactorDirection direction
- , const std::vector<Moses::FactorType> &outputFactorsVec
- , const Vocab &vocab) const
-{
- Moses::Word *ret = new Moses::Word(m_isNonTerminal);
-
- for (size_t ind = 0; ind < m_factors.size(); ++ind) {
- Moses::FactorType factorType = outputFactorsVec[ind];
- UINT32 vocabId = m_factors[ind];
- const Moses::Factor *factor = vocab.GetFactor(vocabId, factorType, direction, m_isNonTerminal);
- ret->SetFactor(factorType, factor);
- }
+void Word::ConvertToMoses(
+ const std::vector<Moses::FactorType> &outputFactorsVec,
+ const Vocab &vocab,
+ Moses::Word &overwrite) const {
+ Moses::FactorCollection &factorColl = Moses::FactorCollection::Instance();
+ overwrite = Moses::Word(m_isNonTerminal);
- return ret;
+ // TODO: this conversion should have been done at load time.
+ util::TokenIter<util::SingleCharacter> tok(vocab.GetString(m_vocabId), '|');
+ for (std::vector<Moses::FactorType>::const_iterator t = outputFactorsVec.begin(); t != outputFactorsVec.end(); ++t, ++tok) {
+ UTIL_THROW_IF(!tok, util::Exception, "Too few factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
+ overwrite.SetFactor(*t, factorColl.AddFactor(*tok));
+ }
+ UTIL_THROW_IF(tok, util::Exception, "Too many factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
}
int Word::Compare(const Word &compare) const
@@ -123,9 +121,9 @@ int Word::Compare(const Word &compare) const
if (m_isNonTerminal != compare.m_isNonTerminal)
return m_isNonTerminal ?-1 : 1;
- if (m_factors < compare.m_factors)
+ if (m_vocabId < compare.m_vocabId)
ret = -1;
- else if (m_factors > compare.m_factors)
+ else if (m_vocabId > compare.m_vocabId)
ret = 1;
else
ret = 0;
@@ -147,27 +145,14 @@ bool Word::operator==(const Word &compare) const
void Word::DebugPrint(ostream &out, const Vocab &vocab) const
{
- std::vector<UINT64>::const_iterator iter;
- for (size_t ind = 0; ind < m_factors.size() - 1; ++ind) {
- UINT64 vocabId = *iter;
- const string &str = vocab.GetString(vocabId);
- out << str << "|";
- }
-
- // last
- UINT64 vocabId = m_factors.back();
- const string &str = vocab.GetString(vocabId);
- out << str;
+ const string &str = vocab.GetString(m_vocabId);
+ out << str;
}
std::ostream& operator<<(std::ostream &out, const Word &word)
{
out << "(";
-
- std::vector<UINT64>::const_iterator iter;
- for (iter = word.m_factors.begin(); iter != word.m_factors.end(); ++iter) {
- out << *iter << "|";
- }
+ out << word.m_vocabId;
out << (word.m_isNonTerminal ? "n" : "t");
out << ")";
diff --git a/OnDiskPt/Word.h b/OnDiskPt/Word.h
index dc7a2424d..8c65cf7e5 100644
--- a/OnDiskPt/Word.h
+++ b/OnDiskPt/Word.h
@@ -22,6 +22,7 @@
#include <vector>
#include <iostream>
#include <fstream>
+#include <boost/shared_ptr.hpp>
#include "Vocab.h"
namespace Moses
@@ -33,21 +34,24 @@ namespace OnDiskPt
{
class Vocab;
+/* A wrapper around a vocab id, and a boolean indicating whther it is a term or non-term.
+ * Factors can be represented by using a vocab string with | character, eg go|VB
+ */
class Word
{
friend std::ostream& operator<<(std::ostream&, const Word&);
protected:
bool m_isNonTerminal;
- std::vector<UINT64> m_factors;
+ UINT64 m_vocabId;
public:
explicit Word()
{}
- explicit Word(size_t numFactors, bool isNonTerminal)
+ explicit Word(bool isNonTerminal)
:m_isNonTerminal(isNonTerminal)
- ,m_factors(numFactors)
+ ,m_vocabId(0)
{}
Word(const Word &copy);
@@ -60,16 +64,17 @@ public:
}
size_t WriteToMemory(char *mem) const;
- size_t ReadFromMemory(const char *mem, size_t numFactors);
- size_t ReadFromFile(std::fstream &file, size_t numFactors);
+ size_t ReadFromMemory(const char *mem);
+ size_t ReadFromFile(std::fstream &file);
- void SetVocabId(size_t ind, UINT32 vocabId) {
- m_factors[ind] = vocabId;
+ void SetVocabId(UINT32 vocabId) {
+ m_vocabId = vocabId;
}
- Moses::Word *ConvertToMoses(Moses::FactorDirection direction
- , const std::vector<Moses::FactorType> &outputFactorsVec
- , const Vocab &vocab) const;
+ void ConvertToMoses(
+ const std::vector<Moses::FactorType> &outputFactorsVec,
+ const Vocab &vocab,
+ Moses::Word &overwrite) const;
virtual void DebugPrint(std::ostream &out, const Vocab &vocab) const;
@@ -78,5 +83,7 @@ public:
bool operator==(const Word &compare) const;
};
+
+typedef boost::shared_ptr<Word> WordPtr;
}
diff --git a/OnDiskPt/queryOnDiskPt.cpp b/OnDiskPt/queryOnDiskPt.cpp
index 9a2d97680..97c100a29 100644
--- a/OnDiskPt/queryOnDiskPt.cpp
+++ b/OnDiskPt/queryOnDiskPt.cpp
@@ -38,20 +38,20 @@ void Tokenize(OnDiskPt::Phrase &phrase
if (splitPos == string::npos) {
// lhs - only 1 word
- Word *word = new Word();
+ WordPtr word (new Word());
word->CreateFromString(wordStr, onDiskWrapper.GetVocab());
phrase.AddWord(word);
} else {
// source & target non-terms
if (addSourceNonTerm) {
- Word *word = new Word();
+ WordPtr word( new Word());
word->CreateFromString(wordStr, onDiskWrapper.GetVocab());
phrase.AddWord(word);
}
wordStr = token.substr(splitPos, tokSize - splitPos);
if (addTargetNonTerm) {
- Word *word = new Word();
+ WordPtr word(new Word());
word->CreateFromString(wordStr, onDiskWrapper.GetVocab());
phrase.AddWord(word);
}
@@ -59,7 +59,7 @@ void Tokenize(OnDiskPt::Phrase &phrase
}
} else {
// term
- Word *word = new Word();
+ WordPtr word(new Word());
word->CreateFromString(token, onDiskWrapper.GetVocab());
phrase.AddWord(word);
}
diff --git a/scripts/ems/biconcor/Alignment.cpp b/biconcor/Alignment.cpp
index e73e18840..e73e18840 100644
--- a/scripts/ems/biconcor/Alignment.cpp
+++ b/biconcor/Alignment.cpp
diff --git a/scripts/ems/biconcor/Alignment.h b/biconcor/Alignment.h
index d3de47863..d3de47863 100644
--- a/scripts/ems/biconcor/Alignment.h
+++ b/biconcor/Alignment.h
diff --git a/scripts/ems/biconcor/Jamfile b/biconcor/Jamfile
index 003193067..76f5c7aaf 100644
--- a/scripts/ems/biconcor/Jamfile
+++ b/biconcor/Jamfile
@@ -1,3 +1,2 @@
exe biconcor : Vocabulary.cpp SuffixArray.cpp TargetCorpus.cpp Alignment.cpp Mismatch.cpp PhrasePair.cpp PhrasePairCollection.cpp biconcor.cpp base64.cpp ;
-install legacy : biconcor : <location>. ;
diff --git a/scripts/ems/biconcor/Mismatch.cpp b/biconcor/Mismatch.cpp
index 31140b200..31140b200 100644
--- a/scripts/ems/biconcor/Mismatch.cpp
+++ b/biconcor/Mismatch.cpp
diff --git a/scripts/ems/biconcor/Mismatch.h b/biconcor/Mismatch.h
index c0063d049..c0063d049 100644
--- a/scripts/ems/biconcor/Mismatch.h
+++ b/biconcor/Mismatch.h
diff --git a/scripts/ems/biconcor/PhrasePair.cpp b/biconcor/PhrasePair.cpp
index 9c16be77c..9c16be77c 100644
--- a/scripts/ems/biconcor/PhrasePair.cpp
+++ b/biconcor/PhrasePair.cpp
diff --git a/scripts/ems/biconcor/PhrasePair.h b/biconcor/PhrasePair.h
index f8a7881a0..f8a7881a0 100644
--- a/scripts/ems/biconcor/PhrasePair.h
+++ b/biconcor/PhrasePair.h
diff --git a/scripts/ems/biconcor/PhrasePairCollection.cpp b/biconcor/PhrasePairCollection.cpp
index 17c95d24a..17c95d24a 100644
--- a/scripts/ems/biconcor/PhrasePairCollection.cpp
+++ b/biconcor/PhrasePairCollection.cpp
diff --git a/scripts/ems/biconcor/PhrasePairCollection.h b/biconcor/PhrasePairCollection.h
index f88bfc10f..f88bfc10f 100644
--- a/scripts/ems/biconcor/PhrasePairCollection.h
+++ b/biconcor/PhrasePairCollection.h
diff --git a/scripts/ems/biconcor/SuffixArray.cpp b/biconcor/SuffixArray.cpp
index 15e6b47b0..15e6b47b0 100644
--- a/scripts/ems/biconcor/SuffixArray.cpp
+++ b/biconcor/SuffixArray.cpp
diff --git a/scripts/ems/biconcor/SuffixArray.h b/biconcor/SuffixArray.h
index af7f5567e..af7f5567e 100644
--- a/scripts/ems/biconcor/SuffixArray.h
+++ b/biconcor/SuffixArray.h
diff --git a/scripts/ems/biconcor/TargetCorpus.cpp b/biconcor/TargetCorpus.cpp
index d331a548a..d331a548a 100644
--- a/scripts/ems/biconcor/TargetCorpus.cpp
+++ b/biconcor/TargetCorpus.cpp
diff --git a/scripts/ems/biconcor/TargetCorpus.h b/biconcor/TargetCorpus.h
index 5a35356f9..5a35356f9 100644
--- a/scripts/ems/biconcor/TargetCorpus.h
+++ b/biconcor/TargetCorpus.h
diff --git a/scripts/ems/biconcor/Vocabulary.cpp b/biconcor/Vocabulary.cpp
index 9c35b3feb..9c35b3feb 100644
--- a/scripts/ems/biconcor/Vocabulary.cpp
+++ b/biconcor/Vocabulary.cpp
diff --git a/scripts/ems/biconcor/Vocabulary.h b/biconcor/Vocabulary.h
index 674912006..674912006 100644
--- a/scripts/ems/biconcor/Vocabulary.h
+++ b/biconcor/Vocabulary.h
diff --git a/scripts/ems/biconcor/base64.cpp b/biconcor/base64.cpp
index 2a863d161..2a863d161 100644
--- a/scripts/ems/biconcor/base64.cpp
+++ b/biconcor/base64.cpp
diff --git a/scripts/ems/biconcor/base64.h b/biconcor/base64.h
index 20398dd37..20398dd37 100644
--- a/scripts/ems/biconcor/base64.h
+++ b/biconcor/base64.h
diff --git a/scripts/ems/biconcor/biconcor.cpp b/biconcor/biconcor.cpp
index a25e63cb7..a25e63cb7 100644
--- a/scripts/ems/biconcor/biconcor.cpp
+++ b/biconcor/biconcor.cpp
diff --git a/bjam b/bjam
index d1ac8a555..2b0232c8a 100755
--- a/bjam
+++ b/bjam
@@ -4,8 +4,8 @@ if
bjam="$(which bjam 2>/dev/null)" && #exists
[ ${#bjam} != 0 ] && #paranoia about which printing nothing then returning true
! grep UFIHGUFIHBDJKNCFZXAEVA "${bjam}" </dev/null >/dev/null && #bjam in path isn't this script
- "${bjam}" --help >/dev/null 2>/dev/null && #bjam in path isn't broken (i.e. has boost-build)
- "${bjam}" --version |grep "Boost.Build 201" >/dev/null 2>/dev/null #It's recent enough.
+ "${bjam}" --sanity-test 2>/dev/null |grep Sane >/dev/null && #The test in jam-files/sanity.jam passes
+ (cd jam-files/fail && ! "${bjam}") >/dev/null #Returns non-zero on failure
then
#Delegate to system bjam
exec "${bjam}" "$@"
diff --git a/contrib/Extract_TMX_Corpus/Extract_TMX_Corpus.py b/contrib/Extract_TMX_Corpus/Extract_TMX_Corpus.py
deleted file mode 100755
index fd67d4b3a..000000000
--- a/contrib/Extract_TMX_Corpus/Extract_TMX_Corpus.py
+++ /dev/null
@@ -1,594 +0,0 @@
-#! /usr/bin/env python
-# -*- coding: utf_8 -*-
-"""This program is used to prepare corpora extracted from TMX files.
-It is particularly useful for translators not very familiar
-with machine translation systems that want to use Moses with a highly customised
-corpus.
-
-It extracts from a directory containing TMX files (and from all of its subdirectories)
-all the segments of one or more language pairs (except empty segments and segments that are equal in both languages)
-and removes all other information. It then creates 2 separate monolingual files per language pair,
-both of which have strictly parallel (aligned) segments. This kind of corpus can easily be transformed
-in other formats, if need be.
-
-The program requires that Pythoncard and wxPython (as well as Python) be previously installed.
-
-Copyright 2009, João L. A. C. Rosas
-
-Distributed under GNU GPL v3 licence (see http://www.gnu.org/licenses/)
-
-E-mail: extracttmxcorpus@gmail.com """
-
-__version__ = "$Revision: 1.043$"
-__date__ = "$Date: 2011/08/13$"
-__author__="$João L. A. C. Rosas$"
-#Special thanks to Gary Daine for a helpful suggestion about a regex expression
-#Updated to run on Linux by Tom Hoar
-
-from PythonCard import clipboard, dialog, graphic, model
-from PythonCard.components import button, combobox,statictext,checkbox,staticbox
-import wx
-import os, re
-import string
-import sys
-from time import strftime
-import codecs
-
-
-class Extract_TMX_Corpus(model.Background):
-
- def on_initialize(self, event):
- """Initialize values
-
-
- @self.inputdir: directory whose files will be treated
- @self.outputfile: base name of the resulting corpora files
- @self.outputpath: root directory of the resulting corpora files
- @currdir: program's current working directory
- @self.languages: list of languages whose segments can be processed
- @self.startinglanguage: something like 'EN-GB'
- @self.destinationlanguage: something like 'FR-FR'
- @self.components.cbStartingLanguage.items: list of values of the Starting Language combobox of the program's window
- @self.components.cbDestinationLanguage.items: list of values of the Destination Language combobox of the program's window
- @self.numtus: number of translation units extracted so far
- @self.presentfile: TMX file being currently processed
- @self.errortypes: variable that stocks the types of errors detected in the TMX file that is being processed
- @self.wroteactions: variable that indicates whether the actions files has already been written to
- """
-
- self.inputdir=''
- self.outputfile=''
- self.outputpath=''
- #Get directory where program file is and ...
- currdir=os.path.abspath(os.path.dirname(os.path.realpath(sys.argv[0])))
- #... load the file ("LanguageCodes.txt") with the list of languages that the program can process
- try:
- self.languages=open(currdir+os.sep+r'LanguageCodes.txt','r+').readlines()
- except:
- # If the languages file doesn't exist in the program directory, alert user that it is essential for the good working of the program and exit
- result = dialog.alertDialog(self, 'The file "LanguageCodes.txt" is missing. The program will now close.', 'Essential file missing')
- sys.exit()
- #remove end of line marker from each line in "LanguageCodes.txt"
- for lang in range(len(self.languages)):
- self.languages[lang]=self.languages[lang].rstrip()
- self.startinglanguage=''
- self.destinationlanguage=''
- #Insert list of language names in appropriate program window's combo boxes
- self.components.cbStartingLanguage.items=self.languages
- self.components.cbDestinationLanguage.items=self.languages
- self.tottus=0
- self.numtus=0
- self.numequaltus=0
- self.presentfile=''
- self.errortypes=''
- self.wroteactions=False
- self.errors=''
-
- def extract_language_segments_tmx(self,text):
- """Extracts TMX language segments from TMX files
-
- @text: the text of the TMX file
- @pattern: compiled regular expression object, which can be used for matching
- @tus: list that collects the translation units of the text
- @segs: list that collects the segment units of the relevant pair of languages
- @numtus: number of translation units extracted
- @present_tu: variable that stocks the translation unit relevant segments (of the chosen language pair) that are being processed
- @self.errortypes: variable that stocks the types of errors detected in the TMX file that is being processed
- """
- #print 'extract_language_segments: start at '+strftime('%H-%M-%S')
- result=('','')
- try:
- if text:
- # Convert character entities to "normal" characters
- pattern=re.compile('&gt;',re.U)
- text=re.sub(pattern,'>',text)
- pattern=re.compile('&lt;',re.U)
- text=re.sub(pattern,'<',text)
- pattern=re.compile('&amp;',re.U)
- text=re.sub(pattern,'&',text)
- pattern=re.compile('&quot;',re.U)
- text=re.sub(pattern,'"',text)
- pattern=re.compile('&apos;',re.U)
- text=re.sub(pattern,"'",text)
- # Extract translation units
- pattern=re.compile('(?s)<tu.*?>(.*?)</tu>')
- tus=re.findall(pattern,text)
- ling1=''
- ling2=''
- #Extract relevant segments and store them in the @text variable
- if tus:
- for tu in tus:
- pattern=re.compile('(?s)<tuv.*?lang="'+self.startinglanguage+'">.*?<seg>(.*?)</seg>.*?<tuv.*?lang="'+self.destinationlanguage+'">.*?<seg>(.*?)</seg>')
- present_tu=re.findall(pattern,tu)
- self.tottus+=1
- #reject empty segments
- if present_tu: # and not present_tu[0][0].startswith("<")
- present_tu1=present_tu[0][0].strip()
- present_tu2=present_tu[0][1].strip()
- present_tu1 = re.sub('<bpt.*</bpt>', '', present_tu1)
- present_tu2 = re.sub('<bpt.*</bpt>', '', present_tu2)
- present_tu1 = re.sub(r'<ept.*</ept>', '', present_tu1)
- present_tu2 = re.sub(r'<ept.*</ept>', '', present_tu2)
- present_tu1 = re.sub(r'<ut.*</ut>', '', present_tu1)
- present_tu2 = re.sub(r'<ut.*</ut>', '', present_tu2)
- present_tu1 = re.sub(r'<ph.*</ph>', '', present_tu1)
- present_tu2 = re.sub(r'<ph.*</ph>', '', present_tu2)
- #Thanks to Gary Daine
- present_tu1 = re.sub('^[0-9\.() \t\-_]*$', '', present_tu1)
- #Thanks to Gary Daine
- present_tu2 = re.sub('^[0-9\.() \t\-_]*$', '', present_tu2)
- if present_tu1 != present_tu2:
- x=len(present_tu1)
- y=len(present_tu2)
- if (x <= y*3) and (y <= x*3):
- ling1=ling1+present_tu1+'\n'
- ling2=ling2+present_tu2+'\n'
- self.numtus+=1
- else:
- self.numequaltus+=1
- pattern=re.compile('(?s)<tuv.*?lang="'+self.destinationlanguage+'">.*?<seg>(.*?)</seg>.*?<tuv.*?lang="'+self.startinglanguage+'">.*?<seg>(.*?)</seg>')
- present_tu=re.findall(pattern,tu)
- #print present_tu
- if present_tu:
- present_tu1=present_tu[0][1].strip()
- present_tu2=present_tu[0][0].strip()
- present_tu1 = re.sub('<bpt.*</bpt>', '', present_tu1)
- present_tu2 = re.sub('<bpt.*</bpt>', '', present_tu2)
- present_tu1 = re.sub(r'<ept.*</ept>', '', present_tu1)
- present_tu2 = re.sub(r'<ept.*</ept>', '', present_tu2)
- present_tu1 = re.sub(r'<ut.*</ut>', '', present_tu1)
- present_tu2 = re.sub(r'<ut.*</ut>', '', present_tu2)
- present_tu1 = re.sub(r'<ph.*</ph>', '', present_tu1)
- present_tu2 = re.sub(r'<ph.*</ph>', '', present_tu2)
- #Thanks to Gary Daine
- present_tu1 = re.sub('^[0-9\.() \t\-_]*$', '', present_tu1)
- #Thanks to Gary Daine
- present_tu2 = re.sub('^[0-9\.() \t\-_]*$', '', present_tu2)
- if present_tu1 != present_tu2:
- x=len(present_tu1)
- y=len(present_tu2)
- if (x <= y*3) and (y <= x*3):
- ling1=ling1+present_tu1+'\n'
- ling2=ling2+present_tu2+'\n'
- self.numtus+=1
- else:
- self.numequaltus+=1
- result=(ling1,ling2)
- except:
- self.errortypes=self.errortypes+' - Extract Language Segments error\n'
- return result
-
- def locate(self,pattern, basedir):
- """Locate all files matching supplied filename pattern in and below
- supplied root directory.
-
- @pattern: something like '*.tmx'
- @basedir:whole directory to be treated
- """
- import fnmatch
- for path, dirs, files in os.walk(os.path.abspath(basedir)):
- for filename in fnmatch.filter(files, pattern):
- yield os.path.join(path, filename)
-
- def getallsegments(self):
- """Get all language segments from the TMX files in the specified
- directory
-
- @self.startinglanguage: something like 'EN-GB'
- @self.destinationlanguage: something like 'FR-FR'
- @fileslist: list of files that should be processed
- @self.inputdir: directory whose files will be treated
- @startfile:output file containing all segments in the @startinglanguage; file
- will be created in @self.inputdir
- @destfile:output file containing all segments in the @destinationlanguage; file
- will be created in @self.inputdir
- @actions:output file indicating the names of all files that were processed without errors; file
- will be created in @self.inputdir
- @self.errortypes: variable that stocks the types of errors detected in the TMX file that is being processed
- @self.presentfile: TMX file being currently processed
- @preptext: parsed XML text with all tags extracted and in string format
- @tus: list that receives the extracted TMX language translation units just with segments of the relevant language pair
- @num: loop control variable between 0 and length of @tus - 1
- @self.numtus: number of translation units extracted so far
- """
- self.statusBar.text='Processing '+ self.inputdir
- try:
- # Get a list of all TMX files that need to be processed
- fileslist=self.locate('*.tmx',self.inputdir)
- # Open output files for writing
- startfile=open(self.outputpath+os.sep+self.startinglanguage+ ' ('+self.destinationlanguage+')_' +self.outputfile,'w+b')
- destfile=open(self.outputpath+os.sep+self.destinationlanguage+' ('+self.startinglanguage+')_'+self.outputfile,'w+b')
- actions=open(self.outputpath+os.sep+'_processing_info'+os.sep+self.startinglanguage+ '-'+self.destinationlanguage+'_'+'actions_'+self.outputfile+'.txt','w+')
- except:
- # if any error up to now, add the name of the TMX file to the output file @errors
- self.errortypes=self.errortypes+' - Get All Segments: creation of output files error\n'
- if fileslist:
- # For each relevant TMX file ...
- for self.presentfile in fileslist:
- self.errortypes=''
- try:
- print self.presentfile
- fileObj = codecs.open(self.presentfile, "rb", "utf-16","replace",0 )
- pos=0
- while True:
- # read a new chunk of text...
- preptext = fileObj.read(692141)
- if not preptext:
- break
- last5=''
- y=''
- #... and make it end at the end of a translation unit
- while True:
- y=fileObj.read(1)
- if not y:
- break
- last5=last5+y
- if '</tu>' in last5:
- break
- preptext=preptext+last5
- # ... and extract its relevant segments ...
- if not self.errortypes:
- segs1,segs2=self.extract_language_segments_tmx(preptext)
- preptext=''
- #... and write those segments to the output files
- if segs1 and segs2:
- try:
- startfile.write('%s' % (segs1.encode('utf-8','strict')))
- destfile.write('%s' % (segs2.encode('utf-8','strict')))
- except:
- self.errortypes=self.errortypes+' - Get All Segments: writing of output files error\n'
- print 'erro'
- #if no errors up to now, insert the name of the TMX file in the @actions output file
- #encoding is necessary because @actions may be in a directory whose name has special diacritic characters
- if self.errortypes=='':
- try:
- actions.write(self.presentfile.encode('utf_8','replace')+'\n')
- self.wroteactions=True
- except:
- self.errortypes=self.errortypes+' - Get All Segments: writing of actions file error\n'
- fileObj.close()
- except:
- self.errortypes=self.errortypes+' - Error reading input file\n'
- try:
- if self.wroteactions:
- actions.write('\n*************************************************\n\n')
- actions.write('Total number of translation units: '+str(self.tottus)+'\n')
- actions.write('Number of extracted translation units (source segment not equal to destination segment): '+str(self.numtus)+'\n')
- actions.write('Number of removed translation units (source segment equal to destination segment): '+str(self.numequaltus)+'\n')
- actions.write('Number of empty translation units (source segment and/or destination segment not present): '+str(self.tottus-self.numequaltus-self.numtus))
-
- except:
- self.errortypes=self.errortypes+' - Get All Segments: writing of actions file error\n'
- # Close output files
- actions.close()
- destfile.close()
- startfile.close()
-
- def SelectDirectory(self):
- """Select the directory where the TMX files to be processed are
-
- @result: object returned by the dialog window with attributes accepted (true if user clicked OK button, false otherwise) and
- path (list of strings containing the full pathnames to all files selected by the user)
- @self.inputdir: directory where TMX files to be processed are (and where output files will be written)
- @self.statusBar.text: text displayed in the program window status bar"""
-
- result= dialog.directoryDialog(self, 'Choose a directory', 'a')
- if result.accepted:
- self.inputdir=result.path
- self.statusBar.text=self.inputdir+' selected.'
-
- def on_menuFileSelectDirectory_select(self, event):
- self.SelectDirectory()
-
- def on_btnSelectDirectory_mouseClick(self, event):
- self.SelectDirectory()
-
- def GetOutputFileBaseName(self):
- """Get base name of the corpus files
-
- @expr: variable containing the base name of the output files
- @wildcard: list of wildcards used in the dialog window to filter types of files
- @result: object returned by the Open File dialog window with attributes accepted (true if user clicked OK button, false otherwise) and
- path (list of strings containing the full pathnames to all files selected by the user)
- @self.inputdir: directory where TMX files to be processed are (and where output files will be written)
- @location: variable containing the full path to the base name output file
- @self.outputpath: base directory of output files
- @self.outputfile: base name of the output files
- """
-
- # Default base name of the corpora files that will be produced. If you choose as base name "Corpus.txt", as starting language "EN-GB" and as destination
- # language "FR-FR" the corpora files will be named "Corpus_EN-GB.txt" and "Corpus_FR-FR.txt"
- expr='Corpus'
- #open a dialog that lets you choose the base name of the corpora files that will be produced.
- wildcard = "Text files (*.txt;*.TXT)|*.txt;*.TXT"
- result = dialog.openFileDialog(None, "Name of corpus file", self.inputdir,expr,wildcard=wildcard)
- if result.accepted:
- location=os.path.split(result.paths[0])
- self.outputpath=location[0]
- self.outputfile = location[1]
- if not os.path.exists(self.outputpath+os.sep+'_processing_info'):
- try:
- os.mkdir(self.outputpath+os.sep+'_processing_info')
- except:
- result1 = dialog.alertDialog(self, "The program can't create the directory " + self.outputpath+os.sep+r'_processing_info, which is necessary for ' + \
- 'the creation of the output files. The program will now close.','Error')
- sys.exit()
-
- def on_menuGetOutputFileBaseName_select(self, event):
- self.GetOutputFileBaseName()
-
- def on_btnGetOutputFileBaseName_mouseClick(self, event):
- self.GetOutputFileBaseName()
-
- def ExtractCorpus(self):
- """Get the directory where TMX files to be processed are, get the choice of the pair of languages that will be treated and launch the extraction
- of the corpus
-
- @self.errortypes: variable that stocks the types of errors detected in the TMX file that is being processed
- @self.presentfile: TMX file being currently processed
- @self.numtus: number of translation units extracted so far
- @self.startinglanguage: something like 'EN-GB'
- @self.destinationlanguage: something like 'FR-FR'
- @self.inputdir: directory whose files will be treated
- @self.components.cbStartingLanguage.items: list of values of the Starting Language combobox of the program's window
- @self.components.cbDestinationLanguage.items: list of values of the Destination Language combobox of the program's window
- @self.outputfile: base name of the resulting corpora files
- @self.errors:output file indicating the types of error that occurred in each processed TMX file
- @self.numtus: number of translation units extracted so far
- """
-
- print 'Extract corpus: started at '+strftime('%H-%M-%S')
- self.errortypes=''
- self.presentfile=''
- self.numtus=0
- #get the startinglanguage name (e.g.: "EN-GB") from the program window
- self.startinglanguage=self.components.cbStartingLanguage.text
- #get the destinationlanguage name from the program window
- self.destinationlanguage=self.components.cbDestinationLanguage.text
- #if the directory where TMX files (@inputdir) or the pair of languages were not previously chosen, open a dialog box explaining
- #the conditions that have to be met so that the extraction can be made and do nothing...
- if (self.inputdir=='') or (self.components.cbStartingLanguage.text=='') or (self.components.cbDestinationLanguage.text=='') or (self.outputfile=='') \
- or (self.components.cbStartingLanguage.text==self.components.cbDestinationLanguage.text):
- result = dialog.alertDialog(self, 'In order to extract a corpus, you need to:\n\n 1) indicate the directory where the TMX files are,\n 2)' \
- +' the starting language,\n 3) the destination language (the 2 languages must be different), and\n 4) the base name of the output files.', 'Error')
-
- #...else, go ahead
- else:
- try:
- self.errors=open(self.outputpath+os.sep+'_processing_info'+os.sep+self.startinglanguage+ '-'+self.destinationlanguage+'_'+'errors_'+self.outputfile+'.txt','w+')
- except:
- pass
- self.statusBar.text='Please wait. This can be a long process ...'
- #Launch the segment extraction
- self.numtus=0
- self.getallsegments()
- # if any error up to now, add the name of the TMX file to the output file @errors
- if self.errortypes:
- try:
- self.errors.write(self.presentfile.encode('utf_8','replace')+':\n'+self.errortypes)
- except:
- pass
- try:
- self.errors.close()
- except:
- pass
- self.statusBar.text='Processing finished.'
- #Open dialog box telling that processing is finished and where can the resulting files be found
- self.inputdir=''
- self.outputfile=''
- self.outputpath=''
- print 'Extract corpus: finished at '+strftime('%H-%M-%S')
- result = dialog.alertDialog(self, 'Processing done. Results found in:\n\n1) '+ \
- self.outputpath+os.sep+self.startinglanguage+ ' ('+self.destinationlanguage+')_' +self.outputfile+ ' (starting language corpus)\n2) '+ \
- self.outputpath+os.sep+self.destinationlanguage+' ('+self.startinglanguage+')_'+self.outputfile+ \
- ' (destination language corpus)\n3) '+self.outputpath+os.sep+'_processing_info'+os.sep+self.startinglanguage+ '-'+self.destinationlanguage+'_'+ \
- 'errors_'+self.outputfile+'.txt'+ ' (list of files that caused errors)\n4) '+self.outputpath+os.sep+'_processing_info'+os.sep+self.startinglanguage+ \
- '-'+self.destinationlanguage+'_'+'actions_'+self.outputfile+'.txt'+ ' (list of files where processing was successful)', 'Processing Done')
-
- def on_menuFileExtractCorpus_select(self, event):
- self.ExtractCorpus()
- def on_btnExtractCorpus_mouseClick(self, event):
- self.ExtractCorpus()
-
- def ExtractAllCorpora(self):
- """Extracts all the LanguagePairs that can be composed with the languages indicated in the file "LanguageCodes.txt"
-
- @self.presentfile: TMX file being currently processed
- @self.numtus: number of translation units extracted so far
- @numcorpora: number of language pair being processed
- @self.inputdir: directory whose files will be treated
- @self.outputfile: base name of the resulting corpora files
- @self.errors:output file indicating the types of error that occurred in each processed TMX file
- @self.startinglanguage: something like 'EN-GB'
- @self.destinationlanguage: something like 'FR-FR'
- @lang1: code of the starting language
- @lang2: code of the destination language
- @self.errortypes: variable that stocks the types of errors detected in the TMX file that is being processed
- @self.wroteactions: variable that indicates whether the actions files has already been written to
- """
-
- print 'Extract All Corpora: started at '+strftime('%H-%M-%S')
- self.presentfile=''
- self.numtus=0
- numcorpora=0
- #if the directory where TMX files (@inputdir) or the base name of the output files were not previously chosen, open a dialog box explaining
- #the conditions that have to be met so that the extraction can be made and do nothing...
- if (self.inputdir=='') or (self.outputfile==''):
- result = dialog.alertDialog(self, 'In order to extract all corpora, you need to:\n\n 1) indicate the directory where the TMX files are, and\n ' \
- + '2) the base name of the output files.', 'Error')
- #...else, go ahead
- else:
- try:
- for lang1 in self.languages:
- for lang2 in self.languages:
- if lang2 > lang1:
- print lang1+'/'+lang2+' corpus being created...'
- numcorpora=numcorpora+1
- self.errortypes=''
- self.numtus=0
- self.wroteactions=False
- #get the startinglanguage name (e.g.: "EN-GB") from the program window
- self.startinglanguage=lang1
- #get the destinationlanguage name from the program window
- self.destinationlanguage=lang2
- try:
- self.errors=open(self.outputpath+os.sep+'_processing_info'+os.sep+self.startinglanguage+ '-'+self.destinationlanguage+'_'+'errors.txt','w+')
- except:
- pass
- self.statusBar.text='Language pair '+str(numcorpora)+' being processed. Please wait.'
- #Launch the segment extraction
- self.getallsegments()
- # if any error up to now, add the name of the TMX file to the output file @errors
- if self.errortypes:
- try:
- self.errors.write(self.presentfile.encode('utf_8','replace')+':\n'+self.errortypes.encode('utf_8','replace'))
- except:
- pass
- try:
- self.errors.close()
- except:
- pass
- self.statusBar.text='Processing finished.'
- except:
- self.errortypes=self.errortypes+' - Extract All Corpora error\n'
- self.errors.write(self.presentfile.encode('utf_8','replace')+':\n'+self.errortypes.encode('utf_8','replace'))
- self.errors.close()
- #Open dialog box telling that processing is finished and where can the resulting files be found
- self.inputdir=''
- self.outputfile=''
- self.outputpath=''
- print 'Extract All Corpora: finished at '+strftime('%H-%M-%S')
- result = dialog.alertDialog(self, 'Results found in: '+ self.outputpath+'.', 'Processing done')
-
-
- def on_menuFileExtractAllCorpora_select(self, event):
- self.ExtractAllCorpora()
- def on_btnExtractAllCorpora_mouseClick(self, event):
- self.ExtractAllCorpora()
-
- def ExtractSomeCorpora(self):
- """Extracts the segments of the LanguagePairs indicated in the file "LanguagePairs.txt" located in the program's root directory
-
- @self.presentfile: TMX file being currently processed
- @self.numtus: number of translation units extracted so far
- @currdir: current working directory of the program
- @pairsoflanguages: list of the pairs of language that are going to be processed
- @self.languages: list of languages whose segments can be processed
- @numcorpora: number of language pair being processed
- @self.inputdir: directory whose files will be treated
- @self.outputfile: base name of the resulting corpora files
- @self.errors:output file indicating the types of error that occurred in each processed TMX file
- @self.startinglanguage: something like 'EN-GB'
- @self.destinationlanguage: something like 'FR-FR'
- @lang1: code of the starting language
- @lang2: code of the destination language
- @self.errortypes: variable that stocks the types of errors detected in the TMX file that is being processed
- @self.wroteactions: variable that indicates whether the actions files has already been written to
- """
-
- print 'Extract Some Corpora: started at '+strftime('%H-%M-%S')
- self.presentfile=''
- self.numtus=0
- currdir=os.path.abspath(os.path.dirname(os.path.realpath(sys.argv[0])))
- #... load the file ("LanguageCodes.txt") with the list of languages that the program can process
- try:
- pairsoflanguages=open(currdir+os.sep+r'LanguagePairs.txt','r+').readlines()
- except:
- # If the languages file doesn't exist in the program directory, alert user that it is essential for the good working of the program and exit
- result = dialog.alertDialog(self, 'The file "LanguagePairs.txt" is missing. The program will now close.', 'Essential file missing')
- sys.exit()
- #remove end of line marker from each line in "LanguageCodes.txt"
- if pairsoflanguages:
- for item in range(len(pairsoflanguages)):
- pairsoflanguages[item]=pairsoflanguages[item].strip()
- pos=pairsoflanguages[item].find("/")
- pairsoflanguages[item]=(pairsoflanguages[item][:pos],pairsoflanguages[item][pos+1:])
- else:
- # If the languages file is empty, alert user that it is essential for the good working of the program and exit
- result = dialog.alertDialog(self, 'The file "LanguagePairs.txt" is an essential file and is empty. The program will now close.', 'Empty file')
- sys.exit()
-
- #if the directory where TMX files (@inputdir) or the base name of the output files were not previously chosen, open a dialog box explaining
- #the conditions that have to be met so that the extraction can be made and do nothing...
- if (self.inputdir=='') or (self.outputfile==''):
- result = dialog.alertDialog(self, 'In order to extract all corpora, you need to:\n\n 1) indicate the directory where the TMX files are, and\n ' \
- + '2) the base name of the output files.', 'Error')
- #...else, go ahead
- else:
- numcorpora=0
- for (lang1,lang2) in pairsoflanguages:
- if lang1<>lang2:
- print lang1+'/'+lang2+' corpus being created...'
- self.errortypes=''
- numcorpora=numcorpora+1
- #get the startinglanguage code (e.g.: "EN-GB")
- self.startinglanguage=lang1
- #get the destinationlanguage code
- self.destinationlanguage=lang2
- try:
- self.errors=open(self.outputpath+os.sep+'_processing_info'+os.sep+self.startinglanguage+ '-'+self.destinationlanguage+'_'+'errors.txt','w+')
- except:
- pass
- self.statusBar.text='Language pair '+str(numcorpora)+' being processed. Please wait.'
- #Launch the segment extraction
- self.numtus=0
- self.wroteactions=False
- self.getallsegments()
- # if any error up to now, add the name of the TMX file to the output file @errors
- if self.errortypes:
- try:
- self.errors.write(self.presentfile.encode('utf_8','replace')+':\n'+self.errortypes.encode('utf_8','replace'))
- except:
- pass
- try:
- self.errors.close()
- except:
- pass
- else:
- result = dialog.alertDialog(self, 'A bilingual corpus involves two different languages. The pair "'+lang1+'/'+lang2 + \
- '" will not be processed.', 'Alert')
- self.statusBar.text='Processing finished.'
- #Open dialog box telling that processing is finished and where can the resulting files be found
- self.inputdir=''
- self.outputfile=''
- self.outputpath=''
- print 'Extract Some Corpora: finished at '+strftime('%H-%M-%S')
- result = dialog.alertDialog(self, 'Results found in: '+ self.outputpath+'.', 'Processing done')
-
- def on_menuFileExtractSomeCorpora_select(self, event):
- self.ExtractSomeCorpora()
- def on_btnExtractSomeCorpora_mouseClick(self, event):
- self.ExtractSomeCorpora()
-
- def on_menuHelpHelp_select(self, event):
- try:
- f = open('_READ_ME_FIRST.txt', "r")
- msg = f.read()
- result = dialog.scrolledMessageDialog(self, msg, 'readme.txt')
- except:
- result = dialog.alertDialog(self, 'Help file missing', 'Problem with the Help file')
-
-
-if __name__ == '__main__':
- app = model.Application(Extract_TMX_Corpus)
- app.MainLoop()
diff --git a/contrib/Extract_TMX_Corpus/Extract_TMX_Corpus.rsrc.py b/contrib/Extract_TMX_Corpus/Extract_TMX_Corpus.rsrc.py
deleted file mode 100755
index 93e19edf2..000000000
--- a/contrib/Extract_TMX_Corpus/Extract_TMX_Corpus.rsrc.py
+++ /dev/null
@@ -1,141 +0,0 @@
-{'application':{'type':'Application',
- 'name':'Extract_TMX_Corpus',
- 'backgrounds': [
- {'type':'Background',
- 'name':'bgExtract_TMX_Corpus',
- 'title':u'Extract_TMX_Corpus',
- 'size':(275, 410),
- 'statusBar':1,
-
- 'menubar': {'type':'MenuBar',
- 'menus': [
- {'type':'Menu',
- 'name':'menuFile',
- 'label':'&File',
- 'items': [
- {'type':'MenuItem',
- 'name':'menuFileSelectDirectory',
- 'label':u'Select &input/output directory...\tCtrl+I',
- 'command':'SelectListOfDirectories',
- },
- {'type':'MenuItem',
- 'name':'menuGetOutputFileBaseName',
- 'label':u'Get &output file base name...\tCtrl+O',
- 'command':'GetOutputFileBaseName',
- },
- {'type':'MenuItem',
- 'name':'fileSep1',
- 'label':'-',
- },
- {'type':'MenuItem',
- 'name':'menuFileExtractCorpus',
- 'label':u'&Extract corpus\tCtrl+E',
- 'command':'ExtractCorpus',
- },
- {'type':'MenuItem',
- 'name':'menuFileExtractSomeCorpora',
- 'label':u'Extract &some corpora\tCtrl+S',
- 'command':'ExtractSomeCorpora',
- },
- {'type':'MenuItem',
- 'name':'menuFileExtractAllCorpora',
- 'label':u'Extract &all corpora\tCtrl+A',
- 'command':'ExtractAllCorpora',
- },
- {'type':'MenuItem',
- 'name':'fileSep2',
- 'label':u'-',
- },
- {'type':'MenuItem',
- 'name':'menuFileExit',
- 'label':'E&xit\tAlt+X',
- 'command':'Doexit',
- },
- ]
- },
- {'type':'Menu',
- 'name':'menuHelp',
- 'label':u'&Help',
- 'items': [
- {'type':'MenuItem',
- 'name':'menuHelpHelp',
- 'label':u'&Help...\tCtrl+H',
- },
- ]
- },
- ]
- },
- 'components': [
-
-{'type':'Button',
- 'name':'btnExtractSomeCorpora',
- 'position':(18, 267),
- 'size':(225, 25),
- 'font':{'faceName': u'Arial', 'family': 'sansSerif', 'size': 10},
- 'label':u'Extract some corpora',
- },
-
-{'type':'Button',
- 'name':'btnExtractAllCorpora',
- 'position':(18, 233),
- 'size':(225, 25),
- 'font':{'faceName': u'Arial', 'family': 'sansSerif', 'size': 10},
- 'label':u'Extract all corpora',
- },
-
-{'type':'StaticText',
- 'name':'StaticText3',
- 'position':(18, 107),
- 'font':{'faceName': u'Arial', 'family': 'sansSerif', 'size': 10},
- 'text':u'Destination Language:',
- },
-
-{'type':'ComboBox',
- 'name':'cbDestinationLanguage',
- 'position':(18, 129),
- 'size':(225, -1),
- 'items':[],
- },
-
-{'type':'Button',
- 'name':'btnSelectDirectory',
- 'position':(18, 19),
- 'size':(225, 25),
- 'font':{'faceName': u'Arial', 'family': 'sansSerif', 'size': 10},
- 'label':u'Select input / output directory...',
- },
-
-{'type':'ComboBox',
- 'name':'cbStartingLanguage',
- 'position':(18, 74),
- 'size':(225, -1),
- 'items':[u'DE-PT', u'EN-PT', u'ES-PT', u'FR-PT'],
- },
-
-{'type':'Button',
- 'name':'btnGetOutputFileBaseName',
- 'position':(18, 166),
- 'size':(225, 25),
- 'font':{'faceName': u'Arial', 'family': 'sansSerif', 'size': 10},
- 'label':u'Select base name of output file...',
- },
-
-{'type':'Button',
- 'name':'btnExtractCorpus',
- 'position':(18, 200),
- 'size':(225, 25),
- 'font':{'faceName': u'Arial', 'family': 'sansSerif', 'size': 10},
- 'label':u'Extract one corpus',
- },
-
-{'type':'StaticText',
- 'name':'StaticText1',
- 'position':(18, 53),
- 'font':{'faceName': u'Arial', 'family': 'sansSerif', 'size': 10},
- 'text':u'Starting Language:',
- },
-
-] # end components
-} # end background
-] # end backgrounds
-} }
diff --git a/contrib/Extract_TMX_Corpus/LanguageCodes.txt b/contrib/Extract_TMX_Corpus/LanguageCodes.txt
deleted file mode 100644
index 22ca66c73..000000000
--- a/contrib/Extract_TMX_Corpus/LanguageCodes.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-BG-01
-CS-01
-DA-01
-DE-DE
-EL-01
-EN-GB
-ES-ES
-ET-01
-FI-01
-FR-FR
-HU-01
-IT-IT
-LT-01
-LV-01
-MT-01
-NL-NL
-PL-01
-PT-PT
-RO-RO
-SK-01
-SL-01
-SV-SE \ No newline at end of file
diff --git a/contrib/Extract_TMX_Corpus/LanguagePairs.txt b/contrib/Extract_TMX_Corpus/LanguagePairs.txt
deleted file mode 100644
index d2ffd094e..000000000
--- a/contrib/Extract_TMX_Corpus/LanguagePairs.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-BG-01/CS-01
-FR-FR/PT-PT
-EN-GB/LT-01 \ No newline at end of file
diff --git a/contrib/Extract_TMX_Corpus/_READ_ME_FIRST.txt b/contrib/Extract_TMX_Corpus/_READ_ME_FIRST.txt
deleted file mode 100644
index 4709b8240..000000000
--- a/contrib/Extract_TMX_Corpus/_READ_ME_FIRST.txt
+++ /dev/null
@@ -1,241 +0,0 @@
-Summary:
- PURPOSE
- PERFORMANCE
- REQUIREMENTS
- INSTALLATION
- HOW TO USE
- GETTING THE RESULTS
- THANKS
- LICENSE
-
-
-********************************************************************************
-PURPOSE:
-********************************************************************************
-This is the MS Windows and Linux version (tested with Ubuntu 10.10 and 11.04)
-of Extract_Tmx_Corpus_1.044.
-
-Extract_Tmx_Corpus_1.044 was created initially as a Windows program (tested in
-Windows 7, Vista and XP) with a view to enable translators not necessarily with
-a deep knowledge of linguistic tools to create highly customised corpora that
-can be used with the Moses machine translation system and with other systems.
-Some users call it "et cetera", playing a bit with its initials (ETC) and
-meaning that it can treat a never-ending number of files.
-
-In order to create corpora that are most useful to train machine translation
-systems, one should strive to include segments that are relevant for the task in
-hand. One of the ways of finding such segments could involve the usage of
-previous translation memory files (TMX files). This way the corpora could be
-customised for the person or for the type of task in question. The present
-program uses such files as input.
-
-The program can create strictly aligned corpora for a single pair of languages,
-several pairs of languages or all the pairs of languages contained in the TMX
-files.
-
-The program creates 2 separate files (UTF-8 format; Unix line endings) for each
-language pair that it processes: one for the starting language and another for
-the destination language. The lines of a given TMX translation unit are placed
-in strictly the same line in both files. The program suppresses empty TMX
-translation units, as well as those where the text for the first language is the
-same as that of the second language (like translation units consisting solely of
-numbers, or those in which the first language segment has not been translated
-into the second language). If you are interested in another format of corpus, it
-should be relatively easy to adapt this format to the format you are interested
-in.
-
-The program also informs about errors that might occur during processing and
-creates a file that lists the name(s) of the TMX files that caused them, as well
-as a separate one listing the files successfully treated and the number of
-segments extracted for the language pair.
-
-********************************************************************************
-REQUIREMENTS:
-********************************************************************************
-The program requires the following to be pre-installed in your computer:
-
-1) Python 2.5 or higher (The program has been tested on Python 2.5 to 2.7.)
- Windows users download and install from http://www.python.org/download/
- Ubuntu users can use the pre-installed Python distribution
-
-2) wxPython 2.8 or higher
- Windows users download and install the Unicode version from
- http://www.wxpython.org/download.php
- Ubuntu users install with:
- sudo apt-get install python-wxtools
-
-3) Pythoncard 0.8.2 or higher
- Windows users download and install
- http://sourceforge.net/projects/pythoncard/files/PythonCard/0.8.2/PythonCard-0.8.2.win32.exe/download
- Ubuntu/Debian users install with:
- sudo apt-get install pythoncard
-
-********************************************************************************
-INSTALLATION:
-********************************************************************************
-Windows users:
-1) Download the Extract_TMX_Corpus_1.041.exe file
-2) Double-click Extract_TMX_Corpus_1.041.exe and follow the wizard's
- instructions.
-NOTE: Windows Vista users, to run the installation programs, by right-click on
-the installation file in Windows Explorer and choose "Execute as administrator"
-in the contextual menu.
-
-Ubuntu users:
-1) Download the Moses2TMX.tgz compressed file to a directory of your choice.
-2) Expand the compressed file and run from the expanded directory.
-
-***IMPORTANT***: Never erase the file "LanguageCodes.txt" in that directory. It
-is necessary for telling the program the languages that it has to process. If
-your TMX files use language codes that are different from those contained in
-this file, please replace the codes contained in the file with the codes used in
-your TMX files. You can always add or delete new codes to this file (when the
-program is not running).
-
-********************************************************************************
-HOW TO USE:
-********************************************************************************
-1) Create a directory where you will copy the TMX files that you want to
- process.
-
-2) Copy the TMX files to that directory.
-Note: If you do not have TMX files, try the following site:
-http://langtech.jrc.it/DGT-TM.html#Download. It contains the European Union
-DGT's Translation Memory, containing legislative documents of the European
-Union. For more details, see http://wt.jrc.it/lt/Acquis/DGT_TU_1.0/data/. These
-files are compressed in zip format and need to be unzipped before they can be
-used.
-
-3) Launch the program.
-
-4) Operate on the main window of the program in the direction from top to
- bottom:
-
- a) Click the "Select input/output directory" button to tell the root
- directory where the TMX files are (this directory can have subdirectories,
- all of which will also be processed), as well as where the output files
- produced by the program will be placed;
- NOTE: Please take note of this directory because the result files will also
- be placed there.
-
- b) In case you want to extract a ***single*** pair of languages, choose them
- in the "Starting Language" and "Destination Language" comboboxes. Do nothing
- if you want to extract more than one pair of languages.
-
- c) Click the "Select base name of output file" button and choose a base name
- for the output files (default: "Corpus.txt").
- Note: This base name is used to compose the names of the output files, which
- will also include the names of the starting and destination languages. If
- you accept the default "Corpus.txt" and choose "EN-GB" as starting language
- and "PT-PT" as destination language, for that corpus pair the respective
- corpora files will be named, respectively, "EN-GB (PT-PT)_Corpus.txt" and
- "PT-PT (EN-GB)_Corpus.txt".
- ***TIP***: The base name is useful for getting different names for different
- corpora of the same language.
-
- d) Click one (***just one***) of the following buttons:
- - "Extract one corpus": this creates a single pair of strictly aligned
- corpora in the languages chosen in the "Starting Language" and
- "Destination Language" comboboxes;
- - "Extract all corpora": this extracts all the combination pairs of
- languages for all the languages available in the "Starting Language" and
- "Destination language" comboboxes; if a language pair does not have
- segments of both languages in all of the translation units of all the
- TMX files, the result will be two empty corpora files for that language
- pair. If, however, there is just a single relevant translation unit, the
- corpus won't be empty.
- - "Extract some corpora": this extracts the pairs of languages listed in
- the file "LanguagePairs.txt". Each line of this file has the following
- structure:
- {Starting Language}/{Destination Language}.
-
-Here is an example of a file with 2 lines:
-
-EN-GB/PT-PT
-FR-FR/PT-PT
-
-This will create corpora for 4 pairs of languages: EN-PT, PT-EN and FR-PT and
-PT-FR. A sample "LanguagePairs.txt" comes with the program to serve as an
-example. Customise it to your needs respecting the syntax described above.
-
-NOTE: Never erase the "LanguagePairs.txt" file and always make sure that each
-pair of languages that you choose does exist in your TMX files. Otherwise, you
-won't get any results.
-
-The "Extract some corpora" and "Extract all corpora" functions are particularly
-useful if you want to prepare corpora for several or many language pairs. If
-your TMX files have translation units in all of the languages you are interested
-in, put them in a single directory (it can have subdirectories) and use those
-functions!
-
-********************************************************************************
-GETTING THE RESULTS:
-********************************************************************************
-The results are the aligned corpora files, as well as other files indicating how
-well the processing was done.
-
-When the processing is finished, you will find the corpora files in the
-directory you have chosen when you selected "Select input/output directory". In
-the "_processing_info" subdirectory of that directory you will find one or more
-*errors.txt file(s), listing the name of the TMX files that caused an error, and
-*actions.txt file(s), listing the files that were successfully processed as well
-as the number of translation units extracted.
-
-If you ask for the extraction of several corpora at once, you'll get lots of
-corpora files. If you feel somewhat confused by that abundance, please note 2
-things:
-a) If you sort the files by order of modified date, you'll reconstitute the
-chronological order in which the corpora were made (corpora are always made in
-pairs one after the other);
-b) The name of the corpora file has the following structure:
-
-{Language of the segments} ({Language with which they are aligned})_{Base name
-of the corpus}.txt
-Example: the file "BG-01 (MT-01)_Corpus.txt" has segments in the BG-01
-(Bulgarian) language that also have a translation in the MT-01 (Maltese)
-language and corresponds to the corpus whose base name is "Corpus.txt". There
-should be an equivalent "MT-01 (BG-01)_Corpus.txt", this time with all the
-Maltese segments that have a translation in Bulgarian. Together, these 2 files
-constitute an aligned corpus ready to be fed to Moses.
-
-You can now feed Moses your customised corpora :-)
-
-********************************************************************************
-PERFORMANCE:
-********************************************************************************
-The program can process very large numbers of TMX files (tens of thousands or
-more). It can also process extremely big TMX files (500 MB or more; it
-successfully processed a 2,3 GB file). The extraction of the corpus of a pair of
-languages in a very large (6,15 GB) set of TMX files took approximately 45
-minutes in an Intel Core 2 Solo U3500 computer @ 1.4 GHz with 4 GB RAM.
-
-The starting language and the destination language segments can be in any order
-in the TMX files (e.g., the starting language segment may be found either before
-or after the destination language segment in one, several or all translation
-units of the TMX file).
-
-The program accepts and preserves text in any language (including special
-diacritical characters), but has only been tested with European Union official
-languages.
-
-********************************************************************************
-THANKS:
-********************************************************************************
-Thanks to Gary Daine, who pointed out a way to improve one of the regex
-expressions used in the code.
-
-********************************************************************************
-LICENSE:
-********************************************************************************
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation (version 3 of the License).
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
diff --git a/contrib/Extract_TMX_Corpus/gpl.txt b/contrib/Extract_TMX_Corpus/gpl.txt
deleted file mode 100644
index 818433ecc..000000000
--- a/contrib/Extract_TMX_Corpus/gpl.txt
+++ /dev/null
@@ -1,674 +0,0 @@
- GNU GENERAL PUBLIC LICENSE
- Version 3, 29 June 2007
-
- Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
- Preamble
-
- The GNU General Public License is a free, copyleft license for
-software and other kinds of works.
-
- The licenses for most software and other practical works are designed
-to take away your freedom to share and change the works. By contrast,
-the GNU General Public License is intended to guarantee your freedom to
-share and change all versions of a program--to make sure it remains free
-software for all its users. We, the Free Software Foundation, use the
-GNU General Public License for most of our software; it applies also to
-any other work released this way by its authors. You can apply it to
-your programs, too.
-
- When we speak of free software, we are referring to freedom, not
-price. Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-them if you wish), that you receive source code or can get it if you
-want it, that you can change the software or use pieces of it in new
-free programs, and that you know you can do these things.
-
- To protect your rights, we need to prevent others from denying you
-these rights or asking you to surrender the rights. Therefore, you have
-certain responsibilities if you distribute copies of the software, or if
-you modify it: responsibilities to respect the freedom of others.
-
- For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must pass on to the recipients the same
-freedoms that you received. You must make sure that they, too, receive
-or can get the source code. And you must show them these terms so they
-know their rights.
-
- Developers that use the GNU GPL protect your rights with two steps:
-(1) assert copyright on the software, and (2) offer you this License
-giving you legal permission to copy, distribute and/or modify it.
-
- For the developers' and authors' protection, the GPL clearly explains
-that there is no warranty for this free software. For both users' and
-authors' sake, the GPL requires that modified versions be marked as
-changed, so that their problems will not be attributed erroneously to
-authors of previous versions.
-
- Some devices are designed to deny users access to install or run
-modified versions of the software inside them, although the manufacturer
-can do so. This is fundamentally incompatible with the aim of
-protecting users' freedom to change the software. The systematic
-pattern of such abuse occurs in the area of products for individuals to
-use, which is precisely where it is most unacceptable. Therefore, we
-have designed this version of the GPL to prohibit the practice for those
-products. If such problems arise substantially in other domains, we
-stand ready to extend this provision to those domains in future versions
-of the GPL, as needed to protect the freedom of users.
-
- Finally, every program is threatened constantly by software patents.
-States should not allow patents to restrict development and use of
-software on general-purpose computers, but in those that do, we wish to
-avoid the special danger that patents applied to a free program could
-make it effectively proprietary. To prevent this, the GPL assures that
-patents cannot be used to render the program non-free.
-
- The precise terms and conditions for copying, distribution and
-modification follow.
-
- TERMS AND CONDITIONS
-
- 0. Definitions.
-
- "This License" refers to version 3 of the GNU General Public License.
-
- "Copyright" also means copyright-like laws that apply to other kinds of
-works, such as semiconductor masks.
-
- "The Program" refers to any copyrightable work licensed under this
-License. Each licensee is addressed as "you". "Licensees" and
-"recipients" may be individuals or organizations.
-
- To "modify" a work means to copy from or adapt all or part of the work
-in a fashion requiring copyright permission, other than the making of an
-exact copy. The resulting work is called a "modified version" of the
-earlier work or a work "based on" the earlier work.
-
- A "covered work" means either the unmodified Program or a work based
-on the Program.
-
- To "propagate" a work means to do anything with it that, without
-permission, would make you directly or secondarily liable for
-infringement under applicable copyright law, except executing it on a
-computer or modifying a private copy. Propagation includes copying,
-distribution (with or without modification), making available to the
-public, and in some countries other activities as well.
-
- To "convey" a work means any kind of propagation that enables other
-parties to make or receive copies. Mere interaction with a user through
-a computer network, with no transfer of a copy, is not conveying.
-
- An interactive user interface displays "Appropriate Legal Notices"
-to the extent that it includes a convenient and prominently visible
-feature that (1) displays an appropriate copyright notice, and (2)
-tells the user that there is no warranty for the work (except to the
-extent that warranties are provided), that licensees may convey the
-work under this License, and how to view a copy of this License. If
-the interface presents a list of user commands or options, such as a
-menu, a prominent item in the list meets this criterion.
-
- 1. Source Code.
-
- The "source code" for a work means the preferred form of the work
-for making modifications to it. "Object code" means any non-source
-form of a work.
-
- A "Standard Interface" means an interface that either is an official
-standard defined by a recognized standards body, or, in the case of
-interfaces specified for a particular programming language, one that
-is widely used among developers working in that language.
-
- The "System Libraries" of an executable work include anything, other
-than the work as a whole, that (a) is included in the normal form of
-packaging a Major Component, but which is not part of that Major
-Component, and (b) serves only to enable use of the work with that
-Major Component, or to implement a Standard Interface for which an
-implementation is available to the public in source code form. A
-"Major Component", in this context, means a major essential component
-(kernel, window system, and so on) of the specific operating system
-(if any) on which the executable work runs, or a compiler used to
-produce the work, or an object code interpreter used to run it.
-
- The "Corresponding Source" for a work in object code form means all
-the source code needed to generate, install, and (for an executable
-work) run the object code and to modify the work, including scripts to
-control those activities. However, it does not include the work's
-System Libraries, or general-purpose tools or generally available free
-programs which are used unmodified in performing those activities but
-which are not part of the work. For example, Corresponding Source
-includes interface definition files associated with source files for
-the work, and the source code for shared libraries and dynamically
-linked subprograms that the work is specifically designed to require,
-such as by intimate data communication or control flow between those
-subprograms and other parts of the work.
-
- The Corresponding Source need not include anything that users
-can regenerate automatically from other parts of the Corresponding
-Source.
-
- The Corresponding Source for a work in source code form is that
-same work.
-
- 2. Basic Permissions.
-
- All rights granted under this License are granted for the term of
-copyright on the Program, and are irrevocable provided the stated
-conditions are met. This License explicitly affirms your unlimited
-permission to run the unmodified Program. The output from running a
-covered work is covered by this License only if the output, given its
-content, constitutes a covered work. This License acknowledges your
-rights of fair use or other equivalent, as provided by copyright law.
-
- You may make, run and propagate covered works that you do not
-convey, without conditions so long as your license otherwise remains
-in force. You may convey covered works to others for the sole purpose
-of having them make modifications exclusively for you, or provide you
-with facilities for running those works, provided that you comply with
-the terms of this License in conveying all material for which you do
-not control copyright. Those thus making or running the covered works
-for you must do so exclusively on your behalf, under your direction
-and control, on terms that prohibit them from making any copies of
-your copyrighted material outside their relationship with you.
-
- Conveying under any other circumstances is permitted solely under
-the conditions stated below. Sublicensing is not allowed; section 10
-makes it unnecessary.
-
- 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
-
- No covered work shall be deemed part of an effective technological
-measure under any applicable law fulfilling obligations under article
-11 of the WIPO copyright treaty adopted on 20 December 1996, or
-similar laws prohibiting or restricting circumvention of such
-measures.
-
- When you convey a covered work, you waive any legal power to forbid
-circumvention of technological measures to the extent such circumvention
-is effected by exercising rights under this License with respect to
-the covered work, and you disclaim any intention to limit operation or
-modification of the work as a means of enforcing, against the work's
-users, your or third parties' legal rights to forbid circumvention of
-technological measures.
-
- 4. Conveying Verbatim Copies.
-
- You may convey verbatim copies of the Program's source code as you
-receive it, in any medium, provided that you conspicuously and
-appropriately publish on each copy an appropriate copyright notice;
-keep intact all notices stating that this License and any
-non-permissive terms added in accord with section 7 apply to the code;
-keep intact all notices of the absence of any warranty; and give all
-recipients a copy of this License along with the Program.
-
- You may charge any price or no price for each copy that you convey,
-and you may offer support or warranty protection for a fee.
-
- 5. Conveying Modified Source Versions.
-
- You may convey a work based on the Program, or the modifications to
-produce it from the Program, in the form of source code under the
-terms of section 4, provided that you also meet all of these conditions:
-
- a) The work must carry prominent notices stating that you modified
- it, and giving a relevant date.
-
- b) The work must carry prominent notices stating that it is
- released under this License and any conditions added under section
- 7. This requirement modifies the requirement in section 4 to
- "keep intact all notices".
-
- c) You must license the entire work, as a whole, under this
- License to anyone who comes into possession of a copy. This
- License will therefore apply, along with any applicable section 7
- additional terms, to the whole of the work, and all its parts,
- regardless of how they are packaged. This License gives no
- permission to license the work in any other way, but it does not
- invalidate such permission if you have separately received it.
-
- d) If the work has interactive user interfaces, each must display
- Appropriate Legal Notices; however, if the Program has interactive
- interfaces that do not display Appropriate Legal Notices, your
- work need not make them do so.
-
- A compilation of a covered work with other separate and independent
-works, which are not by their nature extensions of the covered work,
-and which are not combined with it such as to form a larger program,
-in or on a volume of a storage or distribution medium, is called an
-"aggregate" if the compilation and its resulting copyright are not
-used to limit the access or legal rights of the compilation's users
-beyond what the individual works permit. Inclusion of a covered work
-in an aggregate does not cause this License to apply to the other
-parts of the aggregate.
-
- 6. Conveying Non-Source Forms.
-
- You may convey a covered work in object code form under the terms
-of sections 4 and 5, provided that you also convey the
-machine-readable Corresponding Source under the terms of this License,
-in one of these ways:
-
- a) Convey the object code in, or embodied in, a physical product
- (including a physical distribution medium), accompanied by the
- Corresponding Source fixed on a durable physical medium
- customarily used for software interchange.
-
- b) Convey the object code in, or embodied in, a physical product
- (including a physical distribution medium), accompanied by a
- written offer, valid for at least three years and valid for as
- long as you offer spare parts or customer support for that product
- model, to give anyone who possesses the object code either (1) a
- copy of the Corresponding Source for all the software in the
- product that is covered by this License, on a durable physical
- medium customarily used for software interchange, for a price no
- more than your reasonable cost of physically performing this
- conveying of source, or (2) access to copy the
- Corresponding Source from a network server at no charge.
-
- c) Convey individual copies of the object code with a copy of the
- written offer to provide the Corresponding Source. This
- alternative is allowed only occasionally and noncommercially, and
- only if you received the object code with such an offer, in accord
- with subsection 6b.
-
- d) Convey the object code by offering access from a designated
- place (gratis or for a charge), and offer equivalent access to the
- Corresponding Source in the same way through the same place at no
- further charge. You need not require recipients to copy the
- Corresponding Source along with the object code. If the place to
- copy the object code is a network server, the Corresponding Source
- may be on a different server (operated by you or a third party)
- that supports equivalent copying facilities, provided you maintain
- clear directions next to the object code saying where to find the
- Corresponding Source. Regardless of what server hosts the
- Corresponding Source, you remain obligated to ensure that it is
- available for as long as needed to satisfy these requirements.
-
- e) Convey the object code using peer-to-peer transmission, provided
- you inform other peers where the object code and Corresponding
- Source of the work are being offered to the general public at no
- charge under subsection 6d.
-
- A separable portion of the object code, whose source code is excluded
-from the Corresponding Source as a System Library, need not be
-included in conveying the object code work.
-
- A "User Product" is either (1) a "consumer product", which means any
-tangible personal property which is normally used for personal, family,
-or household purposes, or (2) anything designed or sold for incorporation
-into a dwelling. In determining whether a product is a consumer product,
-doubtful cases shall be resolved in favor of coverage. For a particular
-product received by a particular user, "normally used" refers to a
-typical or common use of that class of product, regardless of the status
-of the particular user or of the way in which the particular user
-actually uses, or expects or is expected to use, the product. A product
-is a consumer product regardless of whether the product has substantial
-commercial, industrial or non-consumer uses, unless such uses represent
-the only significant mode of use of the product.
-
- "Installation Information" for a User Product means any methods,
-procedures, authorization keys, or other information required to install
-and execute modified versions of a covered work in that User Product from
-a modified version of its Corresponding Source. The information must
-suffice to ensure that the continued functioning of the modified object
-code is in no case prevented or interfered with solely because
-modification has been made.
-
- If you convey an object code work under this section in, or with, or
-specifically for use in, a User Product, and the conveying occurs as
-part of a transaction in which the right of possession and use of the
-User Product is transferred to the recipient in perpetuity or for a
-fixed term (regardless of how the transaction is characterized), the
-Corresponding Source conveyed under this section must be accompanied
-by the Installation Information. But this requirement does not apply
-if neither you nor any third party retains the ability to install
-modified object code on the User Product (for example, the work has
-been installed in ROM).
-
- The requirement to provide Installation Information does not include a
-requirement to continue to provide support service, warranty, or updates
-for a work that has been modified or installed by the recipient, or for
-the User Product in which it has been modified or installed. Access to a
-network may be denied when the modification itself materially and
-adversely affects the operation of the network or violates the rules and
-protocols for communication across the network.
-
- Corresponding Source conveyed, and Installation Information provided,
-in accord with this section must be in a format that is publicly
-documented (and with an implementation available to the public in
-source code form), and must require no special password or key for
-unpacking, reading or copying.
-
- 7. Additional Terms.
-
- "Additional permissions" are terms that supplement the terms of this
-License by making exceptions from one or more of its conditions.
-Additional permissions that are applicable to the entire Program shall
-be treated as though they were included in this License, to the extent
-that they are valid under applicable law. If additional permissions
-apply only to part of the Program, that part may be used separately
-under those permissions, but the entire Program remains governed by
-this License without regard to the additional permissions.
-
- When you convey a copy of a covered work, you may at your option
-remove any additional permissions from that copy, or from any part of
-it. (Additional permissions may be written to require their own
-removal in certain cases when you modify the work.) You may place
-additional permissions on material, added by you to a covered work,
-for which you have or can give appropriate copyright permission.
-
- Notwithstanding any other provision of this License, for material you
-add to a covered work, you may (if authorized by the copyright holders of
-that material) supplement the terms of this License with terms:
-
- a) Disclaiming warranty or limiting liability differently from the
- terms of sections 15 and 16 of this License; or
-
- b) Requiring preservation of specified reasonable legal notices or
- author attributions in that material or in the Appropriate Legal
- Notices displayed by works containing it; or
-
- c) Prohibiting misrepresentation of the origin of that material, or
- requiring that modified versions of such material be marked in
- reasonable ways as different from the original version; or
-
- d) Limiting the use for publicity purposes of names of licensors or
- authors of the material; or
-
- e) Declining to grant rights under trademark law for use of some
- trade names, trademarks, or service marks; or
-
- f) Requiring indemnification of licensors and authors of that
- material by anyone who conveys the material (or modified versions of
- it) with contractual assumptions of liability to the recipient, for
- any liability that these contractual assumptions directly impose on
- those licensors and authors.
-
- All other non-permissive additional terms are considered "further
-restrictions" within the meaning of section 10. If the Program as you
-received it, or any part of it, contains a notice stating that it is
-governed by this License along with a term that is a further
-restriction, you may remove that term. If a license document contains
-a further restriction but permits relicensing or conveying under this
-License, you may add to a covered work material governed by the terms
-of that license document, provided that the further restriction does
-not survive such relicensing or conveying.
-
- If you add terms to a covered work in accord with this section, you
-must place, in the relevant source files, a statement of the
-additional terms that apply to those files, or a notice indicating
-where to find the applicable terms.
-
- Additional terms, permissive or non-permissive, may be stated in the
-form of a separately written license, or stated as exceptions;
-the above requirements apply either way.
-
- 8. Termination.
-
- You may not propagate or modify a covered work except as expressly
-provided under this License. Any attempt otherwise to propagate or
-modify it is void, and will automatically terminate your rights under
-this License (including any patent licenses granted under the third
-paragraph of section 11).
-
- However, if you cease all violation of this License, then your
-license from a particular copyright holder is reinstated (a)
-provisionally, unless and until the copyright holder explicitly and
-finally terminates your license, and (b) permanently, if the copyright
-holder fails to notify you of the violation by some reasonable means
-prior to 60 days after the cessation.
-
- Moreover, your license from a particular copyright holder is
-reinstated permanently if the copyright holder notifies you of the
-violation by some reasonable means, this is the first time you have
-received notice of violation of this License (for any work) from that
-copyright holder, and you cure the violation prior to 30 days after
-your receipt of the notice.
-
- Termination of your rights under this section does not terminate the
-licenses of parties who have received copies or rights from you under
-this License. If your rights have been terminated and not permanently
-reinstated, you do not qualify to receive new licenses for the same
-material under section 10.
-
- 9. Acceptance Not Required for Having Copies.
-
- You are not required to accept this License in order to receive or
-run a copy of the Program. Ancillary propagation of a covered work
-occurring solely as a consequence of using peer-to-peer transmission
-to receive a copy likewise does not require acceptance. However,
-nothing other than this License grants you permission to propagate or
-modify any covered work. These actions infringe copyright if you do
-not accept this License. Therefore, by modifying or propagating a
-covered work, you indicate your acceptance of this License to do so.
-
- 10. Automatic Licensing of Downstream Recipients.
-
- Each time you convey a covered work, the recipient automatically
-receives a license from the original licensors, to run, modify and
-propagate that work, subject to this License. You are not responsible
-for enforcing compliance by third parties with this License.
-
- An "entity transaction" is a transaction transferring control of an
-organization, or substantially all assets of one, or subdividing an
-organization, or merging organizations. If propagation of a covered
-work results from an entity transaction, each party to that
-transaction who receives a copy of the work also receives whatever
-licenses to the work the party's predecessor in interest had or could
-give under the previous paragraph, plus a right to possession of the
-Corresponding Source of the work from the predecessor in interest, if
-the predecessor has it or can get it with reasonable efforts.
-
- You may not impose any further restrictions on the exercise of the
-rights granted or affirmed under this License. For example, you may
-not impose a license fee, royalty, or other charge for exercise of
-rights granted under this License, and you may not initiate litigation
-(including a cross-claim or counterclaim in a lawsuit) alleging that
-any patent claim is infringed by making, using, selling, offering for
-sale, or importing the Program or any portion of it.
-
- 11. Patents.
-
- A "contributor" is a copyright holder who authorizes use under this
-License of the Program or a work on which the Program is based. The
-work thus licensed is called the contributor's "contributor version".
-
- A contributor's "essential patent claims" are all patent claims
-owned or controlled by the contributor, whether already acquired or
-hereafter acquired, that would be infringed by some manner, permitted
-by this License, of making, using, or selling its contributor version,
-but do not include claims that would be infringed only as a
-consequence of further modification of the contributor version. For
-purposes of this definition, "control" includes the right to grant
-patent sublicenses in a manner consistent with the requirements of
-this License.
-
- Each contributor grants you a non-exclusive, worldwide, royalty-free
-patent license under the contributor's essential patent claims, to
-make, use, sell, offer for sale, import and otherwise run, modify and
-propagate the contents of its contributor version.
-
- In the following three paragraphs, a "patent license" is any express
-agreement or commitment, however denominated, not to enforce a patent
-(such as an express permission to practice a patent or covenant not to
-sue for patent infringement). To "grant" such a patent license to a
-party means to make such an agreement or commitment not to enforce a
-patent against the party.
-
- If you convey a covered work, knowingly relying on a patent license,
-and the Corresponding Source of the work is not available for anyone
-to copy, free of charge and under the terms of this License, through a
-publicly available network server or other readily accessible means,
-then you must either (1) cause the Corresponding Source to be so
-available, or (2) arrange to deprive yourself of the benefit of the
-patent license for this particular work, or (3) arrange, in a manner
-consistent with the requirements of this License, to extend the patent
-license to downstream recipients. "Knowingly relying" means you have
-actual knowledge that, but for the patent license, your conveying the
-covered work in a country, or your recipient's use of the covered work
-in a country, would infringe one or more identifiable patents in that
-country that you have reason to believe are valid.
-
- If, pursuant to or in connection with a single transaction or
-arrangement, you convey, or propagate by procuring conveyance of, a
-covered work, and grant a patent license to some of the parties
-receiving the covered work authorizing them to use, propagate, modify
-or convey a specific copy of the covered work, then the patent license
-you grant is automatically extended to all recipients of the covered
-work and works based on it.
-
- A patent license is "discriminatory" if it does not include within
-the scope of its coverage, prohibits the exercise of, or is
-conditioned on the non-exercise of one or more of the rights that are
-specifically granted under this License. You may not convey a covered
-work if you are a party to an arrangement with a third party that is
-in the business of distributing software, under which you make payment
-to the third party based on the extent of your activity of conveying
-the work, and under which the third party grants, to any of the
-parties who would receive the covered work from you, a discriminatory
-patent license (a) in connection with copies of the covered work
-conveyed by you (or copies made from those copies), or (b) primarily
-for and in connection with specific products or compilations that
-contain the covered work, unless you entered into that arrangement,
-or that patent license was granted, prior to 28 March 2007.
-
- Nothing in this License shall be construed as excluding or limiting
-any implied license or other defenses to infringement that may
-otherwise be available to you under applicable patent law.
-
- 12. No Surrender of Others' Freedom.
-
- If conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License. If you cannot convey a
-covered work so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you may
-not convey it at all. For example, if you agree to terms that obligate you
-to collect a royalty for further conveying from those to whom you convey
-the Program, the only way you could satisfy both those terms and this
-License would be to refrain entirely from conveying the Program.
-
- 13. Use with the GNU Affero General Public License.
-
- Notwithstanding any other provision of this License, you have
-permission to link or combine any covered work with a work licensed
-under version 3 of the GNU Affero General Public License into a single
-combined work, and to convey the resulting work. The terms of this
-License will continue to apply to the part which is the covered work,
-but the special requirements of the GNU Affero General Public License,
-section 13, concerning interaction through a network will apply to the
-combination as such.
-
- 14. Revised Versions of this License.
-
- The Free Software Foundation may publish revised and/or new versions of
-the GNU General Public License from time to time. Such new versions will
-be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
- Each version is given a distinguishing version number. If the
-Program specifies that a certain numbered version of the GNU General
-Public License "or any later version" applies to it, you have the
-option of following the terms and conditions either of that numbered
-version or of any later version published by the Free Software
-Foundation. If the Program does not specify a version number of the
-GNU General Public License, you may choose any version ever published
-by the Free Software Foundation.
-
- If the Program specifies that a proxy can decide which future
-versions of the GNU General Public License can be used, that proxy's
-public statement of acceptance of a version permanently authorizes you
-to choose that version for the Program.
-
- Later license versions may give you additional or different
-permissions. However, no additional obligations are imposed on any
-author or copyright holder as a result of your choosing to follow a
-later version.
-
- 15. Disclaimer of Warranty.
-
- THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
-APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
-HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
-OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
-IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
-ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
- 16. Limitation of Liability.
-
- IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
-THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
-GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
-USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
-DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
-PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
-EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGES.
-
- 17. Interpretation of Sections 15 and 16.
-
- If the disclaimer of warranty and limitation of liability provided
-above cannot be given local legal effect according to their terms,
-reviewing courts shall apply local law that most closely approximates
-an absolute waiver of all civil liability in connection with the
-Program, unless a warranty or assumption of liability accompanies a
-copy of the Program in return for a fee.
-
- END OF TERMS AND CONDITIONS
-
- How to Apply These Terms to Your New Programs
-
- If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
- To do so, attach the following notices to the program. It is safest
-to attach them to the start of each source file to most effectively
-state the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
- <one line to give the program's name and a brief idea of what it does.>
- Copyright (C) <year> <name of author>
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-Also add information on how to contact you by electronic and paper mail.
-
- If the program does terminal interaction, make it output a short
-notice like this when it starts in an interactive mode:
-
- <program> Copyright (C) <year> <name of author>
- This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
- This is free software, and you are welcome to redistribute it
- under certain conditions; type `show c' for details.
-
-The hypothetical commands `show w' and `show c' should show the appropriate
-parts of the General Public License. Of course, your program's commands
-might be different; for a GUI interface, you would use an "about box".
-
- You should also get your employer (if you work as a programmer) or school,
-if any, to sign a "copyright disclaimer" for the program, if necessary.
-For more information on this, and how to apply and follow the GNU GPL, see
-<http://www.gnu.org/licenses/>.
-
- The GNU General Public License does not permit incorporating your program
-into proprietary programs. If your program is a subroutine library, you
-may consider it more useful to permit linking proprietary applications with
-the library. If this is what you want to do, use the GNU Lesser General
-Public License instead of this License. But first, please read
-<http://www.gnu.org/philosophy/why-not-lgpl.html>.
diff --git a/contrib/Moses2TMX/LanguageCodes.txt b/contrib/Moses2TMX/LanguageCodes.txt
deleted file mode 100644
index 22ca66c73..000000000
--- a/contrib/Moses2TMX/LanguageCodes.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-BG-01
-CS-01
-DA-01
-DE-DE
-EL-01
-EN-GB
-ES-ES
-ET-01
-FI-01
-FR-FR
-HU-01
-IT-IT
-LT-01
-LV-01
-MT-01
-NL-NL
-PL-01
-PT-PT
-RO-RO
-SK-01
-SL-01
-SV-SE \ No newline at end of file
diff --git a/contrib/Moses2TMX/Moses2TMX.py b/contrib/Moses2TMX/Moses2TMX.py
deleted file mode 100755
index b032fe9c5..000000000
--- a/contrib/Moses2TMX/Moses2TMX.py
+++ /dev/null
@@ -1,166 +0,0 @@
-#! /usr/bin/env python
-# -*- coding: utf_8 -*-
-"""This program is used to prepare TMX files from corpora composed of 2 files for each language pair,
-where the position of a segment in the first language file is exactly the same as in the second
-language file.
-
-The program requires that Pythoncard and wxPython (as well as Python) be previously installed.
-
-Copyright 2009, 2010 João Luís A. C. Rosas
-
-Distributed under GNU GPL v3 licence (see http://www.gnu.org/licenses/)
-
-E-mail: joao.luis.rosas@gmail.com """
-
-__version__ = "$Revision: 1.033$"
-__date__ = "$Date: 2010/02/25$"
-__author__="$João Luís A. C. Rosas$"
-
-from PythonCard import clipboard, dialog, graphic, model
-from PythonCard.components import button, combobox,statictext,checkbox,staticbox
-import wx
-import os, re
-import string
-import sys
-from time import strftime
-import codecs
-
-class Moses2TMX(model.Background):
-
- def on_initialize(self, event):
- self.inputdir=''
- #Get directory where program file is and ...
- currdir=os.path.abspath(os.path.dirname(os.path.realpath(sys.argv[0])))
- #... load the file ("LanguageCodes.txt") with the list of languages that the program can process
- try:
- self.languages=open(currdir+os.sep+r'LanguageCodes.txt','r+').readlines()
- except:
- # If the languages file doesn't exist in the program directory, alert user that it is essential for the good working of the program and exit
- result = dialog.alertDialog(self, 'The file "LanguageCodes.txt" is missing. The program will now close.', 'Essential file missing')
- sys.exit()
- #remove end of line marker from each line in "LanguageCodes.txt"
- for lang in range(len(self.languages)):
- self.languages[lang]=self.languages[lang].rstrip()
- self.lang1code=''
- self.lang2code=''
- #Insert list of language names in appropriate program window's combo boxes
- self.components.cbStartingLanguage.items=self.languages
- self.components.cbDestinationLanguage.items=self.languages
-
- def CreateTMX(self, name):
- print 'Started at '+strftime('%H-%M-%S')
- #get the startinglanguage name (e.g.: "EN-GB") from the program window
- self.lang1code=self.components.cbStartingLanguage.text
- #get the destinationlanguage name from the program window
- self.lang2code=self.components.cbDestinationLanguage.text
- print name+'.'+self.lang2code[:2].lower()
- e=codecs.open(name,'r',"utf-8","strict")
- f=codecs.open(name+'.'+self.lang2code[:2].lower()+'.moses','r',"utf-8","strict")
- a=codecs.open(name+'.tmp','w',"utf-8","strict")
- b=codecs.open(name+'.'+self.lang2code[:2].lower()+'.moses.tmp','w',"utf-8","strict")
- for line in e:
- if line.strip():
- a.write(line)
- for line in f:
- if line.strip():
- b.write(line)
- a=codecs.open(name+'.tmp','r',"utf-8","strict")
- b=codecs.open(name+'.'+self.lang2code[:2].lower()+'.moses.tmp','r',"utf-8","strict")
- g=codecs.open(name+'.tmx','w','utf-16','strict')
- g.write('<?xml version="1.0" ?>\n<!DOCTYPE tmx SYSTEM "tmx14.dtd">\n<tmx version="version 1.4">\n\n<header\ncreationtool="moses2tmx"\ncreationtoolversion="1.032"\nsegtype="sentence"\ndatatype="PlainText"\nadminlang="EN-US"\nsrclang="'+self.lang1code+'"\n>\n</header>\n\n<body>\n')
- parar=0
- while True:
- self.ling1segm=a.readline().strip()
- self.ling2segm=b.readline().strip()
- if not self.ling1segm:
- break
- elif not self.ling2segm:
- break
- else:
- try:
- g.write('<tu creationid="MT!">\n<prop type="Txt::Translator">Moses</prop>\n<tuv xml:lang="'+self.lang1code+'">\n<seg>'+self.ling1segm+'</seg>\n</tuv>\n<tuv xml:lang="'+self.lang2code+ \
- '">\n<seg>'+self.ling2segm+'</seg>\n</tuv>\n</tu>\n\n')
- except:
- pass
- a.close()
- b.close()
- e.close()
- f.close()
- g.write('</body>\n</tmx>\n')
- g.close()
- #os.remove(name)
- #os.remove(name+'.'+self.lang2code[:2].lower()+'.moses')
- os.remove(name+'.tmp')
- os.remove(name+'.'+self.lang2code[:2].lower()+'.moses.tmp')
-
- def createTMXs(self):
- try:
- # Get a list of all TMX files that need to be processed
- fileslist=self.locate('*.moses',self.inputdir)
- except:
- # if any error up to now, add the name of the TMX file to the output file @errors
- self.errortypes=self.errortypes+' - Get All Segments: creation of output files error\n'
- if fileslist:
- # For each relevant TMX file ...
- for self.presentfile in fileslist:
- filename=self.presentfile[:-9]
- #print filename
- self.CreateTMX(filename)
- print 'Finished at '+strftime('%H-%M-%S')
- result = dialog.alertDialog(self, 'Processing done.', 'Processing Done')
-
- def on_btnCreateTMX_mouseClick(self, event):
- self.createTMXs()
-
- def on_menuFileCreateTMXFiles_select(self, event):
- self.createTMXs()
-
- def on_btnSelectLang1File_mouseClick(self, event):
- self.input1=self.GetInputFileName()
-
- def on_btnSelectLang2File_mouseClick(self, event):
- self.input2=self.GetInputFileName()
-
- def locate(self,pattern, basedir):
- """Locate all files matching supplied filename pattern in and below
- supplied root directory.
-
- @pattern: something like '*.tmx'
- @basedir:whole directory to be treated
- """
- import fnmatch
- for path, dirs, files in os.walk(os.path.abspath(basedir)):
- for filename in fnmatch.filter(files, pattern):
- yield os.path.join(path, filename)
-
- def SelectDirectory(self):
- """Select the directory where the files to be processed are
-
- @result: object returned by the dialog window with attributes accepted (true if user clicked OK button, false otherwise) and
- path (list of strings containing the full pathnames to all files selected by the user)
- @self.inputdir: directory where files to be processed are (and where output files will be written)
- @self.statusBar.text: text displayed in the program window status bar"""
-
- result= dialog.directoryDialog(self, 'Choose a directory', 'a')
- if result.accepted:
- self.inputdir=result.path
- self.statusBar.text=self.inputdir+' selected.'
-
- def on_menuFileSelectDirectory_select(self, event):
- self.SelectDirectory()
-
- def on_btnSelectDirectory_mouseClick(self, event):
- self.SelectDirectory()
-
- def on_menuHelpShowHelp_select(self, event):
- f = open('_READ_ME_FIRST.txt', "r")
- msg = f.read()
- result = dialog.scrolledMessageDialog(self, msg, '_READ_ME_FIRST.txt')
-
- def on_menuFileExit_select(self, event):
- sys.exit()
-
-
-if __name__ == '__main__':
- app = model.Application(Moses2TMX)
- app.MainLoop()
diff --git a/contrib/Moses2TMX/Moses2TMX.rsrc.py b/contrib/Moses2TMX/Moses2TMX.rsrc.py
deleted file mode 100755
index dc1570c7f..000000000
--- a/contrib/Moses2TMX/Moses2TMX.rsrc.py
+++ /dev/null
@@ -1,95 +0,0 @@
-{'application':{'type':'Application',
- 'name':'Moses2TMX',
- 'backgrounds': [
- {'type':'Background',
- 'name':'bgMoses2TMX',
- 'title':u'Moses2TMX-1.032',
- 'size':(277, 307),
- 'statusBar':1,
-
- 'menubar': {'type':'MenuBar',
- 'menus': [
- {'type':'Menu',
- 'name':'menuFile',
- 'label':u'&File',
- 'items': [
- {'type':'MenuItem',
- 'name':'menuFileSelectDirectory',
- 'label':u'Select &Directory ...\tAlt+D',
- },
- {'type':'MenuItem',
- 'name':'menuFileCreateTMXFiles',
- 'label':u'&Create TMX Files\tAlt+C',
- },
- {'type':'MenuItem',
- 'name':'Sep1',
- 'label':u'-',
- },
- {'type':'MenuItem',
- 'name':'menuFileExit',
- 'label':u'&Exit\tAlt+E',
- },
- ]
- },
- {'type':'Menu',
- 'name':'menuHelp',
- 'label':u'&Help',
- 'items': [
- {'type':'MenuItem',
- 'name':'menuHelpShowHelp',
- 'label':u'&Show Help\tAlt+S',
- },
- ]
- },
- ]
- },
- 'components': [
-
-{'type':'Button',
- 'name':'btnSelectDirectory',
- 'position':(15, 15),
- 'size':(225, 25),
- 'font':{'faceName': u'Arial', 'family': 'sansSerif', 'size': 10},
- 'label':u'Select Directory ...',
- },
-
-{'type':'StaticText',
- 'name':'StaticText3',
- 'position':(17, 106),
- 'font':{'faceName': u'Arial', 'family': 'sansSerif', 'size': 10},
- 'text':u'Target Language:',
- },
-
-{'type':'ComboBox',
- 'name':'cbStartingLanguage',
- 'position':(18, 75),
- 'size':(70, -1),
- 'items':[],
- },
-
-{'type':'ComboBox',
- 'name':'cbDestinationLanguage',
- 'position':(17, 123),
- 'size':(70, -1),
- 'items':[u'DE-PT', u'EN-PT', u'ES-PT', u'FR-PT'],
- },
-
-{'type':'Button',
- 'name':'btnCreateTMX',
- 'position':(20, 160),
- 'size':(225, 25),
- 'font':{'faceName': u'Arial', 'family': 'sansSerif', 'size': 10},
- 'label':u'Create TMX Files',
- },
-
-{'type':'StaticText',
- 'name':'StaticText1',
- 'position':(18, 56),
- 'font':{'faceName': u'Arial', 'family': 'sansSerif', 'size': 10},
- 'text':u'Source Language:',
- },
-
-] # end components
-} # end background
-] # end backgrounds
-} }
diff --git a/contrib/Moses2TMX/_READ_ME_FIRST.txt b/contrib/Moses2TMX/_READ_ME_FIRST.txt
deleted file mode 100644
index cbc667a31..000000000
--- a/contrib/Moses2TMX/_READ_ME_FIRST.txt
+++ /dev/null
@@ -1,127 +0,0 @@
-Summary:
- PURPOSE
- REQUIREMENTS
- INSTALLATION
- HOW TO USE
- LICENSE
-
-
-********************************************************************************
-PURPOSE:
-********************************************************************************
-This is the MS Windows and Linux version (tested with Ubuntu 10.10 and 11.04) of
-Moses2TMX 1.033.
-
-Moses2TMX started as a Windows program (tested on Windows7, Vista and XP) that
-enables translators not necessarily with a deep knowledge of linguistic tools to
-create TMX files from a Moses corpus or from any other corpus made up of 2
-separate files, one for the source language and another for the target language,
-whose lines are strictly aligned.
-
-The program processes a whole directory containing source language and
-corresponding target language documents and creates 1 TMX file (UTF-16 format;
-Windows line endings) for each document pair that it processes.
-
-The program accepts and preserves text in any language (including special
-diacritical characters), but has only been tested with European Union official
-languages.
-
-The program is specifically intended to work with the output of a series of
-Linux scripts together called Moses-for-Mere-Mortals.
-
-********************************************************************************
-REQUIREMENTS:
-********************************************************************************
-The program requires the following to be pre-installed in your computer:
-
-1) Python 2.5 or higher (The program has been tested on Python 2.5 to 2.7)
- Windows users download and install from http://www.python.org/download/
- Ubuntu users can use the pre-installed Python distribution
-
-2) wxPython 2.8 or higher
- Windows users download and install the Unicode version from
- http://www.wxpython.org/download.php
- Ubuntu users install with: sudo apt-get install python-wxtools
-
-3) Pythoncard 0.8.2 or higher
- Windows users download and install
- http://sourceforge.net/projects/pythoncard/files/PythonCard/0.8.2/PythonCard-0.8.2.win32.exe/download
- Ubuntu users install with: sudo apt-get install pythoncard
-
-********************************************************************************
-INSTALLATION:
-********************************************************************************
-Windows users:
-1) Download the Moses2TMX.exe file to a directory of your choice.
-2) Double-click Moses2TMX.exe and follow the wizard's instructions.
-NOTE: Windows Vista users, to run the installation programs, by right-click on
-the installation file in Windows Explorer and choose "Execute as administrator"
-in the contextual menu.
-
-Ubuntu users:
-1) Download the Moses2TMX.tgz compressed file to a directory of your choice.
-2) Expand the compressed file and run from the expanded directory.
-
-***IMPORTANT***: Never erase the file "LanguageCodes.txt" in that directory. It
-is necessary for telling the program the languages that it has to process. If
-your TMX files use language codes that are different from those contained in
-this file, please replace the codes contained in the file with the codes used in
-your TMX files. You can always add or delete new codes to this file (when the
-program is not running).
-
-********************************************************************************
-HOW TO USE:
-********************************************************************************
-1) Create a directory where you will copy the files that you want to process.
-
-2) Copy the source and target language documents that you want to process to
-that directory.
-NOTE YOU HAVE TO RESPECT SOME NAMING CONVENTIONS IN ORDER TO BE ABLE TO USE
-THIS PROGRAM:
-
- a) the target documents have to have follow the following convention:
-
- {basename}.{abbreviation of target language}.moses
-
- where {abbreviation of target language} is a ***2 character*** string
- containing the lowercased first 2 characters of any of the language
- codes present in the LanguageCodes.txt (present in the base directory of
- Moses2TMX)
-
- Example: If {basename} = "200000" and the target language has a code
- "EN-GB" in the LanguageCodes.txt, then the name of the target file
- should be "200000.en.moses"
-
- b) the source language document should have the name:
-
- {basename}
-
- Example: continuing the preceding example, the name of the corresponding
- source document should be "200000".
-
-3) Launch the program as indicated above in the "Launching the program" section.
-
-4) Operate on the main window of the program in the direction from top to
- bottom:
- a) Click the "Select Directory..." button to indicate the directory
- containing all the source and corresponding target documents that you want
- to process;
- b) Indicate the languages of your files refers to in the "Source Language"
- and "Target Language" comboboxes;
- c) Click the Create TMX Files button.
-
-********************************************************************************
-LICENSE:
-********************************************************************************
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation (version 3 of the License).
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
diff --git a/contrib/Moses2TMX/gpl.txt b/contrib/Moses2TMX/gpl.txt
deleted file mode 100644
index 818433ecc..000000000
--- a/contrib/Moses2TMX/gpl.txt
+++ /dev/null
@@ -1,674 +0,0 @@
- GNU GENERAL PUBLIC LICENSE
- Version 3, 29 June 2007
-
- Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
- Preamble
-
- The GNU General Public License is a free, copyleft license for
-software and other kinds of works.
-
- The licenses for most software and other practical works are designed
-to take away your freedom to share and change the works. By contrast,
-the GNU General Public License is intended to guarantee your freedom to
-share and change all versions of a program--to make sure it remains free
-software for all its users. We, the Free Software Foundation, use the
-GNU General Public License for most of our software; it applies also to
-any other work released this way by its authors. You can apply it to
-your programs, too.
-
- When we speak of free software, we are referring to freedom, not
-price. Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-them if you wish), that you receive source code or can get it if you
-want it, that you can change the software or use pieces of it in new
-free programs, and that you know you can do these things.
-
- To protect your rights, we need to prevent others from denying you
-these rights or asking you to surrender the rights. Therefore, you have
-certain responsibilities if you distribute copies of the software, or if
-you modify it: responsibilities to respect the freedom of others.
-
- For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must pass on to the recipients the same
-freedoms that you received. You must make sure that they, too, receive
-or can get the source code. And you must show them these terms so they
-know their rights.
-
- Developers that use the GNU GPL protect your rights with two steps:
-(1) assert copyright on the software, and (2) offer you this License
-giving you legal permission to copy, distribute and/or modify it.
-
- For the developers' and authors' protection, the GPL clearly explains
-that there is no warranty for this free software. For both users' and
-authors' sake, the GPL requires that modified versions be marked as
-changed, so that their problems will not be attributed erroneously to
-authors of previous versions.
-
- Some devices are designed to deny users access to install or run
-modified versions of the software inside them, although the manufacturer
-can do so. This is fundamentally incompatible with the aim of
-protecting users' freedom to change the software. The systematic
-pattern of such abuse occurs in the area of products for individuals to
-use, which is precisely where it is most unacceptable. Therefore, we
-have designed this version of the GPL to prohibit the practice for those
-products. If such problems arise substantially in other domains, we
-stand ready to extend this provision to those domains in future versions
-of the GPL, as needed to protect the freedom of users.
-
- Finally, every program is threatened constantly by software patents.
-States should not allow patents to restrict development and use of
-software on general-purpose computers, but in those that do, we wish to
-avoid the special danger that patents applied to a free program could
-make it effectively proprietary. To prevent this, the GPL assures that
-patents cannot be used to render the program non-free.
-
- The precise terms and conditions for copying, distribution and
-modification follow.
-
- TERMS AND CONDITIONS
-
- 0. Definitions.
-
- "This License" refers to version 3 of the GNU General Public License.
-
- "Copyright" also means copyright-like laws that apply to other kinds of
-works, such as semiconductor masks.
-
- "The Program" refers to any copyrightable work licensed under this
-License. Each licensee is addressed as "you". "Licensees" and
-"recipients" may be individuals or organizations.
-
- To "modify" a work means to copy from or adapt all or part of the work
-in a fashion requiring copyright permission, other than the making of an
-exact copy. The resulting work is called a "modified version" of the
-earlier work or a work "based on" the earlier work.
-
- A "covered work" means either the unmodified Program or a work based
-on the Program.
-
- To "propagate" a work means to do anything with it that, without
-permission, would make you directly or secondarily liable for
-infringement under applicable copyright law, except executing it on a
-computer or modifying a private copy. Propagation includes copying,
-distribution (with or without modification), making available to the
-public, and in some countries other activities as well.
-
- To "convey" a work means any kind of propagation that enables other
-parties to make or receive copies. Mere interaction with a user through
-a computer network, with no transfer of a copy, is not conveying.
-
- An interactive user interface displays "Appropriate Legal Notices"
-to the extent that it includes a convenient and prominently visible
-feature that (1) displays an appropriate copyright notice, and (2)
-tells the user that there is no warranty for the work (except to the
-extent that warranties are provided), that licensees may convey the
-work under this License, and how to view a copy of this License. If
-the interface presents a list of user commands or options, such as a
-menu, a prominent item in the list meets this criterion.
-
- 1. Source Code.
-
- The "source code" for a work means the preferred form of the work
-for making modifications to it. "Object code" means any non-source
-form of a work.
-
- A "Standard Interface" means an interface that either is an official
-standard defined by a recognized standards body, or, in the case of
-interfaces specified for a particular programming language, one that
-is widely used among developers working in that language.
-
- The "System Libraries" of an executable work include anything, other
-than the work as a whole, that (a) is included in the normal form of
-packaging a Major Component, but which is not part of that Major
-Component, and (b) serves only to enable use of the work with that
-Major Component, or to implement a Standard Interface for which an
-implementation is available to the public in source code form. A
-"Major Component", in this context, means a major essential component
-(kernel, window system, and so on) of the specific operating system
-(if any) on which the executable work runs, or a compiler used to
-produce the work, or an object code interpreter used to run it.
-
- The "Corresponding Source" for a work in object code form means all
-the source code needed to generate, install, and (for an executable
-work) run the object code and to modify the work, including scripts to
-control those activities. However, it does not include the work's
-System Libraries, or general-purpose tools or generally available free
-programs which are used unmodified in performing those activities but
-which are not part of the work. For example, Corresponding Source
-includes interface definition files associated with source files for
-the work, and the source code for shared libraries and dynamically
-linked subprograms that the work is specifically designed to require,
-such as by intimate data communication or control flow between those
-subprograms and other parts of the work.
-
- The Corresponding Source need not include anything that users
-can regenerate automatically from other parts of the Corresponding
-Source.
-
- The Corresponding Source for a work in source code form is that
-same work.
-
- 2. Basic Permissions.
-
- All rights granted under this License are granted for the term of
-copyright on the Program, and are irrevocable provided the stated
-conditions are met. This License explicitly affirms your unlimited
-permission to run the unmodified Program. The output from running a
-covered work is covered by this License only if the output, given its
-content, constitutes a covered work. This License acknowledges your
-rights of fair use or other equivalent, as provided by copyright law.
-
- You may make, run and propagate covered works that you do not
-convey, without conditions so long as your license otherwise remains
-in force. You may convey covered works to others for the sole purpose
-of having them make modifications exclusively for you, or provide you
-with facilities for running those works, provided that you comply with
-the terms of this License in conveying all material for which you do
-not control copyright. Those thus making or running the covered works
-for you must do so exclusively on your behalf, under your direction
-and control, on terms that prohibit them from making any copies of
-your copyrighted material outside their relationship with you.
-
- Conveying under any other circumstances is permitted solely under
-the conditions stated below. Sublicensing is not allowed; section 10
-makes it unnecessary.
-
- 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
-
- No covered work shall be deemed part of an effective technological
-measure under any applicable law fulfilling obligations under article
-11 of the WIPO copyright treaty adopted on 20 December 1996, or
-similar laws prohibiting or restricting circumvention of such
-measures.
-
- When you convey a covered work, you waive any legal power to forbid
-circumvention of technological measures to the extent such circumvention
-is effected by exercising rights under this License with respect to
-the covered work, and you disclaim any intention to limit operation or
-modification of the work as a means of enforcing, against the work's
-users, your or third parties' legal rights to forbid circumvention of
-technological measures.
-
- 4. Conveying Verbatim Copies.
-
- You may convey verbatim copies of the Program's source code as you
-receive it, in any medium, provided that you conspicuously and
-appropriately publish on each copy an appropriate copyright notice;
-keep intact all notices stating that this License and any
-non-permissive terms added in accord with section 7 apply to the code;
-keep intact all notices of the absence of any warranty; and give all
-recipients a copy of this License along with the Program.
-
- You may charge any price or no price for each copy that you convey,
-and you may offer support or warranty protection for a fee.
-
- 5. Conveying Modified Source Versions.
-
- You may convey a work based on the Program, or the modifications to
-produce it from the Program, in the form of source code under the
-terms of section 4, provided that you also meet all of these conditions:
-
- a) The work must carry prominent notices stating that you modified
- it, and giving a relevant date.
-
- b) The work must carry prominent notices stating that it is
- released under this License and any conditions added under section
- 7. This requirement modifies the requirement in section 4 to
- "keep intact all notices".
-
- c) You must license the entire work, as a whole, under this
- License to anyone who comes into possession of a copy. This
- License will therefore apply, along with any applicable section 7
- additional terms, to the whole of the work, and all its parts,
- regardless of how they are packaged. This License gives no
- permission to license the work in any other way, but it does not
- invalidate such permission if you have separately received it.
-
- d) If the work has interactive user interfaces, each must display
- Appropriate Legal Notices; however, if the Program has interactive
- interfaces that do not display Appropriate Legal Notices, your
- work need not make them do so.
-
- A compilation of a covered work with other separate and independent
-works, which are not by their nature extensions of the covered work,
-and which are not combined with it such as to form a larger program,
-in or on a volume of a storage or distribution medium, is called an
-"aggregate" if the compilation and its resulting copyright are not
-used to limit the access or legal rights of the compilation's users
-beyond what the individual works permit. Inclusion of a covered work
-in an aggregate does not cause this License to apply to the other
-parts of the aggregate.
-
- 6. Conveying Non-Source Forms.
-
- You may convey a covered work in object code form under the terms
-of sections 4 and 5, provided that you also convey the
-machine-readable Corresponding Source under the terms of this License,
-in one of these ways:
-
- a) Convey the object code in, or embodied in, a physical product
- (including a physical distribution medium), accompanied by the
- Corresponding Source fixed on a durable physical medium
- customarily used for software interchange.
-
- b) Convey the object code in, or embodied in, a physical product
- (including a physical distribution medium), accompanied by a
- written offer, valid for at least three years and valid for as
- long as you offer spare parts or customer support for that product
- model, to give anyone who possesses the object code either (1) a
- copy of the Corresponding Source for all the software in the
- product that is covered by this License, on a durable physical
- medium customarily used for software interchange, for a price no
- more than your reasonable cost of physically performing this
- conveying of source, or (2) access to copy the
- Corresponding Source from a network server at no charge.
-
- c) Convey individual copies of the object code with a copy of the
- written offer to provide the Corresponding Source. This
- alternative is allowed only occasionally and noncommercially, and
- only if you received the object code with such an offer, in accord
- with subsection 6b.
-
- d) Convey the object code by offering access from a designated
- place (gratis or for a charge), and offer equivalent access to the
- Corresponding Source in the same way through the same place at no
- further charge. You need not require recipients to copy the
- Corresponding Source along with the object code. If the place to
- copy the object code is a network server, the Corresponding Source
- may be on a different server (operated by you or a third party)
- that supports equivalent copying facilities, provided you maintain
- clear directions next to the object code saying where to find the
- Corresponding Source. Regardless of what server hosts the
- Corresponding Source, you remain obligated to ensure that it is
- available for as long as needed to satisfy these requirements.
-
- e) Convey the object code using peer-to-peer transmission, provided
- you inform other peers where the object code and Corresponding
- Source of the work are being offered to the general public at no
- charge under subsection 6d.
-
- A separable portion of the object code, whose source code is excluded
-from the Corresponding Source as a System Library, need not be
-included in conveying the object code work.
-
- A "User Product" is either (1) a "consumer product", which means any
-tangible personal property which is normally used for personal, family,
-or household purposes, or (2) anything designed or sold for incorporation
-into a dwelling. In determining whether a product is a consumer product,
-doubtful cases shall be resolved in favor of coverage. For a particular
-product received by a particular user, "normally used" refers to a
-typical or common use of that class of product, regardless of the status
-of the particular user or of the way in which the particular user
-actually uses, or expects or is expected to use, the product. A product
-is a consumer product regardless of whether the product has substantial
-commercial, industrial or non-consumer uses, unless such uses represent
-the only significant mode of use of the product.
-
- "Installation Information" for a User Product means any methods,
-procedures, authorization keys, or other information required to install
-and execute modified versions of a covered work in that User Product from
-a modified version of its Corresponding Source. The information must
-suffice to ensure that the continued functioning of the modified object
-code is in no case prevented or interfered with solely because
-modification has been made.
-
- If you convey an object code work under this section in, or with, or
-specifically for use in, a User Product, and the conveying occurs as
-part of a transaction in which the right of possession and use of the
-User Product is transferred to the recipient in perpetuity or for a
-fixed term (regardless of how the transaction is characterized), the
-Corresponding Source conveyed under this section must be accompanied
-by the Installation Information. But this requirement does not apply
-if neither you nor any third party retains the ability to install
-modified object code on the User Product (for example, the work has
-been installed in ROM).
-
- The requirement to provide Installation Information does not include a
-requirement to continue to provide support service, warranty, or updates
-for a work that has been modified or installed by the recipient, or for
-the User Product in which it has been modified or installed. Access to a
-network may be denied when the modification itself materially and
-adversely affects the operation of the network or violates the rules and
-protocols for communication across the network.
-
- Corresponding Source conveyed, and Installation Information provided,
-in accord with this section must be in a format that is publicly
-documented (and with an implementation available to the public in
-source code form), and must require no special password or key for
-unpacking, reading or copying.
-
- 7. Additional Terms.
-
- "Additional permissions" are terms that supplement the terms of this
-License by making exceptions from one or more of its conditions.
-Additional permissions that are applicable to the entire Program shall
-be treated as though they were included in this License, to the extent
-that they are valid under applicable law. If additional permissions
-apply only to part of the Program, that part may be used separately
-under those permissions, but the entire Program remains governed by
-this License without regard to the additional permissions.
-
- When you convey a copy of a covered work, you may at your option
-remove any additional permissions from that copy, or from any part of
-it. (Additional permissions may be written to require their own
-removal in certain cases when you modify the work.) You may place
-additional permissions on material, added by you to a covered work,
-for which you have or can give appropriate copyright permission.
-
- Notwithstanding any other provision of this License, for material you
-add to a covered work, you may (if authorized by the copyright holders of
-that material) supplement the terms of this License with terms:
-
- a) Disclaiming warranty or limiting liability differently from the
- terms of sections 15 and 16 of this License; or
-
- b) Requiring preservation of specified reasonable legal notices or
- author attributions in that material or in the Appropriate Legal
- Notices displayed by works containing it; or
-
- c) Prohibiting misrepresentation of the origin of that material, or
- requiring that modified versions of such material be marked in
- reasonable ways as different from the original version; or
-
- d) Limiting the use for publicity purposes of names of licensors or
- authors of the material; or
-
- e) Declining to grant rights under trademark law for use of some
- trade names, trademarks, or service marks; or
-
- f) Requiring indemnification of licensors and authors of that
- material by anyone who conveys the material (or modified versions of
- it) with contractual assumptions of liability to the recipient, for
- any liability that these contractual assumptions directly impose on
- those licensors and authors.
-
- All other non-permissive additional terms are considered "further
-restrictions" within the meaning of section 10. If the Program as you
-received it, or any part of it, contains a notice stating that it is
-governed by this License along with a term that is a further
-restriction, you may remove that term. If a license document contains
-a further restriction but permits relicensing or conveying under this
-License, you may add to a covered work material governed by the terms
-of that license document, provided that the further restriction does
-not survive such relicensing or conveying.
-
- If you add terms to a covered work in accord with this section, you
-must place, in the relevant source files, a statement of the
-additional terms that apply to those files, or a notice indicating
-where to find the applicable terms.
-
- Additional terms, permissive or non-permissive, may be stated in the
-form of a separately written license, or stated as exceptions;
-the above requirements apply either way.
-
- 8. Termination.
-
- You may not propagate or modify a covered work except as expressly
-provided under this License. Any attempt otherwise to propagate or
-modify it is void, and will automatically terminate your rights under
-this License (including any patent licenses granted under the third
-paragraph of section 11).
-
- However, if you cease all violation of this License, then your
-license from a particular copyright holder is reinstated (a)
-provisionally, unless and until the copyright holder explicitly and
-finally terminates your license, and (b) permanently, if the copyright
-holder fails to notify you of the violation by some reasonable means
-prior to 60 days after the cessation.
-
- Moreover, your license from a particular copyright holder is
-reinstated permanently if the copyright holder notifies you of the
-violation by some reasonable means, this is the first time you have
-received notice of violation of this License (for any work) from that
-copyright holder, and you cure the violation prior to 30 days after
-your receipt of the notice.
-
- Termination of your rights under this section does not terminate the
-licenses of parties who have received copies or rights from you under
-this License. If your rights have been terminated and not permanently
-reinstated, you do not qualify to receive new licenses for the same
-material under section 10.
-
- 9. Acceptance Not Required for Having Copies.
-
- You are not required to accept this License in order to receive or
-run a copy of the Program. Ancillary propagation of a covered work
-occurring solely as a consequence of using peer-to-peer transmission
-to receive a copy likewise does not require acceptance. However,
-nothing other than this License grants you permission to propagate or
-modify any covered work. These actions infringe copyright if you do
-not accept this License. Therefore, by modifying or propagating a
-covered work, you indicate your acceptance of this License to do so.
-
- 10. Automatic Licensing of Downstream Recipients.
-
- Each time you convey a covered work, the recipient automatically
-receives a license from the original licensors, to run, modify and
-propagate that work, subject to this License. You are not responsible
-for enforcing compliance by third parties with this License.
-
- An "entity transaction" is a transaction transferring control of an
-organization, or substantially all assets of one, or subdividing an
-organization, or merging organizations. If propagation of a covered
-work results from an entity transaction, each party to that
-transaction who receives a copy of the work also receives whatever
-licenses to the work the party's predecessor in interest had or could
-give under the previous paragraph, plus a right to possession of the
-Corresponding Source of the work from the predecessor in interest, if
-the predecessor has it or can get it with reasonable efforts.
-
- You may not impose any further restrictions on the exercise of the
-rights granted or affirmed under this License. For example, you may
-not impose a license fee, royalty, or other charge for exercise of
-rights granted under this License, and you may not initiate litigation
-(including a cross-claim or counterclaim in a lawsuit) alleging that
-any patent claim is infringed by making, using, selling, offering for
-sale, or importing the Program or any portion of it.
-
- 11. Patents.
-
- A "contributor" is a copyright holder who authorizes use under this
-License of the Program or a work on which the Program is based. The
-work thus licensed is called the contributor's "contributor version".
-
- A contributor's "essential patent claims" are all patent claims
-owned or controlled by the contributor, whether already acquired or
-hereafter acquired, that would be infringed by some manner, permitted
-by this License, of making, using, or selling its contributor version,
-but do not include claims that would be infringed only as a
-consequence of further modification of the contributor version. For
-purposes of this definition, "control" includes the right to grant
-patent sublicenses in a manner consistent with the requirements of
-this License.
-
- Each contributor grants you a non-exclusive, worldwide, royalty-free
-patent license under the contributor's essential patent claims, to
-make, use, sell, offer for sale, import and otherwise run, modify and
-propagate the contents of its contributor version.
-
- In the following three paragraphs, a "patent license" is any express
-agreement or commitment, however denominated, not to enforce a patent
-(such as an express permission to practice a patent or covenant not to
-sue for patent infringement). To "grant" such a patent license to a
-party means to make such an agreement or commitment not to enforce a
-patent against the party.
-
- If you convey a covered work, knowingly relying on a patent license,
-and the Corresponding Source of the work is not available for anyone
-to copy, free of charge and under the terms of this License, through a
-publicly available network server or other readily accessible means,
-then you must either (1) cause the Corresponding Source to be so
-available, or (2) arrange to deprive yourself of the benefit of the
-patent license for this particular work, or (3) arrange, in a manner
-consistent with the requirements of this License, to extend the patent
-license to downstream recipients. "Knowingly relying" means you have
-actual knowledge that, but for the patent license, your conveying the
-covered work in a country, or your recipient's use of the covered work
-in a country, would infringe one or more identifiable patents in that
-country that you have reason to believe are valid.
-
- If, pursuant to or in connection with a single transaction or
-arrangement, you convey, or propagate by procuring conveyance of, a
-covered work, and grant a patent license to some of the parties
-receiving the covered work authorizing them to use, propagate, modify
-or convey a specific copy of the covered work, then the patent license
-you grant is automatically extended to all recipients of the covered
-work and works based on it.
-
- A patent license is "discriminatory" if it does not include within
-the scope of its coverage, prohibits the exercise of, or is
-conditioned on the non-exercise of one or more of the rights that are
-specifically granted under this License. You may not convey a covered
-work if you are a party to an arrangement with a third party that is
-in the business of distributing software, under which you make payment
-to the third party based on the extent of your activity of conveying
-the work, and under which the third party grants, to any of the
-parties who would receive the covered work from you, a discriminatory
-patent license (a) in connection with copies of the covered work
-conveyed by you (or copies made from those copies), or (b) primarily
-for and in connection with specific products or compilations that
-contain the covered work, unless you entered into that arrangement,
-or that patent license was granted, prior to 28 March 2007.
-
- Nothing in this License shall be construed as excluding or limiting
-any implied license or other defenses to infringement that may
-otherwise be available to you under applicable patent law.
-
- 12. No Surrender of Others' Freedom.
-
- If conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License. If you cannot convey a
-covered work so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you may
-not convey it at all. For example, if you agree to terms that obligate you
-to collect a royalty for further conveying from those to whom you convey
-the Program, the only way you could satisfy both those terms and this
-License would be to refrain entirely from conveying the Program.
-
- 13. Use with the GNU Affero General Public License.
-
- Notwithstanding any other provision of this License, you have
-permission to link or combine any covered work with a work licensed
-under version 3 of the GNU Affero General Public License into a single
-combined work, and to convey the resulting work. The terms of this
-License will continue to apply to the part which is the covered work,
-but the special requirements of the GNU Affero General Public License,
-section 13, concerning interaction through a network will apply to the
-combination as such.
-
- 14. Revised Versions of this License.
-
- The Free Software Foundation may publish revised and/or new versions of
-the GNU General Public License from time to time. Such new versions will
-be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
- Each version is given a distinguishing version number. If the
-Program specifies that a certain numbered version of the GNU General
-Public License "or any later version" applies to it, you have the
-option of following the terms and conditions either of that numbered
-version or of any later version published by the Free Software
-Foundation. If the Program does not specify a version number of the
-GNU General Public License, you may choose any version ever published
-by the Free Software Foundation.
-
- If the Program specifies that a proxy can decide which future
-versions of the GNU General Public License can be used, that proxy's
-public statement of acceptance of a version permanently authorizes you
-to choose that version for the Program.
-
- Later license versions may give you additional or different
-permissions. However, no additional obligations are imposed on any
-author or copyright holder as a result of your choosing to follow a
-later version.
-
- 15. Disclaimer of Warranty.
-
- THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
-APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
-HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
-OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
-IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
-ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
- 16. Limitation of Liability.
-
- IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
-THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
-GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
-USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
-DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
-PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
-EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGES.
-
- 17. Interpretation of Sections 15 and 16.
-
- If the disclaimer of warranty and limitation of liability provided
-above cannot be given local legal effect according to their terms,
-reviewing courts shall apply local law that most closely approximates
-an absolute waiver of all civil liability in connection with the
-Program, unless a warranty or assumption of liability accompanies a
-copy of the Program in return for a fee.
-
- END OF TERMS AND CONDITIONS
-
- How to Apply These Terms to Your New Programs
-
- If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
- To do so, attach the following notices to the program. It is safest
-to attach them to the start of each source file to most effectively
-state the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
- <one line to give the program's name and a brief idea of what it does.>
- Copyright (C) <year> <name of author>
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-Also add information on how to contact you by electronic and paper mail.
-
- If the program does terminal interaction, make it output a short
-notice like this when it starts in an interactive mode:
-
- <program> Copyright (C) <year> <name of author>
- This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
- This is free software, and you are welcome to redistribute it
- under certain conditions; type `show c' for details.
-
-The hypothetical commands `show w' and `show c' should show the appropriate
-parts of the General Public License. Of course, your program's commands
-might be different; for a GUI interface, you would use an "about box".
-
- You should also get your employer (if you work as a programmer) or school,
-if any, to sign a "copyright disclaimer" for the program, if necessary.
-For more information on this, and how to apply and follow the GNU GPL, see
-<http://www.gnu.org/licenses/>.
-
- The GNU General Public License does not permit incorporating your program
-into proprietary programs. If your program is a subroutine library, you
-may consider it more useful to permit linking proprietary applications with
-the library. If this is what you want to do, use the GNU Lesser General
-Public License instead of this License. But first, please read
-<http://www.gnu.org/philosophy/why-not-lgpl.html>.
diff --git a/contrib/combine-ptables/README.md b/contrib/combine-ptables/README.md
new file mode 100644
index 000000000..b180f9202
--- /dev/null
+++ b/contrib/combine-ptables/README.md
@@ -0,0 +1,139 @@
+`combine-ptables.pl`: fill-up and other techniques of translation models combination.
+
+Author:
+Arianna Bisazza bisazza[AT]fbk.eu
+
+ABOUT
+-----
+This tool implements "fill-up" and other operations that are useful to combine translation and reordering tables.
+In the "fill-up" approach, the weights of out-domain data sources are estimated directly by MERT along with the
+other model weights.
+
+This tool also supports linear interpolation, but weights must be provided by the user.
+If you want to automatically estimate linear interpolation weights, use `contrib/tmcombine` instead.
+
+
+REFERENCE
+---------
+When using this script, please cite:
+Arianna Bisazza, Nick Ruiz, and Marcello Federico. 2011.
+"Fill-up versus Interpolation Methods for Phrase-based SMT Adaptation."
+In International Workshop on Spoken Language Translation (IWSLT), San Francisco, CA.
+
+
+FILL-UP
+-------
+
+This combination technique is useful when the relevance of the models is known a priori,
+e.g. when one is trained on in-domain data and the others on out-of-domain data.
+
+This mode preserves all the entries and scores coming from the first model, and adds
+entries from the other models only if new.
+If more than two tables are provided, each entry is taken only from the first table
+that contains it.
+
+Moreover, a binary feature is added for each additional table to denote the provenance
+of an entry. For in-domain entries, the binary features are all set to 1 (=exp(0)).
+Entries coming from the 2nd table will have the 1st binary feature set to 2.718 (=exp(1)).
+
+This technique was proposed in the following works:
+
+Preslav Nakov. 2008.
+"Improving English-Spanish Statistical Machine Translation: Experiments in Domain
+Adaptation, Sentence Paraphrasing, Tokenization, and Recasing."
+In Workshop on Statistical Machine Translation.
+
+Arianna Bisazza, Nick Ruiz, and Marcello Federico. 2011.
+"Fill-up versus Interpolation Methods for Phrase-based SMT Adaptation."
+In International Workshop on Spoken Language Translation (IWSLT), San Francisco, CA.
+
+The latter paper contains details about the present implementation as well as an empirical
+evaluation of fill-up against other combination techniques.
+Reordering model fill-up, cascaded fill-up and pruning criteria are also discussed in the
+same paper.
+
+Among the findings of this paper, pruning new (out-of-domain) phrases with more than 4
+source words appeared to be beneficial on the Arabic-English TED task when combining the
+in-domain models with MultiUn models.
+This corresponds to the option:
+ `--newSourceMaxLength=4`
+
+
+LINEAR INTERPOLATION
+--------------------
+
+This combination technique consists in linearly combining the feature values coming
+from all tables. The combination weights should be provided by the user, otherwise
+uniform weights are assumed.
+When a phrase pair is absent from a table, a constant value (epsilon) is assumed for
+the corresponding feature values. You may want to set your own epsilon.
+
+See [Bisazza et al. 2011] for an empirical comparison of uniformly weighted linear
+interpolation against fill-up and decoding-time log-linear interpolation. In that paper,
+epsilon was always set to 1e-06.
+
+
+UNION
+-----
+
+This combination technique creates the union of all phrase pairs and assigns to each
+of them the concatenation of all tables scores.
+
+
+INTERSECTION
+------------
+
+This combination technique creates the intersection of all phrase pairs: each phrase
+pair that occurs in all phrase tables is output along with the feature vector taken
+from the *first* table.
+The intersection can be used to prune the reordering table in order to match the
+entries of a corresponding pruned phrase table.
+
+
+USAGE
+-----
+
+Get statistics about overlap of entries:
+ `combine-ptables.pl --mode=stats ptable1 ptable2 ... ptableN > ptables-overlap-stats`
+
+Interpolate phrase tables...
+- with uniform weights:
+ `combine-ptables.pl --mode=interp --phpenalty-at=4 ptable1 ptable2 ptable3 > interp-ptable.X`
+
+- with custom weights:
+ `combine-ptables.pl --mode=interp --phpenalty-at=4 --weights=0.8,0.1,0.1 ptable1 ptable2 ptable3 > interp-ptable.Y`
+
+- with custom epsilon:
+ `combine-ptables.pl --mode=interp --phpenalty-at=4 --epsilon=1e-05 ptable1 ptable2 ptable3 > interp-ptable.Z`
+
+
+Fillup phrase tables...
+- unpruned:
+ `combine-ptables.pl --mode=fillup ptable1 ptable2 ... ptableN > fillup-ptable`
+
+- pruned (new phrases only with max. 4 source words):
+ `combine-ptables.pl --mode=fillup --newSourceMaxLength=4 ptable1 ptable2 ... ptableN > fillup-ptable`
+
+
+Given a pruned phrase table, prune the corresponding reordering table:
+ `combine-ptables.pl --mode=intersect1 reotable1-unpruned ptable1-pruned > reotable1-pruned`
+
+
+NOTES
+-----
+
+The script works only with textual (non-binarized) phrase or reordering tables
+that were *previously sorted* with `LC_ALL=C sort`
+
+The resulting combined tables are also textual and need to binarized normally.
+
+The script combine-ptables.pl can be used on lexicalized reordering tables as well.
+
+Input tables can be gzipped.
+
+When integrating filled up models into a Moses system, remember to:
+ - specify the correct number of features (typically 6) under [ttable-file] in the configuration file `moses.ini`
+ - add a weight under [weight-t] in `moses.ini`
+ - if you binarize the models, provide the correct number of features to the command:
+ `$moses/bin/processPhraseTable -ttable 0 0 - -nscores $nbFeatures`
+
diff --git a/contrib/combine-ptables/combine-ptables.pl b/contrib/combine-ptables/combine-ptables.pl
new file mode 100755
index 000000000..de9df7ec2
--- /dev/null
+++ b/contrib/combine-ptables/combine-ptables.pl
@@ -0,0 +1,425 @@
+#! /usr/bin/perl
+
+#******************************************************************************
+# Arianna Bisazza @ FBK-irst. March 2012
+#******************************************************************************
+# combine-ptables.pl : Combine Moses-style phrase tables, using different approaches
+
+
+use strict;
+use open ':utf8';
+binmode STDIN, ':utf8';
+binmode STDOUT, ':utf8';
+
+use Getopt::Long "GetOptions";
+
+sub main {
+my $usage = "
+USAGE
+-----
+combine-ptables.pl --mode=(interp|union|fillup|intersect1|stats) ptable1 ptable2 ... ptableN > combined-ptable
+combine-ptables.pl --mode=intersect1 reotable-unpruned ptable-pruned > reotable-pruned
+-----
+#
+# This scripts reads two or more *sorted* phrase tables and combines them in different modes.
+#
+# (Note: if present, word alignments are ignored).
+#
+# ----------------
+# OPTIONS
+# ----------------
+#
+# Required:
+# --mode fillup: Each entry is taken only from the first table that contains it.
+# A binary feature is added from each table except the first.
+# interp: Linear interpolation.
+# union: Union of entries, feature vectors are concatenated.
+# intersect1: Intersection of entries, feature vectors taken from the first table.
+# stats: Only compute some statistics about tables overlap. No table is produced.
+#
+# NOTE: if present, additional fields such as word alignment, phrase counts etc. are always
+# taken from the first table.
+#
+# Generic options:
+# --phpenalty=FLOAT Constant value for phrase penalty. Default is exp(1)=2.718
+# --phpenalty-at=N The (N+1)th score of each table is considered as phrase penalty with a constant value.
+# In 'interp' mode, the corresponding feature is not interpolated but simply set to the constant.
+# In 'union' mode, the ph.penalty (constant) is output only once, after all the other scores.
+# By default, no score is considered as phrase penalty.
+#
+#
+# Options for 'fillup':
+# --newSourceMaxLength=INT Don't include \"new\" source phrases if longer than INT words.
+#
+# Options for 'interp':
+# --weights=W1,W2,...WN Weights for interpolation. By default, uniform weights are applied.
+# --epsilon=X Score to assume when a phrase pair is not contained in a table (in 'interp' and 'union' modes).
+# Default epsilon is 1e-06.
+#
+# Options for 'union':
+#
+#
+";
+
+my $combination_mode = '';
+my $debug = '';
+my $weights_str = '';
+my $epsilon = 0.000001;
+my $phPenalty = 2.718; # exp(1)
+my $phPenalty_idx = -1;
+my $delim= " ||| ";
+my $delim_RE= ' \|\|\| ';
+my $exp_one = 2.718;
+my $exp_zero = 1;
+my $newSourceMaxLength = -1;
+my $help = '';
+
+GetOptions ('debug' => \$debug,
+ 'mode=s' => \$combination_mode,
+ 'weights=s' => \$weights_str,
+ 'epsilon=f' => \$epsilon,
+ 'phpenalty=f' => \$phPenalty,
+ 'phpenalty-at=i' => \$phPenalty_idx,
+ 'newSourceMaxLength=i' => \$newSourceMaxLength,
+ 'help' => \$help);
+
+if($help) { die "$usage\n\n"; }
+
+if($combination_mode!~/(interp|union|fillup|intersect1|stats)/) {die "$usage\nUnknown combination mode!\n"};
+
+if(@ARGV < 2) {die "$usage\n\n Please provide at least 2 tables to combine \n\n";}
+
+print STDERR "
+WARNING: Your phrase tables must be sorted (with LC_ALL=C) !!
+******************************
+Combination mode is [$combination_mode]
+******************************
+";
+
+my @tables = @ARGV;
+my $nbtables = scalar(@tables);
+
+###########################################
+
+# The newSourceMaxLength option requires reading all the first PT before starting the combination
+my %sourcePhrasesPT1;
+if($combination_mode eq "fillup" && $newSourceMaxLength>-1) {
+ my $table1=$tables[0];
+ $table1 =~ s/(.*\.gz)\s*$/gzip -dc < $1|/;
+ open(TABLE1, "$table1") or die "Cannot open $table1: ($!)\n";
+ while(my $line=<TABLE1>) {
+ $line=~m/^(.*?)$delim_RE/;
+ $sourcePhrasesPT1{$1}++;
+ }
+ close(TABLE1);
+}
+
+my @table_files=();
+foreach my $table (@tables) {
+ $table =~ s/(.*\.gz)\s*$/gzip -dc < $1|/;
+ #localize the file glob, so FILE is unique to the inner loop.
+ local *FILE;
+ open(FILE, "$table") or die "Cannot open $table: ($!)\n";
+ push(@table_files, *FILE);
+}
+
+
+# Read first line from all tables to find number of weights (and sanity checks)
+my @read_ppairs=();
+my $nbscores = &read_line_from_tables(\@table_files, \@read_ppairs);
+print STDERR "Each phrase table contains $nbscores features.\n";
+
+###########################################
+
+if($phPenalty_idx!=-1) {
+ if($phPenalty_idx<0 || $phPenalty_idx>=$nbscores) {
+ die "Invalid value for option phpenalty-at! Should be in the range [0,($nbscores-1)]\n\n";
+ }
+ else { print STDERR "Phrase penalty at index $phPenalty_idx\n"; }
+}
+
+#if($weights_str ne "") { die "Weights option NOT supported yet. Can only use uniform (1/nbscores)\n\n"; }
+#my $unifw = 1/$nbtables;
+
+my @weights=(); # Array of arrays each containing the feature weights for a phrase table
+if($combination_mode eq "interp") {
+ my @table_level_weights=();
+ if($weights_str eq "") {
+ @table_level_weights= ((1/$nbtables) x $nbtables); # assuming uniform weights
+ }
+ else {
+ @table_level_weights= split(/,/, $weights_str);
+ if(scalar(@table_level_weights) != $nbtables) {
+ die "$usage\n Invalid string for option --weights! Must be a comma-separated list of floats, one per ph.table.\n";
+ }
+ }
+
+ for(my $i=0; $i<$nbtables; $i++) {
+ my @weights_pt = (($table_level_weights[$i]) x $nbscores);
+ if($phPenalty_idx!=-1) {
+ $weights_pt[$phPenalty_idx]=0;
+ }
+ print STDERR "WEIGHTS-PT_$i: ", join(" -- ", @weights_pt), "\n";
+ $weights[$i] = \@weights_pt;
+ }
+ print STDERR "EPSILON: $epsilon \n";
+}
+
+
+###########################################
+
+my @empty_ppair=("");
+my @epsilons = (($epsilon) x $nbscores);
+if($phPenalty_idx>-1) {
+ pop @epsilons;
+}
+
+my $nbPpairs_inAll=0;
+my @nbPairs_found_only_in=((0) x $nbtables);
+my $MINSCORE=1;
+
+print STDERR "Working...\n\n";
+while(1) {
+ my $min_ppair="";
+ my $reached_end_of_tables=1;
+ my @tablesContainingPpair=((0) x $nbtables);
+ for(my $i=0; $i<$nbtables; $i++) {
+ my $ppair=$read_ppairs[$i]->[0];
+ if($ppair ne "") {
+ $reached_end_of_tables=0;
+ if($min_ppair eq "" || $ppair lt $min_ppair) {
+ $min_ppair=$ppair;
+ @tablesContainingPpair=((0) x $nbtables);
+ $tablesContainingPpair[$i]=1;
+ }
+ elsif($ppair eq $min_ppair) {
+ $tablesContainingPpair[$i]=1;
+ }
+ }
+ }
+ last if($reached_end_of_tables);
+
+ ## Actual combination is performed here:
+ &combine_ppair(\@read_ppairs, \@tablesContainingPpair);
+
+ &read_line_from_tables(\@table_files, \@read_ppairs, \@tablesContainingPpair);
+
+}
+
+print STDERR "...done!\n";
+
+print STDERR "The minimum score in all tables is $MINSCORE\n";
+
+if($combination_mode eq "stats") {
+my $tot_ppairs=0;
+print "
+# entries
+found in all tables: $nbPpairs_inAll\n";
+
+for(my $i=0; $i<$nbtables; $i++) {
+ print "found only in PT_$i: $nbPairs_found_only_in[$i]\n";
+}
+
+}
+
+####################################
+sub combine_ppair(PPAIRS_REFARRAY, TABLE_INDICES_REFARRAY) {
+ my $ra_ppairs=shift; # 1st item: phrase-pair key (string);
+ # 2nd item: ref.array of scores;
+ # 3rd item: additional info (string, may be empty)
+
+ my $ra_toRead=shift; # Important: this says which phrase tables contain the ph.pair currently processed
+
+ my $ppair="";
+ my @scores=();
+ my $additional_info="";
+
+ my $to_print=1;
+
+ if($debug) {
+ print STDERR "combine_ppair:\n";
+ for(my $i=0; $i<$nbtables; $i++) {
+ if($ra_toRead->[$i]) {
+ print STDERR "ppair_$i= ", join (" // ", @{$ra_ppairs->[$i]}), "\n";
+ }
+ }
+ }
+
+ if($combination_mode eq "stats") {
+ $to_print=0;
+ my $found_in=-1;
+ my $nb_found=0;
+ for(my $i=0; $i<$nbtables; $i++) {
+ if($ra_toRead->[$i]) {
+ $found_in=$i;
+ $nb_found++;
+ }
+ }
+ if($nb_found==1) { $nbPairs_found_only_in[$found_in]++; }
+ elsif($nb_found==$nbtables) { $nbPpairs_inAll++; }
+ }
+ ### Fill-up + additional binary feature
+ elsif($combination_mode eq "fillup") {
+ my @bin_feats=(($exp_zero) x ($nbtables-1));
+ for(my $i=0; $i<$nbtables; $i++) {
+ if($ra_toRead->[$i]) {
+ $ppair= shift(@{$ra_ppairs->[$i]});
+ # pruning criteria are applied here:
+ if($i>0 && $newSourceMaxLength>-1) {
+ $ppair=~m/^(.*?)$delim_RE/;
+ if(scalar(split(/ +/, $1)) > $newSourceMaxLength &&
+ !defined($sourcePhrasesPT1{$1}))
+ { $to_print=0; }
+ }
+# @scores= @{$ra_ppairs->[$i]};
+ @scores = @{shift(@{$ra_ppairs->[$i]})};
+ # binary feature for ph.pair provenance fires here
+ if($i>0) { $bin_feats[$i-1]=$exp_one; }
+ $additional_info=shift(@{$ra_ppairs->[$i]});
+ last;
+ }
+ }
+ push(@scores, @bin_feats);
+ }
+ ### Linear interpolation
+ elsif($combination_mode eq "interp") {
+ my $firstPpair=-1;
+ @scores=((0) x $nbscores);
+ for(my $i=0; $i<$nbtables; $i++) {
+ if($ra_toRead->[$i]) {
+ if($firstPpair==-1) { $firstPpair=$i; }
+ $ppair= shift(@{$ra_ppairs->[$i]});
+ my @scoresPT = @{shift(@{$ra_ppairs->[$i]})};
+ for(my $j=0; $j<$nbscores; $j++) {
+# $scores[$j]+= $weights[$i]->[$j]* $ra_ppairs->[$i][$j];
+ $scores[$j]+= $weights[$i]->[$j]* $scoresPT[$j];
+ }
+ }
+ else {
+ for(my $j=0; $j<$nbscores; $j++) {
+ $scores[$j]+= $weights[$i]->[$j]* $epsilon;
+ }
+ }
+ if($phPenalty_idx!=-1) {
+ $scores[$phPenalty_idx]= $phPenalty;
+ }
+ }
+ if($debug) { print STDERR "..taking info from ptable_$firstPpair\n"; }
+ $additional_info= shift(@{$ra_ppairs->[$firstPpair]});
+ }
+ ### Union + feature concatenation
+ elsif($combination_mode eq "union") {
+ my $firstPpair=-1;
+ for(my $i=0; $i<$nbtables; $i++) {
+ if($ra_toRead->[$i]) {
+ if($firstPpair==-1) { $firstPpair=$i; }
+ $ppair= shift(@{$ra_ppairs->[$i]});
+ my @scoresPT= @{shift(@{$ra_ppairs->[$i]})};
+ if($phPenalty_idx!=-1) {
+# splice(@{$ra_ppairs->[$i]}, $phPenalty_idx, 1);
+ splice(@scoresPT, $phPenalty_idx, 1);
+ }
+# push(@scores, @{$ra_ppairs->[$i]});
+ push(@scores, @scoresPT);
+ }
+ else {
+ push(@scores, @epsilons);
+ }
+ }
+ if($phPenalty_idx!=-1) {
+ push(@scores, $phPenalty);
+ }
+ if($debug) { print STDERR "..taking info from ptable_$firstPpair\n"; }
+ $additional_info= shift(@{$ra_ppairs->[$firstPpair]});
+ }
+ ### Intersect + features from first table
+ elsif($combination_mode eq "intersect1") {
+ $to_print=0;
+ my $found_in_all=1;
+ for(my $i=0; $i<$nbtables; $i++) {
+ if(!$ra_toRead->[$i]) {
+ $found_in_all=0;
+ last;
+ }
+ }
+ if($found_in_all) {
+ $to_print=1;
+ $ppair= shift(@{$ra_ppairs->[0]});
+# @scores= @{$ra_ppairs->[0]};
+ @scores= @{shift(@{$ra_ppairs->[0]})};
+ $additional_info= shift(@{$ra_ppairs->[0]});
+ }
+ }
+ else {
+ die "$usage\nUnknown combination mode!\n";
+ }
+
+
+ if($to_print) {
+ if($additional_info eq "") {
+ print $ppair, join(" ", @scores), "\n";
+ }else {
+ print $ppair, join(" ", @scores), $delim, $additional_info, "\n";
+ }
+ }
+}
+
+####################################
+# Read lines from all filehandles given in FILES_REFARRAY,
+# or from the files whose indices are assigned 1 in the array TABLE_INDICES_REFARRAY
+# Parse each of them as a phrase pair entry and stores it to the corresponding position of PPAIRS_REFARRAY
+sub read_line_from_tables(FILES_REFARRAY, PPAIRS_REFARRAY, TABLE_INDICES_REFARRAY) {
+ my $ra_files=shift;
+ my $ra_ppairs=shift;
+
+ my $ra_toRead=shift;
+ my @toRead=((1) x $nbtables); # by default read from all files
+ if($ra_toRead ne "") {
+ @toRead=@$ra_toRead;
+ }
+
+ my $nbscores=-1;
+ my $key=""; my $additional_info="";
+ for(my $i=0; $i<$nbtables; $i++) {
+ next if($toRead[$i]==0);
+ my @ppair=();
+ my $file=$ra_files->[$i];
+ if(my $line = <$file>) {
+ chomp $line;
+ my @fields = split(/$delim_RE/, $line);
+ if(scalar(@fields)<3) {
+ die "Invalid phrase table entry:\n$line\n";
+ }
+ my @scores = split(/\s+/, $fields[2]);
+ foreach my $score (@scores) {
+ if($score<$MINSCORE) { $MINSCORE=$score; }
+ }
+ # Get nb of scores from the 1st table. Check that all tables provide the same nb of scores,
+ # unless mode is 'intersect' (then it doesn't matter as scores are taken only from 1st table)
+ if($nbscores==-1) {
+ $nbscores=scalar(@scores);
+ } elsif($nbscores!=scalar(@scores) && $combination_mode ne "intersect1") {
+ die "Wrong number of scores in table-$i! Should be $nbscores\n";
+ }
+ # Get additional fields if any (word aligment, phrase counts etc.)
+ if(scalar(@fields)>3) {
+ $additional_info=join($delim, splice(@fields,3));
+ #print STDOUT "additional_info:__{$additional_info}__\n";
+ }
+ my $key = "$fields[0]$delim$fields[1]$delim"; ## IMPORTANT: the | delimiter at the end of the phrase pair is crucial to preserve sorting!!
+ push(@ppair, $key, \@scores, $additional_info);
+ }
+ else {
+ push(@ppair, "");
+ }
+ $ra_ppairs->[$i]=\@ppair;
+ }
+
+ return $nbscores;
+}
+
+#########
+}
+
+
+&main;
diff --git a/contrib/eppex/configure b/contrib/eppex/configure
index 6aab92d7b..6aab92d7b 100644..100755
--- a/contrib/eppex/configure
+++ b/contrib/eppex/configure
diff --git a/contrib/eppex/depcomp b/contrib/eppex/depcomp
index 04701da53..04701da53 100644..100755
--- a/contrib/eppex/depcomp
+++ b/contrib/eppex/depcomp
diff --git a/contrib/eppex/install-sh b/contrib/eppex/install-sh
index 4d4a9519e..4d4a9519e 100644..100755
--- a/contrib/eppex/install-sh
+++ b/contrib/eppex/install-sh
diff --git a/contrib/eppex/missing b/contrib/eppex/missing
index 894e786e1..894e786e1 100644..100755
--- a/contrib/eppex/missing
+++ b/contrib/eppex/missing
diff --git a/contrib/fuzzy-match/Makefile b/contrib/fuzzy-match/Makefile
new file mode 100644
index 000000000..5bb884a51
--- /dev/null
+++ b/contrib/fuzzy-match/Makefile
@@ -0,0 +1,16 @@
+all: suffix-test fuzzy-match fuzzy-match2
+
+clean:
+ rm -f *.o
+
+.cpp.o:
+ g++ -O6 -g -c $<
+
+suffix-test: Vocabulary.o SuffixArray.o suffix-test.o
+ g++ Vocabulary.o SuffixArray.o suffix-test.o -o suffix-test
+
+fuzzy-match: Vocabulary.o SuffixArray.o old/fuzzy-match.o
+ g++ Vocabulary.o SuffixArray.o fuzzy-match.o -o fuzzy-match
+
+fuzzy-match2: Vocabulary.o SuffixArray.o fuzzy-match2.o Util.o
+ g++ Vocabulary.o SuffixArray.o fuzzy-match2.o Util.o -o fuzzy-match2
diff --git a/contrib/fuzzy-match/Match.h b/contrib/fuzzy-match/Match.h
new file mode 100644
index 000000000..6fc8bb42f
--- /dev/null
+++ b/contrib/fuzzy-match/Match.h
@@ -0,0 +1,29 @@
+//
+// Match.h
+// fuzzy-match
+//
+// Created by Hieu Hoang on 25/07/2012.
+// Copyright 2012 __MyCompanyName__. All rights reserved.
+//
+
+#ifndef fuzzy_match_Match_h
+#define fuzzy_match_Match_h
+
+/* data structure for n-gram match between input and corpus */
+
+class Match {
+public:
+ int input_start;
+ int input_end;
+ int tm_start;
+ int tm_end;
+ int min_cost;
+ int max_cost;
+ int internal_cost;
+ Match( int is, int ie, int ts, int te, int min, int max, int i )
+ :input_start(is), input_end(ie), tm_start(ts), tm_end(te), min_cost(min), max_cost(max), internal_cost(i)
+ {}
+};
+
+
+#endif
diff --git a/contrib/fuzzy-match/SentenceAlignment.h b/contrib/fuzzy-match/SentenceAlignment.h
new file mode 100644
index 000000000..4d92fd635
--- /dev/null
+++ b/contrib/fuzzy-match/SentenceAlignment.h
@@ -0,0 +1,48 @@
+//
+// SentenceAlignment.h
+// fuzzy-match
+//
+// Created by Hieu Hoang on 25/07/2012.
+// Copyright 2012 __MyCompanyName__. All rights reserved.
+//
+
+#ifndef fuzzy_match_SentenceAlignment_h
+#define fuzzy_match_SentenceAlignment_h
+
+#include <sstream>
+#include "Vocabulary.h"
+
+extern Vocabulary vocabulary;
+
+struct SentenceAlignment
+{
+ int count;
+ vector< WORD_ID > target;
+ vector< pair<int,int> > alignment;
+
+ SentenceAlignment()
+ {}
+
+ string getTargetString() const
+ {
+ stringstream strme;
+ for (size_t i = 0; i < target.size(); ++i) {
+ const WORD &word = vocabulary.GetWord(target[i]);
+ strme << word << " ";
+ }
+ return strme.str();
+ }
+
+ string getAlignmentString() const
+ {
+ stringstream strme;
+ for (size_t i = 0; i < alignment.size(); ++i) {
+ const pair<int,int> &alignPair = alignment[i];
+ strme << alignPair.first << "-" << alignPair.second << " ";
+ }
+ return strme.str();
+ }
+
+};
+
+#endif
diff --git a/contrib/fuzzy-match/SuffixArray.cpp b/contrib/fuzzy-match/SuffixArray.cpp
new file mode 100644
index 000000000..e0aa3da91
--- /dev/null
+++ b/contrib/fuzzy-match/SuffixArray.cpp
@@ -0,0 +1,244 @@
+#include "SuffixArray.h"
+#include <string>
+#include <stdlib.h>
+#include <cstring>
+
+using namespace std;
+
+SuffixArray::SuffixArray( string fileName )
+{
+ m_vcb.StoreIfNew( "<uNk>" );
+ m_endOfSentence = m_vcb.StoreIfNew( "<s>" );
+
+ ifstream extractFile;
+ char line[LINE_MAX_LENGTH];
+
+ // count the number of words first;
+ extractFile.open(fileName.c_str());
+ istream *fileP = &extractFile;
+ m_size = 0;
+ size_t sentenceCount = 0;
+ while(!fileP->eof()) {
+ SAFE_GETLINE((*fileP), line, LINE_MAX_LENGTH, '\n');
+ if (fileP->eof()) break;
+ vector< WORD_ID > words = m_vcb.Tokenize( line );
+ m_size += words.size() + 1;
+ sentenceCount++;
+ }
+ extractFile.close();
+ cerr << m_size << " words (incl. sentence boundaries)" << endl;
+
+ // allocate memory
+ m_array = (WORD_ID*) calloc( sizeof( WORD_ID ), m_size );
+ m_index = (INDEX*) calloc( sizeof( INDEX ), m_size );
+ m_wordInSentence = (char*) calloc( sizeof( char ), m_size );
+ m_sentence = (size_t*) calloc( sizeof( size_t ), m_size );
+ m_sentenceLength = (char*) calloc( sizeof( char ), sentenceCount );
+
+ // fill the array
+ int wordIndex = 0;
+ int sentenceId = 0;
+ extractFile.open(fileName.c_str());
+ fileP = &extractFile;
+ while(!fileP->eof()) {
+ SAFE_GETLINE((*fileP), line, LINE_MAX_LENGTH, '\n');
+ if (fileP->eof()) break;
+ vector< WORD_ID > words = m_vcb.Tokenize( line );
+ vector< WORD_ID >::const_iterator i;
+
+ for( i=words.begin(); i!=words.end(); i++)
+ {
+ m_index[ wordIndex ] = wordIndex;
+ m_sentence[ wordIndex ] = sentenceId;
+ m_wordInSentence[ wordIndex ] = i-words.begin();
+ m_array[ wordIndex++ ] = *i;
+ }
+ m_index[ wordIndex ] = wordIndex;
+ m_array[ wordIndex++ ] = m_endOfSentence;
+ m_sentenceLength[ sentenceId++ ] = words.size();
+ }
+ extractFile.close();
+ cerr << "done reading " << wordIndex << " words, " << sentenceId << " sentences." << endl;
+ // List(0,9);
+
+ // sort
+ m_buffer = (INDEX*) calloc( sizeof( INDEX ), m_size );
+ Sort( 0, m_size-1 );
+ free( m_buffer );
+ cerr << "done sorting" << endl;
+}
+
+// good ol' quick sort
+void SuffixArray::Sort(INDEX start, INDEX end) {
+ if (start == end) return;
+ INDEX mid = (start+end+1)/2;
+ Sort( start, mid-1 );
+ Sort( mid, end );
+
+ // merge
+ int i = start;
+ int j = mid;
+ int k = 0;
+ int length = end-start+1;
+ while( k<length )
+ {
+ if (i == mid )
+ {
+ m_buffer[ k++ ] = m_index[ j++ ];
+ }
+ else if (j > end )
+ {
+ m_buffer[ k++ ] = m_index[ i++ ];
+ }
+ else {
+ if (CompareIndex( m_index[i], m_index[j] ) < 0)
+ {
+ m_buffer[ k++ ] = m_index[ i++ ];
+ }
+ else
+ {
+ m_buffer[ k++ ] = m_index[ j++ ];
+ }
+ }
+ }
+
+ memcpy( ((char*)m_index) + sizeof( INDEX ) * start,
+ ((char*)m_buffer), sizeof( INDEX ) * (end-start+1) );
+}
+
+SuffixArray::~SuffixArray()
+{
+ free(m_index);
+ free(m_array);
+}
+
+int SuffixArray::CompareIndex( INDEX a, INDEX b ) const
+{
+ // skip over identical words
+ INDEX offset = 0;
+ while( a+offset < m_size &&
+ b+offset < m_size &&
+ m_array[ a+offset ] == m_array[ b+offset ] )
+ { offset++; }
+
+ if( a+offset == m_size ) return -1;
+ if( b+offset == m_size ) return 1;
+ return CompareWord( m_array[ a+offset ], m_array[ b+offset ] );
+}
+
+inline int SuffixArray::CompareWord( WORD_ID a, WORD_ID b ) const
+{
+ // cerr << "c(" << m_vcb.GetWord(a) << ":" << m_vcb.GetWord(b) << ")=" << m_vcb.GetWord(a).compare( m_vcb.GetWord(b) ) << endl;
+ return m_vcb.GetWord(a).compare( m_vcb.GetWord(b) );
+}
+
+int SuffixArray::Count( const vector< WORD > &phrase )
+{
+ INDEX dummy;
+ return LimitedCount( phrase, m_size, dummy, dummy, 0, m_size-1 );
+}
+
+bool SuffixArray::MinCount( const vector< WORD > &phrase, INDEX min )
+{
+ INDEX dummy;
+ return LimitedCount( phrase, min, dummy, dummy, 0, m_size-1 ) >= min;
+}
+
+bool SuffixArray::Exists( const vector< WORD > &phrase )
+{
+ INDEX dummy;
+ return LimitedCount( phrase, 1, dummy, dummy, 0, m_size-1 ) == 1;
+}
+
+int SuffixArray::FindMatches( const vector< WORD > &phrase, INDEX &firstMatch, INDEX &lastMatch, INDEX search_start, INDEX search_end )
+{
+ return LimitedCount( phrase, m_size, firstMatch, lastMatch, search_start, search_end );
+}
+
+int SuffixArray::LimitedCount( const vector< WORD > &phrase, INDEX min, INDEX &firstMatch, INDEX &lastMatch, INDEX search_start, INDEX search_end )
+{
+ // cerr << "FindFirst\n";
+ INDEX start = search_start;
+ INDEX end = (search_end == -1) ? (m_size-1) : search_end;
+ INDEX mid = FindFirst( phrase, start, end );
+ // cerr << "done\n";
+ if (mid == m_size) return 0; // no matches
+ if (min == 1) return 1; // only existance check
+
+ int matchCount = 1;
+
+ //cerr << "before...\n";
+ firstMatch = FindLast( phrase, mid, start, -1 );
+ matchCount += mid - firstMatch;
+
+ //cerr << "after...\n";
+ lastMatch = FindLast( phrase, mid, end, 1 );
+ matchCount += lastMatch - mid;
+
+ return matchCount;
+}
+
+SuffixArray::INDEX SuffixArray::FindLast( const vector< WORD > &phrase, INDEX start, INDEX end, int direction )
+{
+ end += direction;
+ while(true)
+ {
+ INDEX mid = ( start + end + (direction>0 ? 0 : 1) )/2;
+
+ int match = Match( phrase, mid );
+ int matchNext = Match( phrase, mid+direction );
+ //cerr << "\t" << start << ";" << mid << ";" << end << " -> " << match << "," << matchNext << endl;
+
+ if (match == 0 && matchNext != 0) return mid;
+
+ if (match == 0) // mid point is a match
+ start = mid;
+ else
+ end = mid;
+ }
+}
+
+SuffixArray::INDEX SuffixArray::FindFirst( const vector< WORD > &phrase, INDEX &start, INDEX &end )
+{
+ while(true)
+ {
+ INDEX mid = ( start + end + 1 )/2;
+ //cerr << "FindFirst(" << start << ";" << mid << ";" << end << ")\n";
+ int match = Match( phrase, mid );
+
+ if (match == 0) return mid;
+ if (start >= end && match != 0 ) return m_size;
+
+ if (match > 0)
+ start = mid+1;
+ else
+ end = mid-1;
+ }
+}
+
+int SuffixArray::Match( const vector< WORD > &phrase, INDEX index )
+{
+ INDEX pos = m_index[ index ];
+ for(INDEX i=0; i<phrase.size() && i+pos<m_size; i++)
+ {
+ int match = CompareWord( m_vcb.GetWordID( phrase[i] ), m_array[ pos+i ] );
+ // cerr << "{" << index << "+" << i << "," << pos+i << ":" << match << "}" << endl;
+ if (match != 0)
+ return match;
+ }
+ return 0;
+}
+
+void SuffixArray::List(INDEX start, INDEX end)
+{
+ for(INDEX i=start; i<=end; i++)
+ {
+ INDEX pos = m_index[ i ];
+ // cerr << i << ":" << pos << "\t";
+ for(int j=0; j<5 && j+pos<m_size; j++)
+ {
+ cout << " " << m_vcb.GetWord( m_array[ pos+j ] );
+ }
+ // cerr << "\n";
+ }
+}
diff --git a/contrib/fuzzy-match/SuffixArray.h b/contrib/fuzzy-match/SuffixArray.h
new file mode 100644
index 000000000..2deed4c32
--- /dev/null
+++ b/contrib/fuzzy-match/SuffixArray.h
@@ -0,0 +1,45 @@
+#include "Vocabulary.h"
+
+#pragma once
+
+#define LINE_MAX_LENGTH 10000
+
+
+class SuffixArray
+{
+public:
+ typedef unsigned int INDEX;
+
+private:
+ WORD_ID *m_array;
+ INDEX *m_index;
+ INDEX *m_buffer;
+ char *m_wordInSentence;
+ size_t *m_sentence;
+ char *m_sentenceLength;
+ WORD_ID m_endOfSentence;
+ Vocabulary m_vcb;
+ INDEX m_size;
+
+public:
+ SuffixArray( string fileName );
+ ~SuffixArray();
+
+ void Sort(INDEX start, INDEX end);
+ int CompareIndex( INDEX a, INDEX b ) const;
+ inline int CompareWord( WORD_ID a, WORD_ID b ) const;
+ int Count( const vector< WORD > &phrase );
+ bool MinCount( const vector< WORD > &phrase, INDEX min );
+ bool Exists( const vector< WORD > &phrase );
+ int FindMatches( const vector< WORD > &phrase, INDEX &firstMatch, INDEX &lastMatch, INDEX search_start = 0, INDEX search_end = -1 );
+ int LimitedCount( const vector< WORD > &phrase, INDEX min, INDEX &firstMatch, INDEX &lastMatch, INDEX search_start = -1, INDEX search_end = 0 );
+ INDEX FindFirst( const vector< WORD > &phrase, INDEX &start, INDEX &end );
+ INDEX FindLast( const vector< WORD > &phrase, INDEX start, INDEX end, int direction );
+ int Match( const vector< WORD > &phrase, INDEX index );
+ void List( INDEX start, INDEX end );
+ inline INDEX GetPosition( INDEX index ) { return m_index[ index ]; }
+ inline size_t GetSentence( INDEX position ) { return m_sentence[position]; }
+ inline char GetWordInSentence( INDEX position ) { return m_wordInSentence[position]; }
+ inline char GetSentenceLength( size_t sentenceId ) { return m_sentenceLength[sentenceId]; }
+ inline INDEX GetSize() { return m_size; }
+};
diff --git a/contrib/fuzzy-match/Util.cpp b/contrib/fuzzy-match/Util.cpp
new file mode 100644
index 000000000..4d750791e
--- /dev/null
+++ b/contrib/fuzzy-match/Util.cpp
@@ -0,0 +1,147 @@
+//
+// Util.cpp
+// fuzzy-match
+//
+// Created by Hieu Hoang on 26/07/2012.
+// Copyright 2012 __MyCompanyName__. All rights reserved.
+//
+
+#include <iostream>
+#include <stdio.h>
+#include "Util.h"
+#include "SentenceAlignment.h"
+#include "SuffixArray.h"
+
+void load_corpus( const char* fileName, vector< vector< WORD_ID > > &corpus )
+{ // source
+ ifstream fileStream;
+ fileStream.open(fileName);
+ if (!fileStream) {
+ cerr << "file not found: " << fileName << endl;
+ exit(1);
+ }
+ cerr << "loading " << fileName << endl;
+
+ istream *fileStreamP = &fileStream;
+
+ char line[LINE_MAX_LENGTH];
+ while(true)
+ {
+ SAFE_GETLINE((*fileStreamP), line, LINE_MAX_LENGTH, '\n');
+ if (fileStreamP->eof()) break;
+ corpus.push_back( vocabulary.Tokenize( line ) );
+ }
+}
+
+void load_target( const char* fileName, vector< vector< SentenceAlignment > > &corpus)
+{
+ ifstream fileStream;
+ fileStream.open(fileName);
+ if (!fileStream) {
+ cerr << "file not found: " << fileName << endl;
+ exit(1);
+ }
+ cerr << "loading " << fileName << endl;
+
+ istream *fileStreamP = &fileStream;
+
+ WORD_ID delimiter = vocabulary.StoreIfNew("|||");
+
+ int lineNum = 0;
+ char line[LINE_MAX_LENGTH];
+ while(true)
+ {
+ SAFE_GETLINE((*fileStreamP), line, LINE_MAX_LENGTH, '\n');
+ if (fileStreamP->eof()) break;
+
+ vector<WORD_ID> toks = vocabulary.Tokenize( line );
+
+ corpus.push_back(vector< SentenceAlignment >());
+ vector< SentenceAlignment > &vec = corpus.back();
+
+ vec.push_back(SentenceAlignment());
+ SentenceAlignment *sentence = &vec.back();
+
+ const WORD &countStr = vocabulary.GetWord(toks[0]);
+ sentence->count = atoi(countStr.c_str());
+
+ for (size_t i = 1; i < toks.size(); ++i) {
+ WORD_ID wordId = toks[i];
+
+ if (wordId == delimiter) {
+ // target and alignments can have multiple sentences.
+ vec.push_back(SentenceAlignment());
+ sentence = &vec.back();
+
+ // count
+ ++i;
+
+ const WORD &countStr = vocabulary.GetWord(toks[i]);
+ sentence->count = atoi(countStr.c_str());
+ }
+ else {
+ // just a normal word, add
+ sentence->target.push_back(wordId);
+ }
+ }
+
+ ++lineNum;
+
+ }
+
+}
+
+
+void load_alignment( const char* fileName, vector< vector< SentenceAlignment > > &corpus )
+{
+ ifstream fileStream;
+ fileStream.open(fileName);
+ if (!fileStream) {
+ cerr << "file not found: " << fileName << endl;
+ exit(1);
+ }
+ cerr << "loading " << fileName << endl;
+
+ istream *fileStreamP = &fileStream;
+
+ string delimiter = "|||";
+
+ int lineNum = 0;
+ char line[LINE_MAX_LENGTH];
+ while(true)
+ {
+ SAFE_GETLINE((*fileStreamP), line, LINE_MAX_LENGTH, '\n');
+ if (fileStreamP->eof()) break;
+
+ vector< SentenceAlignment > &vec = corpus[lineNum];
+ size_t targetInd = 0;
+ SentenceAlignment *sentence = &vec[targetInd];
+
+ vector<string> toks = Tokenize(line);
+
+ for (size_t i = 0; i < toks.size(); ++i) {
+ string &tok = toks[i];
+
+ if (tok == delimiter) {
+ // target and alignments can have multiple sentences.
+ ++targetInd;
+ sentence = &vec[targetInd];
+
+ ++i;
+ }
+ else {
+ // just a normal alignment, add
+ vector<int> alignPoint = Tokenize<int>(tok, "-");
+ assert(alignPoint.size() == 2);
+ sentence->alignment.push_back(pair<int,int>(alignPoint[0], alignPoint[1]));
+ }
+ }
+
+ ++lineNum;
+
+ }
+}
+
+
+
+
diff --git a/contrib/fuzzy-match/Util.h b/contrib/fuzzy-match/Util.h
new file mode 100644
index 000000000..7bb13d032
--- /dev/null
+++ b/contrib/fuzzy-match/Util.h
@@ -0,0 +1,87 @@
+//
+// Util.h
+// fuzzy-match
+//
+// Created by Hieu Hoang on 25/07/2012.
+// Copyright 2012 __MyCompanyName__. All rights reserved.
+//
+
+#ifndef fuzzy_match_Util_h
+#define fuzzy_match_Util_h
+
+#include <vector>
+#include <sstream>
+#include "Vocabulary.h"
+
+class SentenceAlignment;
+
+void load_corpus( const char* fileName, std::vector< std::vector< WORD_ID > > &corpus );
+void load_target( const char* fileName, std::vector< std::vector< SentenceAlignment > > &corpus);
+void load_alignment( const char* fileName, std::vector< std::vector< SentenceAlignment > > &corpus );
+
+/**
+ * Convert vector of type T to string
+ */
+template <typename T>
+std::string Join(const std::string& delimiter, const std::vector<T>& items)
+{
+ std::ostringstream outstr;
+ if(items.size() == 0) return "";
+ outstr << items[0];
+ for(unsigned int i = 1; i < items.size(); i++)
+ outstr << delimiter << items[i];
+ return outstr.str();
+}
+
+//! convert string to variable of type T. Used to reading floats, int etc from files
+template<typename T>
+inline T Scan(const std::string &input)
+{
+ std::stringstream stream(input);
+ T ret;
+ stream >> ret;
+ return ret;
+}
+
+//! convert vectors of string to vectors of type T variables
+template<typename T>
+inline std::vector<T> Scan(const std::vector< std::string > &input)
+{
+ std::vector<T> output(input.size());
+ for (size_t i = 0 ; i < input.size() ; i++) {
+ output[i] = Scan<T>( input[i] );
+ }
+ return output;
+}
+
+inline std::vector<std::string> Tokenize(const std::string& str,
+ const std::string& delimiters = " \t")
+{
+ std::vector<std::string> tokens;
+ // Skip delimiters at beginning.
+ std::string::size_type lastPos = str.find_first_not_of(delimiters, 0);
+ // Find first "non-delimiter".
+ std::string::size_type pos = str.find_first_of(delimiters, lastPos);
+
+ while (std::string::npos != pos || std::string::npos != lastPos) {
+ // Found a token, add it to the vector.
+ tokens.push_back(str.substr(lastPos, pos - lastPos));
+ // Skip delimiters. Note the "not_of"
+ lastPos = str.find_first_not_of(delimiters, pos);
+ // Find next "non-delimiter"
+ pos = str.find_first_of(delimiters, lastPos);
+ }
+
+ return tokens;
+}
+
+template<typename T>
+inline std::vector<T> Tokenize( const std::string &input
+ , const std::string& delimiters = " \t")
+{
+ std::vector<std::string> stringVector = Tokenize(input, delimiters);
+ return Scan<T>( stringVector );
+}
+
+
+#endif
diff --git a/contrib/fuzzy-match/Vocabulary.cpp b/contrib/fuzzy-match/Vocabulary.cpp
new file mode 100644
index 000000000..4492eec95
--- /dev/null
+++ b/contrib/fuzzy-match/Vocabulary.cpp
@@ -0,0 +1,45 @@
+// $Id: Vocabulary.cpp 1565 2008-02-22 14:42:01Z bojar $
+#include "Vocabulary.h"
+
+// as in beamdecoder/tables.cpp
+vector<WORD_ID> Vocabulary::Tokenize( const char input[] ) {
+ vector< WORD_ID > token;
+ bool betweenWords = true;
+ int start=0;
+ int i=0;
+ for(; input[i] != '\0'; i++) {
+ bool isSpace = (input[i] == ' ' || input[i] == '\t');
+
+ if (!isSpace && betweenWords) {
+ start = i;
+ betweenWords = false;
+ }
+ else if (isSpace && !betweenWords) {
+ token.push_back( StoreIfNew ( string( input+start, i-start ) ) );
+ betweenWords = true;
+ }
+ }
+ if (!betweenWords)
+ token.push_back( StoreIfNew ( string( input+start, i-start ) ) );
+ return token;
+}
+
+WORD_ID Vocabulary::StoreIfNew( const WORD& word ) {
+ map<WORD, WORD_ID>::iterator i = lookup.find( word );
+
+ if( i != lookup.end() )
+ return i->second;
+
+ WORD_ID id = vocab.size();
+ vocab.push_back( word );
+ lookup[ word ] = id;
+ return id;
+}
+
+WORD_ID Vocabulary::GetWordID( const WORD &word ) {
+ map<WORD, WORD_ID>::iterator i = lookup.find( word );
+ if( i == lookup.end() )
+ return 0;
+ WORD_ID w= (WORD_ID) i->second;
+ return w;
+}
diff --git a/contrib/fuzzy-match/Vocabulary.h b/contrib/fuzzy-match/Vocabulary.h
new file mode 100644
index 000000000..3e48847a7
--- /dev/null
+++ b/contrib/fuzzy-match/Vocabulary.h
@@ -0,0 +1,40 @@
+// $Id: tables-core.h 1470 2007-10-02 21:43:54Z redpony $
+
+#pragma once
+
+#include <iostream>
+#include <fstream>
+#include <assert.h>
+#include <stdlib.h>
+#include <string>
+#include <queue>
+#include <map>
+#include <cmath>
+
+using namespace std;
+
+#define MAX_LENGTH 10000
+
+#define SAFE_GETLINE(_IS, _LINE, _SIZE, _DELIM) { \
+ _IS.getline(_LINE, _SIZE, _DELIM); \
+ if(_IS.fail() && !_IS.bad() && !_IS.eof()) _IS.clear(); \
+ if (_IS.gcount() == _SIZE-1) { \
+ cerr << "Line too long! Buffer overflow. Delete lines >=" \
+ << _SIZE << " chars or raise MAX_LENGTH in phrase-extract/tables-core.cpp" \
+ << endl; \
+ exit(1); \
+ } \
+ }
+
+typedef string WORD;
+typedef unsigned int WORD_ID;
+
+class Vocabulary {
+ public:
+ map<WORD, WORD_ID> lookup;
+ vector< WORD > vocab;
+ WORD_ID StoreIfNew( const WORD& );
+ WORD_ID GetWordID( const WORD& );
+ vector<WORD_ID> Tokenize( const char[] );
+ inline WORD &GetWord( WORD_ID id ) const { WORD &i = (WORD&) vocab[ id ]; return i; }
+};
diff --git a/contrib/fuzzy-match/fuzzy-match2.cpp b/contrib/fuzzy-match/fuzzy-match2.cpp
new file mode 100644
index 000000000..c1252aa03
--- /dev/null
+++ b/contrib/fuzzy-match/fuzzy-match2.cpp
@@ -0,0 +1,460 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <map>
+#include <algorithm>
+#include <iostream>
+#include <fstream>
+#include <cstring>
+#include <time.h>
+#include <fstream>
+#include "SentenceAlignment.h"
+#include "fuzzy-match2.h"
+#include "SuffixArray.h"
+
+/** This implementation is explained in
+ Koehn and Senellart: "Fast Approximate String Matching
+ with Suffix Arrays and A* Parsing" (AMTA 2010) ***/
+
+using namespace std;
+
+int main(int argc, char* argv[])
+{
+ vector< vector< WORD_ID > > source, input;
+ vector< vector< SentenceAlignment > > targetAndAlignment;
+
+
+ while(1) {
+ static struct option long_options[] = {
+ {"basic", no_argument, &basic_flag, 1},
+ {"word", no_argument, &lsed_flag, 0},
+ {"unrefined", no_argument, &refined_flag, 0},
+ {"nolengthfilter", no_argument, &length_filter_flag, 0},
+ {"noparse", no_argument, &parse_flag, 0},
+ {"multiple", no_argument, &multiple_flag, 1},
+ {"minmatch", required_argument, 0, 'm'},
+ {0, 0, 0, 0}
+ };
+ int option_index = 0;
+ int c = getopt_long (argc, argv, "m:", long_options, &option_index);
+ if (c == -1) break;
+ switch (c) {
+ case 0:
+// if (long_options[option_index].flag != 0)
+// break;
+// printf ("option %s", long_options[option_index].name);
+// if (optarg)
+// printf (" with arg %s", optarg);
+// printf ("\n");
+ break;
+ case 'm':
+ min_match = atoi(optarg);
+ if (min_match < 1 || min_match > 100) {
+ cerr << "error: --minmatch must have value in range 1..100\n";
+ exit(1);
+ }
+ cerr << "setting min match to " << min_match << endl;
+ break;
+ default:
+ cerr << "usage: syntax: ./fuzzy-match input corpus [--basic] [--word] [--minmatch 1..100]\n";
+ exit(1);
+ }
+ }
+ if (lsed_flag) { cerr << "lsed\n"; }
+ if (basic_flag) { cerr << "basic\n"; }
+ if (refined_flag) { cerr << "refined\n"; }
+ if (length_filter_flag) { cerr << "length filter\n"; }
+ if (parse_flag) { cerr << "parse\n"; }
+// exit(1);
+
+
+ if (optind+4 != argc) {
+ cerr << "syntax: ./fuzzy-match input source target alignment [--basic] [--word] [--minmatch 1..100]\n";
+ exit(1);
+ }
+
+ load_corpus(argv[optind], input);
+ load_corpus(argv[optind+1], source);
+ load_target(argv[optind+2], targetAndAlignment);
+ load_alignment(argv[optind+3], targetAndAlignment);
+
+ // ./fuzzy-match input corpus [-basic]
+
+// load_corpus("../corpus/tm.truecased.4.en", source);
+// load_corpus("../corpus/tm.truecased.4.it", target);
+// load_corpus("../evaluation/test.input.tc.4", input);
+
+// load_corpus("../../acquis-truecase/corpus/acquis.truecased.190.en", source);
+// load_corpus("../../acquis-truecase/evaluation/ac-test.input.tc.190", input);
+
+// load_corpus("../corpus/tm.truecased.16.en", source);
+// load_corpus("../evaluation/test.input.tc.16", input);
+
+ if (basic_flag) {
+ cerr << "using basic method\n";
+ clock_t start_main_clock2 = clock();
+ basic_fuzzy_match( source, input );
+ cerr << "total: " << (1000 * (clock()-start_main_clock2) / CLOCKS_PER_SEC) << endl;
+ exit(1);
+ }
+
+ cerr << "number of input sentences " << input.size() << endl;
+
+ cerr << "creating suffix array...\n";
+// SuffixArray suffixArray( "../corpus/tm.truecased.4.en" );
+// SuffixArray suffixArray( "../../acquis-truecase/corpus/acquis.truecased.190.en" );
+ SuffixArray suffixArray( argv[optind+1] );
+
+ clock_t start_main_clock = clock();
+
+ // looping through all input sentences...
+ cerr << "looping...\n";
+ for(unsigned int sentenceInd = 0; sentenceInd < input.size(); sentenceInd++)
+ {
+ clock_t start_clock = clock();
+ // if (i % 10 == 0) cerr << ".";
+
+ // establish some basic statistics
+
+ // int input_length = compute_length( input[i] );
+ int input_length = input[sentenceInd].size();
+ int best_cost = input_length * (100-min_match) / 100 + 1;
+
+ int match_count = 0; // how many substring matches to be considered
+ //cerr << endl << "sentence " << i << ", length " << input_length << ", best_cost " << best_cost << endl;
+
+ // find match ranges in suffix array
+ vector< vector< pair< SuffixArray::INDEX, SuffixArray::INDEX > > > match_range;
+ for(size_t start=0;start<input[sentenceInd].size();start++)
+ {
+ SuffixArray::INDEX prior_first_match = 0;
+ SuffixArray::INDEX prior_last_match = suffixArray.GetSize()-1;
+ vector< string > substring;
+ bool stillMatched = true;
+ vector< pair< SuffixArray::INDEX, SuffixArray::INDEX > > matchedAtThisStart;
+ //cerr << "start: " << start;
+ for(int word=start; stillMatched && word<input[sentenceInd].size(); word++)
+ {
+ substring.push_back( vocabulary.GetWord( input[sentenceInd][word] ) );
+
+ // only look up, if needed (i.e. no unnecessary short gram lookups)
+// if (! word-start+1 <= short_match_max_length( input_length ) )
+ // {
+ SuffixArray::INDEX first_match, last_match;
+ stillMatched = false;
+ if (suffixArray.FindMatches( substring, first_match, last_match, prior_first_match, prior_last_match ) )
+ {
+ stillMatched = true;
+ matchedAtThisStart.push_back( make_pair( first_match, last_match ) );
+ //cerr << " (" << first_match << "," << last_match << ")";
+ //cerr << " " << ( last_match - first_match + 1 );
+ prior_first_match = first_match;
+ prior_last_match = last_match;
+ }
+ //}
+ }
+ //cerr << endl;
+ match_range.push_back( matchedAtThisStart );
+ }
+
+ clock_t clock_range = clock();
+
+ map< int, vector< Match > > sentence_match;
+ map< int, int > sentence_match_word_count;
+
+ // go through all matches, longest first
+ for(int length = input[sentenceInd].size(); length >= 1; length--)
+ {
+ // do not create matches, if these are handled by the short match function
+ if (length <= short_match_max_length( input_length ) )
+ {
+ continue;
+ }
+
+ unsigned int count = 0;
+ for(int start = 0; start <= input[sentenceInd].size() - length; start++)
+ {
+ if (match_range[start].size() >= length)
+ {
+ pair< SuffixArray::INDEX, SuffixArray::INDEX > &range = match_range[start][length-1];
+ // cerr << " (" << range.first << "," << range.second << ")";
+ count += range.second - range.first + 1;
+
+ for(SuffixArray::INDEX i=range.first; i<=range.second; i++)
+ {
+ int position = suffixArray.GetPosition( i );
+
+ // sentence length mismatch
+ size_t sentence_id = suffixArray.GetSentence( position );
+ int sentence_length = suffixArray.GetSentenceLength( sentence_id );
+ int diff = abs( (int)sentence_length - (int)input_length );
+ // cerr << endl << i << "\tsentence " << sentence_id << ", length " << sentence_length;
+ //if (length <= 2 && input_length>=5 &&
+ // sentence_match.find( sentence_id ) == sentence_match.end())
+ // continue;
+
+ if (diff > best_cost)
+ continue;
+
+ // compute minimal cost
+ int start_pos = suffixArray.GetWordInSentence( position );
+ int end_pos = start_pos + length-1;
+ // cerr << endl << "\t" << start_pos << "-" << end_pos << " (" << sentence_length << ") vs. "
+ // << start << "-" << (start+length-1) << " (" << input_length << ")";
+ // different number of prior words -> cost is at least diff
+ int min_cost = abs( start - start_pos );
+
+ // same number of words, but not sent. start -> cost is at least 1
+ if (start == start_pos && start>0)
+ min_cost++;
+
+ // different number of remaining words -> cost is at least diff
+ min_cost += abs( ( sentence_length-1 - end_pos ) -
+ ( input_length-1 - (start+length-1) ) );
+
+ // same number of words, but not sent. end -> cost is at least 1
+ if ( sentence_length-1 - end_pos ==
+ input_length-1 - (start+length-1)
+ && end_pos != sentence_length-1 )
+ min_cost++;
+
+ // cerr << " -> min_cost " << min_cost;
+ if (min_cost > best_cost)
+ continue;
+
+ // valid match
+ match_count++;
+
+ // compute maximal cost
+ int max_cost = max( start, start_pos )
+ + max( sentence_length-1 - end_pos,
+ input_length-1 - (start+length-1) );
+ // cerr << ", max_cost " << max_cost;
+
+ Match m = Match( start, start+length-1,
+ start_pos, start_pos+length-1,
+ min_cost, max_cost, 0);
+ sentence_match[ sentence_id ].push_back( m );
+ sentence_match_word_count[ sentence_id ] += length;
+
+ if (max_cost < best_cost)
+ {
+ best_cost = max_cost;
+ if (best_cost == 0) break;
+ }
+ //if (match_count >= MAX_MATCH_COUNT) break;
+ }
+ }
+ // cerr << endl;
+ if (best_cost == 0) break;
+ //if (match_count >= MAX_MATCH_COUNT) break;
+ }
+ // cerr << count << " matches at length " << length << " in " << sentence_match.size() << " tm." << endl;
+
+ if (best_cost == 0) break;
+ //if (match_count >= MAX_MATCH_COUNT) break;
+ }
+ cerr << match_count << " matches in " << sentence_match.size() << " sentences." << endl;
+
+ clock_t clock_matches = clock();
+
+ // consider each sentence for which we have matches
+ int old_best_cost = best_cost;
+ int tm_count_word_match = 0;
+ int tm_count_word_match2 = 0;
+ int pruned_match_count = 0;
+ if (short_match_max_length( input_length ))
+ {
+ init_short_matches( input[sentenceInd] );
+ }
+ vector< int > best_tm;
+ typedef map< int, vector< Match > >::iterator I;
+
+ clock_t clock_validation_sum = 0;
+
+ for(I tm=sentence_match.begin(); tm!=sentence_match.end(); tm++)
+ {
+ int tmID = tm->first;
+ int tm_length = suffixArray.GetSentenceLength(tmID);
+ vector< Match > &match = tm->second;
+ add_short_matches( match, source[tmID], input_length, best_cost );
+
+ //cerr << "match in sentence " << tmID << ": " << match.size() << " [" << tm_length << "]" << endl;
+
+ // quick look: how many words are matched
+ int words_matched = 0;
+ for(int m=0;m<match.size();m++) {
+
+ if (match[m].min_cost <= best_cost) // makes no difference
+ words_matched += match[m].input_end - match[m].input_start + 1;
+ }
+ if (max(input_length,tm_length) - words_matched > best_cost)
+ {
+ if (length_filter_flag) continue;
+ }
+ tm_count_word_match++;
+
+ // prune, check again how many words are matched
+ vector< Match > pruned = prune_matches( match, best_cost );
+ words_matched = 0;
+ for(int p=0;p<pruned.size();p++) {
+ words_matched += pruned[p].input_end - pruned[p].input_start + 1;
+ }
+ if (max(input_length,tm_length) - words_matched > best_cost)
+ {
+ if (length_filter_flag) continue;
+ }
+ tm_count_word_match2++;
+
+ pruned_match_count += pruned.size();
+ int prior_best_cost = best_cost;
+ int cost;
+
+ clock_t clock_validation_start = clock();
+ if (! parse_flag ||
+ pruned.size()>=10) // to prevent worst cases
+ {
+ string path;
+ cost = sed( input[sentenceInd], source[tmID], path, false );
+ if (cost < best_cost)
+ {
+ best_cost = cost;
+ }
+ }
+
+ else
+ {
+ cost = parse_matches( pruned, input_length, tm_length, best_cost );
+ if (prior_best_cost != best_cost)
+ {
+ best_tm.clear();
+ }
+ }
+ clock_validation_sum += clock() - clock_validation_start;
+ if (cost == best_cost)
+ {
+ best_tm.push_back( tmID );
+ }
+ }
+ cerr << "reduced best cost from " << old_best_cost << " to " << best_cost << endl;
+ cerr << "tm considered: " << sentence_match.size()
+ << " word-matched: " << tm_count_word_match
+ << " word-matched2: " << tm_count_word_match2
+ << " best: " << best_tm.size() << endl;
+
+ cerr << "pruned matches: " << ((float)pruned_match_count/(float)tm_count_word_match2) << endl;
+
+ // create xml and extract files
+ string inputStr, sourceStr;
+ for (size_t pos = 0; pos < input_length; ++pos) {
+ inputStr += vocabulary.GetWord(input[sentenceInd][pos]) + " ";
+ }
+
+ // do not try to find the best ... report multiple matches
+ if (multiple_flag) {
+ int input_letter_length = compute_length( input[sentenceInd] );
+ for(int si=0; si<best_tm.size(); si++) {
+ int s = best_tm[si];
+ string path;
+ unsigned int letter_cost = sed( input[sentenceInd], source[s], path, true );
+ // do not report multiple identical sentences, but just their count
+ cout << sentenceInd << " "; // sentence number
+ cout << letter_cost << "/" << input_letter_length << " ";
+ cout << "(" << best_cost <<"/" << input_length <<") ";
+ cout << "||| " << s << " ||| " << path << endl;
+
+ vector<WORD_ID> &sourceSentence = source[s];
+ vector<SentenceAlignment> &targets = targetAndAlignment[s];
+ create_extract(sentenceInd, best_cost, sourceSentence, targets, inputStr, path);
+
+ }
+ } // if (multiple_flag)
+ else {
+
+ // find the best matches according to letter sed
+ string best_path = "";
+ int best_match = -1;
+ int best_letter_cost;
+ if (lsed_flag) {
+ best_letter_cost = compute_length( input[sentenceInd] ) * min_match / 100 + 1;
+ for(int si=0; si<best_tm.size(); si++)
+ {
+ int s = best_tm[si];
+ string path;
+ unsigned int letter_cost = sed( input[sentenceInd], source[s], path, true );
+ if (letter_cost < best_letter_cost)
+ {
+ best_letter_cost = letter_cost;
+ best_path = path;
+ best_match = s;
+ }
+ }
+ }
+ // if letter sed turned off, just compute path for first match
+ else {
+ if (best_tm.size() > 0) {
+ string path;
+ sed( input[sentenceInd], source[best_tm[0]], path, false );
+ best_path = path;
+ best_match = best_tm[0];
+ }
+ }
+ cerr << "elapsed: " << (1000 * (clock()-start_clock) / CLOCKS_PER_SEC)
+ << " ( range: " << (1000 * (clock_range-start_clock) / CLOCKS_PER_SEC)
+ << " match: " << (1000 * (clock_matches-clock_range) / CLOCKS_PER_SEC)
+ << " tm: " << (1000 * (clock()-clock_matches) / CLOCKS_PER_SEC)
+ << " (validation: " << (1000 * (clock_validation_sum) / CLOCKS_PER_SEC) << ")"
+ << " )" << endl;
+ if (lsed_flag) {
+ cout << best_letter_cost << "/" << compute_length( input[sentenceInd] ) << " (";
+ }
+ cout << best_cost <<"/" << input_length;
+ if (lsed_flag) cout << ")";
+ cout << " ||| " << best_match << " ||| " << best_path << endl;
+
+ // creat xml & extracts
+ vector<WORD_ID> &sourceSentence = source[best_match];
+ vector<SentenceAlignment> &targets = targetAndAlignment[best_match];
+ create_extract(sentenceInd, best_cost, sourceSentence, targets, inputStr, best_path);
+
+ } // else if (multiple_flag)
+
+
+ }
+ cerr << "total: " << (1000 * (clock()-start_main_clock) / CLOCKS_PER_SEC) << endl;
+
+}
+
+void create_extract(int sentenceInd, int cost, const vector< WORD_ID > &sourceSentence, const vector<SentenceAlignment> &targets, const string &inputStr, const string &path)
+{
+ string sourceStr;
+ for (size_t pos = 0; pos < sourceSentence.size(); ++pos) {
+ WORD_ID wordId = sourceSentence[pos];
+ sourceStr += vocabulary.GetWord(wordId) + " ";
+ }
+
+ char *inputFileName = tmpnam(NULL);
+ ofstream inputFile(inputFileName);
+
+ for (size_t targetInd = 0; targetInd < targets.size(); ++targetInd) {
+ const SentenceAlignment &sentenceAlignment = targets[targetInd];
+ string targetStr = sentenceAlignment.getTargetString();
+ string alignStr = sentenceAlignment.getAlignmentString();
+
+ inputFile
+ << sentenceInd << endl
+ << cost << endl
+ << sourceStr << endl
+ << inputStr << endl
+ << targetStr << endl
+ << alignStr << endl
+ << path << endl
+ << sentenceAlignment.count << endl;
+
+ }
+
+ string cmd = string("perl create_xml.perl < ") + inputFileName;
+ cerr << cmd << endl;
+ inputFile.close();
+
+}
diff --git a/contrib/fuzzy-match/fuzzy-match2.h b/contrib/fuzzy-match/fuzzy-match2.h
new file mode 100644
index 000000000..614bf971f
--- /dev/null
+++ b/contrib/fuzzy-match/fuzzy-match2.h
@@ -0,0 +1,561 @@
+//
+// fuzzy-match2.h
+// fuzzy-match
+//
+// Created by Hieu Hoang on 25/07/2012.
+// Copyright 2012 __MyCompanyName__. All rights reserved.
+//
+
+#ifndef fuzzy_match_fuzzy_match2_h
+#define fuzzy_match_fuzzy_match2_h
+
+#include <string>
+#include <sstream>
+#include <vector>
+#include "Vocabulary.h"
+#include "SuffixArray.h"
+#include "Util.h"
+#include "Match.h"
+
+#define MAX_MATCH_COUNT 10000000
+
+Vocabulary vocabulary;
+
+int basic_flag = false;
+int lsed_flag = true;
+int refined_flag = true;
+int length_filter_flag = true;
+int parse_flag = true;
+int min_match = 70;
+int multiple_flag = false;
+int multiple_slack = 0;
+int multiple_max = 100;
+map< WORD_ID,vector< int > > single_word_index;
+// global cache for word pairs
+map< pair< WORD_ID, WORD_ID >, unsigned int > lsed;
+
+void create_extract(int sentenceInd, int cost, const vector< WORD_ID > &sourceSentence, const vector<SentenceAlignment> &targets, const string &inputStr, const string &path);
+
+
+
+/* Letter string edit distance, e.g. sub 'their' to 'there' costs 2 */
+
+unsigned int letter_sed( WORD_ID aIdx, WORD_ID bIdx )
+{
+ // check if already computed -> lookup in cache
+ pair< WORD_ID, WORD_ID > pIdx = make_pair( aIdx, bIdx );
+ map< pair< WORD_ID, WORD_ID >, unsigned int >::const_iterator lookup = lsed.find( pIdx );
+ if (lookup != lsed.end())
+ {
+ return (lookup->second);
+ }
+
+ // get surface strings for word indices
+ const string &a = vocabulary.GetWord( aIdx );
+ const string &b = vocabulary.GetWord( bIdx );
+
+ // initialize cost matrix
+ unsigned int **cost = (unsigned int**) calloc( sizeof( unsigned int* ), a.size()+1 );
+ for( unsigned int i=0; i<=a.size(); i++ ) {
+ cost[i] = (unsigned int*) calloc( sizeof(unsigned int), b.size()+1 );
+ cost[i][0] = i;
+ }
+ for( unsigned int j=0; j<=b.size(); j++ ) {
+ cost[0][j] = j;
+ }
+
+ // core string edit distance loop
+ for( unsigned int i=1; i<=a.size(); i++ ) {
+ for( unsigned int j=1; j<=b.size(); j++ ) {
+
+ unsigned int ins = cost[i-1][j] + 1;
+ unsigned int del = cost[i][j-1] + 1;
+ bool match = (a.substr(i-1,1).compare( b.substr(j-1,1) ) == 0);
+ unsigned int diag = cost[i-1][j-1] + (match ? 0 : 1);
+
+ unsigned int min = (ins < del) ? ins : del;
+ min = (diag < min) ? diag : min;
+
+ cost[i][j] = min;
+ }
+ }
+
+ // clear out memory
+ unsigned int final = cost[a.size()][b.size()];
+ for( unsigned int i=0; i<=a.size(); i++ ) {
+ free( cost[i] );
+ }
+ free( cost );
+
+ // cache and return result
+ lsed[ pIdx ] = final;
+ return final;
+}
+
+/* string edit distance implementation */
+
+unsigned int sed( const vector< WORD_ID > &a, const vector< WORD_ID > &b, string &best_path, bool use_letter_sed ) {
+
+ // initialize cost and path matrices
+ unsigned int **cost = (unsigned int**) calloc( sizeof( unsigned int* ), a.size()+1 );
+ char **path = (char**) calloc( sizeof( char* ), a.size()+1 );
+
+ for( unsigned int i=0; i<=a.size(); i++ ) {
+ cost[i] = (unsigned int*) calloc( sizeof(unsigned int), b.size()+1 );
+ path[i] = (char*) calloc( sizeof(char), b.size()+1 );
+ if (i>0)
+ {
+ cost[i][0] = cost[i-1][0];
+ if (use_letter_sed)
+ {
+ cost[i][0] += vocabulary.GetWord( a[i-1] ).size();
+ }
+ else
+ {
+ cost[i][0]++;
+ }
+ }
+ else
+ {
+ cost[i][0] = 0;
+ }
+ path[i][0] = 'I';
+ }
+
+ for( unsigned int j=0; j<=b.size(); j++ ) {
+ if (j>0)
+ {
+ cost[0][j] = cost[0][j-1];
+ if (use_letter_sed)
+ {
+ cost[0][j] += vocabulary.GetWord( b[j-1] ).size();
+ }
+ else
+ {
+ cost[0][j]++;
+ }
+ }
+ else
+ {
+ cost[0][j] = 0;
+ }
+ path[0][j] = 'D';
+ }
+
+ // core string edit distance algorithm
+ for( unsigned int i=1; i<=a.size(); i++ ) {
+ for( unsigned int j=1; j<=b.size(); j++ ) {
+ unsigned int ins = cost[i-1][j];
+ unsigned int del = cost[i][j-1];
+ unsigned int match;
+ if (use_letter_sed)
+ {
+ ins += vocabulary.GetWord( a[i-1] ).size();
+ del += vocabulary.GetWord( b[j-1] ).size();
+ match = letter_sed( a[i-1], b[j-1] );
+ }
+ else
+ {
+ ins++;
+ del++;
+ match = ( a[i-1] == b[j-1] ) ? 0 : 1;
+ }
+ unsigned int diag = cost[i-1][j-1] + match;
+
+ char action = (ins < del) ? 'I' : 'D';
+ unsigned int min = (ins < del) ? ins : del;
+ if (diag < min)
+ {
+ action = (match>0) ? 'S' : 'M';
+ min = diag;
+ }
+
+ cost[i][j] = min;
+ path[i][j] = action;
+ }
+ }
+
+ // construct string for best path
+ unsigned int i = a.size();
+ unsigned int j = b.size();
+ best_path = "";
+ while( i>0 || j>0 )
+ {
+ best_path = path[i][j] + best_path;
+ if (path[i][j] == 'I')
+ {
+ i--;
+ }
+ else if (path[i][j] == 'D')
+ {
+ j--;
+ }
+ else
+ {
+ i--;
+ j--;
+ }
+ }
+
+
+ // clear out memory
+ unsigned int final = cost[a.size()][b.size()];
+
+ for( unsigned int i=0; i<=a.size(); i++ ) {
+ free( cost[i] );
+ free( path[i] );
+ }
+ free( cost );
+ free( path );
+
+ // return result
+ return final;
+}
+
+/* utlility function: compute length of sentence in characters
+ (spaces do not count) */
+
+unsigned int compute_length( const vector< WORD_ID > &sentence )
+{
+ unsigned int length = 0; for( unsigned int i=0; i<sentence.size(); i++ )
+ {
+ length += vocabulary.GetWord( sentence[i] ).size();
+ }
+ return length;
+}
+
+/* brute force method: compare input to all corpus sentences */
+
+int basic_fuzzy_match( vector< vector< WORD_ID > > source,
+ vector< vector< WORD_ID > > input )
+{
+ // go through input set...
+ for(unsigned int i=0;i<input.size();i++)
+ {
+ bool use_letter_sed = false;
+
+ // compute sentence length and worst allowed cost
+ unsigned int input_length;
+ if (use_letter_sed)
+ {
+ input_length = compute_length( input[i] );
+ }
+ else
+ {
+ input_length = input[i].size();
+ }
+ unsigned int best_cost = input_length * (100-min_match) / 100 + 2;
+ string best_path = "";
+ int best_match = -1;
+
+ // go through all corpus sentences
+ for(unsigned int s=0;s<source.size();s++)
+ {
+ int source_length;
+ if (use_letter_sed)
+ {
+ source_length = compute_length( source[s] );
+ }
+ else
+ {
+ source_length = source[s].size();
+ }
+ int diff = abs((int)source_length - (int)input_length);
+ if (length_filter_flag && (diff >= best_cost))
+ {
+ continue;
+ }
+
+ // compute string edit distance
+ string path;
+ unsigned int cost = sed( input[i], source[s], path, use_letter_sed );
+
+ // update if new best
+ if (cost < best_cost)
+ {
+ best_cost = cost;
+ best_path = path;
+ best_match = s;
+ }
+ }
+ cout << best_cost << " ||| " << best_match << " ||| " << best_path << endl;
+ }
+}
+
+/* definition of short matches
+ very short n-gram matches (1-grams) will not be looked up in
+ the suffix array, since there are too many matches
+ and for longer sentences, at least one 2-gram match must occur */
+
+inline int short_match_max_length( int input_length )
+{
+ if ( ! refined_flag )
+ return 0;
+ if ( input_length >= 5 )
+ return 1;
+ return 0;
+}
+
+/* if we have non-short matches in a sentence, we need to
+ take a closer look at it.
+ this function creates a hash map for all input words and their positions
+ (to be used by the next function)
+ (done here, because this has be done only once for an input sentence) */
+
+void init_short_matches( const vector< WORD_ID > &input )
+{
+ int max_length = short_match_max_length( input.size() );
+ if (max_length == 0)
+ return;
+
+ single_word_index.clear();
+
+ // store input words and their positions in hash map
+ for(int i=0; i<input.size(); i++)
+ {
+ if (single_word_index.find( input[i] ) == single_word_index.end())
+ {
+ vector< int > position_vector;
+ single_word_index[ input[i] ] = position_vector;
+ }
+ single_word_index[ input[i] ].push_back( i );
+ }
+}
+
+/* add all short matches to list of matches for a sentence */
+
+void add_short_matches( vector< Match > &match, const vector< WORD_ID > &tm, int input_length, int best_cost )
+{
+ int max_length = short_match_max_length( input_length );
+ if (max_length == 0)
+ return;
+
+ int tm_length = tm.size();
+ map< WORD_ID,vector< int > >::iterator input_word_hit;
+ for(int t_pos=0; t_pos<tm.size(); t_pos++)
+ {
+ input_word_hit = single_word_index.find( tm[t_pos] );
+ if (input_word_hit != single_word_index.end())
+ {
+ vector< int > &position_vector = input_word_hit->second;
+ for(int j=0; j<position_vector.size(); j++)
+ {
+ int &i_pos = position_vector[j];
+
+ // before match
+ int max_cost = max( i_pos , t_pos );
+ int min_cost = abs( i_pos - t_pos );
+ if ( i_pos>0 && i_pos == t_pos )
+ min_cost++;
+
+ // after match
+ max_cost += max( (input_length-i_pos) , (tm_length-t_pos));
+ min_cost += abs( (input_length-i_pos) - (tm_length-t_pos));
+ if ( i_pos != input_length-1 && (input_length-i_pos) == (tm_length-t_pos))
+ min_cost++;
+
+ if (min_cost <= best_cost)
+ {
+ Match new_match( i_pos,i_pos, t_pos,t_pos, min_cost,max_cost,0 );
+ match.push_back( new_match );
+ }
+ }
+ }
+ }
+}
+
+/* remove matches that are subsumed by a larger match */
+
+vector< Match > prune_matches( const vector< Match > &match, int best_cost )
+{
+ //cerr << "\tpruning";
+ vector< Match > pruned;
+ for(int i=match.size()-1; i>=0; i--)
+ {
+ //cerr << " (" << match[i].input_start << "," << match[i].input_end
+ // << " ; " << match[i].tm_start << "," << match[i].tm_end
+ // << " * " << match[i].min_cost << ")";
+
+ //if (match[i].min_cost > best_cost)
+ // continue;
+
+ bool subsumed = false;
+ for(int j=match.size()-1; j>=0; j--)
+ {
+ if (i!=j // do not compare match with itself
+ && ( match[i].input_end - match[i].input_start <=
+ match[j].input_end - match[j].input_start ) // i shorter than j
+ && ((match[i].input_start == match[j].input_start &&
+ match[i].tm_start == match[j].tm_start ) ||
+ (match[i].input_end == match[j].input_end &&
+ match[i].tm_end == match[j].tm_end) ) )
+ {
+ subsumed = true;
+ }
+ }
+ if (! subsumed && match[i].min_cost <= best_cost)
+ {
+ //cerr << "*";
+ pruned.push_back( match[i] );
+ }
+ }
+ //cerr << endl;
+ return pruned;
+}
+
+/* A* parsing method to compute string edit distance */
+
+int parse_matches( vector< Match > &match, int input_length, int tm_length, int &best_cost )
+{
+ // cerr << "sentence has " << match.size() << " matches, best cost: " << best_cost << ", lengths input: " << input_length << " tm: " << tm_length << endl;
+
+ if (match.size() == 1)
+ return match[0].max_cost;
+ if (match.size() == 0)
+ return input_length+tm_length;
+
+ int this_best_cost = input_length + tm_length;
+ for(int i=0;i<match.size();i++)
+ {
+ this_best_cost = min( this_best_cost, match[i].max_cost );
+ }
+ // cerr << "\tthis best cost: " << this_best_cost << endl;
+
+ // bottom up combination of spans
+ vector< vector< Match > > multi_match;
+ multi_match.push_back( match );
+
+ int match_level = 1;
+ while(multi_match[ match_level-1 ].size()>0)
+ {
+ // init vector
+ vector< Match > empty;
+ multi_match.push_back( empty );
+
+ for(int first_level = 0; first_level <= (match_level-1)/2; first_level++)
+ {
+ int second_level = match_level - first_level -1;
+ //cerr << "\tcombining level " << first_level << " and " << second_level << endl;
+
+ vector< Match > &first_match = multi_match[ first_level ];
+ vector< Match > &second_match = multi_match[ second_level ];
+
+ for(int i1 = 0; i1 < first_match.size(); i1++) {
+ for(int i2 = 0; i2 < second_match.size(); i2++) {
+
+ // do not combine the same pair twice
+ if (first_level == second_level && i2 <= i1)
+ {
+ continue;
+ }
+
+ // get sorted matches (first is before second)
+ Match *first, *second;
+ if (first_match[i1].input_start < second_match[i2].input_start )
+ {
+ first = &first_match[i1];
+ second = &second_match[i2];
+ }
+ else
+ {
+ second = &first_match[i1];
+ first = &second_match[i2];
+ }
+
+ //cerr << "\tcombining "
+ // << "(" << first->input_start << "," << first->input_end << "), "
+ // << first->tm_start << " [" << first->internal_cost << "]"
+ // << " with "
+ // << "(" << second->input_start << "," << second->input_end << "), "
+ // << second->tm_start<< " [" << second->internal_cost << "]"
+ // << endl;
+
+ // do not process overlapping matches
+ if (first->input_end >= second->input_start)
+ {
+ continue;
+ }
+
+ // no overlap / mismatch in tm
+ if (first->tm_end >= second->tm_start)
+ {
+ continue;
+ }
+
+ // compute cost
+ int min_cost = 0;
+ int max_cost = 0;
+
+ // initial
+ min_cost += abs( first->input_start - first->tm_start );
+ max_cost += max( first->input_start, first->tm_start );
+
+ // same number of words, but not sent. start -> cost is at least 1
+ if (first->input_start == first->tm_start && first->input_start > 0)
+ {
+ min_cost++;
+ }
+
+ // in-between
+ int skipped_words = second->input_start - first->input_end -1;
+ int skipped_words_tm = second->tm_start - first->tm_end -1;
+ int internal_cost = max( skipped_words, skipped_words_tm );
+ internal_cost += first->internal_cost + second->internal_cost;
+ min_cost += internal_cost;
+ max_cost += internal_cost;
+
+ // final
+ min_cost += abs( (tm_length-1 - second->tm_end) -
+ (input_length-1 - second->input_end) );
+ max_cost += max( (tm_length-1 - second->tm_end),
+ (input_length-1 - second->input_end) );
+
+ // same number of words, but not sent. end -> cost is at least 1
+ if ( ( input_length-1 - second->input_end
+ == tm_length-1 - second->tm_end )
+ && input_length-1 != second->input_end )
+ {
+ min_cost++;
+ }
+
+ // cerr << "\tcost: " << min_cost << "-" << max_cost << endl;
+
+ // if worst than best cost, forget it
+ if (min_cost > best_cost)
+ {
+ continue;
+ }
+
+ // add match
+ Match new_match( first->input_start,
+ second->input_end,
+ first->tm_start,
+ second->tm_end,
+ min_cost,
+ max_cost,
+ internal_cost);
+ multi_match[ match_level ].push_back( new_match );
+ // cerr << "\tstored\n";
+
+ // possibly updating this_best_cost
+ if (max_cost < this_best_cost)
+ {
+ // cerr << "\tupdating this best cost to " << max_cost << "\n";
+ this_best_cost = max_cost;
+
+ // possibly updating best_cost
+ if (max_cost < best_cost)
+ {
+ // cerr << "\tupdating best cost to " << max_cost << "\n";
+ best_cost = max_cost;
+ }
+ }
+ }
+ }
+ }
+ match_level++;
+ }
+ return this_best_cost;
+}
+
+#endif
diff --git a/contrib/fuzzy-match/make-xml-from-match.perl b/contrib/fuzzy-match/make-xml-from-match.perl
new file mode 100644
index 000000000..b5c213a3d
--- /dev/null
+++ b/contrib/fuzzy-match/make-xml-from-match.perl
@@ -0,0 +1,214 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+my $DEBUG = 1;
+
+my $match_file = "tm/BEST.acquis-xml-escaped.4.uniq";
+my $source_file = "data/acquis.truecased.4.en.uniq";
+my $target_file = "data/acquis.truecased.4.fr.uniq.most-frequent";
+my $alignment_file = "data/acquis.truecased.4.align.uniq.most-frequent";
+my $out_file = "data/ac-test.input.xml.4.uniq";
+my $in_file = "evaluation/ac-test.input.tc.4";
+
+#my $match_file = "tm/BEST.acquis-xml-escaped.4";
+#my $source_file = "corpus/acquis.truecased.4.en";
+#my $target_file = "corpus/acquis.truecased.4.fr";
+#my $alignment_file = "model/aligned.4.grow-diag-final-and";
+#my $out_file = "data/ac-test.input.xml.4";
+#my $in_file = "evaluation/ac-test.input.tc.4";
+
+#my $match_file = "tm/BEST.acquis.with";
+#my $source_file = "../acquis-truecase/corpus/acquis.truecased.190.en";
+#my $target_file = "../acquis-truecase/corpus/acquis.truecased.190.fr";
+#my $alignment_file = "../acquis-truecase/model/aligned.190.grow-diag-final-and";
+#my $out_file = "data/ac-test.input.xml";
+#my $in_file = "evaluation/ac-test.input.tc.1";
+
+my @INPUT = `cat $in_file`; chop(@INPUT);
+my @SOURCE = `cat $source_file`; chop(@SOURCE);
+my @TARGET = `cat $target_file`; chop(@TARGET);
+my @ALIGNMENT = `cat $alignment_file`; chop(@ALIGNMENT);
+
+open(MATCH,$match_file);
+open(FRAME,">$out_file");
+for(my $i=0;$i<4107;$i++) {
+
+ # get match data
+ my $match = <MATCH>;
+ chop($match);
+ my ($score,$sentence,$path) = split(/ \|\|\| /,$match);
+
+ # construct frame
+ if ($sentence < 1e9 && $sentence >= 0) {
+ my $frame = &create_xml($SOURCE[$sentence],
+ $INPUT[$i],
+ $TARGET[$sentence],
+ $ALIGNMENT[$sentence],
+ $path);
+ print FRAME $frame."\n";
+ }
+
+ # no frame -> output source
+ else {
+ print FRAME $INPUT[$i]."\n";
+ }
+}
+close(FRAME);
+close(MATCH);
+
+sub create_xml {
+ my ($source,$input,$target,$alignment,$path) = @_;
+
+ my @INPUT = split(/ /,$input);
+ my @SOURCE = split(/ /,$source);
+ my @TARGET = split(/ /,$target);
+ my %ALIGN = &create_alignment($alignment);
+
+ my %FRAME_INPUT;
+ my @TARGET_BITMAP;
+ foreach (@TARGET) { push @TARGET_BITMAP,1 }
+
+ ### STEP 1: FIND MISMATCHES
+
+ my ($s,$i) = (0,0);
+ my $currently_matching = 0;
+ my ($start_s,$start_i) = (0,0);
+
+ $path .= "X"; # indicate end
+ print "$input\n$source\n$target\n$path\n";
+ for(my $p=0;$p<length($path);$p++) {
+ my $action = substr($path,$p,1);
+
+ # beginning of a mismatch
+ if ($currently_matching && $action ne "M" && $action ne "X") {
+ $start_i = $i;
+ $start_s = $s;
+ $currently_matching = 0;
+ }
+
+ # end of a mismatch
+ elsif (!$currently_matching &&
+ ($action eq "M" || $action eq "X")) {
+
+ # remove use of affected target words
+ for(my $ss = $start_s; $ss<$s; $ss++) {
+ foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
+ $TARGET_BITMAP[$tt] = 0;
+ }
+
+ # also remove enclosed unaligned words?
+ }
+
+ # are there input words that need to be inserted ?
+ print "($start_i<$i)?\n";
+ if ($start_i<$i) {
+
+ # take note of input words to be inserted
+ my $insertion = "";
+ for(my $ii = $start_i; $ii<$i; $ii++) {
+ $insertion .= $INPUT[$ii]." ";
+ }
+
+ # find position for inserted input words
+
+ # find first removed target word
+ my $start_t = 1000;
+ for(my $ss = $start_s; $ss<$s; $ss++) {
+ foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
+ $start_t = $tt if $tt < $start_t;
+ }
+ }
+
+ # end of sentence? add to end
+ if ($start_t == 1000 && $i > $#INPUT) {
+ $start_t = $#TARGET;
+ }
+
+ # backtrack to previous words if unaligned
+ if ($start_t == 1000) {
+ $start_t = -1;
+ for(my $ss = $s-1; $start_t==-1 && $ss>=0; $ss--) {
+ foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
+ $start_t = $tt if $tt > $start_t;
+ }
+ }
+ }
+ $FRAME_INPUT{$start_t} .= $insertion;
+ }
+
+ $currently_matching = 1;
+ }
+
+ print "$action $s $i ($start_s $start_i) $currently_matching";
+ if ($action ne "I") {
+ print " ->";
+ foreach my $tt (keys %{${$ALIGN{'s'}}[$s]}) {
+ print " ".$tt;
+ }
+ }
+ print "\n";
+ $s++ unless $action eq "I";
+ $i++ unless $action eq "D";
+ }
+
+
+ print $target."\n";
+ foreach (@TARGET_BITMAP) { print $_; } print "\n";
+ foreach (sort keys %FRAME_INPUT) {
+ print "$_: $FRAME_INPUT{$_}\n";
+ }
+
+ ### STEP 2: BUILD FRAME
+
+ # modify frame
+ my $frame = "";
+ $frame = $FRAME_INPUT{-1} if defined $FRAME_INPUT{-1};
+
+ my $currently_included = 0;
+ my $start_t = -1;
+ push @TARGET_BITMAP,0; # indicate end
+
+ for(my $t=0;$t<=scalar(@TARGET);$t++) {
+
+ # beginning of tm target inclusion
+ if (!$currently_included && $TARGET_BITMAP[$t]) {
+ $start_t = $t;
+ $currently_included = 1;
+ }
+
+ # end of tm target inclusion (not included word or inserted input)
+ elsif ($currently_included &&
+ (!$TARGET_BITMAP[$t] || defined($FRAME_INPUT{$t}))) {
+ # add xml (unless change is at the beginning of the sentence
+ if ($start_t >= 0) {
+ my $target = "";
+ print "for(tt=$start_t;tt<$t+$TARGET_BITMAP[$t]);\n";
+ for(my $tt=$start_t;$tt<$t+$TARGET_BITMAP[$t];$tt++) {
+ $target .= $TARGET[$tt] . " ";
+ }
+ chop($target);
+ $frame .= "<xml translation=\"$target\"> x </xml> ";
+ }
+ $currently_included = 0;
+ }
+
+ $frame .= $FRAME_INPUT{$t} if defined $FRAME_INPUT{$t};
+ print "$TARGET_BITMAP[$t] $t ($start_t) $currently_included\n";
+ }
+
+ print $frame."\n-------------------------------------\n";
+ return $frame;
+}
+
+sub create_alignment {
+ my ($line) = @_;
+ my (@ALIGNED_TO_S,@ALIGNED_TO_T);
+ foreach my $point (split(/ /,$line)) {
+ my ($s,$t) = split(/\-/,$point);
+ $ALIGNED_TO_S[$s]{$t}++;
+ $ALIGNED_TO_T[$t]{$s}++;
+ }
+ my %ALIGNMENT = ( 's' => \@ALIGNED_TO_S, 't' => \@ALIGNED_TO_T );
+ return %ALIGNMENT;
+}
diff --git a/contrib/fuzzy-match/old/fuzzy-match.cpp b/contrib/fuzzy-match/old/fuzzy-match.cpp
new file mode 100644
index 000000000..76c69e246
--- /dev/null
+++ b/contrib/fuzzy-match/old/fuzzy-match.cpp
@@ -0,0 +1,982 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <vector>
+#include <map>
+#include <string>
+#include <algorithm>
+#include <iostream>
+#include <fstream>
+#include <cstring>
+#include <time.h>
+
+#include "Vocabulary.h"
+#include "SuffixArray.h"
+
+/** This implementation is explained in
+ Koehn and Senellart: "Fast Approximate String Matching
+ with Suffix Arrays and A* Parsing" (AMTA 2010) ***/
+
+using namespace std;
+
+Vocabulary vocabulary;
+
+int basic_flag = false;
+int lsed_flag = true;
+int refined_flag = true;
+int length_filter_flag = true;
+int parse_flag = true;
+int min_match = 70;
+int multiple_flag = false;
+int multiple_slack = 0;
+int multiple_max = 100;
+
+void load_corpus( char* fileName, vector< vector< WORD_ID > > &corpus )
+{
+ ifstream fileStream;
+ fileStream.open(fileName);
+ if (!fileStream) {
+ cerr << "file not found: " << fileName << endl;
+ exit(1);
+ }
+ istream *fileStreamP = &fileStream;
+
+ char line[LINE_MAX_LENGTH];
+ while(true)
+ {
+ SAFE_GETLINE((*fileStreamP), line, LINE_MAX_LENGTH, '\n');
+ if (fileStreamP->eof()) break;
+ corpus.push_back( vocabulary.Tokenize( line ) );
+ }
+}
+
+
+/* Letter string edit distance, e.g. sub 'their' to 'there' costs 2 */
+
+// global cache for word pairs
+map< pair< WORD_ID, WORD_ID >, unsigned int > lsed;
+
+unsigned int letter_sed( WORD_ID aIdx, WORD_ID bIdx )
+{
+ // check if already computed -> lookup in cache
+ pair< WORD_ID, WORD_ID > pIdx = make_pair( aIdx, bIdx );
+ map< pair< WORD_ID, WORD_ID >, unsigned int >::const_iterator lookup = lsed.find( pIdx );
+ if (lookup != lsed.end())
+ {
+ return (lookup->second);
+ }
+
+ // get surface strings for word indices
+ const string &a = vocabulary.GetWord( aIdx );
+ const string &b = vocabulary.GetWord( bIdx );
+
+ // initialize cost matrix
+ unsigned int **cost = (unsigned int**) calloc( sizeof( unsigned int* ), a.size()+1 );
+ for( unsigned int i=0; i<=a.size(); i++ ) {
+ cost[i] = (unsigned int*) calloc( sizeof(unsigned int), b.size()+1 );
+ cost[i][0] = i;
+ }
+ for( unsigned int j=0; j<=b.size(); j++ ) {
+ cost[0][j] = j;
+ }
+
+ // core string edit distance loop
+ for( unsigned int i=1; i<=a.size(); i++ ) {
+ for( unsigned int j=1; j<=b.size(); j++ ) {
+
+ unsigned int ins = cost[i-1][j] + 1;
+ unsigned int del = cost[i][j-1] + 1;
+ bool match = (a.substr(i-1,1).compare( b.substr(j-1,1) ) == 0);
+ unsigned int diag = cost[i-1][j-1] + (match ? 0 : 1);
+
+ unsigned int min = (ins < del) ? ins : del;
+ min = (diag < min) ? diag : min;
+
+ cost[i][j] = min;
+ }
+ }
+
+ // clear out memory
+ unsigned int final = cost[a.size()][b.size()];
+ for( unsigned int i=0; i<=a.size(); i++ ) {
+ free( cost[i] );
+ }
+ free( cost );
+
+ // cache and return result
+ lsed[ pIdx ] = final;
+ return final;
+}
+
+/* string edit distance implementation */
+
+unsigned int sed( const vector< WORD_ID > &a, const vector< WORD_ID > &b, string &best_path, bool use_letter_sed ) {
+
+ // initialize cost and path matrices
+ unsigned int **cost = (unsigned int**) calloc( sizeof( unsigned int* ), a.size()+1 );
+ char **path = (char**) calloc( sizeof( char* ), a.size()+1 );
+
+ for( unsigned int i=0; i<=a.size(); i++ ) {
+ cost[i] = (unsigned int*) calloc( sizeof(unsigned int), b.size()+1 );
+ path[i] = (char*) calloc( sizeof(char), b.size()+1 );
+ if (i>0)
+ {
+ cost[i][0] = cost[i-1][0];
+ if (use_letter_sed)
+ {
+ cost[i][0] += vocabulary.GetWord( a[i-1] ).size();
+ }
+ else
+ {
+ cost[i][0]++;
+ }
+ }
+ else
+ {
+ cost[i][0] = 0;
+ }
+ path[i][0] = 'I';
+ }
+
+ for( unsigned int j=0; j<=b.size(); j++ ) {
+ if (j>0)
+ {
+ cost[0][j] = cost[0][j-1];
+ if (use_letter_sed)
+ {
+ cost[0][j] += vocabulary.GetWord( b[j-1] ).size();
+ }
+ else
+ {
+ cost[0][j]++;
+ }
+ }
+ else
+ {
+ cost[0][j] = 0;
+ }
+ path[0][j] = 'D';
+ }
+
+ // core string edit distance algorithm
+ for( unsigned int i=1; i<=a.size(); i++ ) {
+ for( unsigned int j=1; j<=b.size(); j++ ) {
+ unsigned int ins = cost[i-1][j];
+ unsigned int del = cost[i][j-1];
+ unsigned int match;
+ if (use_letter_sed)
+ {
+ ins += vocabulary.GetWord( a[i-1] ).size();
+ del += vocabulary.GetWord( b[j-1] ).size();
+ match = letter_sed( a[i-1], b[j-1] );
+ }
+ else
+ {
+ ins++;
+ del++;
+ match = ( a[i-1] == b[j-1] ) ? 0 : 1;
+ }
+ unsigned int diag = cost[i-1][j-1] + match;
+
+ char action = (ins < del) ? 'I' : 'D';
+ unsigned int min = (ins < del) ? ins : del;
+ if (diag < min)
+ {
+ action = (match>0) ? 'S' : 'M';
+ min = diag;
+ }
+
+ cost[i][j] = min;
+ path[i][j] = action;
+ }
+ }
+
+ // construct string for best path
+ unsigned int i = a.size();
+ unsigned int j = b.size();
+ best_path = "";
+ while( i>0 || j>0 )
+ {
+ best_path = path[i][j] + best_path;
+ if (path[i][j] == 'I')
+ {
+ i--;
+ }
+ else if (path[i][j] == 'D')
+ {
+ j--;
+ }
+ else
+ {
+ i--;
+ j--;
+ }
+ }
+
+
+ // clear out memory
+ unsigned int final = cost[a.size()][b.size()];
+
+ for( unsigned int i=0; i<=a.size(); i++ ) {
+ free( cost[i] );
+ free( path[i] );
+ }
+ free( cost );
+ free( path );
+
+ // return result
+ return final;
+}
+
+/* utlility function: compute length of sentence in characters
+ (spaces do not count) */
+
+unsigned int compute_length( const vector< WORD_ID > &sentence )
+{
+ unsigned int length = 0; for( unsigned int i=0; i<sentence.size(); i++ )
+ {
+ length += vocabulary.GetWord( sentence[i] ).size();
+ }
+ return length;
+}
+
+/* brute force method: compare input to all corpus sentences */
+
+int basic_fuzzy_match( vector< vector< WORD_ID > > source,
+ vector< vector< WORD_ID > > input )
+{
+ // go through input set...
+ for(unsigned int i=0;i<input.size();i++)
+ {
+ bool use_letter_sed = false;
+
+ // compute sentence length and worst allowed cost
+ unsigned int input_length;
+ if (use_letter_sed)
+ {
+ input_length = compute_length( input[i] );
+ }
+ else
+ {
+ input_length = input[i].size();
+ }
+ unsigned int best_cost = input_length * (100-min_match) / 100 + 2;
+ string best_path = "";
+ int best_match = -1;
+
+ // go through all corpus sentences
+ for(unsigned int s=0;s<source.size();s++)
+ {
+ int source_length;
+ if (use_letter_sed)
+ {
+ source_length = compute_length( source[s] );
+ }
+ else
+ {
+ source_length = source[s].size();
+ }
+ int diff = abs((int)source_length - (int)input_length);
+ if (length_filter_flag && (diff >= best_cost))
+ {
+ continue;
+ }
+
+ // compute string edit distance
+ string path;
+ unsigned int cost = sed( input[i], source[s], path, use_letter_sed );
+
+ // update if new best
+ if (cost < best_cost)
+ {
+ best_cost = cost;
+ best_path = path;
+ best_match = s;
+ }
+ }
+ cout << best_cost << " ||| " << best_match << " ||| " << best_path << endl;
+ }
+}
+
+#define MAX_MATCH_COUNT 10000000
+
+/* data structure for n-gram match between input and corpus */
+
+class Match {
+public:
+ int input_start;
+ int input_end;
+ int tm_start;
+ int tm_end;
+ int min_cost;
+ int max_cost;
+ int internal_cost;
+ Match( int is, int ie, int ts, int te, int min, int max, int i )
+ :input_start(is), input_end(ie), tm_start(ts), tm_end(te), min_cost(min), max_cost(max), internal_cost(i)
+ {}
+};
+
+map< WORD_ID,vector< int > > single_word_index;
+
+/* definition of short matches
+ very short n-gram matches (1-grams) will not be looked up in
+ the suffix array, since there are too many matches
+ and for longer sentences, at least one 2-gram match must occur */
+
+inline int short_match_max_length( int input_length )
+{
+ if ( ! refined_flag )
+ return 0;
+ if ( input_length >= 5 )
+ return 1;
+ return 0;
+}
+
+/* if we have non-short matches in a sentence, we need to
+ take a closer look at it.
+ this function creates a hash map for all input words and their positions
+ (to be used by the next function)
+ (done here, because this has be done only once for an input sentence) */
+
+void init_short_matches( const vector< WORD_ID > &input )
+{
+ int max_length = short_match_max_length( input.size() );
+ if (max_length == 0)
+ return;
+
+ single_word_index.clear();
+
+ // store input words and their positions in hash map
+ for(int i=0; i<input.size(); i++)
+ {
+ if (single_word_index.find( input[i] ) == single_word_index.end())
+ {
+ vector< int > position_vector;
+ single_word_index[ input[i] ] = position_vector;
+ }
+ single_word_index[ input[i] ].push_back( i );
+ }
+}
+
+/* add all short matches to list of matches for a sentence */
+
+void add_short_matches( vector< Match > &match, const vector< WORD_ID > &tm, int input_length, int best_cost )
+{
+ int max_length = short_match_max_length( input_length );
+ if (max_length == 0)
+ return;
+
+ int tm_length = tm.size();
+ map< WORD_ID,vector< int > >::iterator input_word_hit;
+ for(int t_pos=0; t_pos<tm.size(); t_pos++)
+ {
+ input_word_hit = single_word_index.find( tm[t_pos] );
+ if (input_word_hit != single_word_index.end())
+ {
+ vector< int > &position_vector = input_word_hit->second;
+ for(int j=0; j<position_vector.size(); j++)
+ {
+ int &i_pos = position_vector[j];
+
+ // before match
+ int max_cost = max( i_pos , t_pos );
+ int min_cost = abs( i_pos - t_pos );
+ if ( i_pos>0 && i_pos == t_pos )
+ min_cost++;
+
+ // after match
+ max_cost += max( (input_length-i_pos) , (tm_length-t_pos));
+ min_cost += abs( (input_length-i_pos) - (tm_length-t_pos));
+ if ( i_pos != input_length-1 && (input_length-i_pos) == (tm_length-t_pos))
+ min_cost++;
+
+ if (min_cost <= best_cost)
+ {
+ Match new_match( i_pos,i_pos, t_pos,t_pos, min_cost,max_cost,0 );
+ match.push_back( new_match );
+ }
+ }
+ }
+ }
+}
+
+/* remove matches that are subsumed by a larger match */
+
+vector< Match > prune_matches( const vector< Match > &match, int best_cost )
+{
+ //cerr << "\tpruning";
+ vector< Match > pruned;
+ for(int i=match.size()-1; i>=0; i--)
+ {
+ //cerr << " (" << match[i].input_start << "," << match[i].input_end
+ // << " ; " << match[i].tm_start << "," << match[i].tm_end
+ // << " * " << match[i].min_cost << ")";
+
+ //if (match[i].min_cost > best_cost)
+ // continue;
+
+ bool subsumed = false;
+ for(int j=match.size()-1; j>=0; j--)
+ {
+ if (i!=j // do not compare match with itself
+ && ( match[i].input_end - match[i].input_start <=
+ match[j].input_end - match[j].input_start ) // i shorter than j
+ && ((match[i].input_start == match[j].input_start &&
+ match[i].tm_start == match[j].tm_start ) ||
+ (match[i].input_end == match[j].input_end &&
+ match[i].tm_end == match[j].tm_end) ) )
+ {
+ subsumed = true;
+ }
+ }
+ if (! subsumed && match[i].min_cost <= best_cost)
+ {
+ //cerr << "*";
+ pruned.push_back( match[i] );
+ }
+ }
+ //cerr << endl;
+ return pruned;
+}
+
+/* A* parsing method to compute string edit distance */
+
+int parse_matches( vector< Match > &match, int input_length, int tm_length, int &best_cost )
+{
+ // cerr << "sentence has " << match.size() << " matches, best cost: " << best_cost << ", lengths input: " << input_length << " tm: " << tm_length << endl;
+
+ if (match.size() == 1)
+ return match[0].max_cost;
+ if (match.size() == 0)
+ return input_length+tm_length;
+
+ int this_best_cost = input_length + tm_length;
+ for(int i=0;i<match.size();i++)
+ {
+ this_best_cost = min( this_best_cost, match[i].max_cost );
+ }
+ // cerr << "\tthis best cost: " << this_best_cost << endl;
+
+ // bottom up combination of spans
+ vector< vector< Match > > multi_match;
+ multi_match.push_back( match );
+
+ int match_level = 1;
+ while(multi_match[ match_level-1 ].size()>0)
+ {
+ // init vector
+ vector< Match > empty;
+ multi_match.push_back( empty );
+
+ for(int first_level = 0; first_level <= (match_level-1)/2; first_level++)
+ {
+ int second_level = match_level - first_level -1;
+ //cerr << "\tcombining level " << first_level << " and " << second_level << endl;
+
+ vector< Match > &first_match = multi_match[ first_level ];
+ vector< Match > &second_match = multi_match[ second_level ];
+
+ for(int i1 = 0; i1 < first_match.size(); i1++) {
+ for(int i2 = 0; i2 < second_match.size(); i2++) {
+
+ // do not combine the same pair twice
+ if (first_level == second_level && i2 <= i1)
+ {
+ continue;
+ }
+
+ // get sorted matches (first is before second)
+ Match *first, *second;
+ if (first_match[i1].input_start < second_match[i2].input_start )
+ {
+ first = &first_match[i1];
+ second = &second_match[i2];
+ }
+ else
+ {
+ second = &first_match[i1];
+ first = &second_match[i2];
+ }
+
+ //cerr << "\tcombining "
+ // << "(" << first->input_start << "," << first->input_end << "), "
+ // << first->tm_start << " [" << first->internal_cost << "]"
+ // << " with "
+ // << "(" << second->input_start << "," << second->input_end << "), "
+ // << second->tm_start<< " [" << second->internal_cost << "]"
+ // << endl;
+
+ // do not process overlapping matches
+ if (first->input_end >= second->input_start)
+ {
+ continue;
+ }
+
+ // no overlap / mismatch in tm
+ if (first->tm_end >= second->tm_start)
+ {
+ continue;
+ }
+
+ // compute cost
+ int min_cost = 0;
+ int max_cost = 0;
+
+ // initial
+ min_cost += abs( first->input_start - first->tm_start );
+ max_cost += max( first->input_start, first->tm_start );
+
+ // same number of words, but not sent. start -> cost is at least 1
+ if (first->input_start == first->tm_start && first->input_start > 0)
+ {
+ min_cost++;
+ }
+
+ // in-between
+ int skipped_words = second->input_start - first->input_end -1;
+ int skipped_words_tm = second->tm_start - first->tm_end -1;
+ int internal_cost = max( skipped_words, skipped_words_tm );
+ internal_cost += first->internal_cost + second->internal_cost;
+ min_cost += internal_cost;
+ max_cost += internal_cost;
+
+ // final
+ min_cost += abs( (tm_length-1 - second->tm_end) -
+ (input_length-1 - second->input_end) );
+ max_cost += max( (tm_length-1 - second->tm_end),
+ (input_length-1 - second->input_end) );
+
+ // same number of words, but not sent. end -> cost is at least 1
+ if ( ( input_length-1 - second->input_end
+ == tm_length-1 - second->tm_end )
+ && input_length-1 != second->input_end )
+ {
+ min_cost++;
+ }
+
+ // cerr << "\tcost: " << min_cost << "-" << max_cost << endl;
+
+ // if worst than best cost, forget it
+ if (min_cost > best_cost)
+ {
+ continue;
+ }
+
+ // add match
+ Match new_match( first->input_start,
+ second->input_end,
+ first->tm_start,
+ second->tm_end,
+ min_cost,
+ max_cost,
+ internal_cost);
+ multi_match[ match_level ].push_back( new_match );
+ // cerr << "\tstored\n";
+
+ // possibly updating this_best_cost
+ if (max_cost < this_best_cost)
+ {
+ // cerr << "\tupdating this best cost to " << max_cost << "\n";
+ this_best_cost = max_cost;
+
+ // possibly updating best_cost
+ if (max_cost < best_cost)
+ {
+ // cerr << "\tupdating best cost to " << max_cost << "\n";
+ best_cost = max_cost;
+ }
+ }
+ }
+ }
+ }
+ match_level++;
+ }
+ return this_best_cost;
+}
+
+int main(int argc, char* argv[])
+{
+ vector< vector< WORD_ID > > source, input;
+
+ while(1) {
+ static struct option long_options[] = {
+ {"basic", no_argument, &basic_flag, 1},
+ {"word", no_argument, &lsed_flag, 0},
+ {"unrefined", no_argument, &refined_flag, 0},
+ {"nolengthfilter", no_argument, &length_filter_flag, 0},
+ {"noparse", no_argument, &parse_flag, 0},
+ {"multiple", no_argument, &multiple_flag, 1},
+ {"minmatch", required_argument, 0, 'm'},
+ {0, 0, 0, 0}
+ };
+ int option_index = 0;
+ int c = getopt_long (argc, argv, "m:", long_options, &option_index);
+ if (c == -1) break;
+ switch (c) {
+ case 0:
+// if (long_options[option_index].flag != 0)
+// break;
+// printf ("option %s", long_options[option_index].name);
+// if (optarg)
+// printf (" with arg %s", optarg);
+// printf ("\n");
+ break;
+ case 'm':
+ min_match = atoi(optarg);
+ if (min_match < 1 || min_match > 100) {
+ cerr << "error: --minmatch must have value in range 1..100\n";
+ exit(1);
+ }
+ cerr << "setting min match to " << min_match << endl;
+ break;
+ default:
+ cerr << "usage: syntax: ./fuzzy-match input corpus [--basic] [--word] [--minmatch 1..100]\n";
+ exit(1);
+ }
+ }
+ if (lsed_flag) { cerr << "lsed\n"; }
+ if (basic_flag) { cerr << "basic\n"; }
+ if (refined_flag) { cerr << "refined\n"; }
+ if (length_filter_flag) { cerr << "length filter\n"; }
+ if (parse_flag) { cerr << "parse\n"; }
+// exit(1);
+
+
+ if (optind+2 != argc) {
+ cerr << "syntax: ./fuzzy-match input corpus [--basic] [--word] [--minmatch 1..100]\n";
+ exit(1);
+ }
+
+ cerr << "loading corpus...\n";
+
+ load_corpus(argv[optind], input);
+ load_corpus(argv[optind+1], source);
+
+ // ./fuzzy-match input corpus [-basic]
+
+// load_corpus("../corpus/tm.truecased.4.en", source);
+// load_corpus("../corpus/tm.truecased.4.it", target);
+// load_corpus("../evaluation/test.input.tc.4", input);
+
+// load_corpus("../../acquis-truecase/corpus/acquis.truecased.190.en", source);
+// load_corpus("../../acquis-truecase/evaluation/ac-test.input.tc.190", input);
+
+// load_corpus("../corpus/tm.truecased.16.en", source);
+// load_corpus("../evaluation/test.input.tc.16", input);
+
+ if (basic_flag) {
+ cerr << "using basic method\n";
+ clock_t start_main_clock2 = clock();
+ basic_fuzzy_match( source, input );
+ cerr << "total: " << (1000 * (clock()-start_main_clock2) / CLOCKS_PER_SEC) << endl;
+ exit(1);
+ }
+
+ cerr << "number of input sentences " << input.size() << endl;
+
+ cerr << "creating suffix array...\n";
+// SuffixArray suffixArray( "../corpus/tm.truecased.4.en" );
+// SuffixArray suffixArray( "../../acquis-truecase/corpus/acquis.truecased.190.en" );
+ SuffixArray suffixArray( argv[optind+1] );
+
+ clock_t start_main_clock = clock();
+
+ // looping through all input sentences...
+ cerr << "looping...\n";
+ for(unsigned int i=0;i<input.size();i++)
+ {
+ clock_t start_clock = clock();
+ // if (i % 10 == 0) cerr << ".";
+ int input_id = i; // clean up this mess!
+
+ // establish some basic statistics
+
+ // int input_length = compute_length( input[i] );
+ int input_length = input[i].size();
+ int best_cost = input_length * (100-min_match) / 100 + 1;
+
+ int match_count = 0; // how many substring matches to be considered
+ //cerr << endl << "sentence " << i << ", length " << input_length << ", best_cost " << best_cost << endl;
+
+ // find match ranges in suffix array
+ vector< vector< pair< SuffixArray::INDEX, SuffixArray::INDEX > > > match_range;
+ for(size_t start=0;start<input[i].size();start++)
+ {
+ SuffixArray::INDEX prior_first_match = 0;
+ SuffixArray::INDEX prior_last_match = suffixArray.GetSize()-1;
+ vector< string > substring;
+ bool stillMatched = true;
+ vector< pair< SuffixArray::INDEX, SuffixArray::INDEX > > matchedAtThisStart;
+ //cerr << "start: " << start;
+ for(int word=start; stillMatched && word<input[i].size(); word++)
+ {
+ substring.push_back( vocabulary.GetWord( input[i][word] ) );
+
+ // only look up, if needed (i.e. no unnecessary short gram lookups)
+// if (! word-start+1 <= short_match_max_length( input_length ) )
+ // {
+ SuffixArray::INDEX first_match, last_match;
+ stillMatched = false;
+ if (suffixArray.FindMatches( substring, first_match, last_match, prior_first_match, prior_last_match ) )
+ {
+ stillMatched = true;
+ matchedAtThisStart.push_back( make_pair( first_match, last_match ) );
+ //cerr << " (" << first_match << "," << last_match << ")";
+ //cerr << " " << ( last_match - first_match + 1 );
+ prior_first_match = first_match;
+ prior_last_match = last_match;
+ }
+ //}
+ }
+ //cerr << endl;
+ match_range.push_back( matchedAtThisStart );
+ }
+
+ clock_t clock_range = clock();
+
+ map< int, vector< Match > > sentence_match;
+ map< int, int > sentence_match_word_count;
+
+ // go through all matches, longest first
+ for(int length = input[i].size(); length >= 1; length--)
+ {
+ // do not create matches, if these are handled by the short match function
+ if (length <= short_match_max_length( input_length ) )
+ {
+ continue;
+ }
+
+ unsigned int count = 0;
+ for(int start = 0; start <= input[i].size() - length; start++)
+ {
+ if (match_range[start].size() >= length)
+ {
+ pair< SuffixArray::INDEX, SuffixArray::INDEX > &range = match_range[start][length-1];
+ // cerr << " (" << range.first << "," << range.second << ")";
+ count += range.second - range.first + 1;
+
+ for(SuffixArray::INDEX i=range.first; i<=range.second; i++)
+ {
+ int position = suffixArray.GetPosition( i );
+
+ // sentence length mismatch
+ size_t sentence_id = suffixArray.GetSentence( position );
+ int sentence_length = suffixArray.GetSentenceLength( sentence_id );
+ int diff = abs( (int)sentence_length - (int)input_length );
+ // cerr << endl << i << "\tsentence " << sentence_id << ", length " << sentence_length;
+ //if (length <= 2 && input_length>=5 &&
+ // sentence_match.find( sentence_id ) == sentence_match.end())
+ // continue;
+
+ if (diff > best_cost)
+ continue;
+
+ // compute minimal cost
+ int start_pos = suffixArray.GetWordInSentence( position );
+ int end_pos = start_pos + length-1;
+ // cerr << endl << "\t" << start_pos << "-" << end_pos << " (" << sentence_length << ") vs. "
+ // << start << "-" << (start+length-1) << " (" << input_length << ")";
+ // different number of prior words -> cost is at least diff
+ int min_cost = abs( start - start_pos );
+
+ // same number of words, but not sent. start -> cost is at least 1
+ if (start == start_pos && start>0)
+ min_cost++;
+
+ // different number of remaining words -> cost is at least diff
+ min_cost += abs( ( sentence_length-1 - end_pos ) -
+ ( input_length-1 - (start+length-1) ) );
+
+ // same number of words, but not sent. end -> cost is at least 1
+ if ( sentence_length-1 - end_pos ==
+ input_length-1 - (start+length-1)
+ && end_pos != sentence_length-1 )
+ min_cost++;
+
+ // cerr << " -> min_cost " << min_cost;
+ if (min_cost > best_cost)
+ continue;
+
+ // valid match
+ match_count++;
+
+ // compute maximal cost
+ int max_cost = max( start, start_pos )
+ + max( sentence_length-1 - end_pos,
+ input_length-1 - (start+length-1) );
+ // cerr << ", max_cost " << max_cost;
+
+ Match m = Match( start, start+length-1,
+ start_pos, start_pos+length-1,
+ min_cost, max_cost, 0);
+ sentence_match[ sentence_id ].push_back( m );
+ sentence_match_word_count[ sentence_id ] += length;
+
+ if (max_cost < best_cost)
+ {
+ best_cost = max_cost;
+ if (best_cost == 0) break;
+ }
+ //if (match_count >= MAX_MATCH_COUNT) break;
+ }
+ }
+ // cerr << endl;
+ if (best_cost == 0) break;
+ //if (match_count >= MAX_MATCH_COUNT) break;
+ }
+ // cerr << count << " matches at length " << length << " in " << sentence_match.size() << " tm." << endl;
+
+ if (best_cost == 0) break;
+ //if (match_count >= MAX_MATCH_COUNT) break;
+ }
+ cerr << match_count << " matches in " << sentence_match.size() << " sentences." << endl;
+
+ clock_t clock_matches = clock();
+
+ // consider each sentence for which we have matches
+ int old_best_cost = best_cost;
+ int tm_count_word_match = 0;
+ int tm_count_word_match2 = 0;
+ int pruned_match_count = 0;
+ if (short_match_max_length( input_length ))
+ {
+ init_short_matches( input[i] );
+ }
+ vector< int > best_tm;
+ typedef map< int, vector< Match > >::iterator I;
+
+ clock_t clock_validation_sum = 0;
+
+ for(I tm=sentence_match.begin(); tm!=sentence_match.end(); tm++)
+ {
+ int tmID = tm->first;
+ int tm_length = suffixArray.GetSentenceLength(tmID);
+ vector< Match > &match = tm->second;
+ add_short_matches( match, source[tmID], input_length, best_cost );
+
+ //cerr << "match in sentence " << tmID << ": " << match.size() << " [" << tm_length << "]" << endl;
+
+ // quick look: how many words are matched
+ int words_matched = 0;
+ for(int m=0;m<match.size();m++) {
+
+ if (match[m].min_cost <= best_cost) // makes no difference
+ words_matched += match[m].input_end - match[m].input_start + 1;
+ }
+ if (max(input_length,tm_length) - words_matched > best_cost)
+ {
+ if (length_filter_flag) continue;
+ }
+ tm_count_word_match++;
+
+ // prune, check again how many words are matched
+ vector< Match > pruned = prune_matches( match, best_cost );
+ words_matched = 0;
+ for(int p=0;p<pruned.size();p++) {
+ words_matched += pruned[p].input_end - pruned[p].input_start + 1;
+ }
+ if (max(input_length,tm_length) - words_matched > best_cost)
+ {
+ if (length_filter_flag) continue;
+ }
+ tm_count_word_match2++;
+
+ pruned_match_count += pruned.size();
+ int prior_best_cost = best_cost;
+ int cost;
+
+ clock_t clock_validation_start = clock();
+ if (! parse_flag ||
+ pruned.size()>=10) // to prevent worst cases
+ {
+ string path;
+ cost = sed( input[input_id], source[tmID], path, false );
+ if (cost < best_cost)
+ {
+ best_cost = cost;
+ }
+ }
+
+ else
+ {
+ cost = parse_matches( pruned, input_length, tm_length, best_cost );
+ if (prior_best_cost != best_cost)
+ {
+ best_tm.clear();
+ }
+ }
+ clock_validation_sum += clock() - clock_validation_start;
+ if (cost == best_cost)
+ {
+ best_tm.push_back( tmID );
+ }
+ }
+ cerr << "reduced best cost from " << old_best_cost << " to " << best_cost << endl;
+ cerr << "tm considered: " << sentence_match.size()
+ << " word-matched: " << tm_count_word_match
+ << " word-matched2: " << tm_count_word_match2
+ << " best: " << best_tm.size() << endl;
+
+ cerr << "pruned matches: " << ((float)pruned_match_count/(float)tm_count_word_match2) << endl;
+
+ // do not try to find the best ... report multiple matches
+ if (multiple_flag) {
+ int input_letter_length = compute_length( input[input_id] );
+ for(int si=0; si<best_tm.size(); si++) {
+ int s = best_tm[si];
+ string path;
+ unsigned int letter_cost = sed( input[input_id], source[s], path, true );
+ // do not report multiple identical sentences, but just their count
+ cout << i << " "; // sentence number
+ cout << letter_cost << "/" << input_letter_length << " ";
+ cout << "(" << best_cost <<"/" << input_length <<") ";
+ cout << "||| " << s << " ||| " << path << endl;
+ }
+ continue;
+ }
+
+ // find the best matches according to letter sed
+ string best_path = "";
+ int best_match = -1;
+ int best_letter_cost;
+ if (lsed_flag) {
+ best_letter_cost = compute_length( input[input_id] ) * min_match / 100 + 1;
+ for(int si=0; si<best_tm.size(); si++)
+ {
+ int s = best_tm[si];
+ string path;
+ unsigned int letter_cost = sed( input[input_id], source[s], path, true );
+ if (letter_cost < best_letter_cost)
+ {
+ best_letter_cost = letter_cost;
+ best_path = path;
+ best_match = s;
+ }
+ }
+ }
+ // if letter sed turned off, just compute path for first match
+ else {
+ if (best_tm.size() > 0) {
+ string path;
+ sed( input[input_id], source[best_tm[0]], path, false );
+ best_path = path;
+ best_match = best_tm[0];
+ }
+ }
+ cerr << "elapsed: " << (1000 * (clock()-start_clock) / CLOCKS_PER_SEC)
+ << " ( range: " << (1000 * (clock_range-start_clock) / CLOCKS_PER_SEC)
+ << " match: " << (1000 * (clock_matches-clock_range) / CLOCKS_PER_SEC)
+ << " tm: " << (1000 * (clock()-clock_matches) / CLOCKS_PER_SEC)
+ << " (validation: " << (1000 * (clock_validation_sum) / CLOCKS_PER_SEC) << ")"
+ << " )" << endl;
+ if (lsed_flag) {
+ cout << best_letter_cost << "/" << compute_length( input[input_id] ) << " (";
+ }
+ cout << best_cost <<"/" << input_length;
+ if (lsed_flag) cout << ")";
+ cout << " ||| " << best_match << " ||| " << best_path << endl;
+ }
+ cerr << "total: " << (1000 * (clock()-start_main_clock) / CLOCKS_PER_SEC) << endl;
+
+
+}
diff --git a/contrib/fuzzy-match/old/get-multiple-translations-for-uniq-sources.perl b/contrib/fuzzy-match/old/get-multiple-translations-for-uniq-sources.perl
new file mode 100644
index 000000000..49e9ce1ec
--- /dev/null
+++ b/contrib/fuzzy-match/old/get-multiple-translations-for-uniq-sources.perl
@@ -0,0 +1,58 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+my $src_in = "corpus/acquis.truecased.4.en";
+my $tgt_in = "corpus/acquis.truecased.4.fr";
+my $align_in = "model/aligned.4.grow-diag-final-and";
+
+my $src_out = "data/acquis.truecased.4.en.uniq";
+my $tgt_out = "data/acquis.truecased.4.fr.uniq";
+my $tgt_mf = "data/acquis.truecased.4.fr.uniq.most-frequent";
+my $align_out = "data/acquis.truecased.4.align.uniq";
+my $align_mf = "data/acquis.truecased.4.align.uniq.most-frequent";
+
+my (%TRANS,%ALIGN);
+
+open(SRC,$src_in);
+open(TGT,$tgt_in);
+open(ALIGN,$align_in);
+while(my $src = <SRC>) {
+ my $tgt = <TGT>;
+ my $align = <ALIGN>;
+ chop($tgt);
+ chop($align);
+ $TRANS{$src}{$tgt}++;
+ $ALIGN{$src}{$tgt} = $align;
+}
+close(SRC);
+close(TGT);
+
+open(SRC_OUT,">$src_out");
+open(TGT_OUT,">$tgt_out");
+open(TGT_MF, ">$tgt_mf");
+open(ALIGN_OUT,">$align_out");
+open(ALIGN_MF, ">$align_mf");
+foreach my $src (keys %TRANS) {
+ print SRC_OUT $src;
+ my $first = 1;
+ my ($max,$best) = (0);
+ foreach my $tgt (keys %{$TRANS{$src}}) {
+ print TGT_OUT " ||| " unless $first;
+ print TGT_OUT $TRANS{$src}{$tgt}." ".$tgt;
+ print ALIGN_OUT " ||| " unless $first;
+ print ALIGN_OUT $ALIGN{$src}{$tgt};
+ if ($TRANS{$src}{$tgt} > $max) {
+ $max = $TRANS{$src}{$tgt};
+ $best = $tgt;
+ }
+ $first = 0;
+ }
+ print TGT_OUT "\n";
+ print ALIGN_OUT "\n";
+ print TGT_MF $best."\n";
+ print ALIGN_MF $ALIGN{$src}{$best}."\n";
+}
+close(SRC_OUT);
+close(TGT_OUT);
+
diff --git a/contrib/fuzzy-match/old/make-pt-from-tm.perl b/contrib/fuzzy-match/old/make-pt-from-tm.perl
new file mode 100755
index 000000000..6bdb2fa93
--- /dev/null
+++ b/contrib/fuzzy-match/old/make-pt-from-tm.perl
@@ -0,0 +1,308 @@
+#!/usr/bin/perl -w
+
+use strict;
+use FindBin qw($RealBin);
+use File::Basename;
+
+my $DEBUG = 1;
+my $OUTPUT_RULES = 1;
+
+#my $data_root = "/Users/hieuhoang/workspace/experiment/data/tm-mt-integration/";
+my $in_file = $ARGV[0]; #"$data_root/in/ac-test.input.tc.4";
+my $source_file = $ARGV[1]; #"$data_root/in/acquis.truecased.4.en.uniq";
+my $target_file = $ARGV[2]; #"$data_root/in/acquis.truecased.4.fr.uniq";
+my $alignment_file = $ARGV[3]; #"$data_root/in/acquis.truecased.4.align.uniq";
+my $lex_file = $ARGV[4]; #$data_root/in/lex.4;
+my $pt_file = $ARGV[5]; #"$data_root/out/pt";
+
+my $cmd;
+
+my $TMPDIR=dirname($pt_file) ."/tmp.$$";
+$cmd = "mkdir -p $TMPDIR";
+`$cmd`;
+
+my $match_file = "$TMPDIR/match";
+
+# suffix array creation and extraction
+$cmd = "$RealBin/fuzzy-match --multiple $in_file $source_file > $match_file";
+print STDERR "$cmd \n";
+`$cmd`;
+
+# make into xml and pt
+my $out_file = "$TMPDIR/ac-test.input.xml.4.uniq.multi.tuning";
+
+my @INPUT = `cat $in_file`; chop(@INPUT);
+my @ALL_SOURCE = `cat $source_file`; chop(@ALL_SOURCE);
+my @ALL_TARGET = `cat $target_file`; chop(@ALL_TARGET);
+my @ALL_ALIGNMENT = `cat $alignment_file`; chop(@ALL_ALIGNMENT);
+
+open(MATCH,$match_file);
+open(FRAME,">$out_file");
+open(RULE,">$out_file.extract") if $OUTPUT_RULES;
+open(RULE_INV,">$out_file.extract.inv") if $OUTPUT_RULES;
+open(INFO,">$out_file.info");
+while( my $match = <MATCH> ) {
+ chop($match);
+ my ($score,$sentence,$path) = split(/ \|\|\| /,$match);
+
+ $score =~ /^(\d+) (.+)/ || die;
+ my ($i,$match_score) = ($1,$2);
+ print STDERR "i=$i match_score=$match_score\n";
+
+ # construct frame
+ if ($sentence < 1e9 && $sentence >= 0) {
+ my $SOURCE = $ALL_SOURCE[$sentence];
+ my @ALIGNMENT = split(/ \|\|\| /,$ALL_ALIGNMENT[$sentence]);
+ my @TARGET = split(/ \|\|\| /,$ALL_TARGET[$sentence]);
+
+ for(my $j=0;$j<scalar(@TARGET);$j++) {
+ $TARGET[$j] =~ /^(\d+) (.+)$/ || die;
+ my ($target_count,$target) = ($1,$2);
+ my ($frame,$rule_s,$rule_t,$rule_alignment,$rule_alignment_inv) =
+ &create_xml($SOURCE,
+ $INPUT[$i],
+ $target,
+ $ALIGNMENT[$j],
+ $path);
+ print FRAME $frame."\n";
+ print RULE "$rule_s [X] ||| $rule_t [X] ||| $rule_alignment ||| $target_count\n" if $OUTPUT_RULES;
+ print RULE_INV "$rule_t [X] ||| $rule_s [X] ||| $rule_alignment_inv ||| $target_count\n" if $OUTPUT_RULES;
+ print INFO "$i ||| $match_score ||| $target_count\n";
+ }
+ }
+}
+close(FRAME);
+close(MATCH);
+close(RULE) if $OUTPUT_RULES;
+close(RULE_INV) if $OUTPUT_RULES;
+
+`LC_ALL=C sort $out_file.extract | gzip -c > $out_file.extract.sorted.gz`;
+`LC_ALL=C sort $out_file.extract.inv | gzip -c > $out_file.extract.inv.sorted.gz`;
+
+if ($OUTPUT_RULES)
+{
+ $cmd = "$RealBin/../../scripts/training/train-model.perl -dont-zip -first-step 6 -last-step 6 -f en -e fr -hierarchical -extract-file $out_file.extract -lexical-file $lex_file -phrase-translation-table $pt_file";
+ print STDERR "Executing: $cmd \n";
+ `$cmd`;
+}
+
+#$cmd = "rm -rf $TMPDIR";
+#`$cmd`;
+
+#######################################################
+sub create_xml {
+ my ($source,$input,$target,$alignment,$path) = @_;
+
+ print STDERR " HIEU \n $source \n $input \n $target \n $alignment \n $path \n";
+
+ my @INPUT = split(/ /,$input);
+ my @SOURCE = split(/ /,$source);
+ my @TARGET = split(/ /,$target);
+ my %ALIGN = &create_alignment($alignment);
+
+ my %FRAME_INPUT;
+ my (@NT,@INPUT_BITMAP,@TARGET_BITMAP,%ALIGNMENT_I_TO_S);
+ foreach (@TARGET) { push @TARGET_BITMAP,1 }
+
+ ### STEP 1: FIND MISMATCHES
+
+ my ($s,$i) = (0,0);
+ my $currently_matching = 0;
+ my ($start_s,$start_i) = (0,0);
+
+ $path .= "X"; # indicate end
+ print STDERR "$input\n$source\n$target\n$path\n";
+ for(my $p=0;$p<length($path);$p++) {
+ my $action = substr($path,$p,1);
+
+ # beginning of a mismatch
+ if ($currently_matching && $action ne "M" && $action ne "X") {
+ $start_i = $i;
+ $start_s = $s;
+ $currently_matching = 0;
+ }
+
+ # end of a mismatch
+ elsif (!$currently_matching &&
+ ($action eq "M" || $action eq "X")) {
+
+ # remove use of affected target words
+ for(my $ss = $start_s; $ss<$s; $ss++) {
+ foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
+ $TARGET_BITMAP[$tt] = 0;
+ }
+
+ # also remove enclosed unaligned words?
+ }
+
+ # are there input words that need to be inserted ?
+ print STDERR "($start_i<$i)?\n";
+ if ($start_i<$i) {
+
+ # take note of input words to be inserted
+ my $insertion = "";
+ for(my $ii = $start_i; $ii<$i; $ii++) {
+ $insertion .= $INPUT[$ii]." ";
+ }
+
+ # find position for inserted input words
+
+ # find first removed target word
+ my $start_t = 1000;
+ for(my $ss = $start_s; $ss<$s; $ss++) {
+ foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
+ $start_t = $tt if $tt < $start_t;
+ }
+ }
+
+ # end of sentence? add to end
+ if ($start_t == 1000 && $i > $#INPUT) {
+ $start_t = $#TARGET;
+ }
+
+ # backtrack to previous words if unaligned
+ if ($start_t == 1000) {
+ $start_t = -1;
+ for(my $ss = $s-1; $start_t==-1 && $ss>=0; $ss--) {
+ foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
+ $start_t = $tt if $tt > $start_t;
+ }
+ }
+ }
+ $FRAME_INPUT{$start_t} .= $insertion;
+ my %NT = ("start_t" => $start_t,
+ "start_i" => $start_i );
+ push @NT,\%NT;
+ }
+ $currently_matching = 1;
+ }
+
+ print STDERR "$action $s $i ($start_s $start_i) $currently_matching";
+ if ($action ne "I") {
+ print STDERR " ->";
+ foreach my $tt (keys %{${$ALIGN{'s'}}[$s]}) {
+ print STDERR " ".$tt;
+ }
+ }
+ print STDERR "\n";
+ $s++ unless $action eq "I";
+ $i++ unless $action eq "D";
+ $ALIGNMENT_I_TO_S{$i} = $s unless $action eq "D";
+ push @INPUT_BITMAP, 1 if $action eq "M";
+ push @INPUT_BITMAP, 0 if $action eq "I" || $action eq "S";
+ }
+
+
+ print STDERR $target."\n";
+ foreach (@TARGET_BITMAP) { print STDERR $_; } print STDERR "\n";
+ foreach (sort keys %FRAME_INPUT) {
+ print STDERR "$_: $FRAME_INPUT{$_}\n";
+ }
+
+ ### STEP 2: BUILD RULE AND FRAME
+
+ # hierarchical rule
+ my $rule_s = "";
+ my $rule_pos_s = 0;
+ my %RULE_ALIGNMENT_S;
+ for(my $i=0;$i<scalar(@INPUT_BITMAP);$i++) {
+ if ($INPUT_BITMAP[$i]) {
+ $rule_s .= $INPUT[$i]." ";
+ $RULE_ALIGNMENT_S{$ALIGNMENT_I_TO_S{$i}} = $rule_pos_s++;
+ }
+ foreach my $NT (@NT) {
+ if ($i == $$NT{"start_i"}) {
+ $rule_s .= "[X][X] ";
+ $$NT{"rule_pos_s"} = $rule_pos_s++;
+ }
+ }
+ }
+
+ my $rule_t = "";
+ my $rule_pos_t = 0;
+ my %RULE_ALIGNMENT_T;
+ for(my $t=-1;$t<scalar(@TARGET_BITMAP);$t++) {
+ if ($t>=0 && $TARGET_BITMAP[$t]) {
+ $rule_t .= $TARGET[$t]." ";
+ $RULE_ALIGNMENT_T{$t} = $rule_pos_t++;
+ }
+ foreach my $NT (@NT) {
+ if ($t == $$NT{"start_t"}) {
+ $rule_t .= "[X][X] ";
+ $$NT{"rule_pos_t"} = $rule_pos_t++;
+ }
+ }
+ }
+
+ my $rule_alignment = "";
+ foreach my $s (sort { $a <=> $b} keys %RULE_ALIGNMENT_S) {
+ foreach my $t (keys %{$ALIGN{"s"}[$s]}) {
+ next unless defined($RULE_ALIGNMENT_T{$t});
+ $rule_alignment .= $RULE_ALIGNMENT_S{$s}."-".$RULE_ALIGNMENT_T{$t}." ";
+ }
+ }
+ foreach my $NT (@NT) {
+ $rule_alignment .= $$NT{"rule_pos_s"}."-".$$NT{"rule_pos_t"}." ";
+ }
+
+ chop($rule_s);
+ chop($rule_t);
+ chop($rule_alignment);
+
+ my $rule_alignment_inv = "";
+ foreach (split(/ /,$rule_alignment)) {
+ /^(\d+)\-(\d+)$/;
+ $rule_alignment_inv .= "$2-$1 ";
+ }
+ chop($rule_alignment_inv);
+
+ # frame
+ my $frame = "";
+ $frame = $FRAME_INPUT{-1} if defined $FRAME_INPUT{-1};
+
+ my $currently_included = 0;
+ my $start_t = -1;
+ push @TARGET_BITMAP,0; # indicate end
+
+ for(my $t=0;$t<=scalar(@TARGET);$t++) {
+ # beginning of tm target inclusion
+ if (!$currently_included && $TARGET_BITMAP[$t]) {
+ $start_t = $t;
+ $currently_included = 1;
+ }
+
+ # end of tm target inclusion (not included word or inserted input)
+ elsif ($currently_included &&
+ (!$TARGET_BITMAP[$t] || defined($FRAME_INPUT{$t}))) {
+ # add xml (unless change is at the beginning of the sentence
+ if ($start_t >= 0) {
+ my $target = "";
+ print STDERR "for(tt=$start_t;tt<$t+$TARGET_BITMAP[$t]);\n";
+ for(my $tt=$start_t;$tt<$t+$TARGET_BITMAP[$t];$tt++) {
+ $target .= $TARGET[$tt] . " ";
+ }
+ chop($target);
+ $frame .= "<xml translation=\"$target\"> x </xml> ";
+ }
+ $currently_included = 0;
+ }
+
+ $frame .= $FRAME_INPUT{$t} if defined $FRAME_INPUT{$t};
+ print STDERR "$TARGET_BITMAP[$t] $t ($start_t) $currently_included\n";
+ }
+
+ print STDERR $frame."\n-------------------------------------\n";
+ return ($frame,$rule_s,$rule_t,$rule_alignment,$rule_alignment_inv);
+}
+
+sub create_alignment {
+ my ($line) = @_;
+ my (@ALIGNED_TO_S,@ALIGNED_TO_T);
+ foreach my $point (split(/ /,$line)) {
+ my ($s,$t) = split(/\-/,$point);
+ $ALIGNED_TO_S[$s]{$t}++;
+ $ALIGNED_TO_T[$t]{$s}++;
+ }
+ my %ALIGNMENT = ( 's' => \@ALIGNED_TO_S, 't' => \@ALIGNED_TO_T );
+ return %ALIGNMENT;
+}
diff --git a/contrib/fuzzy-match/old/make-pt-from-tm2.perl b/contrib/fuzzy-match/old/make-pt-from-tm2.perl
new file mode 100755
index 000000000..3a5fa4171
--- /dev/null
+++ b/contrib/fuzzy-match/old/make-pt-from-tm2.perl
@@ -0,0 +1,300 @@
+#!/usr/bin/perl -w -d
+
+use strict;
+use FindBin qw($RealBin);
+use File::Basename;
+
+my $DEBUG = 1;
+my $OUTPUT_RULES = 1;
+
+#my $data_root = "/Users/hieuhoang/workspace/experiment/data/tm-mt-integration/";
+my $in_file = $ARGV[0]; #"$data_root/in/ac-test.input.tc.4";
+my $source_file = $ARGV[1]; #"$data_root/in/acquis.truecased.4.en.uniq";
+my $target_file = $ARGV[2]; #"$data_root/in/acquis.truecased.4.fr.uniq";
+my $alignment_file = $ARGV[3]; #"$data_root/in/acquis.truecased.4.align.uniq";
+my $lex_file = $ARGV[4]; #$data_root/in/lex.4;
+my $pt_file = $ARGV[5]; #"$data_root/out/pt";
+
+my $cmd;
+
+my $TMPDIR= "/tmp/tmp.$$";
+$cmd = "mkdir -p $TMPDIR";
+`$cmd`;
+$TMPDIR = "/Users/hieuhoang/workspace/experiment/data/tm-mt-integration/out/tmp.3196";
+
+my $match_file = "$TMPDIR/match";
+
+# suffix array creation and extraction
+$cmd = "$RealBin/fuzzy-match --multiple $in_file $source_file > $match_file";
+`$cmd`;
+
+# make into xml and pt
+my $out_file = "$TMPDIR/ac-test.input.xml.4.uniq.multi.tuning";
+
+open(MATCH,$match_file);
+open(FRAME,">$out_file");
+open(RULE,">$out_file.extract") if $OUTPUT_RULES;
+open(RULE_INV,">$out_file.extract.inv") if $OUTPUT_RULES;
+open(INFO,">$out_file.info");
+while( my $match = <MATCH> ) {
+ chop($match);
+ my ($score,$sentence,$path) = split(/ \|\|\| /,$match);
+
+ $score =~ /^(\d+) (.+)/ || die;
+ my ($i,$match_score) = ($1,$2);
+
+ # construct frame
+ if ($sentence < 1e9 && $sentence >= 0) {
+ my $SOURCE = $ALL_SOURCE[$sentence];
+ my @ALIGNMENT = split(/ \|\|\| /,$ALL_ALIGNMENT[$sentence]);
+ my @TARGET = split(/ \|\|\| /,$ALL_TARGET[$sentence]);
+
+ for(my $j=0;$j<scalar(@TARGET);$j++) {
+ $TARGET[$j] =~ /^(\d+) (.+)$/ || die;
+ my ($target_count,$target) = ($1,$2);
+ my ($frame,$rule_s,$rule_t,$rule_alignment,$rule_alignment_inv) =
+ &create_xml($SOURCE,
+ $INPUT[$i],
+ $target,
+ $ALIGNMENT[$j],
+ $path);
+ print FRAME $frame."\n";
+ print RULE "$rule_s [X] ||| $rule_t [X] ||| $rule_alignment ||| $target_count\n" if $OUTPUT_RULES;
+ print RULE_INV "$rule_t [X] ||| $rule_s [X] ||| $rule_alignment_inv ||| $target_count\n" if $OUTPUT_RULES;
+ print INFO "$i ||| $match_score ||| $target_count\n";
+ }
+ }
+}
+close(FRAME);
+close(MATCH);
+close(RULE) if $OUTPUT_RULES;
+close(RULE_INV) if $OUTPUT_RULES;
+
+`LC_ALL=C sort $out_file.extract | gzip -c > $out_file.extract.sorted.gz`;
+`LC_ALL=C sort $out_file.extract.inv | gzip -c > $out_file.extract.inv.sorted.gz`;
+
+if ($OUTPUT_RULES)
+{
+ $cmd = "$RealBin/../../scripts/training/train-model.perl -dont-zip -first-step 6 -last-step 6 -f en -e fr -hierarchical -extract-file $out_file.extract -lexical-file $lex_file -phrase-translation-table $pt_file";
+ print STDERR "Executing: $cmd \n";
+ `$cmd`;
+}
+
+#$cmd = "rm -rf $TMPDIR";
+#`$cmd`;
+
+#######################################################
+sub create_xml {
+ my ($source,$input,$target,$alignment,$path) = @_;
+
+ my @INPUT = split(/ /,$input);
+ my @SOURCE = split(/ /,$source);
+ my @TARGET = split(/ /,$target);
+ my %ALIGN = &create_alignment($alignment);
+
+ my %FRAME_INPUT;
+ my (@NT,@INPUT_BITMAP,@TARGET_BITMAP,%ALIGNMENT_I_TO_S);
+ foreach (@TARGET) { push @TARGET_BITMAP,1 }
+
+ ### STEP 1: FIND MISMATCHES
+
+ my ($s,$i) = (0,0);
+ my $currently_matching = 0;
+ my ($start_s,$start_i) = (0,0);
+
+ $path .= "X"; # indicate end
+ print STDERR "$input\n$source\n$target\n$path\n";
+ for(my $p=0;$p<length($path);$p++) {
+ my $action = substr($path,$p,1);
+
+ # beginning of a mismatch
+ if ($currently_matching && $action ne "M" && $action ne "X") {
+ $start_i = $i;
+ $start_s = $s;
+ $currently_matching = 0;
+ }
+
+ # end of a mismatch
+ elsif (!$currently_matching &&
+ ($action eq "M" || $action eq "X")) {
+
+ # remove use of affected target words
+ for(my $ss = $start_s; $ss<$s; $ss++) {
+ foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
+ $TARGET_BITMAP[$tt] = 0;
+ }
+
+ # also remove enclosed unaligned words?
+ }
+
+ # are there input words that need to be inserted ?
+ print STDERR "($start_i<$i)?\n";
+ if ($start_i<$i) {
+
+ # take note of input words to be inserted
+ my $insertion = "";
+ for(my $ii = $start_i; $ii<$i; $ii++) {
+ $insertion .= $INPUT[$ii]." ";
+ }
+
+ # find position for inserted input words
+
+ # find first removed target word
+ my $start_t = 1000;
+ for(my $ss = $start_s; $ss<$s; $ss++) {
+ foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
+ $start_t = $tt if $tt < $start_t;
+ }
+ }
+
+ # end of sentence? add to end
+ if ($start_t == 1000 && $i > $#INPUT) {
+ $start_t = $#TARGET;
+ }
+
+ # backtrack to previous words if unaligned
+ if ($start_t == 1000) {
+ $start_t = -1;
+ for(my $ss = $s-1; $start_t==-1 && $ss>=0; $ss--) {
+ foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
+ $start_t = $tt if $tt > $start_t;
+ }
+ }
+ }
+ $FRAME_INPUT{$start_t} .= $insertion;
+ my %NT = ("start_t" => $start_t,
+ "start_i" => $start_i );
+ push @NT,\%NT;
+ }
+ $currently_matching = 1;
+ }
+
+ print STDERR "$action $s $i ($start_s $start_i) $currently_matching";
+ if ($action ne "I") {
+ print STDERR " ->";
+ foreach my $tt (keys %{${$ALIGN{'s'}}[$s]}) {
+ print STDERR " ".$tt;
+ }
+ }
+ print STDERR "\n";
+ $s++ unless $action eq "I";
+ $i++ unless $action eq "D";
+ $ALIGNMENT_I_TO_S{$i} = $s unless $action eq "D";
+ push @INPUT_BITMAP, 1 if $action eq "M";
+ push @INPUT_BITMAP, 0 if $action eq "I" || $action eq "S";
+ }
+
+
+ print STDERR $target."\n";
+ foreach (@TARGET_BITMAP) { print STDERR $_; } print STDERR "\n";
+ foreach (sort keys %FRAME_INPUT) {
+ print STDERR "$_: $FRAME_INPUT{$_}\n";
+ }
+
+ ### STEP 2: BUILD RULE AND FRAME
+
+ # hierarchical rule
+ my $rule_s = "";
+ my $rule_pos_s = 0;
+ my %RULE_ALIGNMENT_S;
+ for(my $i=0;$i<scalar(@INPUT_BITMAP);$i++) {
+ if ($INPUT_BITMAP[$i]) {
+ $rule_s .= $INPUT[$i]." ";
+ $RULE_ALIGNMENT_S{$ALIGNMENT_I_TO_S{$i}} = $rule_pos_s++;
+ }
+ foreach my $NT (@NT) {
+ if ($i == $$NT{"start_i"}) {
+ $rule_s .= "[X][X] ";
+ $$NT{"rule_pos_s"} = $rule_pos_s++;
+ }
+ }
+ }
+
+ my $rule_t = "";
+ my $rule_pos_t = 0;
+ my %RULE_ALIGNMENT_T;
+ for(my $t=-1;$t<scalar(@TARGET_BITMAP);$t++) {
+ if ($t>=0 && $TARGET_BITMAP[$t]) {
+ $rule_t .= $TARGET[$t]." ";
+ $RULE_ALIGNMENT_T{$t} = $rule_pos_t++;
+ }
+ foreach my $NT (@NT) {
+ if ($t == $$NT{"start_t"}) {
+ $rule_t .= "[X][X] ";
+ $$NT{"rule_pos_t"} = $rule_pos_t++;
+ }
+ }
+ }
+
+ my $rule_alignment = "";
+ foreach my $s (sort { $a <=> $b} keys %RULE_ALIGNMENT_S) {
+ foreach my $t (keys %{$ALIGN{"s"}[$s]}) {
+ next unless defined($RULE_ALIGNMENT_T{$t});
+ $rule_alignment .= $RULE_ALIGNMENT_S{$s}."-".$RULE_ALIGNMENT_T{$t}." ";
+ }
+ }
+ foreach my $NT (@NT) {
+ $rule_alignment .= $$NT{"rule_pos_s"}."-".$$NT{"rule_pos_t"}." ";
+ }
+
+ chop($rule_s);
+ chop($rule_t);
+ chop($rule_alignment);
+
+ my $rule_alignment_inv = "";
+ foreach (split(/ /,$rule_alignment)) {
+ /^(\d+)\-(\d+)$/;
+ $rule_alignment_inv .= "$2-$1 ";
+ }
+ chop($rule_alignment_inv);
+
+ # frame
+ my $frame = "";
+ $frame = $FRAME_INPUT{-1} if defined $FRAME_INPUT{-1};
+
+ my $currently_included = 0;
+ my $start_t = -1;
+ push @TARGET_BITMAP,0; # indicate end
+
+ for(my $t=0;$t<=scalar(@TARGET);$t++) {
+ # beginning of tm target inclusion
+ if (!$currently_included && $TARGET_BITMAP[$t]) {
+ $start_t = $t;
+ $currently_included = 1;
+ }
+
+ # end of tm target inclusion (not included word or inserted input)
+ elsif ($currently_included &&
+ (!$TARGET_BITMAP[$t] || defined($FRAME_INPUT{$t}))) {
+ # add xml (unless change is at the beginning of the sentence
+ if ($start_t >= 0) {
+ my $target = "";
+ print STDERR "for(tt=$start_t;tt<$t+$TARGET_BITMAP[$t]);\n";
+ for(my $tt=$start_t;$tt<$t+$TARGET_BITMAP[$t];$tt++) {
+ $target .= $TARGET[$tt] . " ";
+ }
+ chop($target);
+ $frame .= "<xml translation=\"$target\"> x </xml> ";
+ }
+ $currently_included = 0;
+ }
+
+ $frame .= $FRAME_INPUT{$t} if defined $FRAME_INPUT{$t};
+ print STDERR "$TARGET_BITMAP[$t] $t ($start_t) $currently_included\n";
+ }
+
+ print STDERR $frame."\n-------------------------------------\n";
+ return ($frame,$rule_s,$rule_t,$rule_alignment,$rule_alignment_inv);
+}
+
+sub create_alignment {
+ my ($line) = @_;
+ my (@ALIGNED_TO_S,@ALIGNED_TO_T);
+ foreach my $point (split(/ /,$line)) {
+ my ($s,$t) = split(/\-/,$point);
+ $ALIGNED_TO_S[$s]{$t}++;
+ $ALIGNED_TO_T[$t]{$s}++;
+ }
+ my %ALIGNMENT = ( 's' => \@ALIGNED_TO_S, 't' => \@ALIGNED_TO_T );
+ return %ALIGNMENT;
+}
diff --git a/contrib/fuzzy-match/old/make-xml-from-match-multiple.perl b/contrib/fuzzy-match/old/make-xml-from-match-multiple.perl
new file mode 100755
index 000000000..e16c9de75
--- /dev/null
+++ b/contrib/fuzzy-match/old/make-xml-from-match-multiple.perl
@@ -0,0 +1,288 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+my $DEBUG = 1;
+my $OUTPUT_RULES = 1;
+
+my $scripts_root_dir = "/Users/hieuhoang/workspace/github/hieuhoang/scripts";
+
+my $data_root = "/Users/hieuhoang/workspace/experiment/data/tm-mt-integration/";
+#my $match_file = "$data_root/in/BEST.acquis-xml-escaped.4.uniq.multi.tuning";
+my $match_file = "$data_root/out/BEST";
+my $source_file = "$data_root/in/acquis.truecased.4.en.uniq";
+my $target_file = "$data_root/in/acquis.truecased.4.fr.uniq";
+my $alignment_file = "$data_root/in/acquis.truecased.4.align.uniq";
+my $out_file = "$data_root/out/ac-test.input.xml.4.uniq.multi.tuning";
+my $in_file = "$data_root/in/ac-test.input.tc.4";
+
+#my $match_file = "tm/BEST.acquis-xml-escaped.4.uniq.multi";
+#my $source_file = "data/acquis.truecased.4.en.uniq";
+#my $target_file = "data/acquis.truecased.4.fr.uniq";
+#my $alignment_file = "data/acquis.truecased.4.align.uniq";
+#my $out_file = "data/ac-test.input.xml.4.uniq.multi.xxx";
+#my $in_file = "evaluation/ac-test.input.tc.4";
+
+my @INPUT = `cat $in_file`; chop(@INPUT);
+my @ALL_SOURCE = `cat $source_file`; chop(@ALL_SOURCE);
+my @ALL_TARGET = `cat $target_file`; chop(@ALL_TARGET);
+my @ALL_ALIGNMENT = `cat $alignment_file`; chop(@ALL_ALIGNMENT);
+
+open(MATCH,$match_file);
+open(FRAME,">$out_file");
+open(RULE,">$out_file.extract") if $OUTPUT_RULES;
+open(RULE_INV,">$out_file.extract.inv") if $OUTPUT_RULES;
+open(INFO,">$out_file.info");
+while( my $match = <MATCH> ) {
+ chop($match);
+ my ($score,$sentence,$path) = split(/ \|\|\| /,$match);
+
+ $score =~ /^(\d+) (.+)/ || die;
+ my ($i,$match_score) = ($1,$2);
+
+ # construct frame
+ if ($sentence < 1e9 && $sentence >= 0) {
+ my $SOURCE = $ALL_SOURCE[$sentence];
+ my @ALIGNMENT = split(/ \|\|\| /,$ALL_ALIGNMENT[$sentence]);
+ my @TARGET = split(/ \|\|\| /,$ALL_TARGET[$sentence]);
+
+ for(my $j=0;$j<scalar(@TARGET);$j++) {
+ $TARGET[$j] =~ /^(\d+) (.+)$/ || die;
+ my ($target_count,$target) = ($1,$2);
+ my ($frame,$rule_s,$rule_t,$rule_alignment,$rule_alignment_inv) =
+ &create_xml($SOURCE,
+ $INPUT[$i],
+ $target,
+ $ALIGNMENT[$j],
+ $path);
+ print FRAME $frame."\n";
+ print RULE "$rule_s [X] ||| $rule_t [X] ||| $rule_alignment ||| $target_count\n" if $OUTPUT_RULES;
+ print RULE_INV "$rule_t [X] ||| $rule_s [X] ||| $rule_alignment_inv ||| $target_count\n" if $OUTPUT_RULES;
+ print INFO "$i ||| $match_score ||| $target_count\n";
+ }
+ }
+}
+close(FRAME);
+close(MATCH);
+close(RULE) if $OUTPUT_RULES;
+close(RULE_INV) if $OUTPUT_RULES;
+
+`LC_ALL=C sort $out_file.extract | gzip -c > $out_file.extract.sorted.gz`;
+`LC_ALL=C sort $out_file.extract.inv | gzip -c > $out_file.extract.inv.sorted.gz`;
+
+`$scripts_root_dir/training/train-model.perl -dont-zip -first-step 6 -last-step 6 -f en -e fr -hierarchical -extract-file $out_file.extract -lexical-file $data_root/in/lex.4 -phrase-translation-table $out_file.phrase-table` if $OUTPUT_RULES;
+
+sub create_xml {
+ my ($source,$input,$target,$alignment,$path) = @_;
+
+ my @INPUT = split(/ /,$input);
+ my @SOURCE = split(/ /,$source);
+ my @TARGET = split(/ /,$target);
+ my %ALIGN = &create_alignment($alignment);
+
+ my %FRAME_INPUT;
+ my (@NT,@INPUT_BITMAP,@TARGET_BITMAP,%ALIGNMENT_I_TO_S);
+ foreach (@TARGET) { push @TARGET_BITMAP,1 }
+
+ ### STEP 1: FIND MISMATCHES
+
+ my ($s,$i) = (0,0);
+ my $currently_matching = 0;
+ my ($start_s,$start_i) = (0,0);
+
+ $path .= "X"; # indicate end
+ print "$input\n$source\n$target\n$path\n";
+ for(my $p=0;$p<length($path);$p++) {
+ my $action = substr($path,$p,1);
+
+ # beginning of a mismatch
+ if ($currently_matching && $action ne "M" && $action ne "X") {
+ $start_i = $i;
+ $start_s = $s;
+ $currently_matching = 0;
+ }
+
+ # end of a mismatch
+ elsif (!$currently_matching &&
+ ($action eq "M" || $action eq "X")) {
+
+ # remove use of affected target words
+ for(my $ss = $start_s; $ss<$s; $ss++) {
+ foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
+ $TARGET_BITMAP[$tt] = 0;
+ }
+
+ # also remove enclosed unaligned words?
+ }
+
+ # are there input words that need to be inserted ?
+ print "($start_i<$i)?\n";
+ if ($start_i<$i) {
+
+ # take note of input words to be inserted
+ my $insertion = "";
+ for(my $ii = $start_i; $ii<$i; $ii++) {
+ $insertion .= $INPUT[$ii]." ";
+ }
+
+ # find position for inserted input words
+
+ # find first removed target word
+ my $start_t = 1000;
+ for(my $ss = $start_s; $ss<$s; $ss++) {
+ foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
+ $start_t = $tt if $tt < $start_t;
+ }
+ }
+
+ # end of sentence? add to end
+ if ($start_t == 1000 && $i > $#INPUT) {
+ $start_t = $#TARGET;
+ }
+
+ # backtrack to previous words if unaligned
+ if ($start_t == 1000) {
+ $start_t = -1;
+ for(my $ss = $s-1; $start_t==-1 && $ss>=0; $ss--) {
+ foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
+ $start_t = $tt if $tt > $start_t;
+ }
+ }
+ }
+ $FRAME_INPUT{$start_t} .= $insertion;
+ my %NT = ("start_t" => $start_t,
+ "start_i" => $start_i );
+ push @NT,\%NT;
+ }
+ $currently_matching = 1;
+ }
+
+ print "$action $s $i ($start_s $start_i) $currently_matching";
+ if ($action ne "I") {
+ print " ->";
+ foreach my $tt (keys %{${$ALIGN{'s'}}[$s]}) {
+ print " ".$tt;
+ }
+ }
+ print "\n";
+ $s++ unless $action eq "I";
+ $i++ unless $action eq "D";
+ $ALIGNMENT_I_TO_S{$i} = $s unless $action eq "D";
+ push @INPUT_BITMAP, 1 if $action eq "M";
+ push @INPUT_BITMAP, 0 if $action eq "I" || $action eq "S";
+ }
+
+
+ print $target."\n";
+ foreach (@TARGET_BITMAP) { print $_; } print "\n";
+ foreach (sort keys %FRAME_INPUT) {
+ print "$_: $FRAME_INPUT{$_}\n";
+ }
+
+ ### STEP 2: BUILD RULE AND FRAME
+
+ # hierarchical rule
+ my $rule_s = "";
+ my $rule_pos_s = 0;
+ my %RULE_ALIGNMENT_S;
+ for(my $i=0;$i<scalar(@INPUT_BITMAP);$i++) {
+ if ($INPUT_BITMAP[$i]) {
+ $rule_s .= $INPUT[$i]." ";
+ $RULE_ALIGNMENT_S{$ALIGNMENT_I_TO_S{$i}} = $rule_pos_s++;
+ }
+ foreach my $NT (@NT) {
+ if ($i == $$NT{"start_i"}) {
+ $rule_s .= "[X][X] ";
+ $$NT{"rule_pos_s"} = $rule_pos_s++;
+ }
+ }
+ }
+
+ my $rule_t = "";
+ my $rule_pos_t = 0;
+ my %RULE_ALIGNMENT_T;
+ for(my $t=-1;$t<scalar(@TARGET_BITMAP);$t++) {
+ if ($t>=0 && $TARGET_BITMAP[$t]) {
+ $rule_t .= $TARGET[$t]." ";
+ $RULE_ALIGNMENT_T{$t} = $rule_pos_t++;
+ }
+ foreach my $NT (@NT) {
+ if ($t == $$NT{"start_t"}) {
+ $rule_t .= "[X][X] ";
+ $$NT{"rule_pos_t"} = $rule_pos_t++;
+ }
+ }
+ }
+
+ my $rule_alignment = "";
+ foreach my $s (sort { $a <=> $b} keys %RULE_ALIGNMENT_S) {
+ foreach my $t (keys %{$ALIGN{"s"}[$s]}) {
+ next unless defined($RULE_ALIGNMENT_T{$t});
+ $rule_alignment .= $RULE_ALIGNMENT_S{$s}."-".$RULE_ALIGNMENT_T{$t}." ";
+ }
+ }
+ foreach my $NT (@NT) {
+ $rule_alignment .= $$NT{"rule_pos_s"}."-".$$NT{"rule_pos_t"}." ";
+ }
+
+ chop($rule_s);
+ chop($rule_t);
+ chop($rule_alignment);
+
+ my $rule_alignment_inv = "";
+ foreach (split(/ /,$rule_alignment)) {
+ /^(\d+)\-(\d+)$/;
+ $rule_alignment_inv .= "$2-$1 ";
+ }
+ chop($rule_alignment_inv);
+
+ # frame
+ my $frame = "";
+ $frame = $FRAME_INPUT{-1} if defined $FRAME_INPUT{-1};
+
+ my $currently_included = 0;
+ my $start_t = -1;
+ push @TARGET_BITMAP,0; # indicate end
+
+ for(my $t=0;$t<=scalar(@TARGET);$t++) {
+ # beginning of tm target inclusion
+ if (!$currently_included && $TARGET_BITMAP[$t]) {
+ $start_t = $t;
+ $currently_included = 1;
+ }
+
+ # end of tm target inclusion (not included word or inserted input)
+ elsif ($currently_included &&
+ (!$TARGET_BITMAP[$t] || defined($FRAME_INPUT{$t}))) {
+ # add xml (unless change is at the beginning of the sentence
+ if ($start_t >= 0) {
+ my $target = "";
+ print "for(tt=$start_t;tt<$t+$TARGET_BITMAP[$t]);\n";
+ for(my $tt=$start_t;$tt<$t+$TARGET_BITMAP[$t];$tt++) {
+ $target .= $TARGET[$tt] . " ";
+ }
+ chop($target);
+ $frame .= "<xml translation=\"$target\"> x </xml> ";
+ }
+ $currently_included = 0;
+ }
+
+ $frame .= $FRAME_INPUT{$t} if defined $FRAME_INPUT{$t};
+ print "$TARGET_BITMAP[$t] $t ($start_t) $currently_included\n";
+ }
+
+ print $frame."\n-------------------------------------\n";
+ return ($frame,$rule_s,$rule_t,$rule_alignment,$rule_alignment_inv);
+}
+
+sub create_alignment {
+ my ($line) = @_;
+ my (@ALIGNED_TO_S,@ALIGNED_TO_T);
+ foreach my $point (split(/ /,$line)) {
+ my ($s,$t) = split(/\-/,$point);
+ $ALIGNED_TO_S[$s]{$t}++;
+ $ALIGNED_TO_T[$t]{$s}++;
+ }
+ my %ALIGNMENT = ( 's' => \@ALIGNED_TO_S, 't' => \@ALIGNED_TO_T );
+ return %ALIGNMENT;
+}
diff --git a/contrib/fuzzy-match/suffix-test.cpp b/contrib/fuzzy-match/suffix-test.cpp
new file mode 100644
index 000000000..01b722fb4
--- /dev/null
+++ b/contrib/fuzzy-match/suffix-test.cpp
@@ -0,0 +1,27 @@
+#include "SuffixArray.h"
+
+using namespace std;
+
+int main(int argc, char* argv[])
+{
+ SuffixArray suffixArray( "/home/pkoehn/syntax/grammars/wmt09-de-en/corpus.1k.de" );
+ //suffixArray.List(10,20);
+ vector< string > der;
+ der.push_back("der");
+ vector< string > inDer;
+ inDer.push_back("in");
+ inDer.push_back("der");
+ vector< string > zzz;
+ zzz.push_back("zzz");
+ vector< string > derDer;
+ derDer.push_back("der");
+ derDer.push_back("der");
+
+ cout << "count of 'der' " << suffixArray.Count( der ) << endl;
+ cout << "limited count of 'der' " << suffixArray.MinCount( der, 2 ) << endl;
+ cout << "count of 'in der' " << suffixArray.Count( inDer ) << endl;
+ cout << "count of 'der der' " << suffixArray.Count( derDer ) << endl;
+ cout << "limited count of 'der der' " << suffixArray.MinCount( derDer, 1 ) << endl;
+ // cout << "count of 'zzz' " << suffixArray.Count( zzz ) << endl;
+ // cout << "limited count of 'zzz' " << suffixArray.LimitedCount( zzz, 1 ) << endl;
+}
diff --git a/contrib/iSenWeb/Introduction/iSenWeb A Web-based Machine Translation System to Translate Sentences.docx b/contrib/iSenWeb/Introduction/iSenWeb A Web-based Machine Translation System to Translate Sentences.docx
new file mode 100755
index 000000000..74a22178c
--- /dev/null
+++ b/contrib/iSenWeb/Introduction/iSenWeb A Web-based Machine Translation System to Translate Sentences.docx
Binary files differ
diff --git a/contrib/iSenWeb/Introduction/iSenWeb A Web-based Machine Translation System to Translate Sentences.pdf b/contrib/iSenWeb/Introduction/iSenWeb A Web-based Machine Translation System to Translate Sentences.pdf
new file mode 100755
index 000000000..e05ab4214
--- /dev/null
+++ b/contrib/iSenWeb/Introduction/iSenWeb A Web-based Machine Translation System to Translate Sentences.pdf
Binary files differ
diff --git a/contrib/iSenWeb/index.html b/contrib/iSenWeb/index.html
new file mode 100755
index 000000000..673e8d519
--- /dev/null
+++ b/contrib/iSenWeb/index.html
@@ -0,0 +1,129 @@
+ <!DOCTYPE html>
+ <HTML>
+ <head>
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+ <title>Moses Translation System</title>
+ <script type="text/javascript" src="jquery-1.7.2.js"></script>
+ <link href="./themes/styles/common.css" rel="stylesheet" type="text/css" />
+ <link href="./themes/styles/search.css" rel="stylesheet" type="text/css"/>
+ <link href="./themes/styles/fanyi.css" rel="stylesheet" type="text/css" />
+ </head>
+<script language="javascript">
+$(document).ready(function()
+{
+
+ var targetDiv = $("#outputText");
+ var input = $("#inputText");
+
+ $("#transForm").submit(function()
+ {
+ $.ajax(
+ {
+ type: "POST", url: 'trans_result.php',data: {input1: input.val()},
+ complete: function(data)
+ {
+ targetDiv.html('');
+ targetDiv.append(data.responseText);
+
+ }
+ });
+ return false;
+ });
+});
+
+</script>
+ <body>
+ <div class="topWrap">
+ <div class="top">
+ <div class="logo"><a href="/" title="English Chinese Translation Based on Moses">Home</a></div>
+
+ </div>
+ <!-- top end -->
+ </div>
+ <div class="ConBox">
+ <div class="hd">
+ <div id="inputMod" class="column fl">
+ <div class="wrapper">
+<!--
+ <form action="trans_result.php" method="post" id="transForm" name="transForm">-->
+ <form action="" method="post" id="transForm" name="transForm">
+ <div class="row desc">
+ Source Text:
+ <input type="reset" name="clear" value="Clear"/>
+ </div>
+ <div class="row border content">
+ <textarea id="inputText" class="text" dir="ltr" tabindex="1" wrap="SOFT" name="inputText"></textarea>
+
+ </div>
+ <div class="row">
+ <select>
+ <option value ="en-cn">English >> Chinese </option>
+ </select>
+ <input type="submit" value="Translation"/>
+ </div>
+ </form>
+ </div>
+ <!-- end of wrapper -->
+ </div>
+ <!-- end of div inputMod -->
+ <div id="outputMod" class="column fr">
+ <div class="wrapper">
+ <div id="translated" style="display: block;">
+ <div class="row desc"><span id="outputLang">en->ch</span></div>
+ <div class="row">
+ <div id="outputText" class="row">
+ <div class="translated_result">
+
+
+ </div>
+
+ </div>
+ </div>
+
+ </div>
+
+ <!-- end of entryList -->
+ <!-- end translated -->
+ </div>
+ <!-- end of wrapper -->
+
+
+ <div class="row cf" id="addons">
+ <a id="feedback_link" target="_blank" href="#" class="fr">Feedback</a>
+ <span id="suggestYou">
+ 选择<a data-pos="web.o.leftbottom" class="clog-js" data-clog="FUFEI_CLICK" href="http://nlp2ct.sftw.umac.mo/" target="_blank">人工翻译æœåŠ¡</a>,获得更专业的翻译结果。
+ </span>
+ </div>
+ </div>
+ <div id="errorHolder"><span class="error_text"></span></div>
+ </div>
+ <div style="clear:both"></div>
+ <script type="text/javascript">
+ var global = {};
+ global.sessionFrom = "http://dict.youdao.com/";
+ </script>
+ <script type="text/javascript" src="http://impservice.dictweb.youdao.com/imp/dict_req_web_1.0.js"></script>
+ <script data-main="fanyi" type="text/javascript" src="./themes/fanyi/v2.1.3.1/scripts/fanyi.js"></script>
+ <div id="transBtnTip">
+ <div id="transBtnTipInner">
+ 点击翻译按钮继续,查看网页翻译结果。
+ <p class="ar">
+ <a href="#" id="transBtnTipOK">I have known</a>
+ </p>
+ <b id="transBtnTipArrow"></b>
+ </div>
+ </div>
+
+ <div class="Feedback"><a href="http://nlp2ct.sftw.umac.mo/" target="_blank">å馈信æ¯ç»™æˆ‘们</a></div>
+
+
+ <div class="footer" style="clear:both">
+ <p><a href="http://nlp2ct.sftw.umac.mo/" target="_blank">Conect with us</a> <span>|</span>
+ <a href="http://nlp2ct.sftw.umac.mo/" target="_blank">Mosese Translated system</a> <span>|</span>
+ Copyright© &nbsp;&nbsp;2012-2012 NLP2CT All Right to Moses Group
+ </p>
+ <p>More</p>
+ </div>
+</div>
+ </body>
+ </HTML>
diff --git a/contrib/iSenWeb/jquery-1.7.2.js b/contrib/iSenWeb/jquery-1.7.2.js
new file mode 100755
index 000000000..668b14a92
--- /dev/null
+++ b/contrib/iSenWeb/jquery-1.7.2.js
@@ -0,0 +1,9405 @@
+/*!
+ * jQuery JavaScript Library v1.7.2
+ * http://jquery.com/
+ *
+ * Copyright 2011, John Resig
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://jquery.org/license
+ *
+ * Includes Sizzle.js
+ * http://sizzlejs.com/
+ * Copyright 2011, The Dojo Foundation
+ * Released under the MIT, BSD, and GPL Licenses.
+ *
+ * Date: Wed Mar 21 12:46:34 2012 -0700
+ */
+(function( window, undefined ) {
+
+// Use the correct document accordingly with window argument (sandbox)
+var document = window.document,
+ navigator = window.navigator,
+ location = window.location;
+var jQuery = (function() {
+
+// Define a local copy of jQuery
+var jQuery = function( selector, context ) {
+ // The jQuery object is actually just the init constructor 'enhanced'
+ return new jQuery.fn.init( selector, context, rootjQuery );
+ },
+
+ // Map over jQuery in case of overwrite
+ _jQuery = window.jQuery,
+
+ // Map over the $ in case of overwrite
+ _$ = window.$,
+
+ // A central reference to the root jQuery(document)
+ rootjQuery,
+
+ // A simple way to check for HTML strings or ID strings
+ // Prioritize #id over <tag> to avoid XSS via location.hash (#9521)
+ quickExpr = /^(?:[^#<]*(<[\w\W]+>)[^>]*$|#([\w\-]*)$)/,
+
+ // Check if a string has a non-whitespace character in it
+ rnotwhite = /\S/,
+
+ // Used for trimming whitespace
+ trimLeft = /^\s+/,
+ trimRight = /\s+$/,
+
+ // Match a standalone tag
+ rsingleTag = /^<(\w+)\s*\/?>(?:<\/\1>)?$/,
+
+ // JSON RegExp
+ rvalidchars = /^[\],:{}\s]*$/,
+ rvalidescape = /\\(?:["\\\/bfnrt]|u[0-9a-fA-F]{4})/g,
+ rvalidtokens = /"[^"\\\n\r]*"|true|false|null|-?\d+(?:\.\d*)?(?:[eE][+\-]?\d+)?/g,
+ rvalidbraces = /(?:^|:|,)(?:\s*\[)+/g,
+
+ // Useragent RegExp
+ rwebkit = /(webkit)[ \/]([\w.]+)/,
+ ropera = /(opera)(?:.*version)?[ \/]([\w.]+)/,
+ rmsie = /(msie) ([\w.]+)/,
+ rmozilla = /(mozilla)(?:.*? rv:([\w.]+))?/,
+
+ // Matches dashed string for camelizing
+ rdashAlpha = /-([a-z]|[0-9])/ig,
+ rmsPrefix = /^-ms-/,
+
+ // Used by jQuery.camelCase as callback to replace()
+ fcamelCase = function( all, letter ) {
+ return ( letter + "" ).toUpperCase();
+ },
+
+ // Keep a UserAgent string for use with jQuery.browser
+ userAgent = navigator.userAgent,
+
+ // For matching the engine and version of the browser
+ browserMatch,
+
+ // The deferred used on DOM ready
+ readyList,
+
+ // The ready event handler
+ DOMContentLoaded,
+
+ // Save a reference to some core methods
+ toString = Object.prototype.toString,
+ hasOwn = Object.prototype.hasOwnProperty,
+ push = Array.prototype.push,
+ slice = Array.prototype.slice,
+ trim = String.prototype.trim,
+ indexOf = Array.prototype.indexOf,
+
+ // [[Class]] -> type pairs
+ class2type = {};
+
+jQuery.fn = jQuery.prototype = {
+ constructor: jQuery,
+ init: function( selector, context, rootjQuery ) {
+ var match, elem, ret, doc;
+
+ // Handle $(""), $(null), or $(undefined)
+ if ( !selector ) {
+ return this;
+ }
+
+ // Handle $(DOMElement)
+ if ( selector.nodeType ) {
+ this.context = this[0] = selector;
+ this.length = 1;
+ return this;
+ }
+
+ // The body element only exists once, optimize finding it
+ if ( selector === "body" && !context && document.body ) {
+ this.context = document;
+ this[0] = document.body;
+ this.selector = selector;
+ this.length = 1;
+ return this;
+ }
+
+ // Handle HTML strings
+ if ( typeof selector === "string" ) {
+ // Are we dealing with HTML string or an ID?
+ if ( selector.charAt(0) === "<" && selector.charAt( selector.length - 1 ) === ">" && selector.length >= 3 ) {
+ // Assume that strings that start and end with <> are HTML and skip the regex check
+ match = [ null, selector, null ];
+
+ } else {
+ match = quickExpr.exec( selector );
+ }
+
+ // Verify a match, and that no context was specified for #id
+ if ( match && (match[1] || !context) ) {
+
+ // HANDLE: $(html) -> $(array)
+ if ( match[1] ) {
+ context = context instanceof jQuery ? context[0] : context;
+ doc = ( context ? context.ownerDocument || context : document );
+
+ // If a single string is passed in and it's a single tag
+ // just do a createElement and skip the rest
+ ret = rsingleTag.exec( selector );
+
+ if ( ret ) {
+ if ( jQuery.isPlainObject( context ) ) {
+ selector = [ document.createElement( ret[1] ) ];
+ jQuery.fn.attr.call( selector, context, true );
+
+ } else {
+ selector = [ doc.createElement( ret[1] ) ];
+ }
+
+ } else {
+ ret = jQuery.buildFragment( [ match[1] ], [ doc ] );
+ selector = ( ret.cacheable ? jQuery.clone(ret.fragment) : ret.fragment ).childNodes;
+ }
+
+ return jQuery.merge( this, selector );
+
+ // HANDLE: $("#id")
+ } else {
+ elem = document.getElementById( match[2] );
+
+ // Check parentNode to catch when Blackberry 4.6 returns
+ // nodes that are no longer in the document #6963
+ if ( elem && elem.parentNode ) {
+ // Handle the case where IE and Opera return items
+ // by name instead of ID
+ if ( elem.id !== match[2] ) {
+ return rootjQuery.find( selector );
+ }
+
+ // Otherwise, we inject the element directly into the jQuery object
+ this.length = 1;
+ this[0] = elem;
+ }
+
+ this.context = document;
+ this.selector = selector;
+ return this;
+ }
+
+ // HANDLE: $(expr, $(...))
+ } else if ( !context || context.jquery ) {
+ return ( context || rootjQuery ).find( selector );
+
+ // HANDLE: $(expr, context)
+ // (which is just equivalent to: $(context).find(expr)
+ } else {
+ return this.constructor( context ).find( selector );
+ }
+
+ // HANDLE: $(function)
+ // Shortcut for document ready
+ } else if ( jQuery.isFunction( selector ) ) {
+ return rootjQuery.ready( selector );
+ }
+
+ if ( selector.selector !== undefined ) {
+ this.selector = selector.selector;
+ this.context = selector.context;
+ }
+
+ return jQuery.makeArray( selector, this );
+ },
+
+ // Start with an empty selector
+ selector: "",
+
+ // The current version of jQuery being used
+ jquery: "1.7.2",
+
+ // The default length of a jQuery object is 0
+ length: 0,
+
+ // The number of elements contained in the matched element set
+ size: function() {
+ return this.length;
+ },
+
+ toArray: function() {
+ return slice.call( this, 0 );
+ },
+
+ // Get the Nth element in the matched element set OR
+ // Get the whole matched element set as a clean array
+ get: function( num ) {
+ return num == null ?
+
+ // Return a 'clean' array
+ this.toArray() :
+
+ // Return just the object
+ ( num < 0 ? this[ this.length + num ] : this[ num ] );
+ },
+
+ // Take an array of elements and push it onto the stack
+ // (returning the new matched element set)
+ pushStack: function( elems, name, selector ) {
+ // Build a new jQuery matched element set
+ var ret = this.constructor();
+
+ if ( jQuery.isArray( elems ) ) {
+ push.apply( ret, elems );
+
+ } else {
+ jQuery.merge( ret, elems );
+ }
+
+ // Add the old object onto the stack (as a reference)
+ ret.prevObject = this;
+
+ ret.context = this.context;
+
+ if ( name === "find" ) {
+ ret.selector = this.selector + ( this.selector ? " " : "" ) + selector;
+ } else if ( name ) {
+ ret.selector = this.selector + "." + name + "(" + selector + ")";
+ }
+
+ // Return the newly-formed element set
+ return ret;
+ },
+
+ // Execute a callback for every element in the matched set.
+ // (You can seed the arguments with an array of args, but this is
+ // only used internally.)
+ each: function( callback, args ) {
+ return jQuery.each( this, callback, args );
+ },
+
+ ready: function( fn ) {
+ // Attach the listeners
+ jQuery.bindReady();
+
+ // Add the callback
+ readyList.add( fn );
+
+ return this;
+ },
+
+ eq: function( i ) {
+ i = +i;
+ return i === -1 ?
+ this.slice( i ) :
+ this.slice( i, i + 1 );
+ },
+
+ first: function() {
+ return this.eq( 0 );
+ },
+
+ last: function() {
+ return this.eq( -1 );
+ },
+
+ slice: function() {
+ return this.pushStack( slice.apply( this, arguments ),
+ "slice", slice.call(arguments).join(",") );
+ },
+
+ map: function( callback ) {
+ return this.pushStack( jQuery.map(this, function( elem, i ) {
+ return callback.call( elem, i, elem );
+ }));
+ },
+
+ end: function() {
+ return this.prevObject || this.constructor(null);
+ },
+
+ // For internal use only.
+ // Behaves like an Array's method, not like a jQuery method.
+ push: push,
+ sort: [].sort,
+ splice: [].splice
+};
+
+// Give the init function the jQuery prototype for later instantiation
+jQuery.fn.init.prototype = jQuery.fn;
+
+jQuery.extend = jQuery.fn.extend = function() {
+ var options, name, src, copy, copyIsArray, clone,
+ target = arguments[0] || {},
+ i = 1,
+ length = arguments.length,
+ deep = false;
+
+ // Handle a deep copy situation
+ if ( typeof target === "boolean" ) {
+ deep = target;
+ target = arguments[1] || {};
+ // skip the boolean and the target
+ i = 2;
+ }
+
+ // Handle case when target is a string or something (possible in deep copy)
+ if ( typeof target !== "object" && !jQuery.isFunction(target) ) {
+ target = {};
+ }
+
+ // extend jQuery itself if only one argument is passed
+ if ( length === i ) {
+ target = this;
+ --i;
+ }
+
+ for ( ; i < length; i++ ) {
+ // Only deal with non-null/undefined values
+ if ( (options = arguments[ i ]) != null ) {
+ // Extend the base object
+ for ( name in options ) {
+ src = target[ name ];
+ copy = options[ name ];
+
+ // Prevent never-ending loop
+ if ( target === copy ) {
+ continue;
+ }
+
+ // Recurse if we're merging plain objects or arrays
+ if ( deep && copy && ( jQuery.isPlainObject(copy) || (copyIsArray = jQuery.isArray(copy)) ) ) {
+ if ( copyIsArray ) {
+ copyIsArray = false;
+ clone = src && jQuery.isArray(src) ? src : [];
+
+ } else {
+ clone = src && jQuery.isPlainObject(src) ? src : {};
+ }
+
+ // Never move original objects, clone them
+ target[ name ] = jQuery.extend( deep, clone, copy );
+
+ // Don't bring in undefined values
+ } else if ( copy !== undefined ) {
+ target[ name ] = copy;
+ }
+ }
+ }
+ }
+
+ // Return the modified object
+ return target;
+};
+
+jQuery.extend({
+ noConflict: function( deep ) {
+ if ( window.$ === jQuery ) {
+ window.$ = _$;
+ }
+
+ if ( deep && window.jQuery === jQuery ) {
+ window.jQuery = _jQuery;
+ }
+
+ return jQuery;
+ },
+
+ // Is the DOM ready to be used? Set to true once it occurs.
+ isReady: false,
+
+ // A counter to track how many items to wait for before
+ // the ready event fires. See #6781
+ readyWait: 1,
+
+ // Hold (or release) the ready event
+ holdReady: function( hold ) {
+ if ( hold ) {
+ jQuery.readyWait++;
+ } else {
+ jQuery.ready( true );
+ }
+ },
+
+ // Handle when the DOM is ready
+ ready: function( wait ) {
+ // Either a released hold or an DOMready/load event and not yet ready
+ if ( (wait === true && !--jQuery.readyWait) || (wait !== true && !jQuery.isReady) ) {
+ // Make sure body exists, at least, in case IE gets a little overzealous (ticket #5443).
+ if ( !document.body ) {
+ return setTimeout( jQuery.ready, 1 );
+ }
+
+ // Remember that the DOM is ready
+ jQuery.isReady = true;
+
+ // If a normal DOM Ready event fired, decrement, and wait if need be
+ if ( wait !== true && --jQuery.readyWait > 0 ) {
+ return;
+ }
+
+ // If there are functions bound, to execute
+ readyList.fireWith( document, [ jQuery ] );
+
+ // Trigger any bound ready events
+ if ( jQuery.fn.trigger ) {
+ jQuery( document ).trigger( "ready" ).off( "ready" );
+ }
+ }
+ },
+
+ bindReady: function() {
+ if ( readyList ) {
+ return;
+ }
+
+ readyList = jQuery.Callbacks( "once memory" );
+
+ // Catch cases where $(document).ready() is called after the
+ // browser event has already occurred.
+ if ( document.readyState === "complete" ) {
+ // Handle it asynchronously to allow scripts the opportunity to delay ready
+ return setTimeout( jQuery.ready, 1 );
+ }
+
+ // Mozilla, Opera and webkit nightlies currently support this event
+ if ( document.addEventListener ) {
+ // Use the handy event callback
+ document.addEventListener( "DOMContentLoaded", DOMContentLoaded, false );
+
+ // A fallback to window.onload, that will always work
+ window.addEventListener( "load", jQuery.ready, false );
+
+ // If IE event model is used
+ } else if ( document.attachEvent ) {
+ // ensure firing before onload,
+ // maybe late but safe also for iframes
+ document.attachEvent( "onreadystatechange", DOMContentLoaded );
+
+ // A fallback to window.onload, that will always work
+ window.attachEvent( "onload", jQuery.ready );
+
+ // If IE and not a frame
+ // continually check to see if the document is ready
+ var toplevel = false;
+
+ try {
+ toplevel = window.frameElement == null;
+ } catch(e) {}
+
+ if ( document.documentElement.doScroll && toplevel ) {
+ doScrollCheck();
+ }
+ }
+ },
+
+ // See test/unit/core.js for details concerning isFunction.
+ // Since version 1.3, DOM methods and functions like alert
+ // aren't supported. They return false on IE (#2968).
+ isFunction: function( obj ) {
+ return jQuery.type(obj) === "function";
+ },
+
+ isArray: Array.isArray || function( obj ) {
+ return jQuery.type(obj) === "array";
+ },
+
+ isWindow: function( obj ) {
+ return obj != null && obj == obj.window;
+ },
+
+ isNumeric: function( obj ) {
+ return !isNaN( parseFloat(obj) ) && isFinite( obj );
+ },
+
+ type: function( obj ) {
+ return obj == null ?
+ String( obj ) :
+ class2type[ toString.call(obj) ] || "object";
+ },
+
+ isPlainObject: function( obj ) {
+ // Must be an Object.
+ // Because of IE, we also have to check the presence of the constructor property.
+ // Make sure that DOM nodes and window objects don't pass through, as well
+ if ( !obj || jQuery.type(obj) !== "object" || obj.nodeType || jQuery.isWindow( obj ) ) {
+ return false;
+ }
+
+ try {
+ // Not own constructor property must be Object
+ if ( obj.constructor &&
+ !hasOwn.call(obj, "constructor") &&
+ !hasOwn.call(obj.constructor.prototype, "isPrototypeOf") ) {
+ return false;
+ }
+ } catch ( e ) {
+ // IE8,9 Will throw exceptions on certain host objects #9897
+ return false;
+ }
+
+ // Own properties are enumerated firstly, so to speed up,
+ // if last one is own, then all properties are own.
+
+ var key;
+ for ( key in obj ) {}
+
+ return key === undefined || hasOwn.call( obj, key );
+ },
+
+ isEmptyObject: function( obj ) {
+ for ( var name in obj ) {
+ return false;
+ }
+ return true;
+ },
+
+ error: function( msg ) {
+ throw new Error( msg );
+ },
+
+ parseJSON: function( data ) {
+ if ( typeof data !== "string" || !data ) {
+ return null;
+ }
+
+ // Make sure leading/trailing whitespace is removed (IE can't handle it)
+ data = jQuery.trim( data );
+
+ // Attempt to parse using the native JSON parser first
+ if ( window.JSON && window.JSON.parse ) {
+ return window.JSON.parse( data );
+ }
+
+ // Make sure the incoming data is actual JSON
+ // Logic borrowed from http://json.org/json2.js
+ if ( rvalidchars.test( data.replace( rvalidescape, "@" )
+ .replace( rvalidtokens, "]" )
+ .replace( rvalidbraces, "")) ) {
+
+ return ( new Function( "return " + data ) )();
+
+ }
+ jQuery.error( "Invalid JSON: " + data );
+ },
+
+ // Cross-browser xml parsing
+ parseXML: function( data ) {
+ if ( typeof data !== "string" || !data ) {
+ return null;
+ }
+ var xml, tmp;
+ try {
+ if ( window.DOMParser ) { // Standard
+ tmp = new DOMParser();
+ xml = tmp.parseFromString( data , "text/xml" );
+ } else { // IE
+ xml = new ActiveXObject( "Microsoft.XMLDOM" );
+ xml.async = "false";
+ xml.loadXML( data );
+ }
+ } catch( e ) {
+ xml = undefined;
+ }
+ if ( !xml || !xml.documentElement || xml.getElementsByTagName( "parsererror" ).length ) {
+ jQuery.error( "Invalid XML: " + data );
+ }
+ return xml;
+ },
+
+ noop: function() {},
+
+ // Evaluates a script in a global context
+ // Workarounds based on findings by Jim Driscoll
+ // http://weblogs.java.net/blog/driscoll/archive/2009/09/08/eval-javascript-global-context
+ globalEval: function( data ) {
+ if ( data && rnotwhite.test( data ) ) {
+ // We use execScript on Internet Explorer
+ // We use an anonymous function so that context is window
+ // rather than jQuery in Firefox
+ ( window.execScript || function( data ) {
+ window[ "eval" ].call( window, data );
+ } )( data );
+ }
+ },
+
+ // Convert dashed to camelCase; used by the css and data modules
+ // Microsoft forgot to hump their vendor prefix (#9572)
+ camelCase: function( string ) {
+ return string.replace( rmsPrefix, "ms-" ).replace( rdashAlpha, fcamelCase );
+ },
+
+ nodeName: function( elem, name ) {
+ return elem.nodeName && elem.nodeName.toUpperCase() === name.toUpperCase();
+ },
+
+ // args is for internal usage only
+ each: function( object, callback, args ) {
+ var name, i = 0,
+ length = object.length,
+ isObj = length === undefined || jQuery.isFunction( object );
+
+ if ( args ) {
+ if ( isObj ) {
+ for ( name in object ) {
+ if ( callback.apply( object[ name ], args ) === false ) {
+ break;
+ }
+ }
+ } else {
+ for ( ; i < length; ) {
+ if ( callback.apply( object[ i++ ], args ) === false ) {
+ break;
+ }
+ }
+ }
+
+ // A special, fast, case for the most common use of each
+ } else {
+ if ( isObj ) {
+ for ( name in object ) {
+ if ( callback.call( object[ name ], name, object[ name ] ) === false ) {
+ break;
+ }
+ }
+ } else {
+ for ( ; i < length; ) {
+ if ( callback.call( object[ i ], i, object[ i++ ] ) === false ) {
+ break;
+ }
+ }
+ }
+ }
+
+ return object;
+ },
+
+ // Use native String.trim function wherever possible
+ trim: trim ?
+ function( text ) {
+ return text == null ?
+ "" :
+ trim.call( text );
+ } :
+
+ // Otherwise use our own trimming functionality
+ function( text ) {
+ return text == null ?
+ "" :
+ text.toString().replace( trimLeft, "" ).replace( trimRight, "" );
+ },
+
+ // results is for internal usage only
+ makeArray: function( array, results ) {
+ var ret = results || [];
+
+ if ( array != null ) {
+ // The window, strings (and functions) also have 'length'
+ // Tweaked logic slightly to handle Blackberry 4.7 RegExp issues #6930
+ var type = jQuery.type( array );
+
+ if ( array.length == null || type === "string" || type === "function" || type === "regexp" || jQuery.isWindow( array ) ) {
+ push.call( ret, array );
+ } else {
+ jQuery.merge( ret, array );
+ }
+ }
+
+ return ret;
+ },
+
+ inArray: function( elem, array, i ) {
+ var len;
+
+ if ( array ) {
+ if ( indexOf ) {
+ return indexOf.call( array, elem, i );
+ }
+
+ len = array.length;
+ i = i ? i < 0 ? Math.max( 0, len + i ) : i : 0;
+
+ for ( ; i < len; i++ ) {
+ // Skip accessing in sparse arrays
+ if ( i in array && array[ i ] === elem ) {
+ return i;
+ }
+ }
+ }
+
+ return -1;
+ },
+
+ merge: function( first, second ) {
+ var i = first.length,
+ j = 0;
+
+ if ( typeof second.length === "number" ) {
+ for ( var l = second.length; j < l; j++ ) {
+ first[ i++ ] = second[ j ];
+ }
+
+ } else {
+ while ( second[j] !== undefined ) {
+ first[ i++ ] = second[ j++ ];
+ }
+ }
+
+ first.length = i;
+
+ return first;
+ },
+
+ grep: function( elems, callback, inv ) {
+ var ret = [], retVal;
+ inv = !!inv;
+
+ // Go through the array, only saving the items
+ // that pass the validator function
+ for ( var i = 0, length = elems.length; i < length; i++ ) {
+ retVal = !!callback( elems[ i ], i );
+ if ( inv !== retVal ) {
+ ret.push( elems[ i ] );
+ }
+ }
+
+ return ret;
+ },
+
+ // arg is for internal usage only
+ map: function( elems, callback, arg ) {
+ var value, key, ret = [],
+ i = 0,
+ length = elems.length,
+ // jquery objects are treated as arrays
+ isArray = elems instanceof jQuery || length !== undefined && typeof length === "number" && ( ( length > 0 && elems[ 0 ] && elems[ length -1 ] ) || length === 0 || jQuery.isArray( elems ) ) ;
+
+ // Go through the array, translating each of the items to their
+ if ( isArray ) {
+ for ( ; i < length; i++ ) {
+ value = callback( elems[ i ], i, arg );
+
+ if ( value != null ) {
+ ret[ ret.length ] = value;
+ }
+ }
+
+ // Go through every key on the object,
+ } else {
+ for ( key in elems ) {
+ value = callback( elems[ key ], key, arg );
+
+ if ( value != null ) {
+ ret[ ret.length ] = value;
+ }
+ }
+ }
+
+ // Flatten any nested arrays
+ return ret.concat.apply( [], ret );
+ },
+
+ // A global GUID counter for objects
+ guid: 1,
+
+ // Bind a function to a context, optionally partially applying any
+ // arguments.
+ proxy: function( fn, context ) {
+ if ( typeof context === "string" ) {
+ var tmp = fn[ context ];
+ context = fn;
+ fn = tmp;
+ }
+
+ // Quick check to determine if target is callable, in the spec
+ // this throws a TypeError, but we will just return undefined.
+ if ( !jQuery.isFunction( fn ) ) {
+ return undefined;
+ }
+
+ // Simulated bind
+ var args = slice.call( arguments, 2 ),
+ proxy = function() {
+ return fn.apply( context, args.concat( slice.call( arguments ) ) );
+ };
+
+ // Set the guid of unique handler to the same of original handler, so it can be removed
+ proxy.guid = fn.guid = fn.guid || proxy.guid || jQuery.guid++;
+
+ return proxy;
+ },
+
+ // Mutifunctional method to get and set values to a collection
+ // The value/s can optionally be executed if it's a function
+ access: function( elems, fn, key, value, chainable, emptyGet, pass ) {
+ var exec,
+ bulk = key == null,
+ i = 0,
+ length = elems.length;
+
+ // Sets many values
+ if ( key && typeof key === "object" ) {
+ for ( i in key ) {
+ jQuery.access( elems, fn, i, key[i], 1, emptyGet, value );
+ }
+ chainable = 1;
+
+ // Sets one value
+ } else if ( value !== undefined ) {
+ // Optionally, function values get executed if exec is true
+ exec = pass === undefined && jQuery.isFunction( value );
+
+ if ( bulk ) {
+ // Bulk operations only iterate when executing function values
+ if ( exec ) {
+ exec = fn;
+ fn = function( elem, key, value ) {
+ return exec.call( jQuery( elem ), value );
+ };
+
+ // Otherwise they run against the entire set
+ } else {
+ fn.call( elems, value );
+ fn = null;
+ }
+ }
+
+ if ( fn ) {
+ for (; i < length; i++ ) {
+ fn( elems[i], key, exec ? value.call( elems[i], i, fn( elems[i], key ) ) : value, pass );
+ }
+ }
+
+ chainable = 1;
+ }
+
+ return chainable ?
+ elems :
+
+ // Gets
+ bulk ?
+ fn.call( elems ) :
+ length ? fn( elems[0], key ) : emptyGet;
+ },
+
+ now: function() {
+ return ( new Date() ).getTime();
+ },
+
+ // Use of jQuery.browser is frowned upon.
+ // More details: http://docs.jquery.com/Utilities/jQuery.browser
+ uaMatch: function( ua ) {
+ ua = ua.toLowerCase();
+
+ var match = rwebkit.exec( ua ) ||
+ ropera.exec( ua ) ||
+ rmsie.exec( ua ) ||
+ ua.indexOf("compatible") < 0 && rmozilla.exec( ua ) ||
+ [];
+
+ return { browser: match[1] || "", version: match[2] || "0" };
+ },
+
+ sub: function() {
+ function jQuerySub( selector, context ) {
+ return new jQuerySub.fn.init( selector, context );
+ }
+ jQuery.extend( true, jQuerySub, this );
+ jQuerySub.superclass = this;
+ jQuerySub.fn = jQuerySub.prototype = this();
+ jQuerySub.fn.constructor = jQuerySub;
+ jQuerySub.sub = this.sub;
+ jQuerySub.fn.init = function init( selector, context ) {
+ if ( context && context instanceof jQuery && !(context instanceof jQuerySub) ) {
+ context = jQuerySub( context );
+ }
+
+ return jQuery.fn.init.call( this, selector, context, rootjQuerySub );
+ };
+ jQuerySub.fn.init.prototype = jQuerySub.fn;
+ var rootjQuerySub = jQuerySub(document);
+ return jQuerySub;
+ },
+
+ browser: {}
+});
+
+// Populate the class2type map
+jQuery.each("Boolean Number String Function Array Date RegExp Object".split(" "), function(i, name) {
+ class2type[ "[object " + name + "]" ] = name.toLowerCase();
+});
+
+browserMatch = jQuery.uaMatch( userAgent );
+if ( browserMatch.browser ) {
+ jQuery.browser[ browserMatch.browser ] = true;
+ jQuery.browser.version = browserMatch.version;
+}
+
+// Deprecated, use jQuery.browser.webkit instead
+if ( jQuery.browser.webkit ) {
+ jQuery.browser.safari = true;
+}
+
+// IE doesn't match non-breaking spaces with \s
+if ( rnotwhite.test( "\xA0" ) ) {
+ trimLeft = /^[\s\xA0]+/;
+ trimRight = /[\s\xA0]+$/;
+}
+
+// All jQuery objects should point back to these
+rootjQuery = jQuery(document);
+
+// Cleanup functions for the document ready method
+if ( document.addEventListener ) {
+ DOMContentLoaded = function() {
+ document.removeEventListener( "DOMContentLoaded", DOMContentLoaded, false );
+ jQuery.ready();
+ };
+
+} else if ( document.attachEvent ) {
+ DOMContentLoaded = function() {
+ // Make sure body exists, at least, in case IE gets a little overzealous (ticket #5443).
+ if ( document.readyState === "complete" ) {
+ document.detachEvent( "onreadystatechange", DOMContentLoaded );
+ jQuery.ready();
+ }
+ };
+}
+
+// The DOM ready check for Internet Explorer
+function doScrollCheck() {
+ if ( jQuery.isReady ) {
+ return;
+ }
+
+ try {
+ // If IE is used, use the trick by Diego Perini
+ // http://javascript.nwbox.com/IEContentLoaded/
+ document.documentElement.doScroll("left");
+ } catch(e) {
+ setTimeout( doScrollCheck, 1 );
+ return;
+ }
+
+ // and execute any waiting functions
+ jQuery.ready();
+}
+
+return jQuery;
+
+})();
+
+
+// String to Object flags format cache
+var flagsCache = {};
+
+// Convert String-formatted flags into Object-formatted ones and store in cache
+function createFlags( flags ) {
+ var object = flagsCache[ flags ] = {},
+ i, length;
+ flags = flags.split( /\s+/ );
+ for ( i = 0, length = flags.length; i < length; i++ ) {
+ object[ flags[i] ] = true;
+ }
+ return object;
+}
+
+/*
+ * Create a callback list using the following parameters:
+ *
+ * flags: an optional list of space-separated flags that will change how
+ * the callback list behaves
+ *
+ * By default a callback list will act like an event callback list and can be
+ * "fired" multiple times.
+ *
+ * Possible flags:
+ *
+ * once: will ensure the callback list can only be fired once (like a Deferred)
+ *
+ * memory: will keep track of previous values and will call any callback added
+ * after the list has been fired right away with the latest "memorized"
+ * values (like a Deferred)
+ *
+ * unique: will ensure a callback can only be added once (no duplicate in the list)
+ *
+ * stopOnFalse: interrupt callings when a callback returns false
+ *
+ */
+jQuery.Callbacks = function( flags ) {
+
+ // Convert flags from String-formatted to Object-formatted
+ // (we check in cache first)
+ flags = flags ? ( flagsCache[ flags ] || createFlags( flags ) ) : {};
+
+ var // Actual callback list
+ list = [],
+ // Stack of fire calls for repeatable lists
+ stack = [],
+ // Last fire value (for non-forgettable lists)
+ memory,
+ // Flag to know if list was already fired
+ fired,
+ // Flag to know if list is currently firing
+ firing,
+ // First callback to fire (used internally by add and fireWith)
+ firingStart,
+ // End of the loop when firing
+ firingLength,
+ // Index of currently firing callback (modified by remove if needed)
+ firingIndex,
+ // Add one or several callbacks to the list
+ add = function( args ) {
+ var i,
+ length,
+ elem,
+ type,
+ actual;
+ for ( i = 0, length = args.length; i < length; i++ ) {
+ elem = args[ i ];
+ type = jQuery.type( elem );
+ if ( type === "array" ) {
+ // Inspect recursively
+ add( elem );
+ } else if ( type === "function" ) {
+ // Add if not in unique mode and callback is not in
+ if ( !flags.unique || !self.has( elem ) ) {
+ list.push( elem );
+ }
+ }
+ }
+ },
+ // Fire callbacks
+ fire = function( context, args ) {
+ args = args || [];
+ memory = !flags.memory || [ context, args ];
+ fired = true;
+ firing = true;
+ firingIndex = firingStart || 0;
+ firingStart = 0;
+ firingLength = list.length;
+ for ( ; list && firingIndex < firingLength; firingIndex++ ) {
+ if ( list[ firingIndex ].apply( context, args ) === false && flags.stopOnFalse ) {
+ memory = true; // Mark as halted
+ break;
+ }
+ }
+ firing = false;
+ if ( list ) {
+ if ( !flags.once ) {
+ if ( stack && stack.length ) {
+ memory = stack.shift();
+ self.fireWith( memory[ 0 ], memory[ 1 ] );
+ }
+ } else if ( memory === true ) {
+ self.disable();
+ } else {
+ list = [];
+ }
+ }
+ },
+ // Actual Callbacks object
+ self = {
+ // Add a callback or a collection of callbacks to the list
+ add: function() {
+ if ( list ) {
+ var length = list.length;
+ add( arguments );
+ // Do we need to add the callbacks to the
+ // current firing batch?
+ if ( firing ) {
+ firingLength = list.length;
+ // With memory, if we're not firing then
+ // we should call right away, unless previous
+ // firing was halted (stopOnFalse)
+ } else if ( memory && memory !== true ) {
+ firingStart = length;
+ fire( memory[ 0 ], memory[ 1 ] );
+ }
+ }
+ return this;
+ },
+ // Remove a callback from the list
+ remove: function() {
+ if ( list ) {
+ var args = arguments,
+ argIndex = 0,
+ argLength = args.length;
+ for ( ; argIndex < argLength ; argIndex++ ) {
+ for ( var i = 0; i < list.length; i++ ) {
+ if ( args[ argIndex ] === list[ i ] ) {
+ // Handle firingIndex and firingLength
+ if ( firing ) {
+ if ( i <= firingLength ) {
+ firingLength--;
+ if ( i <= firingIndex ) {
+ firingIndex--;
+ }
+ }
+ }
+ // Remove the element
+ list.splice( i--, 1 );
+ // If we have some unicity property then
+ // we only need to do this once
+ if ( flags.unique ) {
+ break;
+ }
+ }
+ }
+ }
+ }
+ return this;
+ },
+ // Control if a given callback is in the list
+ has: function( fn ) {
+ if ( list ) {
+ var i = 0,
+ length = list.length;
+ for ( ; i < length; i++ ) {
+ if ( fn === list[ i ] ) {
+ return true;
+ }
+ }
+ }
+ return false;
+ },
+ // Remove all callbacks from the list
+ empty: function() {
+ list = [];
+ return this;
+ },
+ // Have the list do nothing anymore
+ disable: function() {
+ list = stack = memory = undefined;
+ return this;
+ },
+ // Is it disabled?
+ disabled: function() {
+ return !list;
+ },
+ // Lock the list in its current state
+ lock: function() {
+ stack = undefined;
+ if ( !memory || memory === true ) {
+ self.disable();
+ }
+ return this;
+ },
+ // Is it locked?
+ locked: function() {
+ return !stack;
+ },
+ // Call all callbacks with the given context and arguments
+ fireWith: function( context, args ) {
+ if ( stack ) {
+ if ( firing ) {
+ if ( !flags.once ) {
+ stack.push( [ context, args ] );
+ }
+ } else if ( !( flags.once && memory ) ) {
+ fire( context, args );
+ }
+ }
+ return this;
+ },
+ // Call all the callbacks with the given arguments
+ fire: function() {
+ self.fireWith( this, arguments );
+ return this;
+ },
+ // To know if the callbacks have already been called at least once
+ fired: function() {
+ return !!fired;
+ }
+ };
+
+ return self;
+};
+
+
+
+
+var // Static reference to slice
+ sliceDeferred = [].slice;
+
+jQuery.extend({
+
+ Deferred: function( func ) {
+ var doneList = jQuery.Callbacks( "once memory" ),
+ failList = jQuery.Callbacks( "once memory" ),
+ progressList = jQuery.Callbacks( "memory" ),
+ state = "pending",
+ lists = {
+ resolve: doneList,
+ reject: failList,
+ notify: progressList
+ },
+ promise = {
+ done: doneList.add,
+ fail: failList.add,
+ progress: progressList.add,
+
+ state: function() {
+ return state;
+ },
+
+ // Deprecated
+ isResolved: doneList.fired,
+ isRejected: failList.fired,
+
+ then: function( doneCallbacks, failCallbacks, progressCallbacks ) {
+ deferred.done( doneCallbacks ).fail( failCallbacks ).progress( progressCallbacks );
+ return this;
+ },
+ always: function() {
+ deferred.done.apply( deferred, arguments ).fail.apply( deferred, arguments );
+ return this;
+ },
+ pipe: function( fnDone, fnFail, fnProgress ) {
+ return jQuery.Deferred(function( newDefer ) {
+ jQuery.each( {
+ done: [ fnDone, "resolve" ],
+ fail: [ fnFail, "reject" ],
+ progress: [ fnProgress, "notify" ]
+ }, function( handler, data ) {
+ var fn = data[ 0 ],
+ action = data[ 1 ],
+ returned;
+ if ( jQuery.isFunction( fn ) ) {
+ deferred[ handler ](function() {
+ returned = fn.apply( this, arguments );
+ if ( returned && jQuery.isFunction( returned.promise ) ) {
+ returned.promise().then( newDefer.resolve, newDefer.reject, newDefer.notify );
+ } else {
+ newDefer[ action + "With" ]( this === deferred ? newDefer : this, [ returned ] );
+ }
+ });
+ } else {
+ deferred[ handler ]( newDefer[ action ] );
+ }
+ });
+ }).promise();
+ },
+ // Get a promise for this deferred
+ // If obj is provided, the promise aspect is added to the object
+ promise: function( obj ) {
+ if ( obj == null ) {
+ obj = promise;
+ } else {
+ for ( var key in promise ) {
+ obj[ key ] = promise[ key ];
+ }
+ }
+ return obj;
+ }
+ },
+ deferred = promise.promise({}),
+ key;
+
+ for ( key in lists ) {
+ deferred[ key ] = lists[ key ].fire;
+ deferred[ key + "With" ] = lists[ key ].fireWith;
+ }
+
+ // Handle state
+ deferred.done( function() {
+ state = "resolved";
+ }, failList.disable, progressList.lock ).fail( function() {
+ state = "rejected";
+ }, doneList.disable, progressList.lock );
+
+ // Call given func if any
+ if ( func ) {
+ func.call( deferred, deferred );
+ }
+
+ // All done!
+ return deferred;
+ },
+
+ // Deferred helper
+ when: function( firstParam ) {
+ var args = sliceDeferred.call( arguments, 0 ),
+ i = 0,
+ length = args.length,
+ pValues = new Array( length ),
+ count = length,
+ pCount = length,
+ deferred = length <= 1 && firstParam && jQuery.isFunction( firstParam.promise ) ?
+ firstParam :
+ jQuery.Deferred(),
+ promise = deferred.promise();
+ function resolveFunc( i ) {
+ return function( value ) {
+ args[ i ] = arguments.length > 1 ? sliceDeferred.call( arguments, 0 ) : value;
+ if ( !( --count ) ) {
+ deferred.resolveWith( deferred, args );
+ }
+ };
+ }
+ function progressFunc( i ) {
+ return function( value ) {
+ pValues[ i ] = arguments.length > 1 ? sliceDeferred.call( arguments, 0 ) : value;
+ deferred.notifyWith( promise, pValues );
+ };
+ }
+ if ( length > 1 ) {
+ for ( ; i < length; i++ ) {
+ if ( args[ i ] && args[ i ].promise && jQuery.isFunction( args[ i ].promise ) ) {
+ args[ i ].promise().then( resolveFunc(i), deferred.reject, progressFunc(i) );
+ } else {
+ --count;
+ }
+ }
+ if ( !count ) {
+ deferred.resolveWith( deferred, args );
+ }
+ } else if ( deferred !== firstParam ) {
+ deferred.resolveWith( deferred, length ? [ firstParam ] : [] );
+ }
+ return promise;
+ }
+});
+
+
+
+
+jQuery.support = (function() {
+
+ var support,
+ all,
+ a,
+ select,
+ opt,
+ input,
+ fragment,
+ tds,
+ events,
+ eventName,
+ i,
+ isSupported,
+ div = document.createElement( "div" ),
+ documentElement = document.documentElement;
+
+ // Preliminary tests
+ div.setAttribute("className", "t");
+ div.innerHTML = " <link/><table></table><a href='/a' style='top:1px;float:left;opacity:.55;'>a</a><input type='checkbox'/>";
+
+ all = div.getElementsByTagName( "*" );
+ a = div.getElementsByTagName( "a" )[ 0 ];
+
+ // Can't get basic test support
+ if ( !all || !all.length || !a ) {
+ return {};
+ }
+
+ // First batch of supports tests
+ select = document.createElement( "select" );
+ opt = select.appendChild( document.createElement("option") );
+ input = div.getElementsByTagName( "input" )[ 0 ];
+
+ support = {
+ // IE strips leading whitespace when .innerHTML is used
+ leadingWhitespace: ( div.firstChild.nodeType === 3 ),
+
+ // Make sure that tbody elements aren't automatically inserted
+ // IE will insert them into empty tables
+ tbody: !div.getElementsByTagName("tbody").length,
+
+ // Make sure that link elements get serialized correctly by innerHTML
+ // This requires a wrapper element in IE
+ htmlSerialize: !!div.getElementsByTagName("link").length,
+
+ // Get the style information from getAttribute
+ // (IE uses .cssText instead)
+ style: /top/.test( a.getAttribute("style") ),
+
+ // Make sure that URLs aren't manipulated
+ // (IE normalizes it by default)
+ hrefNormalized: ( a.getAttribute("href") === "/a" ),
+
+ // Make sure that element opacity exists
+ // (IE uses filter instead)
+ // Use a regex to work around a WebKit issue. See #5145
+ opacity: /^0.55/.test( a.style.opacity ),
+
+ // Verify style float existence
+ // (IE uses styleFloat instead of cssFloat)
+ cssFloat: !!a.style.cssFloat,
+
+ // Make sure that if no value is specified for a checkbox
+ // that it defaults to "on".
+ // (WebKit defaults to "" instead)
+ checkOn: ( input.value === "on" ),
+
+ // Make sure that a selected-by-default option has a working selected property.
+ // (WebKit defaults to false instead of true, IE too, if it's in an optgroup)
+ optSelected: opt.selected,
+
+ // Test setAttribute on camelCase class. If it works, we need attrFixes when doing get/setAttribute (ie6/7)
+ getSetAttribute: div.className !== "t",
+
+ // Tests for enctype support on a form(#6743)
+ enctype: !!document.createElement("form").enctype,
+
+ // Makes sure cloning an html5 element does not cause problems
+ // Where outerHTML is undefined, this still works
+ html5Clone: document.createElement("nav").cloneNode( true ).outerHTML !== "<:nav></:nav>",
+
+ // Will be defined later
+ submitBubbles: true,
+ changeBubbles: true,
+ focusinBubbles: false,
+ deleteExpando: true,
+ noCloneEvent: true,
+ inlineBlockNeedsLayout: false,
+ shrinkWrapBlocks: false,
+ reliableMarginRight: true,
+ pixelMargin: true
+ };
+
+ // jQuery.boxModel DEPRECATED in 1.3, use jQuery.support.boxModel instead
+ jQuery.boxModel = support.boxModel = (document.compatMode === "CSS1Compat");
+
+ // Make sure checked status is properly cloned
+ input.checked = true;
+ support.noCloneChecked = input.cloneNode( true ).checked;
+
+ // Make sure that the options inside disabled selects aren't marked as disabled
+ // (WebKit marks them as disabled)
+ select.disabled = true;
+ support.optDisabled = !opt.disabled;
+
+ // Test to see if it's possible to delete an expando from an element
+ // Fails in Internet Explorer
+ try {
+ delete div.test;
+ } catch( e ) {
+ support.deleteExpando = false;
+ }
+
+ if ( !div.addEventListener && div.attachEvent && div.fireEvent ) {
+ div.attachEvent( "onclick", function() {
+ // Cloning a node shouldn't copy over any
+ // bound event handlers (IE does this)
+ support.noCloneEvent = false;
+ });
+ div.cloneNode( true ).fireEvent( "onclick" );
+ }
+
+ // Check if a radio maintains its value
+ // after being appended to the DOM
+ input = document.createElement("input");
+ input.value = "t";
+ input.setAttribute("type", "radio");
+ support.radioValue = input.value === "t";
+
+ input.setAttribute("checked", "checked");
+
+ // #11217 - WebKit loses check when the name is after the checked attribute
+ input.setAttribute( "name", "t" );
+
+ div.appendChild( input );
+ fragment = document.createDocumentFragment();
+ fragment.appendChild( div.lastChild );
+
+ // WebKit doesn't clone checked state correctly in fragments
+ support.checkClone = fragment.cloneNode( true ).cloneNode( true ).lastChild.checked;
+
+ // Check if a disconnected checkbox will retain its checked
+ // value of true after appended to the DOM (IE6/7)
+ support.appendChecked = input.checked;
+
+ fragment.removeChild( input );
+ fragment.appendChild( div );
+
+ // Technique from Juriy Zaytsev
+ // http://perfectionkills.com/detecting-event-support-without-browser-sniffing/
+ // We only care about the case where non-standard event systems
+ // are used, namely in IE. Short-circuiting here helps us to
+ // avoid an eval call (in setAttribute) which can cause CSP
+ // to go haywire. See: https://developer.mozilla.org/en/Security/CSP
+ if ( div.attachEvent ) {
+ for ( i in {
+ submit: 1,
+ change: 1,
+ focusin: 1
+ }) {
+ eventName = "on" + i;
+ isSupported = ( eventName in div );
+ if ( !isSupported ) {
+ div.setAttribute( eventName, "return;" );
+ isSupported = ( typeof div[ eventName ] === "function" );
+ }
+ support[ i + "Bubbles" ] = isSupported;
+ }
+ }
+
+ fragment.removeChild( div );
+
+ // Null elements to avoid leaks in IE
+ fragment = select = opt = div = input = null;
+
+ // Run tests that need a body at doc ready
+ jQuery(function() {
+ var container, outer, inner, table, td, offsetSupport,
+ marginDiv, conMarginTop, style, html, positionTopLeftWidthHeight,
+ paddingMarginBorderVisibility, paddingMarginBorder,
+ body = document.getElementsByTagName("body")[0];
+
+ if ( !body ) {
+ // Return for frameset docs that don't have a body
+ return;
+ }
+
+ conMarginTop = 1;
+ paddingMarginBorder = "padding:0;margin:0;border:";
+ positionTopLeftWidthHeight = "position:absolute;top:0;left:0;width:1px;height:1px;";
+ paddingMarginBorderVisibility = paddingMarginBorder + "0;visibility:hidden;";
+ style = "style='" + positionTopLeftWidthHeight + paddingMarginBorder + "5px solid #000;";
+ html = "<div " + style + "display:block;'><div style='" + paddingMarginBorder + "0;display:block;overflow:hidden;'></div></div>" +
+ "<table " + style + "' cellpadding='0' cellspacing='0'>" +
+ "<tr><td></td></tr></table>";
+
+ container = document.createElement("div");
+ container.style.cssText = paddingMarginBorderVisibility + "width:0;height:0;position:static;top:0;margin-top:" + conMarginTop + "px";
+ body.insertBefore( container, body.firstChild );
+
+ // Construct the test element
+ div = document.createElement("div");
+ container.appendChild( div );
+
+ // Check if table cells still have offsetWidth/Height when they are set
+ // to display:none and there are still other visible table cells in a
+ // table row; if so, offsetWidth/Height are not reliable for use when
+ // determining if an element has been hidden directly using
+ // display:none (it is still safe to use offsets if a parent element is
+ // hidden; don safety goggles and see bug #4512 for more information).
+ // (only IE 8 fails this test)
+ div.innerHTML = "<table><tr><td style='" + paddingMarginBorder + "0;display:none'></td><td>t</td></tr></table>";
+ tds = div.getElementsByTagName( "td" );
+ isSupported = ( tds[ 0 ].offsetHeight === 0 );
+
+ tds[ 0 ].style.display = "";
+ tds[ 1 ].style.display = "none";
+
+ // Check if empty table cells still have offsetWidth/Height
+ // (IE <= 8 fail this test)
+ support.reliableHiddenOffsets = isSupported && ( tds[ 0 ].offsetHeight === 0 );
+
+ // Check if div with explicit width and no margin-right incorrectly
+ // gets computed margin-right based on width of container. For more
+ // info see bug #3333
+ // Fails in WebKit before Feb 2011 nightlies
+ // WebKit Bug 13343 - getComputedStyle returns wrong value for margin-right
+ if ( window.getComputedStyle ) {
+ div.innerHTML = "";
+ marginDiv = document.createElement( "div" );
+ marginDiv.style.width = "0";
+ marginDiv.style.marginRight = "0";
+ div.style.width = "2px";
+ div.appendChild( marginDiv );
+ support.reliableMarginRight =
+ ( parseInt( ( window.getComputedStyle( marginDiv, null ) || { marginRight: 0 } ).marginRight, 10 ) || 0 ) === 0;
+ }
+
+ if ( typeof div.style.zoom !== "undefined" ) {
+ // Check if natively block-level elements act like inline-block
+ // elements when setting their display to 'inline' and giving
+ // them layout
+ // (IE < 8 does this)
+ div.innerHTML = "";
+ div.style.width = div.style.padding = "1px";
+ div.style.border = 0;
+ div.style.overflow = "hidden";
+ div.style.display = "inline";
+ div.style.zoom = 1;
+ support.inlineBlockNeedsLayout = ( div.offsetWidth === 3 );
+
+ // Check if elements with layout shrink-wrap their children
+ // (IE 6 does this)
+ div.style.display = "block";
+ div.style.overflow = "visible";
+ div.innerHTML = "<div style='width:5px;'></div>";
+ support.shrinkWrapBlocks = ( div.offsetWidth !== 3 );
+ }
+
+ div.style.cssText = positionTopLeftWidthHeight + paddingMarginBorderVisibility;
+ div.innerHTML = html;
+
+ outer = div.firstChild;
+ inner = outer.firstChild;
+ td = outer.nextSibling.firstChild.firstChild;
+
+ offsetSupport = {
+ doesNotAddBorder: ( inner.offsetTop !== 5 ),
+ doesAddBorderForTableAndCells: ( td.offsetTop === 5 )
+ };
+
+ inner.style.position = "fixed";
+ inner.style.top = "20px";
+
+ // safari subtracts parent border width here which is 5px
+ offsetSupport.fixedPosition = ( inner.offsetTop === 20 || inner.offsetTop === 15 );
+ inner.style.position = inner.style.top = "";
+
+ outer.style.overflow = "hidden";
+ outer.style.position = "relative";
+
+ offsetSupport.subtractsBorderForOverflowNotVisible = ( inner.offsetTop === -5 );
+ offsetSupport.doesNotIncludeMarginInBodyOffset = ( body.offsetTop !== conMarginTop );
+
+ if ( window.getComputedStyle ) {
+ div.style.marginTop = "1%";
+ support.pixelMargin = ( window.getComputedStyle( div, null ) || { marginTop: 0 } ).marginTop !== "1%";
+ }
+
+ if ( typeof container.style.zoom !== "undefined" ) {
+ container.style.zoom = 1;
+ }
+
+ body.removeChild( container );
+ marginDiv = div = container = null;
+
+ jQuery.extend( support, offsetSupport );
+ });
+
+ return support;
+})();
+
+
+
+
+var rbrace = /^(?:\{.*\}|\[.*\])$/,
+ rmultiDash = /([A-Z])/g;
+
+jQuery.extend({
+ cache: {},
+
+ // Please use with caution
+ uuid: 0,
+
+ // Unique for each copy of jQuery on the page
+ // Non-digits removed to match rinlinejQuery
+ expando: "jQuery" + ( jQuery.fn.jquery + Math.random() ).replace( /\D/g, "" ),
+
+ // The following elements throw uncatchable exceptions if you
+ // attempt to add expando properties to them.
+ noData: {
+ "embed": true,
+ // Ban all objects except for Flash (which handle expandos)
+ "object": "clsid:D27CDB6E-AE6D-11cf-96B8-444553540000",
+ "applet": true
+ },
+
+ hasData: function( elem ) {
+ elem = elem.nodeType ? jQuery.cache[ elem[jQuery.expando] ] : elem[ jQuery.expando ];
+ return !!elem && !isEmptyDataObject( elem );
+ },
+
+ data: function( elem, name, data, pvt /* Internal Use Only */ ) {
+ if ( !jQuery.acceptData( elem ) ) {
+ return;
+ }
+
+ var privateCache, thisCache, ret,
+ internalKey = jQuery.expando,
+ getByName = typeof name === "string",
+
+ // We have to handle DOM nodes and JS objects differently because IE6-7
+ // can't GC object references properly across the DOM-JS boundary
+ isNode = elem.nodeType,
+
+ // Only DOM nodes need the global jQuery cache; JS object data is
+ // attached directly to the object so GC can occur automatically
+ cache = isNode ? jQuery.cache : elem,
+
+ // Only defining an ID for JS objects if its cache already exists allows
+ // the code to shortcut on the same path as a DOM node with no cache
+ id = isNode ? elem[ internalKey ] : elem[ internalKey ] && internalKey,
+ isEvents = name === "events";
+
+ // Avoid doing any more work than we need to when trying to get data on an
+ // object that has no data at all
+ if ( (!id || !cache[id] || (!isEvents && !pvt && !cache[id].data)) && getByName && data === undefined ) {
+ return;
+ }
+
+ if ( !id ) {
+ // Only DOM nodes need a new unique ID for each element since their data
+ // ends up in the global cache
+ if ( isNode ) {
+ elem[ internalKey ] = id = ++jQuery.uuid;
+ } else {
+ id = internalKey;
+ }
+ }
+
+ if ( !cache[ id ] ) {
+ cache[ id ] = {};
+
+ // Avoids exposing jQuery metadata on plain JS objects when the object
+ // is serialized using JSON.stringify
+ if ( !isNode ) {
+ cache[ id ].toJSON = jQuery.noop;
+ }
+ }
+
+ // An object can be passed to jQuery.data instead of a key/value pair; this gets
+ // shallow copied over onto the existing cache
+ if ( typeof name === "object" || typeof name === "function" ) {
+ if ( pvt ) {
+ cache[ id ] = jQuery.extend( cache[ id ], name );
+ } else {
+ cache[ id ].data = jQuery.extend( cache[ id ].data, name );
+ }
+ }
+
+ privateCache = thisCache = cache[ id ];
+
+ // jQuery data() is stored in a separate object inside the object's internal data
+ // cache in order to avoid key collisions between internal data and user-defined
+ // data.
+ if ( !pvt ) {
+ if ( !thisCache.data ) {
+ thisCache.data = {};
+ }
+
+ thisCache = thisCache.data;
+ }
+
+ if ( data !== undefined ) {
+ thisCache[ jQuery.camelCase( name ) ] = data;
+ }
+
+ // Users should not attempt to inspect the internal events object using jQuery.data,
+ // it is undocumented and subject to change. But does anyone listen? No.
+ if ( isEvents && !thisCache[ name ] ) {
+ return privateCache.events;
+ }
+
+ // Check for both converted-to-camel and non-converted data property names
+ // If a data property was specified
+ if ( getByName ) {
+
+ // First Try to find as-is property data
+ ret = thisCache[ name ];
+
+ // Test for null|undefined property data
+ if ( ret == null ) {
+
+ // Try to find the camelCased property
+ ret = thisCache[ jQuery.camelCase( name ) ];
+ }
+ } else {
+ ret = thisCache;
+ }
+
+ return ret;
+ },
+
+ removeData: function( elem, name, pvt /* Internal Use Only */ ) {
+ if ( !jQuery.acceptData( elem ) ) {
+ return;
+ }
+
+ var thisCache, i, l,
+
+ // Reference to internal data cache key
+ internalKey = jQuery.expando,
+
+ isNode = elem.nodeType,
+
+ // See jQuery.data for more information
+ cache = isNode ? jQuery.cache : elem,
+
+ // See jQuery.data for more information
+ id = isNode ? elem[ internalKey ] : internalKey;
+
+ // If there is already no cache entry for this object, there is no
+ // purpose in continuing
+ if ( !cache[ id ] ) {
+ return;
+ }
+
+ if ( name ) {
+
+ thisCache = pvt ? cache[ id ] : cache[ id ].data;
+
+ if ( thisCache ) {
+
+ // Support array or space separated string names for data keys
+ if ( !jQuery.isArray( name ) ) {
+
+ // try the string as a key before any manipulation
+ if ( name in thisCache ) {
+ name = [ name ];
+ } else {
+
+ // split the camel cased version by spaces unless a key with the spaces exists
+ name = jQuery.camelCase( name );
+ if ( name in thisCache ) {
+ name = [ name ];
+ } else {
+ name = name.split( " " );
+ }
+ }
+ }
+
+ for ( i = 0, l = name.length; i < l; i++ ) {
+ delete thisCache[ name[i] ];
+ }
+
+ // If there is no data left in the cache, we want to continue
+ // and let the cache object itself get destroyed
+ if ( !( pvt ? isEmptyDataObject : jQuery.isEmptyObject )( thisCache ) ) {
+ return;
+ }
+ }
+ }
+
+ // See jQuery.data for more information
+ if ( !pvt ) {
+ delete cache[ id ].data;
+
+ // Don't destroy the parent cache unless the internal data object
+ // had been the only thing left in it
+ if ( !isEmptyDataObject(cache[ id ]) ) {
+ return;
+ }
+ }
+
+ // Browsers that fail expando deletion also refuse to delete expandos on
+ // the window, but it will allow it on all other JS objects; other browsers
+ // don't care
+ // Ensure that `cache` is not a window object #10080
+ if ( jQuery.support.deleteExpando || !cache.setInterval ) {
+ delete cache[ id ];
+ } else {
+ cache[ id ] = null;
+ }
+
+ // We destroyed the cache and need to eliminate the expando on the node to avoid
+ // false lookups in the cache for entries that no longer exist
+ if ( isNode ) {
+ // IE does not allow us to delete expando properties from nodes,
+ // nor does it have a removeAttribute function on Document nodes;
+ // we must handle all of these cases
+ if ( jQuery.support.deleteExpando ) {
+ delete elem[ internalKey ];
+ } else if ( elem.removeAttribute ) {
+ elem.removeAttribute( internalKey );
+ } else {
+ elem[ internalKey ] = null;
+ }
+ }
+ },
+
+ // For internal use only.
+ _data: function( elem, name, data ) {
+ return jQuery.data( elem, name, data, true );
+ },
+
+ // A method for determining if a DOM node can handle the data expando
+ acceptData: function( elem ) {
+ if ( elem.nodeName ) {
+ var match = jQuery.noData[ elem.nodeName.toLowerCase() ];
+
+ if ( match ) {
+ return !(match === true || elem.getAttribute("classid") !== match);
+ }
+ }
+
+ return true;
+ }
+});
+
+jQuery.fn.extend({
+ data: function( key, value ) {
+ var parts, part, attr, name, l,
+ elem = this[0],
+ i = 0,
+ data = null;
+
+ // Gets all values
+ if ( key === undefined ) {
+ if ( this.length ) {
+ data = jQuery.data( elem );
+
+ if ( elem.nodeType === 1 && !jQuery._data( elem, "parsedAttrs" ) ) {
+ attr = elem.attributes;
+ for ( l = attr.length; i < l; i++ ) {
+ name = attr[i].name;
+
+ if ( name.indexOf( "data-" ) === 0 ) {
+ name = jQuery.camelCase( name.substring(5) );
+
+ dataAttr( elem, name, data[ name ] );
+ }
+ }
+ jQuery._data( elem, "parsedAttrs", true );
+ }
+ }
+
+ return data;
+ }
+
+ // Sets multiple values
+ if ( typeof key === "object" ) {
+ return this.each(function() {
+ jQuery.data( this, key );
+ });
+ }
+
+ parts = key.split( ".", 2 );
+ parts[1] = parts[1] ? "." + parts[1] : "";
+ part = parts[1] + "!";
+
+ return jQuery.access( this, function( value ) {
+
+ if ( value === undefined ) {
+ data = this.triggerHandler( "getData" + part, [ parts[0] ] );
+
+ // Try to fetch any internally stored data first
+ if ( data === undefined && elem ) {
+ data = jQuery.data( elem, key );
+ data = dataAttr( elem, key, data );
+ }
+
+ return data === undefined && parts[1] ?
+ this.data( parts[0] ) :
+ data;
+ }
+
+ parts[1] = value;
+ this.each(function() {
+ var self = jQuery( this );
+
+ self.triggerHandler( "setData" + part, parts );
+ jQuery.data( this, key, value );
+ self.triggerHandler( "changeData" + part, parts );
+ });
+ }, null, value, arguments.length > 1, null, false );
+ },
+
+ removeData: function( key ) {
+ return this.each(function() {
+ jQuery.removeData( this, key );
+ });
+ }
+});
+
+function dataAttr( elem, key, data ) {
+ // If nothing was found internally, try to fetch any
+ // data from the HTML5 data-* attribute
+ if ( data === undefined && elem.nodeType === 1 ) {
+
+ var name = "data-" + key.replace( rmultiDash, "-$1" ).toLowerCase();
+
+ data = elem.getAttribute( name );
+
+ if ( typeof data === "string" ) {
+ try {
+ data = data === "true" ? true :
+ data === "false" ? false :
+ data === "null" ? null :
+ jQuery.isNumeric( data ) ? +data :
+ rbrace.test( data ) ? jQuery.parseJSON( data ) :
+ data;
+ } catch( e ) {}
+
+ // Make sure we set the data so it isn't changed later
+ jQuery.data( elem, key, data );
+
+ } else {
+ data = undefined;
+ }
+ }
+
+ return data;
+}
+
+// checks a cache object for emptiness
+function isEmptyDataObject( obj ) {
+ for ( var name in obj ) {
+
+ // if the public data object is empty, the private is still empty
+ if ( name === "data" && jQuery.isEmptyObject( obj[name] ) ) {
+ continue;
+ }
+ if ( name !== "toJSON" ) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+
+
+
+function handleQueueMarkDefer( elem, type, src ) {
+ var deferDataKey = type + "defer",
+ queueDataKey = type + "queue",
+ markDataKey = type + "mark",
+ defer = jQuery._data( elem, deferDataKey );
+ if ( defer &&
+ ( src === "queue" || !jQuery._data(elem, queueDataKey) ) &&
+ ( src === "mark" || !jQuery._data(elem, markDataKey) ) ) {
+ // Give room for hard-coded callbacks to fire first
+ // and eventually mark/queue something else on the element
+ setTimeout( function() {
+ if ( !jQuery._data( elem, queueDataKey ) &&
+ !jQuery._data( elem, markDataKey ) ) {
+ jQuery.removeData( elem, deferDataKey, true );
+ defer.fire();
+ }
+ }, 0 );
+ }
+}
+
+jQuery.extend({
+
+ _mark: function( elem, type ) {
+ if ( elem ) {
+ type = ( type || "fx" ) + "mark";
+ jQuery._data( elem, type, (jQuery._data( elem, type ) || 0) + 1 );
+ }
+ },
+
+ _unmark: function( force, elem, type ) {
+ if ( force !== true ) {
+ type = elem;
+ elem = force;
+ force = false;
+ }
+ if ( elem ) {
+ type = type || "fx";
+ var key = type + "mark",
+ count = force ? 0 : ( (jQuery._data( elem, key ) || 1) - 1 );
+ if ( count ) {
+ jQuery._data( elem, key, count );
+ } else {
+ jQuery.removeData( elem, key, true );
+ handleQueueMarkDefer( elem, type, "mark" );
+ }
+ }
+ },
+
+ queue: function( elem, type, data ) {
+ var q;
+ if ( elem ) {
+ type = ( type || "fx" ) + "queue";
+ q = jQuery._data( elem, type );
+
+ // Speed up dequeue by getting out quickly if this is just a lookup
+ if ( data ) {
+ if ( !q || jQuery.isArray(data) ) {
+ q = jQuery._data( elem, type, jQuery.makeArray(data) );
+ } else {
+ q.push( data );
+ }
+ }
+ return q || [];
+ }
+ },
+
+ dequeue: function( elem, type ) {
+ type = type || "fx";
+
+ var queue = jQuery.queue( elem, type ),
+ fn = queue.shift(),
+ hooks = {};
+
+ // If the fx queue is dequeued, always remove the progress sentinel
+ if ( fn === "inprogress" ) {
+ fn = queue.shift();
+ }
+
+ if ( fn ) {
+ // Add a progress sentinel to prevent the fx queue from being
+ // automatically dequeued
+ if ( type === "fx" ) {
+ queue.unshift( "inprogress" );
+ }
+
+ jQuery._data( elem, type + ".run", hooks );
+ fn.call( elem, function() {
+ jQuery.dequeue( elem, type );
+ }, hooks );
+ }
+
+ if ( !queue.length ) {
+ jQuery.removeData( elem, type + "queue " + type + ".run", true );
+ handleQueueMarkDefer( elem, type, "queue" );
+ }
+ }
+});
+
+jQuery.fn.extend({
+ queue: function( type, data ) {
+ var setter = 2;
+
+ if ( typeof type !== "string" ) {
+ data = type;
+ type = "fx";
+ setter--;
+ }
+
+ if ( arguments.length < setter ) {
+ return jQuery.queue( this[0], type );
+ }
+
+ return data === undefined ?
+ this :
+ this.each(function() {
+ var queue = jQuery.queue( this, type, data );
+
+ if ( type === "fx" && queue[0] !== "inprogress" ) {
+ jQuery.dequeue( this, type );
+ }
+ });
+ },
+ dequeue: function( type ) {
+ return this.each(function() {
+ jQuery.dequeue( this, type );
+ });
+ },
+ // Based off of the plugin by Clint Helfers, with permission.
+ // http://blindsignals.com/index.php/2009/07/jquery-delay/
+ delay: function( time, type ) {
+ time = jQuery.fx ? jQuery.fx.speeds[ time ] || time : time;
+ type = type || "fx";
+
+ return this.queue( type, function( next, hooks ) {
+ var timeout = setTimeout( next, time );
+ hooks.stop = function() {
+ clearTimeout( timeout );
+ };
+ });
+ },
+ clearQueue: function( type ) {
+ return this.queue( type || "fx", [] );
+ },
+ // Get a promise resolved when queues of a certain type
+ // are emptied (fx is the type by default)
+ promise: function( type, object ) {
+ if ( typeof type !== "string" ) {
+ object = type;
+ type = undefined;
+ }
+ type = type || "fx";
+ var defer = jQuery.Deferred(),
+ elements = this,
+ i = elements.length,
+ count = 1,
+ deferDataKey = type + "defer",
+ queueDataKey = type + "queue",
+ markDataKey = type + "mark",
+ tmp;
+ function resolve() {
+ if ( !( --count ) ) {
+ defer.resolveWith( elements, [ elements ] );
+ }
+ }
+ while( i-- ) {
+ if (( tmp = jQuery.data( elements[ i ], deferDataKey, undefined, true ) ||
+ ( jQuery.data( elements[ i ], queueDataKey, undefined, true ) ||
+ jQuery.data( elements[ i ], markDataKey, undefined, true ) ) &&
+ jQuery.data( elements[ i ], deferDataKey, jQuery.Callbacks( "once memory" ), true ) )) {
+ count++;
+ tmp.add( resolve );
+ }
+ }
+ resolve();
+ return defer.promise( object );
+ }
+});
+
+
+
+
+var rclass = /[\n\t\r]/g,
+ rspace = /\s+/,
+ rreturn = /\r/g,
+ rtype = /^(?:button|input)$/i,
+ rfocusable = /^(?:button|input|object|select|textarea)$/i,
+ rclickable = /^a(?:rea)?$/i,
+ rboolean = /^(?:autofocus|autoplay|async|checked|controls|defer|disabled|hidden|loop|multiple|open|readonly|required|scoped|selected)$/i,
+ getSetAttribute = jQuery.support.getSetAttribute,
+ nodeHook, boolHook, fixSpecified;
+
+jQuery.fn.extend({
+ attr: function( name, value ) {
+ return jQuery.access( this, jQuery.attr, name, value, arguments.length > 1 );
+ },
+
+ removeAttr: function( name ) {
+ return this.each(function() {
+ jQuery.removeAttr( this, name );
+ });
+ },
+
+ prop: function( name, value ) {
+ return jQuery.access( this, jQuery.prop, name, value, arguments.length > 1 );
+ },
+
+ removeProp: function( name ) {
+ name = jQuery.propFix[ name ] || name;
+ return this.each(function() {
+ // try/catch handles cases where IE balks (such as removing a property on window)
+ try {
+ this[ name ] = undefined;
+ delete this[ name ];
+ } catch( e ) {}
+ });
+ },
+
+ addClass: function( value ) {
+ var classNames, i, l, elem,
+ setClass, c, cl;
+
+ if ( jQuery.isFunction( value ) ) {
+ return this.each(function( j ) {
+ jQuery( this ).addClass( value.call(this, j, this.className) );
+ });
+ }
+
+ if ( value && typeof value === "string" ) {
+ classNames = value.split( rspace );
+
+ for ( i = 0, l = this.length; i < l; i++ ) {
+ elem = this[ i ];
+
+ if ( elem.nodeType === 1 ) {
+ if ( !elem.className && classNames.length === 1 ) {
+ elem.className = value;
+
+ } else {
+ setClass = " " + elem.className + " ";
+
+ for ( c = 0, cl = classNames.length; c < cl; c++ ) {
+ if ( !~setClass.indexOf( " " + classNames[ c ] + " " ) ) {
+ setClass += classNames[ c ] + " ";
+ }
+ }
+ elem.className = jQuery.trim( setClass );
+ }
+ }
+ }
+ }
+
+ return this;
+ },
+
+ removeClass: function( value ) {
+ var classNames, i, l, elem, className, c, cl;
+
+ if ( jQuery.isFunction( value ) ) {
+ return this.each(function( j ) {
+ jQuery( this ).removeClass( value.call(this, j, this.className) );
+ });
+ }
+
+ if ( (value && typeof value === "string") || value === undefined ) {
+ classNames = ( value || "" ).split( rspace );
+
+ for ( i = 0, l = this.length; i < l; i++ ) {
+ elem = this[ i ];
+
+ if ( elem.nodeType === 1 && elem.className ) {
+ if ( value ) {
+ className = (" " + elem.className + " ").replace( rclass, " " );
+ for ( c = 0, cl = classNames.length; c < cl; c++ ) {
+ className = className.replace(" " + classNames[ c ] + " ", " ");
+ }
+ elem.className = jQuery.trim( className );
+
+ } else {
+ elem.className = "";
+ }
+ }
+ }
+ }
+
+ return this;
+ },
+
+ toggleClass: function( value, stateVal ) {
+ var type = typeof value,
+ isBool = typeof stateVal === "boolean";
+
+ if ( jQuery.isFunction( value ) ) {
+ return this.each(function( i ) {
+ jQuery( this ).toggleClass( value.call(this, i, this.className, stateVal), stateVal );
+ });
+ }
+
+ return this.each(function() {
+ if ( type === "string" ) {
+ // toggle individual class names
+ var className,
+ i = 0,
+ self = jQuery( this ),
+ state = stateVal,
+ classNames = value.split( rspace );
+
+ while ( (className = classNames[ i++ ]) ) {
+ // check each className given, space seperated list
+ state = isBool ? state : !self.hasClass( className );
+ self[ state ? "addClass" : "removeClass" ]( className );
+ }
+
+ } else if ( type === "undefined" || type === "boolean" ) {
+ if ( this.className ) {
+ // store className if set
+ jQuery._data( this, "__className__", this.className );
+ }
+
+ // toggle whole className
+ this.className = this.className || value === false ? "" : jQuery._data( this, "__className__" ) || "";
+ }
+ });
+ },
+
+ hasClass: function( selector ) {
+ var className = " " + selector + " ",
+ i = 0,
+ l = this.length;
+ for ( ; i < l; i++ ) {
+ if ( this[i].nodeType === 1 && (" " + this[i].className + " ").replace(rclass, " ").indexOf( className ) > -1 ) {
+ return true;
+ }
+ }
+
+ return false;
+ },
+
+ val: function( value ) {
+ var hooks, ret, isFunction,
+ elem = this[0];
+
+ if ( !arguments.length ) {
+ if ( elem ) {
+ hooks = jQuery.valHooks[ elem.type ] || jQuery.valHooks[ elem.nodeName.toLowerCase() ];
+
+ if ( hooks && "get" in hooks && (ret = hooks.get( elem, "value" )) !== undefined ) {
+ return ret;
+ }
+
+ ret = elem.value;
+
+ return typeof ret === "string" ?
+ // handle most common string cases
+ ret.replace(rreturn, "") :
+ // handle cases where value is null/undef or number
+ ret == null ? "" : ret;
+ }
+
+ return;
+ }
+
+ isFunction = jQuery.isFunction( value );
+
+ return this.each(function( i ) {
+ var self = jQuery(this), val;
+
+ if ( this.nodeType !== 1 ) {
+ return;
+ }
+
+ if ( isFunction ) {
+ val = value.call( this, i, self.val() );
+ } else {
+ val = value;
+ }
+
+ // Treat null/undefined as ""; convert numbers to string
+ if ( val == null ) {
+ val = "";
+ } else if ( typeof val === "number" ) {
+ val += "";
+ } else if ( jQuery.isArray( val ) ) {
+ val = jQuery.map(val, function ( value ) {
+ return value == null ? "" : value + "";
+ });
+ }
+
+ hooks = jQuery.valHooks[ this.type ] || jQuery.valHooks[ this.nodeName.toLowerCase() ];
+
+ // If set returns undefined, fall back to normal setting
+ if ( !hooks || !("set" in hooks) || hooks.set( this, val, "value" ) === undefined ) {
+ this.value = val;
+ }
+ });
+ }
+});
+
+jQuery.extend({
+ valHooks: {
+ option: {
+ get: function( elem ) {
+ // attributes.value is undefined in Blackberry 4.7 but
+ // uses .value. See #6932
+ var val = elem.attributes.value;
+ return !val || val.specified ? elem.value : elem.text;
+ }
+ },
+ select: {
+ get: function( elem ) {
+ var value, i, max, option,
+ index = elem.selectedIndex,
+ values = [],
+ options = elem.options,
+ one = elem.type === "select-one";
+
+ // Nothing was selected
+ if ( index < 0 ) {
+ return null;
+ }
+
+ // Loop through all the selected options
+ i = one ? index : 0;
+ max = one ? index + 1 : options.length;
+ for ( ; i < max; i++ ) {
+ option = options[ i ];
+
+ // Don't return options that are disabled or in a disabled optgroup
+ if ( option.selected && (jQuery.support.optDisabled ? !option.disabled : option.getAttribute("disabled") === null) &&
+ (!option.parentNode.disabled || !jQuery.nodeName( option.parentNode, "optgroup" )) ) {
+
+ // Get the specific value for the option
+ value = jQuery( option ).val();
+
+ // We don't need an array for one selects
+ if ( one ) {
+ return value;
+ }
+
+ // Multi-Selects return an array
+ values.push( value );
+ }
+ }
+
+ // Fixes Bug #2551 -- select.val() broken in IE after form.reset()
+ if ( one && !values.length && options.length ) {
+ return jQuery( options[ index ] ).val();
+ }
+
+ return values;
+ },
+
+ set: function( elem, value ) {
+ var values = jQuery.makeArray( value );
+
+ jQuery(elem).find("option").each(function() {
+ this.selected = jQuery.inArray( jQuery(this).val(), values ) >= 0;
+ });
+
+ if ( !values.length ) {
+ elem.selectedIndex = -1;
+ }
+ return values;
+ }
+ }
+ },
+
+ attrFn: {
+ val: true,
+ css: true,
+ html: true,
+ text: true,
+ data: true,
+ width: true,
+ height: true,
+ offset: true
+ },
+
+ attr: function( elem, name, value, pass ) {
+ var ret, hooks, notxml,
+ nType = elem.nodeType;
+
+ // don't get/set attributes on text, comment and attribute nodes
+ if ( !elem || nType === 3 || nType === 8 || nType === 2 ) {
+ return;
+ }
+
+ if ( pass && name in jQuery.attrFn ) {
+ return jQuery( elem )[ name ]( value );
+ }
+
+ // Fallback to prop when attributes are not supported
+ if ( typeof elem.getAttribute === "undefined" ) {
+ return jQuery.prop( elem, name, value );
+ }
+
+ notxml = nType !== 1 || !jQuery.isXMLDoc( elem );
+
+ // All attributes are lowercase
+ // Grab necessary hook if one is defined
+ if ( notxml ) {
+ name = name.toLowerCase();
+ hooks = jQuery.attrHooks[ name ] || ( rboolean.test( name ) ? boolHook : nodeHook );
+ }
+
+ if ( value !== undefined ) {
+
+ if ( value === null ) {
+ jQuery.removeAttr( elem, name );
+ return;
+
+ } else if ( hooks && "set" in hooks && notxml && (ret = hooks.set( elem, value, name )) !== undefined ) {
+ return ret;
+
+ } else {
+ elem.setAttribute( name, "" + value );
+ return value;
+ }
+
+ } else if ( hooks && "get" in hooks && notxml && (ret = hooks.get( elem, name )) !== null ) {
+ return ret;
+
+ } else {
+
+ ret = elem.getAttribute( name );
+
+ // Non-existent attributes return null, we normalize to undefined
+ return ret === null ?
+ undefined :
+ ret;
+ }
+ },
+
+ removeAttr: function( elem, value ) {
+ var propName, attrNames, name, l, isBool,
+ i = 0;
+
+ if ( value && elem.nodeType === 1 ) {
+ attrNames = value.toLowerCase().split( rspace );
+ l = attrNames.length;
+
+ for ( ; i < l; i++ ) {
+ name = attrNames[ i ];
+
+ if ( name ) {
+ propName = jQuery.propFix[ name ] || name;
+ isBool = rboolean.test( name );
+
+ // See #9699 for explanation of this approach (setting first, then removal)
+ // Do not do this for boolean attributes (see #10870)
+ if ( !isBool ) {
+ jQuery.attr( elem, name, "" );
+ }
+ elem.removeAttribute( getSetAttribute ? name : propName );
+
+ // Set corresponding property to false for boolean attributes
+ if ( isBool && propName in elem ) {
+ elem[ propName ] = false;
+ }
+ }
+ }
+ }
+ },
+
+ attrHooks: {
+ type: {
+ set: function( elem, value ) {
+ // We can't allow the type property to be changed (since it causes problems in IE)
+ if ( rtype.test( elem.nodeName ) && elem.parentNode ) {
+ jQuery.error( "type property can't be changed" );
+ } else if ( !jQuery.support.radioValue && value === "radio" && jQuery.nodeName(elem, "input") ) {
+ // Setting the type on a radio button after the value resets the value in IE6-9
+ // Reset value to it's default in case type is set after value
+ // This is for element creation
+ var val = elem.value;
+ elem.setAttribute( "type", value );
+ if ( val ) {
+ elem.value = val;
+ }
+ return value;
+ }
+ }
+ },
+ // Use the value property for back compat
+ // Use the nodeHook for button elements in IE6/7 (#1954)
+ value: {
+ get: function( elem, name ) {
+ if ( nodeHook && jQuery.nodeName( elem, "button" ) ) {
+ return nodeHook.get( elem, name );
+ }
+ return name in elem ?
+ elem.value :
+ null;
+ },
+ set: function( elem, value, name ) {
+ if ( nodeHook && jQuery.nodeName( elem, "button" ) ) {
+ return nodeHook.set( elem, value, name );
+ }
+ // Does not return so that setAttribute is also used
+ elem.value = value;
+ }
+ }
+ },
+
+ propFix: {
+ tabindex: "tabIndex",
+ readonly: "readOnly",
+ "for": "htmlFor",
+ "class": "className",
+ maxlength: "maxLength",
+ cellspacing: "cellSpacing",
+ cellpadding: "cellPadding",
+ rowspan: "rowSpan",
+ colspan: "colSpan",
+ usemap: "useMap",
+ frameborder: "frameBorder",
+ contenteditable: "contentEditable"
+ },
+
+ prop: function( elem, name, value ) {
+ var ret, hooks, notxml,
+ nType = elem.nodeType;
+
+ // don't get/set properties on text, comment and attribute nodes
+ if ( !elem || nType === 3 || nType === 8 || nType === 2 ) {
+ return;
+ }
+
+ notxml = nType !== 1 || !jQuery.isXMLDoc( elem );
+
+ if ( notxml ) {
+ // Fix name and attach hooks
+ name = jQuery.propFix[ name ] || name;
+ hooks = jQuery.propHooks[ name ];
+ }
+
+ if ( value !== undefined ) {
+ if ( hooks && "set" in hooks && (ret = hooks.set( elem, value, name )) !== undefined ) {
+ return ret;
+
+ } else {
+ return ( elem[ name ] = value );
+ }
+
+ } else {
+ if ( hooks && "get" in hooks && (ret = hooks.get( elem, name )) !== null ) {
+ return ret;
+
+ } else {
+ return elem[ name ];
+ }
+ }
+ },
+
+ propHooks: {
+ tabIndex: {
+ get: function( elem ) {
+ // elem.tabIndex doesn't always return the correct value when it hasn't been explicitly set
+ // http://fluidproject.org/blog/2008/01/09/getting-setting-and-removing-tabindex-values-with-javascript/
+ var attributeNode = elem.getAttributeNode("tabindex");
+
+ return attributeNode && attributeNode.specified ?
+ parseInt( attributeNode.value, 10 ) :
+ rfocusable.test( elem.nodeName ) || rclickable.test( elem.nodeName ) && elem.href ?
+ 0 :
+ undefined;
+ }
+ }
+ }
+});
+
+// Add the tabIndex propHook to attrHooks for back-compat (different case is intentional)
+jQuery.attrHooks.tabindex = jQuery.propHooks.tabIndex;
+
+// Hook for boolean attributes
+boolHook = {
+ get: function( elem, name ) {
+ // Align boolean attributes with corresponding properties
+ // Fall back to attribute presence where some booleans are not supported
+ var attrNode,
+ property = jQuery.prop( elem, name );
+ return property === true || typeof property !== "boolean" && ( attrNode = elem.getAttributeNode(name) ) && attrNode.nodeValue !== false ?
+ name.toLowerCase() :
+ undefined;
+ },
+ set: function( elem, value, name ) {
+ var propName;
+ if ( value === false ) {
+ // Remove boolean attributes when set to false
+ jQuery.removeAttr( elem, name );
+ } else {
+ // value is true since we know at this point it's type boolean and not false
+ // Set boolean attributes to the same name and set the DOM property
+ propName = jQuery.propFix[ name ] || name;
+ if ( propName in elem ) {
+ // Only set the IDL specifically if it already exists on the element
+ elem[ propName ] = true;
+ }
+
+ elem.setAttribute( name, name.toLowerCase() );
+ }
+ return name;
+ }
+};
+
+// IE6/7 do not support getting/setting some attributes with get/setAttribute
+if ( !getSetAttribute ) {
+
+ fixSpecified = {
+ name: true,
+ id: true,
+ coords: true
+ };
+
+ // Use this for any attribute in IE6/7
+ // This fixes almost every IE6/7 issue
+ nodeHook = jQuery.valHooks.button = {
+ get: function( elem, name ) {
+ var ret;
+ ret = elem.getAttributeNode( name );
+ return ret && ( fixSpecified[ name ] ? ret.nodeValue !== "" : ret.specified ) ?
+ ret.nodeValue :
+ undefined;
+ },
+ set: function( elem, value, name ) {
+ // Set the existing or create a new attribute node
+ var ret = elem.getAttributeNode( name );
+ if ( !ret ) {
+ ret = document.createAttribute( name );
+ elem.setAttributeNode( ret );
+ }
+ return ( ret.nodeValue = value + "" );
+ }
+ };
+
+ // Apply the nodeHook to tabindex
+ jQuery.attrHooks.tabindex.set = nodeHook.set;
+
+ // Set width and height to auto instead of 0 on empty string( Bug #8150 )
+ // This is for removals
+ jQuery.each([ "width", "height" ], function( i, name ) {
+ jQuery.attrHooks[ name ] = jQuery.extend( jQuery.attrHooks[ name ], {
+ set: function( elem, value ) {
+ if ( value === "" ) {
+ elem.setAttribute( name, "auto" );
+ return value;
+ }
+ }
+ });
+ });
+
+ // Set contenteditable to false on removals(#10429)
+ // Setting to empty string throws an error as an invalid value
+ jQuery.attrHooks.contenteditable = {
+ get: nodeHook.get,
+ set: function( elem, value, name ) {
+ if ( value === "" ) {
+ value = "false";
+ }
+ nodeHook.set( elem, value, name );
+ }
+ };
+}
+
+
+// Some attributes require a special call on IE
+if ( !jQuery.support.hrefNormalized ) {
+ jQuery.each([ "href", "src", "width", "height" ], function( i, name ) {
+ jQuery.attrHooks[ name ] = jQuery.extend( jQuery.attrHooks[ name ], {
+ get: function( elem ) {
+ var ret = elem.getAttribute( name, 2 );
+ return ret === null ? undefined : ret;
+ }
+ });
+ });
+}
+
+if ( !jQuery.support.style ) {
+ jQuery.attrHooks.style = {
+ get: function( elem ) {
+ // Return undefined in the case of empty string
+ // Normalize to lowercase since IE uppercases css property names
+ return elem.style.cssText.toLowerCase() || undefined;
+ },
+ set: function( elem, value ) {
+ return ( elem.style.cssText = "" + value );
+ }
+ };
+}
+
+// Safari mis-reports the default selected property of an option
+// Accessing the parent's selectedIndex property fixes it
+if ( !jQuery.support.optSelected ) {
+ jQuery.propHooks.selected = jQuery.extend( jQuery.propHooks.selected, {
+ get: function( elem ) {
+ var parent = elem.parentNode;
+
+ if ( parent ) {
+ parent.selectedIndex;
+
+ // Make sure that it also works with optgroups, see #5701
+ if ( parent.parentNode ) {
+ parent.parentNode.selectedIndex;
+ }
+ }
+ return null;
+ }
+ });
+}
+
+// IE6/7 call enctype encoding
+if ( !jQuery.support.enctype ) {
+ jQuery.propFix.enctype = "encoding";
+}
+
+// Radios and checkboxes getter/setter
+if ( !jQuery.support.checkOn ) {
+ jQuery.each([ "radio", "checkbox" ], function() {
+ jQuery.valHooks[ this ] = {
+ get: function( elem ) {
+ // Handle the case where in Webkit "" is returned instead of "on" if a value isn't specified
+ return elem.getAttribute("value") === null ? "on" : elem.value;
+ }
+ };
+ });
+}
+jQuery.each([ "radio", "checkbox" ], function() {
+ jQuery.valHooks[ this ] = jQuery.extend( jQuery.valHooks[ this ], {
+ set: function( elem, value ) {
+ if ( jQuery.isArray( value ) ) {
+ return ( elem.checked = jQuery.inArray( jQuery(elem).val(), value ) >= 0 );
+ }
+ }
+ });
+});
+
+
+
+
+var rformElems = /^(?:textarea|input|select)$/i,
+ rtypenamespace = /^([^\.]*)?(?:\.(.+))?$/,
+ rhoverHack = /(?:^|\s)hover(\.\S+)?\b/,
+ rkeyEvent = /^key/,
+ rmouseEvent = /^(?:mouse|contextmenu)|click/,
+ rfocusMorph = /^(?:focusinfocus|focusoutblur)$/,
+ rquickIs = /^(\w*)(?:#([\w\-]+))?(?:\.([\w\-]+))?$/,
+ quickParse = function( selector ) {
+ var quick = rquickIs.exec( selector );
+ if ( quick ) {
+ // 0 1 2 3
+ // [ _, tag, id, class ]
+ quick[1] = ( quick[1] || "" ).toLowerCase();
+ quick[3] = quick[3] && new RegExp( "(?:^|\\s)" + quick[3] + "(?:\\s|$)" );
+ }
+ return quick;
+ },
+ quickIs = function( elem, m ) {
+ var attrs = elem.attributes || {};
+ return (
+ (!m[1] || elem.nodeName.toLowerCase() === m[1]) &&
+ (!m[2] || (attrs.id || {}).value === m[2]) &&
+ (!m[3] || m[3].test( (attrs[ "class" ] || {}).value ))
+ );
+ },
+ hoverHack = function( events ) {
+ return jQuery.event.special.hover ? events : events.replace( rhoverHack, "mouseenter$1 mouseleave$1" );
+ };
+
+/*
+ * Helper functions for managing events -- not part of the public interface.
+ * Props to Dean Edwards' addEvent library for many of the ideas.
+ */
+jQuery.event = {
+
+ add: function( elem, types, handler, data, selector ) {
+
+ var elemData, eventHandle, events,
+ t, tns, type, namespaces, handleObj,
+ handleObjIn, quick, handlers, special;
+
+ // Don't attach events to noData or text/comment nodes (allow plain objects tho)
+ if ( elem.nodeType === 3 || elem.nodeType === 8 || !types || !handler || !(elemData = jQuery._data( elem )) ) {
+ return;
+ }
+
+ // Caller can pass in an object of custom data in lieu of the handler
+ if ( handler.handler ) {
+ handleObjIn = handler;
+ handler = handleObjIn.handler;
+ selector = handleObjIn.selector;
+ }
+
+ // Make sure that the handler has a unique ID, used to find/remove it later
+ if ( !handler.guid ) {
+ handler.guid = jQuery.guid++;
+ }
+
+ // Init the element's event structure and main handler, if this is the first
+ events = elemData.events;
+ if ( !events ) {
+ elemData.events = events = {};
+ }
+ eventHandle = elemData.handle;
+ if ( !eventHandle ) {
+ elemData.handle = eventHandle = function( e ) {
+ // Discard the second event of a jQuery.event.trigger() and
+ // when an event is called after a page has unloaded
+ return typeof jQuery !== "undefined" && (!e || jQuery.event.triggered !== e.type) ?
+ jQuery.event.dispatch.apply( eventHandle.elem, arguments ) :
+ undefined;
+ };
+ // Add elem as a property of the handle fn to prevent a memory leak with IE non-native events
+ eventHandle.elem = elem;
+ }
+
+ // Handle multiple events separated by a space
+ // jQuery(...).bind("mouseover mouseout", fn);
+ types = jQuery.trim( hoverHack(types) ).split( " " );
+ for ( t = 0; t < types.length; t++ ) {
+
+ tns = rtypenamespace.exec( types[t] ) || [];
+ type = tns[1];
+ namespaces = ( tns[2] || "" ).split( "." ).sort();
+
+ // If event changes its type, use the special event handlers for the changed type
+ special = jQuery.event.special[ type ] || {};
+
+ // If selector defined, determine special event api type, otherwise given type
+ type = ( selector ? special.delegateType : special.bindType ) || type;
+
+ // Update special based on newly reset type
+ special = jQuery.event.special[ type ] || {};
+
+ // handleObj is passed to all event handlers
+ handleObj = jQuery.extend({
+ type: type,
+ origType: tns[1],
+ data: data,
+ handler: handler,
+ guid: handler.guid,
+ selector: selector,
+ quick: selector && quickParse( selector ),
+ namespace: namespaces.join(".")
+ }, handleObjIn );
+
+ // Init the event handler queue if we're the first
+ handlers = events[ type ];
+ if ( !handlers ) {
+ handlers = events[ type ] = [];
+ handlers.delegateCount = 0;
+
+ // Only use addEventListener/attachEvent if the special events handler returns false
+ if ( !special.setup || special.setup.call( elem, data, namespaces, eventHandle ) === false ) {
+ // Bind the global event handler to the element
+ if ( elem.addEventListener ) {
+ elem.addEventListener( type, eventHandle, false );
+
+ } else if ( elem.attachEvent ) {
+ elem.attachEvent( "on" + type, eventHandle );
+ }
+ }
+ }
+
+ if ( special.add ) {
+ special.add.call( elem, handleObj );
+
+ if ( !handleObj.handler.guid ) {
+ handleObj.handler.guid = handler.guid;
+ }
+ }
+
+ // Add to the element's handler list, delegates in front
+ if ( selector ) {
+ handlers.splice( handlers.delegateCount++, 0, handleObj );
+ } else {
+ handlers.push( handleObj );
+ }
+
+ // Keep track of which events have ever been used, for event optimization
+ jQuery.event.global[ type ] = true;
+ }
+
+ // Nullify elem to prevent memory leaks in IE
+ elem = null;
+ },
+
+ global: {},
+
+ // Detach an event or set of events from an element
+ remove: function( elem, types, handler, selector, mappedTypes ) {
+
+ var elemData = jQuery.hasData( elem ) && jQuery._data( elem ),
+ t, tns, type, origType, namespaces, origCount,
+ j, events, special, handle, eventType, handleObj;
+
+ if ( !elemData || !(events = elemData.events) ) {
+ return;
+ }
+
+ // Once for each type.namespace in types; type may be omitted
+ types = jQuery.trim( hoverHack( types || "" ) ).split(" ");
+ for ( t = 0; t < types.length; t++ ) {
+ tns = rtypenamespace.exec( types[t] ) || [];
+ type = origType = tns[1];
+ namespaces = tns[2];
+
+ // Unbind all events (on this namespace, if provided) for the element
+ if ( !type ) {
+ for ( type in events ) {
+ jQuery.event.remove( elem, type + types[ t ], handler, selector, true );
+ }
+ continue;
+ }
+
+ special = jQuery.event.special[ type ] || {};
+ type = ( selector? special.delegateType : special.bindType ) || type;
+ eventType = events[ type ] || [];
+ origCount = eventType.length;
+ namespaces = namespaces ? new RegExp("(^|\\.)" + namespaces.split(".").sort().join("\\.(?:.*\\.)?") + "(\\.|$)") : null;
+
+ // Remove matching events
+ for ( j = 0; j < eventType.length; j++ ) {
+ handleObj = eventType[ j ];
+
+ if ( ( mappedTypes || origType === handleObj.origType ) &&
+ ( !handler || handler.guid === handleObj.guid ) &&
+ ( !namespaces || namespaces.test( handleObj.namespace ) ) &&
+ ( !selector || selector === handleObj.selector || selector === "**" && handleObj.selector ) ) {
+ eventType.splice( j--, 1 );
+
+ if ( handleObj.selector ) {
+ eventType.delegateCount--;
+ }
+ if ( special.remove ) {
+ special.remove.call( elem, handleObj );
+ }
+ }
+ }
+
+ // Remove generic event handler if we removed something and no more handlers exist
+ // (avoids potential for endless recursion during removal of special event handlers)
+ if ( eventType.length === 0 && origCount !== eventType.length ) {
+ if ( !special.teardown || special.teardown.call( elem, namespaces ) === false ) {
+ jQuery.removeEvent( elem, type, elemData.handle );
+ }
+
+ delete events[ type ];
+ }
+ }
+
+ // Remove the expando if it's no longer used
+ if ( jQuery.isEmptyObject( events ) ) {
+ handle = elemData.handle;
+ if ( handle ) {
+ handle.elem = null;
+ }
+
+ // removeData also checks for emptiness and clears the expando if empty
+ // so use it instead of delete
+ jQuery.removeData( elem, [ "events", "handle" ], true );
+ }
+ },
+
+ // Events that are safe to short-circuit if no handlers are attached.
+ // Native DOM events should not be added, they may have inline handlers.
+ customEvent: {
+ "getData": true,
+ "setData": true,
+ "changeData": true
+ },
+
+ trigger: function( event, data, elem, onlyHandlers ) {
+ // Don't do events on text and comment nodes
+ if ( elem && (elem.nodeType === 3 || elem.nodeType === 8) ) {
+ return;
+ }
+
+ // Event object or event type
+ var type = event.type || event,
+ namespaces = [],
+ cache, exclusive, i, cur, old, ontype, special, handle, eventPath, bubbleType;
+
+ // focus/blur morphs to focusin/out; ensure we're not firing them right now
+ if ( rfocusMorph.test( type + jQuery.event.triggered ) ) {
+ return;
+ }
+
+ if ( type.indexOf( "!" ) >= 0 ) {
+ // Exclusive events trigger only for the exact event (no namespaces)
+ type = type.slice(0, -1);
+ exclusive = true;
+ }
+
+ if ( type.indexOf( "." ) >= 0 ) {
+ // Namespaced trigger; create a regexp to match event type in handle()
+ namespaces = type.split(".");
+ type = namespaces.shift();
+ namespaces.sort();
+ }
+
+ if ( (!elem || jQuery.event.customEvent[ type ]) && !jQuery.event.global[ type ] ) {
+ // No jQuery handlers for this event type, and it can't have inline handlers
+ return;
+ }
+
+ // Caller can pass in an Event, Object, or just an event type string
+ event = typeof event === "object" ?
+ // jQuery.Event object
+ event[ jQuery.expando ] ? event :
+ // Object literal
+ new jQuery.Event( type, event ) :
+ // Just the event type (string)
+ new jQuery.Event( type );
+
+ event.type = type;
+ event.isTrigger = true;
+ event.exclusive = exclusive;
+ event.namespace = namespaces.join( "." );
+ event.namespace_re = event.namespace? new RegExp("(^|\\.)" + namespaces.join("\\.(?:.*\\.)?") + "(\\.|$)") : null;
+ ontype = type.indexOf( ":" ) < 0 ? "on" + type : "";
+
+ // Handle a global trigger
+ if ( !elem ) {
+
+ // TODO: Stop taunting the data cache; remove global events and always attach to document
+ cache = jQuery.cache;
+ for ( i in cache ) {
+ if ( cache[ i ].events && cache[ i ].events[ type ] ) {
+ jQuery.event.trigger( event, data, cache[ i ].handle.elem, true );
+ }
+ }
+ return;
+ }
+
+ // Clean up the event in case it is being reused
+ event.result = undefined;
+ if ( !event.target ) {
+ event.target = elem;
+ }
+
+ // Clone any incoming data and prepend the event, creating the handler arg list
+ data = data != null ? jQuery.makeArray( data ) : [];
+ data.unshift( event );
+
+ // Allow special events to draw outside the lines
+ special = jQuery.event.special[ type ] || {};
+ if ( special.trigger && special.trigger.apply( elem, data ) === false ) {
+ return;
+ }
+
+ // Determine event propagation path in advance, per W3C events spec (#9951)
+ // Bubble up to document, then to window; watch for a global ownerDocument var (#9724)
+ eventPath = [[ elem, special.bindType || type ]];
+ if ( !onlyHandlers && !special.noBubble && !jQuery.isWindow( elem ) ) {
+
+ bubbleType = special.delegateType || type;
+ cur = rfocusMorph.test( bubbleType + type ) ? elem : elem.parentNode;
+ old = null;
+ for ( ; cur; cur = cur.parentNode ) {
+ eventPath.push([ cur, bubbleType ]);
+ old = cur;
+ }
+
+ // Only add window if we got to document (e.g., not plain obj or detached DOM)
+ if ( old && old === elem.ownerDocument ) {
+ eventPath.push([ old.defaultView || old.parentWindow || window, bubbleType ]);
+ }
+ }
+
+ // Fire handlers on the event path
+ for ( i = 0; i < eventPath.length && !event.isPropagationStopped(); i++ ) {
+
+ cur = eventPath[i][0];
+ event.type = eventPath[i][1];
+
+ handle = ( jQuery._data( cur, "events" ) || {} )[ event.type ] && jQuery._data( cur, "handle" );
+ if ( handle ) {
+ handle.apply( cur, data );
+ }
+ // Note that this is a bare JS function and not a jQuery handler
+ handle = ontype && cur[ ontype ];
+ if ( handle && jQuery.acceptData( cur ) && handle.apply( cur, data ) === false ) {
+ event.preventDefault();
+ }
+ }
+ event.type = type;
+
+ // If nobody prevented the default action, do it now
+ if ( !onlyHandlers && !event.isDefaultPrevented() ) {
+
+ if ( (!special._default || special._default.apply( elem.ownerDocument, data ) === false) &&
+ !(type === "click" && jQuery.nodeName( elem, "a" )) && jQuery.acceptData( elem ) ) {
+
+ // Call a native DOM method on the target with the same name name as the event.
+ // Can't use an .isFunction() check here because IE6/7 fails that test.
+ // Don't do default actions on window, that's where global variables be (#6170)
+ // IE<9 dies on focus/blur to hidden element (#1486)
+ if ( ontype && elem[ type ] && ((type !== "focus" && type !== "blur") || event.target.offsetWidth !== 0) && !jQuery.isWindow( elem ) ) {
+
+ // Don't re-trigger an onFOO event when we call its FOO() method
+ old = elem[ ontype ];
+
+ if ( old ) {
+ elem[ ontype ] = null;
+ }
+
+ // Prevent re-triggering of the same event, since we already bubbled it above
+ jQuery.event.triggered = type;
+ elem[ type ]();
+ jQuery.event.triggered = undefined;
+
+ if ( old ) {
+ elem[ ontype ] = old;
+ }
+ }
+ }
+ }
+
+ return event.result;
+ },
+
+ dispatch: function( event ) {
+
+ // Make a writable jQuery.Event from the native event object
+ event = jQuery.event.fix( event || window.event );
+
+ var handlers = ( (jQuery._data( this, "events" ) || {} )[ event.type ] || []),
+ delegateCount = handlers.delegateCount,
+ args = [].slice.call( arguments, 0 ),
+ run_all = !event.exclusive && !event.namespace,
+ special = jQuery.event.special[ event.type ] || {},
+ handlerQueue = [],
+ i, j, cur, jqcur, ret, selMatch, matched, matches, handleObj, sel, related;
+
+ // Use the fix-ed jQuery.Event rather than the (read-only) native event
+ args[0] = event;
+ event.delegateTarget = this;
+
+ // Call the preDispatch hook for the mapped type, and let it bail if desired
+ if ( special.preDispatch && special.preDispatch.call( this, event ) === false ) {
+ return;
+ }
+
+ // Determine handlers that should run if there are delegated events
+ // Avoid non-left-click bubbling in Firefox (#3861)
+ if ( delegateCount && !(event.button && event.type === "click") ) {
+
+ // Pregenerate a single jQuery object for reuse with .is()
+ jqcur = jQuery(this);
+ jqcur.context = this.ownerDocument || this;
+
+ for ( cur = event.target; cur != this; cur = cur.parentNode || this ) {
+
+ // Don't process events on disabled elements (#6911, #8165)
+ if ( cur.disabled !== true ) {
+ selMatch = {};
+ matches = [];
+ jqcur[0] = cur;
+ for ( i = 0; i < delegateCount; i++ ) {
+ handleObj = handlers[ i ];
+ sel = handleObj.selector;
+
+ if ( selMatch[ sel ] === undefined ) {
+ selMatch[ sel ] = (
+ handleObj.quick ? quickIs( cur, handleObj.quick ) : jqcur.is( sel )
+ );
+ }
+ if ( selMatch[ sel ] ) {
+ matches.push( handleObj );
+ }
+ }
+ if ( matches.length ) {
+ handlerQueue.push({ elem: cur, matches: matches });
+ }
+ }
+ }
+ }
+
+ // Add the remaining (directly-bound) handlers
+ if ( handlers.length > delegateCount ) {
+ handlerQueue.push({ elem: this, matches: handlers.slice( delegateCount ) });
+ }
+
+ // Run delegates first; they may want to stop propagation beneath us
+ for ( i = 0; i < handlerQueue.length && !event.isPropagationStopped(); i++ ) {
+ matched = handlerQueue[ i ];
+ event.currentTarget = matched.elem;
+
+ for ( j = 0; j < matched.matches.length && !event.isImmediatePropagationStopped(); j++ ) {
+ handleObj = matched.matches[ j ];
+
+ // Triggered event must either 1) be non-exclusive and have no namespace, or
+ // 2) have namespace(s) a subset or equal to those in the bound event (both can have no namespace).
+ if ( run_all || (!event.namespace && !handleObj.namespace) || event.namespace_re && event.namespace_re.test( handleObj.namespace ) ) {
+
+ event.data = handleObj.data;
+ event.handleObj = handleObj;
+
+ ret = ( (jQuery.event.special[ handleObj.origType ] || {}).handle || handleObj.handler )
+ .apply( matched.elem, args );
+
+ if ( ret !== undefined ) {
+ event.result = ret;
+ if ( ret === false ) {
+ event.preventDefault();
+ event.stopPropagation();
+ }
+ }
+ }
+ }
+ }
+
+ // Call the postDispatch hook for the mapped type
+ if ( special.postDispatch ) {
+ special.postDispatch.call( this, event );
+ }
+
+ return event.result;
+ },
+
+ // Includes some event props shared by KeyEvent and MouseEvent
+ // *** attrChange attrName relatedNode srcElement are not normalized, non-W3C, deprecated, will be removed in 1.8 ***
+ props: "attrChange attrName relatedNode srcElement altKey bubbles cancelable ctrlKey currentTarget eventPhase metaKey relatedTarget shiftKey target timeStamp view which".split(" "),
+
+ fixHooks: {},
+
+ keyHooks: {
+ props: "char charCode key keyCode".split(" "),
+ filter: function( event, original ) {
+
+ // Add which for key events
+ if ( event.which == null ) {
+ event.which = original.charCode != null ? original.charCode : original.keyCode;
+ }
+
+ return event;
+ }
+ },
+
+ mouseHooks: {
+ props: "button buttons clientX clientY fromElement offsetX offsetY pageX pageY screenX screenY toElement".split(" "),
+ filter: function( event, original ) {
+ var eventDoc, doc, body,
+ button = original.button,
+ fromElement = original.fromElement;
+
+ // Calculate pageX/Y if missing and clientX/Y available
+ if ( event.pageX == null && original.clientX != null ) {
+ eventDoc = event.target.ownerDocument || document;
+ doc = eventDoc.documentElement;
+ body = eventDoc.body;
+
+ event.pageX = original.clientX + ( doc && doc.scrollLeft || body && body.scrollLeft || 0 ) - ( doc && doc.clientLeft || body && body.clientLeft || 0 );
+ event.pageY = original.clientY + ( doc && doc.scrollTop || body && body.scrollTop || 0 ) - ( doc && doc.clientTop || body && body.clientTop || 0 );
+ }
+
+ // Add relatedTarget, if necessary
+ if ( !event.relatedTarget && fromElement ) {
+ event.relatedTarget = fromElement === event.target ? original.toElement : fromElement;
+ }
+
+ // Add which for click: 1 === left; 2 === middle; 3 === right
+ // Note: button is not normalized, so don't use it
+ if ( !event.which && button !== undefined ) {
+ event.which = ( button & 1 ? 1 : ( button & 2 ? 3 : ( button & 4 ? 2 : 0 ) ) );
+ }
+
+ return event;
+ }
+ },
+
+ fix: function( event ) {
+ if ( event[ jQuery.expando ] ) {
+ return event;
+ }
+
+ // Create a writable copy of the event object and normalize some properties
+ var i, prop,
+ originalEvent = event,
+ fixHook = jQuery.event.fixHooks[ event.type ] || {},
+ copy = fixHook.props ? this.props.concat( fixHook.props ) : this.props;
+
+ event = jQuery.Event( originalEvent );
+
+ for ( i = copy.length; i; ) {
+ prop = copy[ --i ];
+ event[ prop ] = originalEvent[ prop ];
+ }
+
+ // Fix target property, if necessary (#1925, IE 6/7/8 & Safari2)
+ if ( !event.target ) {
+ event.target = originalEvent.srcElement || document;
+ }
+
+ // Target should not be a text node (#504, Safari)
+ if ( event.target.nodeType === 3 ) {
+ event.target = event.target.parentNode;
+ }
+
+ // For mouse/key events; add metaKey if it's not there (#3368, IE6/7/8)
+ if ( event.metaKey === undefined ) {
+ event.metaKey = event.ctrlKey;
+ }
+
+ return fixHook.filter? fixHook.filter( event, originalEvent ) : event;
+ },
+
+ special: {
+ ready: {
+ // Make sure the ready event is setup
+ setup: jQuery.bindReady
+ },
+
+ load: {
+ // Prevent triggered image.load events from bubbling to window.load
+ noBubble: true
+ },
+
+ focus: {
+ delegateType: "focusin"
+ },
+ blur: {
+ delegateType: "focusout"
+ },
+
+ beforeunload: {
+ setup: function( data, namespaces, eventHandle ) {
+ // We only want to do this special case on windows
+ if ( jQuery.isWindow( this ) ) {
+ this.onbeforeunload = eventHandle;
+ }
+ },
+
+ teardown: function( namespaces, eventHandle ) {
+ if ( this.onbeforeunload === eventHandle ) {
+ this.onbeforeunload = null;
+ }
+ }
+ }
+ },
+
+ simulate: function( type, elem, event, bubble ) {
+ // Piggyback on a donor event to simulate a different one.
+ // Fake originalEvent to avoid donor's stopPropagation, but if the
+ // simulated event prevents default then we do the same on the donor.
+ var e = jQuery.extend(
+ new jQuery.Event(),
+ event,
+ { type: type,
+ isSimulated: true,
+ originalEvent: {}
+ }
+ );
+ if ( bubble ) {
+ jQuery.event.trigger( e, null, elem );
+ } else {
+ jQuery.event.dispatch.call( elem, e );
+ }
+ if ( e.isDefaultPrevented() ) {
+ event.preventDefault();
+ }
+ }
+};
+
+// Some plugins are using, but it's undocumented/deprecated and will be removed.
+// The 1.7 special event interface should provide all the hooks needed now.
+jQuery.event.handle = jQuery.event.dispatch;
+
+jQuery.removeEvent = document.removeEventListener ?
+ function( elem, type, handle ) {
+ if ( elem.removeEventListener ) {
+ elem.removeEventListener( type, handle, false );
+ }
+ } :
+ function( elem, type, handle ) {
+ if ( elem.detachEvent ) {
+ elem.detachEvent( "on" + type, handle );
+ }
+ };
+
+jQuery.Event = function( src, props ) {
+ // Allow instantiation without the 'new' keyword
+ if ( !(this instanceof jQuery.Event) ) {
+ return new jQuery.Event( src, props );
+ }
+
+ // Event object
+ if ( src && src.type ) {
+ this.originalEvent = src;
+ this.type = src.type;
+
+ // Events bubbling up the document may have been marked as prevented
+ // by a handler lower down the tree; reflect the correct value.
+ this.isDefaultPrevented = ( src.defaultPrevented || src.returnValue === false ||
+ src.getPreventDefault && src.getPreventDefault() ) ? returnTrue : returnFalse;
+
+ // Event type
+ } else {
+ this.type = src;
+ }
+
+ // Put explicitly provided properties onto the event object
+ if ( props ) {
+ jQuery.extend( this, props );
+ }
+
+ // Create a timestamp if incoming event doesn't have one
+ this.timeStamp = src && src.timeStamp || jQuery.now();
+
+ // Mark it as fixed
+ this[ jQuery.expando ] = true;
+};
+
+function returnFalse() {
+ return false;
+}
+function returnTrue() {
+ return true;
+}
+
+// jQuery.Event is based on DOM3 Events as specified by the ECMAScript Language Binding
+// http://www.w3.org/TR/2003/WD-DOM-Level-3-Events-20030331/ecma-script-binding.html
+jQuery.Event.prototype = {
+ preventDefault: function() {
+ this.isDefaultPrevented = returnTrue;
+
+ var e = this.originalEvent;
+ if ( !e ) {
+ return;
+ }
+
+ // if preventDefault exists run it on the original event
+ if ( e.preventDefault ) {
+ e.preventDefault();
+
+ // otherwise set the returnValue property of the original event to false (IE)
+ } else {
+ e.returnValue = false;
+ }
+ },
+ stopPropagation: function() {
+ this.isPropagationStopped = returnTrue;
+
+ var e = this.originalEvent;
+ if ( !e ) {
+ return;
+ }
+ // if stopPropagation exists run it on the original event
+ if ( e.stopPropagation ) {
+ e.stopPropagation();
+ }
+ // otherwise set the cancelBubble property of the original event to true (IE)
+ e.cancelBubble = true;
+ },
+ stopImmediatePropagation: function() {
+ this.isImmediatePropagationStopped = returnTrue;
+ this.stopPropagation();
+ },
+ isDefaultPrevented: returnFalse,
+ isPropagationStopped: returnFalse,
+ isImmediatePropagationStopped: returnFalse
+};
+
+// Create mouseenter/leave events using mouseover/out and event-time checks
+jQuery.each({
+ mouseenter: "mouseover",
+ mouseleave: "mouseout"
+}, function( orig, fix ) {
+ jQuery.event.special[ orig ] = {
+ delegateType: fix,
+ bindType: fix,
+
+ handle: function( event ) {
+ var target = this,
+ related = event.relatedTarget,
+ handleObj = event.handleObj,
+ selector = handleObj.selector,
+ ret;
+
+ // For mousenter/leave call the handler if related is outside the target.
+ // NB: No relatedTarget if the mouse left/entered the browser window
+ if ( !related || (related !== target && !jQuery.contains( target, related )) ) {
+ event.type = handleObj.origType;
+ ret = handleObj.handler.apply( this, arguments );
+ event.type = fix;
+ }
+ return ret;
+ }
+ };
+});
+
+// IE submit delegation
+if ( !jQuery.support.submitBubbles ) {
+
+ jQuery.event.special.submit = {
+ setup: function() {
+ // Only need this for delegated form submit events
+ if ( jQuery.nodeName( this, "form" ) ) {
+ return false;
+ }
+
+ // Lazy-add a submit handler when a descendant form may potentially be submitted
+ jQuery.event.add( this, "click._submit keypress._submit", function( e ) {
+ // Node name check avoids a VML-related crash in IE (#9807)
+ var elem = e.target,
+ form = jQuery.nodeName( elem, "input" ) || jQuery.nodeName( elem, "button" ) ? elem.form : undefined;
+ if ( form && !form._submit_attached ) {
+ jQuery.event.add( form, "submit._submit", function( event ) {
+ event._submit_bubble = true;
+ });
+ form._submit_attached = true;
+ }
+ });
+ // return undefined since we don't need an event listener
+ },
+
+ postDispatch: function( event ) {
+ // If form was submitted by the user, bubble the event up the tree
+ if ( event._submit_bubble ) {
+ delete event._submit_bubble;
+ if ( this.parentNode && !event.isTrigger ) {
+ jQuery.event.simulate( "submit", this.parentNode, event, true );
+ }
+ }
+ },
+
+ teardown: function() {
+ // Only need this for delegated form submit events
+ if ( jQuery.nodeName( this, "form" ) ) {
+ return false;
+ }
+
+ // Remove delegated handlers; cleanData eventually reaps submit handlers attached above
+ jQuery.event.remove( this, "._submit" );
+ }
+ };
+}
+
+// IE change delegation and checkbox/radio fix
+if ( !jQuery.support.changeBubbles ) {
+
+ jQuery.event.special.change = {
+
+ setup: function() {
+
+ if ( rformElems.test( this.nodeName ) ) {
+ // IE doesn't fire change on a check/radio until blur; trigger it on click
+ // after a propertychange. Eat the blur-change in special.change.handle.
+ // This still fires onchange a second time for check/radio after blur.
+ if ( this.type === "checkbox" || this.type === "radio" ) {
+ jQuery.event.add( this, "propertychange._change", function( event ) {
+ if ( event.originalEvent.propertyName === "checked" ) {
+ this._just_changed = true;
+ }
+ });
+ jQuery.event.add( this, "click._change", function( event ) {
+ if ( this._just_changed && !event.isTrigger ) {
+ this._just_changed = false;
+ jQuery.event.simulate( "change", this, event, true );
+ }
+ });
+ }
+ return false;
+ }
+ // Delegated event; lazy-add a change handler on descendant inputs
+ jQuery.event.add( this, "beforeactivate._change", function( e ) {
+ var elem = e.target;
+
+ if ( rformElems.test( elem.nodeName ) && !elem._change_attached ) {
+ jQuery.event.add( elem, "change._change", function( event ) {
+ if ( this.parentNode && !event.isSimulated && !event.isTrigger ) {
+ jQuery.event.simulate( "change", this.parentNode, event, true );
+ }
+ });
+ elem._change_attached = true;
+ }
+ });
+ },
+
+ handle: function( event ) {
+ var elem = event.target;
+
+ // Swallow native change events from checkbox/radio, we already triggered them above
+ if ( this !== elem || event.isSimulated || event.isTrigger || (elem.type !== "radio" && elem.type !== "checkbox") ) {
+ return event.handleObj.handler.apply( this, arguments );
+ }
+ },
+
+ teardown: function() {
+ jQuery.event.remove( this, "._change" );
+
+ return rformElems.test( this.nodeName );
+ }
+ };
+}
+
+// Create "bubbling" focus and blur events
+if ( !jQuery.support.focusinBubbles ) {
+ jQuery.each({ focus: "focusin", blur: "focusout" }, function( orig, fix ) {
+
+ // Attach a single capturing handler while someone wants focusin/focusout
+ var attaches = 0,
+ handler = function( event ) {
+ jQuery.event.simulate( fix, event.target, jQuery.event.fix( event ), true );
+ };
+
+ jQuery.event.special[ fix ] = {
+ setup: function() {
+ if ( attaches++ === 0 ) {
+ document.addEventListener( orig, handler, true );
+ }
+ },
+ teardown: function() {
+ if ( --attaches === 0 ) {
+ document.removeEventListener( orig, handler, true );
+ }
+ }
+ };
+ });
+}
+
+jQuery.fn.extend({
+
+ on: function( types, selector, data, fn, /*INTERNAL*/ one ) {
+ var origFn, type;
+
+ // Types can be a map of types/handlers
+ if ( typeof types === "object" ) {
+ // ( types-Object, selector, data )
+ if ( typeof selector !== "string" ) { // && selector != null
+ // ( types-Object, data )
+ data = data || selector;
+ selector = undefined;
+ }
+ for ( type in types ) {
+ this.on( type, selector, data, types[ type ], one );
+ }
+ return this;
+ }
+
+ if ( data == null && fn == null ) {
+ // ( types, fn )
+ fn = selector;
+ data = selector = undefined;
+ } else if ( fn == null ) {
+ if ( typeof selector === "string" ) {
+ // ( types, selector, fn )
+ fn = data;
+ data = undefined;
+ } else {
+ // ( types, data, fn )
+ fn = data;
+ data = selector;
+ selector = undefined;
+ }
+ }
+ if ( fn === false ) {
+ fn = returnFalse;
+ } else if ( !fn ) {
+ return this;
+ }
+
+ if ( one === 1 ) {
+ origFn = fn;
+ fn = function( event ) {
+ // Can use an empty set, since event contains the info
+ jQuery().off( event );
+ return origFn.apply( this, arguments );
+ };
+ // Use same guid so caller can remove using origFn
+ fn.guid = origFn.guid || ( origFn.guid = jQuery.guid++ );
+ }
+ return this.each( function() {
+ jQuery.event.add( this, types, fn, data, selector );
+ });
+ },
+ one: function( types, selector, data, fn ) {
+ return this.on( types, selector, data, fn, 1 );
+ },
+ off: function( types, selector, fn ) {
+ if ( types && types.preventDefault && types.handleObj ) {
+ // ( event ) dispatched jQuery.Event
+ var handleObj = types.handleObj;
+ jQuery( types.delegateTarget ).off(
+ handleObj.namespace ? handleObj.origType + "." + handleObj.namespace : handleObj.origType,
+ handleObj.selector,
+ handleObj.handler
+ );
+ return this;
+ }
+ if ( typeof types === "object" ) {
+ // ( types-object [, selector] )
+ for ( var type in types ) {
+ this.off( type, selector, types[ type ] );
+ }
+ return this;
+ }
+ if ( selector === false || typeof selector === "function" ) {
+ // ( types [, fn] )
+ fn = selector;
+ selector = undefined;
+ }
+ if ( fn === false ) {
+ fn = returnFalse;
+ }
+ return this.each(function() {
+ jQuery.event.remove( this, types, fn, selector );
+ });
+ },
+
+ bind: function( types, data, fn ) {
+ return this.on( types, null, data, fn );
+ },
+ unbind: function( types, fn ) {
+ return this.off( types, null, fn );
+ },
+
+ live: function( types, data, fn ) {
+ jQuery( this.context ).on( types, this.selector, data, fn );
+ return this;
+ },
+ die: function( types, fn ) {
+ jQuery( this.context ).off( types, this.selector || "**", fn );
+ return this;
+ },
+
+ delegate: function( selector, types, data, fn ) {
+ return this.on( types, selector, data, fn );
+ },
+ undelegate: function( selector, types, fn ) {
+ // ( namespace ) or ( selector, types [, fn] )
+ return arguments.length == 1? this.off( selector, "**" ) : this.off( types, selector, fn );
+ },
+
+ trigger: function( type, data ) {
+ return this.each(function() {
+ jQuery.event.trigger( type, data, this );
+ });
+ },
+ triggerHandler: function( type, data ) {
+ if ( this[0] ) {
+ return jQuery.event.trigger( type, data, this[0], true );
+ }
+ },
+
+ toggle: function( fn ) {
+ // Save reference to arguments for access in closure
+ var args = arguments,
+ guid = fn.guid || jQuery.guid++,
+ i = 0,
+ toggler = function( event ) {
+ // Figure out which function to execute
+ var lastToggle = ( jQuery._data( this, "lastToggle" + fn.guid ) || 0 ) % i;
+ jQuery._data( this, "lastToggle" + fn.guid, lastToggle + 1 );
+
+ // Make sure that clicks stop
+ event.preventDefault();
+
+ // and execute the function
+ return args[ lastToggle ].apply( this, arguments ) || false;
+ };
+
+ // link all the functions, so any of them can unbind this click handler
+ toggler.guid = guid;
+ while ( i < args.length ) {
+ args[ i++ ].guid = guid;
+ }
+
+ return this.click( toggler );
+ },
+
+ hover: function( fnOver, fnOut ) {
+ return this.mouseenter( fnOver ).mouseleave( fnOut || fnOver );
+ }
+});
+
+jQuery.each( ("blur focus focusin focusout load resize scroll unload click dblclick " +
+ "mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave " +
+ "change select submit keydown keypress keyup error contextmenu").split(" "), function( i, name ) {
+
+ // Handle event binding
+ jQuery.fn[ name ] = function( data, fn ) {
+ if ( fn == null ) {
+ fn = data;
+ data = null;
+ }
+
+ return arguments.length > 0 ?
+ this.on( name, null, data, fn ) :
+ this.trigger( name );
+ };
+
+ if ( jQuery.attrFn ) {
+ jQuery.attrFn[ name ] = true;
+ }
+
+ if ( rkeyEvent.test( name ) ) {
+ jQuery.event.fixHooks[ name ] = jQuery.event.keyHooks;
+ }
+
+ if ( rmouseEvent.test( name ) ) {
+ jQuery.event.fixHooks[ name ] = jQuery.event.mouseHooks;
+ }
+});
+
+
+
+/*!
+ * Sizzle CSS Selector Engine
+ * Copyright 2011, The Dojo Foundation
+ * Released under the MIT, BSD, and GPL Licenses.
+ * More information: http://sizzlejs.com/
+ */
+(function(){
+
+var chunker = /((?:\((?:\([^()]+\)|[^()]+)+\)|\[(?:\[[^\[\]]*\]|['"][^'"]*['"]|[^\[\]'"]+)+\]|\\.|[^ >+~,(\[\\]+)+|[>+~])(\s*,\s*)?((?:.|\r|\n)*)/g,
+ expando = "sizcache" + (Math.random() + '').replace('.', ''),
+ done = 0,
+ toString = Object.prototype.toString,
+ hasDuplicate = false,
+ baseHasDuplicate = true,
+ rBackslash = /\\/g,
+ rReturn = /\r\n/g,
+ rNonWord = /\W/;
+
+// Here we check if the JavaScript engine is using some sort of
+// optimization where it does not always call our comparision
+// function. If that is the case, discard the hasDuplicate value.
+// Thus far that includes Google Chrome.
+[0, 0].sort(function() {
+ baseHasDuplicate = false;
+ return 0;
+});
+
+var Sizzle = function( selector, context, results, seed ) {
+ results = results || [];
+ context = context || document;
+
+ var origContext = context;
+
+ if ( context.nodeType !== 1 && context.nodeType !== 9 ) {
+ return [];
+ }
+
+ if ( !selector || typeof selector !== "string" ) {
+ return results;
+ }
+
+ var m, set, checkSet, extra, ret, cur, pop, i,
+ prune = true,
+ contextXML = Sizzle.isXML( context ),
+ parts = [],
+ soFar = selector;
+
+ // Reset the position of the chunker regexp (start from head)
+ do {
+ chunker.exec( "" );
+ m = chunker.exec( soFar );
+
+ if ( m ) {
+ soFar = m[3];
+
+ parts.push( m[1] );
+
+ if ( m[2] ) {
+ extra = m[3];
+ break;
+ }
+ }
+ } while ( m );
+
+ if ( parts.length > 1 && origPOS.exec( selector ) ) {
+
+ if ( parts.length === 2 && Expr.relative[ parts[0] ] ) {
+ set = posProcess( parts[0] + parts[1], context, seed );
+
+ } else {
+ set = Expr.relative[ parts[0] ] ?
+ [ context ] :
+ Sizzle( parts.shift(), context );
+
+ while ( parts.length ) {
+ selector = parts.shift();
+
+ if ( Expr.relative[ selector ] ) {
+ selector += parts.shift();
+ }
+
+ set = posProcess( selector, set, seed );
+ }
+ }
+
+ } else {
+ // Take a shortcut and set the context if the root selector is an ID
+ // (but not if it'll be faster if the inner selector is an ID)
+ if ( !seed && parts.length > 1 && context.nodeType === 9 && !contextXML &&
+ Expr.match.ID.test(parts[0]) && !Expr.match.ID.test(parts[parts.length - 1]) ) {
+
+ ret = Sizzle.find( parts.shift(), context, contextXML );
+ context = ret.expr ?
+ Sizzle.filter( ret.expr, ret.set )[0] :
+ ret.set[0];
+ }
+
+ if ( context ) {
+ ret = seed ?
+ { expr: parts.pop(), set: makeArray(seed) } :
+ Sizzle.find( parts.pop(), parts.length === 1 && (parts[0] === "~" || parts[0] === "+") && context.parentNode ? context.parentNode : context, contextXML );
+
+ set = ret.expr ?
+ Sizzle.filter( ret.expr, ret.set ) :
+ ret.set;
+
+ if ( parts.length > 0 ) {
+ checkSet = makeArray( set );
+
+ } else {
+ prune = false;
+ }
+
+ while ( parts.length ) {
+ cur = parts.pop();
+ pop = cur;
+
+ if ( !Expr.relative[ cur ] ) {
+ cur = "";
+ } else {
+ pop = parts.pop();
+ }
+
+ if ( pop == null ) {
+ pop = context;
+ }
+
+ Expr.relative[ cur ]( checkSet, pop, contextXML );
+ }
+
+ } else {
+ checkSet = parts = [];
+ }
+ }
+
+ if ( !checkSet ) {
+ checkSet = set;
+ }
+
+ if ( !checkSet ) {
+ Sizzle.error( cur || selector );
+ }
+
+ if ( toString.call(checkSet) === "[object Array]" ) {
+ if ( !prune ) {
+ results.push.apply( results, checkSet );
+
+ } else if ( context && context.nodeType === 1 ) {
+ for ( i = 0; checkSet[i] != null; i++ ) {
+ if ( checkSet[i] && (checkSet[i] === true || checkSet[i].nodeType === 1 && Sizzle.contains(context, checkSet[i])) ) {
+ results.push( set[i] );
+ }
+ }
+
+ } else {
+ for ( i = 0; checkSet[i] != null; i++ ) {
+ if ( checkSet[i] && checkSet[i].nodeType === 1 ) {
+ results.push( set[i] );
+ }
+ }
+ }
+
+ } else {
+ makeArray( checkSet, results );
+ }
+
+ if ( extra ) {
+ Sizzle( extra, origContext, results, seed );
+ Sizzle.uniqueSort( results );
+ }
+
+ return results;
+};
+
+Sizzle.uniqueSort = function( results ) {
+ if ( sortOrder ) {
+ hasDuplicate = baseHasDuplicate;
+ results.sort( sortOrder );
+
+ if ( hasDuplicate ) {
+ for ( var i = 1; i < results.length; i++ ) {
+ if ( results[i] === results[ i - 1 ] ) {
+ results.splice( i--, 1 );
+ }
+ }
+ }
+ }
+
+ return results;
+};
+
+Sizzle.matches = function( expr, set ) {
+ return Sizzle( expr, null, null, set );
+};
+
+Sizzle.matchesSelector = function( node, expr ) {
+ return Sizzle( expr, null, null, [node] ).length > 0;
+};
+
+Sizzle.find = function( expr, context, isXML ) {
+ var set, i, len, match, type, left;
+
+ if ( !expr ) {
+ return [];
+ }
+
+ for ( i = 0, len = Expr.order.length; i < len; i++ ) {
+ type = Expr.order[i];
+
+ if ( (match = Expr.leftMatch[ type ].exec( expr )) ) {
+ left = match[1];
+ match.splice( 1, 1 );
+
+ if ( left.substr( left.length - 1 ) !== "\\" ) {
+ match[1] = (match[1] || "").replace( rBackslash, "" );
+ set = Expr.find[ type ]( match, context, isXML );
+
+ if ( set != null ) {
+ expr = expr.replace( Expr.match[ type ], "" );
+ break;
+ }
+ }
+ }
+ }
+
+ if ( !set ) {
+ set = typeof context.getElementsByTagName !== "undefined" ?
+ context.getElementsByTagName( "*" ) :
+ [];
+ }
+
+ return { set: set, expr: expr };
+};
+
+Sizzle.filter = function( expr, set, inplace, not ) {
+ var match, anyFound,
+ type, found, item, filter, left,
+ i, pass,
+ old = expr,
+ result = [],
+ curLoop = set,
+ isXMLFilter = set && set[0] && Sizzle.isXML( set[0] );
+
+ while ( expr && set.length ) {
+ for ( type in Expr.filter ) {
+ if ( (match = Expr.leftMatch[ type ].exec( expr )) != null && match[2] ) {
+ filter = Expr.filter[ type ];
+ left = match[1];
+
+ anyFound = false;
+
+ match.splice(1,1);
+
+ if ( left.substr( left.length - 1 ) === "\\" ) {
+ continue;
+ }
+
+ if ( curLoop === result ) {
+ result = [];
+ }
+
+ if ( Expr.preFilter[ type ] ) {
+ match = Expr.preFilter[ type ]( match, curLoop, inplace, result, not, isXMLFilter );
+
+ if ( !match ) {
+ anyFound = found = true;
+
+ } else if ( match === true ) {
+ continue;
+ }
+ }
+
+ if ( match ) {
+ for ( i = 0; (item = curLoop[i]) != null; i++ ) {
+ if ( item ) {
+ found = filter( item, match, i, curLoop );
+ pass = not ^ found;
+
+ if ( inplace && found != null ) {
+ if ( pass ) {
+ anyFound = true;
+
+ } else {
+ curLoop[i] = false;
+ }
+
+ } else if ( pass ) {
+ result.push( item );
+ anyFound = true;
+ }
+ }
+ }
+ }
+
+ if ( found !== undefined ) {
+ if ( !inplace ) {
+ curLoop = result;
+ }
+
+ expr = expr.replace( Expr.match[ type ], "" );
+
+ if ( !anyFound ) {
+ return [];
+ }
+
+ break;
+ }
+ }
+ }
+
+ // Improper expression
+ if ( expr === old ) {
+ if ( anyFound == null ) {
+ Sizzle.error( expr );
+
+ } else {
+ break;
+ }
+ }
+
+ old = expr;
+ }
+
+ return curLoop;
+};
+
+Sizzle.error = function( msg ) {
+ throw new Error( "Syntax error, unrecognized expression: " + msg );
+};
+
+/**
+ * Utility function for retreiving the text value of an array of DOM nodes
+ * @param {Array|Element} elem
+ */
+var getText = Sizzle.getText = function( elem ) {
+ var i, node,
+ nodeType = elem.nodeType,
+ ret = "";
+
+ if ( nodeType ) {
+ if ( nodeType === 1 || nodeType === 9 || nodeType === 11 ) {
+ // Use textContent || innerText for elements
+ if ( typeof elem.textContent === 'string' ) {
+ return elem.textContent;
+ } else if ( typeof elem.innerText === 'string' ) {
+ // Replace IE's carriage returns
+ return elem.innerText.replace( rReturn, '' );
+ } else {
+ // Traverse it's children
+ for ( elem = elem.firstChild; elem; elem = elem.nextSibling) {
+ ret += getText( elem );
+ }
+ }
+ } else if ( nodeType === 3 || nodeType === 4 ) {
+ return elem.nodeValue;
+ }
+ } else {
+
+ // If no nodeType, this is expected to be an array
+ for ( i = 0; (node = elem[i]); i++ ) {
+ // Do not traverse comment nodes
+ if ( node.nodeType !== 8 ) {
+ ret += getText( node );
+ }
+ }
+ }
+ return ret;
+};
+
+var Expr = Sizzle.selectors = {
+ order: [ "ID", "NAME", "TAG" ],
+
+ match: {
+ ID: /#((?:[\w\u00c0-\uFFFF\-]|\\.)+)/,
+ CLASS: /\.((?:[\w\u00c0-\uFFFF\-]|\\.)+)/,
+ NAME: /\[name=['"]*((?:[\w\u00c0-\uFFFF\-]|\\.)+)['"]*\]/,
+ ATTR: /\[\s*((?:[\w\u00c0-\uFFFF\-]|\\.)+)\s*(?:(\S?=)\s*(?:(['"])(.*?)\3|(#?(?:[\w\u00c0-\uFFFF\-]|\\.)*)|)|)\s*\]/,
+ TAG: /^((?:[\w\u00c0-\uFFFF\*\-]|\\.)+)/,
+ CHILD: /:(only|nth|last|first)-child(?:\(\s*(even|odd|(?:[+\-]?\d+|(?:[+\-]?\d*)?n\s*(?:[+\-]\s*\d+)?))\s*\))?/,
+ POS: /:(nth|eq|gt|lt|first|last|even|odd)(?:\((\d*)\))?(?=[^\-]|$)/,
+ PSEUDO: /:((?:[\w\u00c0-\uFFFF\-]|\\.)+)(?:\((['"]?)((?:\([^\)]+\)|[^\(\)]*)+)\2\))?/
+ },
+
+ leftMatch: {},
+
+ attrMap: {
+ "class": "className",
+ "for": "htmlFor"
+ },
+
+ attrHandle: {
+ href: function( elem ) {
+ return elem.getAttribute( "href" );
+ },
+ type: function( elem ) {
+ return elem.getAttribute( "type" );
+ }
+ },
+
+ relative: {
+ "+": function(checkSet, part){
+ var isPartStr = typeof part === "string",
+ isTag = isPartStr && !rNonWord.test( part ),
+ isPartStrNotTag = isPartStr && !isTag;
+
+ if ( isTag ) {
+ part = part.toLowerCase();
+ }
+
+ for ( var i = 0, l = checkSet.length, elem; i < l; i++ ) {
+ if ( (elem = checkSet[i]) ) {
+ while ( (elem = elem.previousSibling) && elem.nodeType !== 1 ) {}
+
+ checkSet[i] = isPartStrNotTag || elem && elem.nodeName.toLowerCase() === part ?
+ elem || false :
+ elem === part;
+ }
+ }
+
+ if ( isPartStrNotTag ) {
+ Sizzle.filter( part, checkSet, true );
+ }
+ },
+
+ ">": function( checkSet, part ) {
+ var elem,
+ isPartStr = typeof part === "string",
+ i = 0,
+ l = checkSet.length;
+
+ if ( isPartStr && !rNonWord.test( part ) ) {
+ part = part.toLowerCase();
+
+ for ( ; i < l; i++ ) {
+ elem = checkSet[i];
+
+ if ( elem ) {
+ var parent = elem.parentNode;
+ checkSet[i] = parent.nodeName.toLowerCase() === part ? parent : false;
+ }
+ }
+
+ } else {
+ for ( ; i < l; i++ ) {
+ elem = checkSet[i];
+
+ if ( elem ) {
+ checkSet[i] = isPartStr ?
+ elem.parentNode :
+ elem.parentNode === part;
+ }
+ }
+
+ if ( isPartStr ) {
+ Sizzle.filter( part, checkSet, true );
+ }
+ }
+ },
+
+ "": function(checkSet, part, isXML){
+ var nodeCheck,
+ doneName = done++,
+ checkFn = dirCheck;
+
+ if ( typeof part === "string" && !rNonWord.test( part ) ) {
+ part = part.toLowerCase();
+ nodeCheck = part;
+ checkFn = dirNodeCheck;
+ }
+
+ checkFn( "parentNode", part, doneName, checkSet, nodeCheck, isXML );
+ },
+
+ "~": function( checkSet, part, isXML ) {
+ var nodeCheck,
+ doneName = done++,
+ checkFn = dirCheck;
+
+ if ( typeof part === "string" && !rNonWord.test( part ) ) {
+ part = part.toLowerCase();
+ nodeCheck = part;
+ checkFn = dirNodeCheck;
+ }
+
+ checkFn( "previousSibling", part, doneName, checkSet, nodeCheck, isXML );
+ }
+ },
+
+ find: {
+ ID: function( match, context, isXML ) {
+ if ( typeof context.getElementById !== "undefined" && !isXML ) {
+ var m = context.getElementById(match[1]);
+ // Check parentNode to catch when Blackberry 4.6 returns
+ // nodes that are no longer in the document #6963
+ return m && m.parentNode ? [m] : [];
+ }
+ },
+
+ NAME: function( match, context ) {
+ if ( typeof context.getElementsByName !== "undefined" ) {
+ var ret = [],
+ results = context.getElementsByName( match[1] );
+
+ for ( var i = 0, l = results.length; i < l; i++ ) {
+ if ( results[i].getAttribute("name") === match[1] ) {
+ ret.push( results[i] );
+ }
+ }
+
+ return ret.length === 0 ? null : ret;
+ }
+ },
+
+ TAG: function( match, context ) {
+ if ( typeof context.getElementsByTagName !== "undefined" ) {
+ return context.getElementsByTagName( match[1] );
+ }
+ }
+ },
+ preFilter: {
+ CLASS: function( match, curLoop, inplace, result, not, isXML ) {
+ match = " " + match[1].replace( rBackslash, "" ) + " ";
+
+ if ( isXML ) {
+ return match;
+ }
+
+ for ( var i = 0, elem; (elem = curLoop[i]) != null; i++ ) {
+ if ( elem ) {
+ if ( not ^ (elem.className && (" " + elem.className + " ").replace(/[\t\n\r]/g, " ").indexOf(match) >= 0) ) {
+ if ( !inplace ) {
+ result.push( elem );
+ }
+
+ } else if ( inplace ) {
+ curLoop[i] = false;
+ }
+ }
+ }
+
+ return false;
+ },
+
+ ID: function( match ) {
+ return match[1].replace( rBackslash, "" );
+ },
+
+ TAG: function( match, curLoop ) {
+ return match[1].replace( rBackslash, "" ).toLowerCase();
+ },
+
+ CHILD: function( match ) {
+ if ( match[1] === "nth" ) {
+ if ( !match[2] ) {
+ Sizzle.error( match[0] );
+ }
+
+ match[2] = match[2].replace(/^\+|\s*/g, '');
+
+ // parse equations like 'even', 'odd', '5', '2n', '3n+2', '4n-1', '-n+6'
+ var test = /(-?)(\d*)(?:n([+\-]?\d*))?/.exec(
+ match[2] === "even" && "2n" || match[2] === "odd" && "2n+1" ||
+ !/\D/.test( match[2] ) && "0n+" + match[2] || match[2]);
+
+ // calculate the numbers (first)n+(last) including if they are negative
+ match[2] = (test[1] + (test[2] || 1)) - 0;
+ match[3] = test[3] - 0;
+ }
+ else if ( match[2] ) {
+ Sizzle.error( match[0] );
+ }
+
+ // TODO: Move to normal caching system
+ match[0] = done++;
+
+ return match;
+ },
+
+ ATTR: function( match, curLoop, inplace, result, not, isXML ) {
+ var name = match[1] = match[1].replace( rBackslash, "" );
+
+ if ( !isXML && Expr.attrMap[name] ) {
+ match[1] = Expr.attrMap[name];
+ }
+
+ // Handle if an un-quoted value was used
+ match[4] = ( match[4] || match[5] || "" ).replace( rBackslash, "" );
+
+ if ( match[2] === "~=" ) {
+ match[4] = " " + match[4] + " ";
+ }
+
+ return match;
+ },
+
+ PSEUDO: function( match, curLoop, inplace, result, not ) {
+ if ( match[1] === "not" ) {
+ // If we're dealing with a complex expression, or a simple one
+ if ( ( chunker.exec(match[3]) || "" ).length > 1 || /^\w/.test(match[3]) ) {
+ match[3] = Sizzle(match[3], null, null, curLoop);
+
+ } else {
+ var ret = Sizzle.filter(match[3], curLoop, inplace, true ^ not);
+
+ if ( !inplace ) {
+ result.push.apply( result, ret );
+ }
+
+ return false;
+ }
+
+ } else if ( Expr.match.POS.test( match[0] ) || Expr.match.CHILD.test( match[0] ) ) {
+ return true;
+ }
+
+ return match;
+ },
+
+ POS: function( match ) {
+ match.unshift( true );
+
+ return match;
+ }
+ },
+
+ filters: {
+ enabled: function( elem ) {
+ return elem.disabled === false && elem.type !== "hidden";
+ },
+
+ disabled: function( elem ) {
+ return elem.disabled === true;
+ },
+
+ checked: function( elem ) {
+ return elem.checked === true;
+ },
+
+ selected: function( elem ) {
+ // Accessing this property makes selected-by-default
+ // options in Safari work properly
+ if ( elem.parentNode ) {
+ elem.parentNode.selectedIndex;
+ }
+
+ return elem.selected === true;
+ },
+
+ parent: function( elem ) {
+ return !!elem.firstChild;
+ },
+
+ empty: function( elem ) {
+ return !elem.firstChild;
+ },
+
+ has: function( elem, i, match ) {
+ return !!Sizzle( match[3], elem ).length;
+ },
+
+ header: function( elem ) {
+ return (/h\d/i).test( elem.nodeName );
+ },
+
+ text: function( elem ) {
+ var attr = elem.getAttribute( "type" ), type = elem.type;
+ // IE6 and 7 will map elem.type to 'text' for new HTML5 types (search, etc)
+ // use getAttribute instead to test this case
+ return elem.nodeName.toLowerCase() === "input" && "text" === type && ( attr === type || attr === null );
+ },
+
+ radio: function( elem ) {
+ return elem.nodeName.toLowerCase() === "input" && "radio" === elem.type;
+ },
+
+ checkbox: function( elem ) {
+ return elem.nodeName.toLowerCase() === "input" && "checkbox" === elem.type;
+ },
+
+ file: function( elem ) {
+ return elem.nodeName.toLowerCase() === "input" && "file" === elem.type;
+ },
+
+ password: function( elem ) {
+ return elem.nodeName.toLowerCase() === "input" && "password" === elem.type;
+ },
+
+ submit: function( elem ) {
+ var name = elem.nodeName.toLowerCase();
+ return (name === "input" || name === "button") && "submit" === elem.type;
+ },
+
+ image: function( elem ) {
+ return elem.nodeName.toLowerCase() === "input" && "image" === elem.type;
+ },
+
+ reset: function( elem ) {
+ var name = elem.nodeName.toLowerCase();
+ return (name === "input" || name === "button") && "reset" === elem.type;
+ },
+
+ button: function( elem ) {
+ var name = elem.nodeName.toLowerCase();
+ return name === "input" && "button" === elem.type || name === "button";
+ },
+
+ input: function( elem ) {
+ return (/input|select|textarea|button/i).test( elem.nodeName );
+ },
+
+ focus: function( elem ) {
+ return elem === elem.ownerDocument.activeElement;
+ }
+ },
+ setFilters: {
+ first: function( elem, i ) {
+ return i === 0;
+ },
+
+ last: function( elem, i, match, array ) {
+ return i === array.length - 1;
+ },
+
+ even: function( elem, i ) {
+ return i % 2 === 0;
+ },
+
+ odd: function( elem, i ) {
+ return i % 2 === 1;
+ },
+
+ lt: function( elem, i, match ) {
+ return i < match[3] - 0;
+ },
+
+ gt: function( elem, i, match ) {
+ return i > match[3] - 0;
+ },
+
+ nth: function( elem, i, match ) {
+ return match[3] - 0 === i;
+ },
+
+ eq: function( elem, i, match ) {
+ return match[3] - 0 === i;
+ }
+ },
+ filter: {
+ PSEUDO: function( elem, match, i, array ) {
+ var name = match[1],
+ filter = Expr.filters[ name ];
+
+ if ( filter ) {
+ return filter( elem, i, match, array );
+
+ } else if ( name === "contains" ) {
+ return (elem.textContent || elem.innerText || getText([ elem ]) || "").indexOf(match[3]) >= 0;
+
+ } else if ( name === "not" ) {
+ var not = match[3];
+
+ for ( var j = 0, l = not.length; j < l; j++ ) {
+ if ( not[j] === elem ) {
+ return false;
+ }
+ }
+
+ return true;
+
+ } else {
+ Sizzle.error( name );
+ }
+ },
+
+ CHILD: function( elem, match ) {
+ var first, last,
+ doneName, parent, cache,
+ count, diff,
+ type = match[1],
+ node = elem;
+
+ switch ( type ) {
+ case "only":
+ case "first":
+ while ( (node = node.previousSibling) ) {
+ if ( node.nodeType === 1 ) {
+ return false;
+ }
+ }
+
+ if ( type === "first" ) {
+ return true;
+ }
+
+ node = elem;
+
+ /* falls through */
+ case "last":
+ while ( (node = node.nextSibling) ) {
+ if ( node.nodeType === 1 ) {
+ return false;
+ }
+ }
+
+ return true;
+
+ case "nth":
+ first = match[2];
+ last = match[3];
+
+ if ( first === 1 && last === 0 ) {
+ return true;
+ }
+
+ doneName = match[0];
+ parent = elem.parentNode;
+
+ if ( parent && (parent[ expando ] !== doneName || !elem.nodeIndex) ) {
+ count = 0;
+
+ for ( node = parent.firstChild; node; node = node.nextSibling ) {
+ if ( node.nodeType === 1 ) {
+ node.nodeIndex = ++count;
+ }
+ }
+
+ parent[ expando ] = doneName;
+ }
+
+ diff = elem.nodeIndex - last;
+
+ if ( first === 0 ) {
+ return diff === 0;
+
+ } else {
+ return ( diff % first === 0 && diff / first >= 0 );
+ }
+ }
+ },
+
+ ID: function( elem, match ) {
+ return elem.nodeType === 1 && elem.getAttribute("id") === match;
+ },
+
+ TAG: function( elem, match ) {
+ return (match === "*" && elem.nodeType === 1) || !!elem.nodeName && elem.nodeName.toLowerCase() === match;
+ },
+
+ CLASS: function( elem, match ) {
+ return (" " + (elem.className || elem.getAttribute("class")) + " ")
+ .indexOf( match ) > -1;
+ },
+
+ ATTR: function( elem, match ) {
+ var name = match[1],
+ result = Sizzle.attr ?
+ Sizzle.attr( elem, name ) :
+ Expr.attrHandle[ name ] ?
+ Expr.attrHandle[ name ]( elem ) :
+ elem[ name ] != null ?
+ elem[ name ] :
+ elem.getAttribute( name ),
+ value = result + "",
+ type = match[2],
+ check = match[4];
+
+ return result == null ?
+ type === "!=" :
+ !type && Sizzle.attr ?
+ result != null :
+ type === "=" ?
+ value === check :
+ type === "*=" ?
+ value.indexOf(check) >= 0 :
+ type === "~=" ?
+ (" " + value + " ").indexOf(check) >= 0 :
+ !check ?
+ value && result !== false :
+ type === "!=" ?
+ value !== check :
+ type === "^=" ?
+ value.indexOf(check) === 0 :
+ type === "$=" ?
+ value.substr(value.length - check.length) === check :
+ type === "|=" ?
+ value === check || value.substr(0, check.length + 1) === check + "-" :
+ false;
+ },
+
+ POS: function( elem, match, i, array ) {
+ var name = match[2],
+ filter = Expr.setFilters[ name ];
+
+ if ( filter ) {
+ return filter( elem, i, match, array );
+ }
+ }
+ }
+};
+
+var origPOS = Expr.match.POS,
+ fescape = function(all, num){
+ return "\\" + (num - 0 + 1);
+ };
+
+for ( var type in Expr.match ) {
+ Expr.match[ type ] = new RegExp( Expr.match[ type ].source + (/(?![^\[]*\])(?![^\(]*\))/.source) );
+ Expr.leftMatch[ type ] = new RegExp( /(^(?:.|\r|\n)*?)/.source + Expr.match[ type ].source.replace(/\\(\d+)/g, fescape) );
+}
+// Expose origPOS
+// "global" as in regardless of relation to brackets/parens
+Expr.match.globalPOS = origPOS;
+
+var makeArray = function( array, results ) {
+ array = Array.prototype.slice.call( array, 0 );
+
+ if ( results ) {
+ results.push.apply( results, array );
+ return results;
+ }
+
+ return array;
+};
+
+// Perform a simple check to determine if the browser is capable of
+// converting a NodeList to an array using builtin methods.
+// Also verifies that the returned array holds DOM nodes
+// (which is not the case in the Blackberry browser)
+try {
+ Array.prototype.slice.call( document.documentElement.childNodes, 0 )[0].nodeType;
+
+// Provide a fallback method if it does not work
+} catch( e ) {
+ makeArray = function( array, results ) {
+ var i = 0,
+ ret = results || [];
+
+ if ( toString.call(array) === "[object Array]" ) {
+ Array.prototype.push.apply( ret, array );
+
+ } else {
+ if ( typeof array.length === "number" ) {
+ for ( var l = array.length; i < l; i++ ) {
+ ret.push( array[i] );
+ }
+
+ } else {
+ for ( ; array[i]; i++ ) {
+ ret.push( array[i] );
+ }
+ }
+ }
+
+ return ret;
+ };
+}
+
+var sortOrder, siblingCheck;
+
+if ( document.documentElement.compareDocumentPosition ) {
+ sortOrder = function( a, b ) {
+ if ( a === b ) {
+ hasDuplicate = true;
+ return 0;
+ }
+
+ if ( !a.compareDocumentPosition || !b.compareDocumentPosition ) {
+ return a.compareDocumentPosition ? -1 : 1;
+ }
+
+ return a.compareDocumentPosition(b) & 4 ? -1 : 1;
+ };
+
+} else {
+ sortOrder = function( a, b ) {
+ // The nodes are identical, we can exit early
+ if ( a === b ) {
+ hasDuplicate = true;
+ return 0;
+
+ // Fallback to using sourceIndex (in IE) if it's available on both nodes
+ } else if ( a.sourceIndex && b.sourceIndex ) {
+ return a.sourceIndex - b.sourceIndex;
+ }
+
+ var al, bl,
+ ap = [],
+ bp = [],
+ aup = a.parentNode,
+ bup = b.parentNode,
+ cur = aup;
+
+ // If the nodes are siblings (or identical) we can do a quick check
+ if ( aup === bup ) {
+ return siblingCheck( a, b );
+
+ // If no parents were found then the nodes are disconnected
+ } else if ( !aup ) {
+ return -1;
+
+ } else if ( !bup ) {
+ return 1;
+ }
+
+ // Otherwise they're somewhere else in the tree so we need
+ // to build up a full list of the parentNodes for comparison
+ while ( cur ) {
+ ap.unshift( cur );
+ cur = cur.parentNode;
+ }
+
+ cur = bup;
+
+ while ( cur ) {
+ bp.unshift( cur );
+ cur = cur.parentNode;
+ }
+
+ al = ap.length;
+ bl = bp.length;
+
+ // Start walking down the tree looking for a discrepancy
+ for ( var i = 0; i < al && i < bl; i++ ) {
+ if ( ap[i] !== bp[i] ) {
+ return siblingCheck( ap[i], bp[i] );
+ }
+ }
+
+ // We ended someplace up the tree so do a sibling check
+ return i === al ?
+ siblingCheck( a, bp[i], -1 ) :
+ siblingCheck( ap[i], b, 1 );
+ };
+
+ siblingCheck = function( a, b, ret ) {
+ if ( a === b ) {
+ return ret;
+ }
+
+ var cur = a.nextSibling;
+
+ while ( cur ) {
+ if ( cur === b ) {
+ return -1;
+ }
+
+ cur = cur.nextSibling;
+ }
+
+ return 1;
+ };
+}
+
+// Check to see if the browser returns elements by name when
+// querying by getElementById (and provide a workaround)
+(function(){
+ // We're going to inject a fake input element with a specified name
+ var form = document.createElement("div"),
+ id = "script" + (new Date()).getTime(),
+ root = document.documentElement;
+
+ form.innerHTML = "<a name='" + id + "'/>";
+
+ // Inject it into the root element, check its status, and remove it quickly
+ root.insertBefore( form, root.firstChild );
+
+ // The workaround has to do additional checks after a getElementById
+ // Which slows things down for other browsers (hence the branching)
+ if ( document.getElementById( id ) ) {
+ Expr.find.ID = function( match, context, isXML ) {
+ if ( typeof context.getElementById !== "undefined" && !isXML ) {
+ var m = context.getElementById(match[1]);
+
+ return m ?
+ m.id === match[1] || typeof m.getAttributeNode !== "undefined" && m.getAttributeNode("id").nodeValue === match[1] ?
+ [m] :
+ undefined :
+ [];
+ }
+ };
+
+ Expr.filter.ID = function( elem, match ) {
+ var node = typeof elem.getAttributeNode !== "undefined" && elem.getAttributeNode("id");
+
+ return elem.nodeType === 1 && node && node.nodeValue === match;
+ };
+ }
+
+ root.removeChild( form );
+
+ // release memory in IE
+ root = form = null;
+})();
+
+(function(){
+ // Check to see if the browser returns only elements
+ // when doing getElementsByTagName("*")
+
+ // Create a fake element
+ var div = document.createElement("div");
+ div.appendChild( document.createComment("") );
+
+ // Make sure no comments are found
+ if ( div.getElementsByTagName("*").length > 0 ) {
+ Expr.find.TAG = function( match, context ) {
+ var results = context.getElementsByTagName( match[1] );
+
+ // Filter out possible comments
+ if ( match[1] === "*" ) {
+ var tmp = [];
+
+ for ( var i = 0; results[i]; i++ ) {
+ if ( results[i].nodeType === 1 ) {
+ tmp.push( results[i] );
+ }
+ }
+
+ results = tmp;
+ }
+
+ return results;
+ };
+ }
+
+ // Check to see if an attribute returns normalized href attributes
+ div.innerHTML = "<a href='#'></a>";
+
+ if ( div.firstChild && typeof div.firstChild.getAttribute !== "undefined" &&
+ div.firstChild.getAttribute("href") !== "#" ) {
+
+ Expr.attrHandle.href = function( elem ) {
+ return elem.getAttribute( "href", 2 );
+ };
+ }
+
+ // release memory in IE
+ div = null;
+})();
+
+if ( document.querySelectorAll ) {
+ (function(){
+ var oldSizzle = Sizzle,
+ div = document.createElement("div"),
+ id = "__sizzle__";
+
+ div.innerHTML = "<p class='TEST'></p>";
+
+ // Safari can't handle uppercase or unicode characters when
+ // in quirks mode.
+ if ( div.querySelectorAll && div.querySelectorAll(".TEST").length === 0 ) {
+ return;
+ }
+
+ Sizzle = function( query, context, extra, seed ) {
+ context = context || document;
+
+ // Only use querySelectorAll on non-XML documents
+ // (ID selectors don't work in non-HTML documents)
+ if ( !seed && !Sizzle.isXML(context) ) {
+ // See if we find a selector to speed up
+ var match = /^(\w+$)|^\.([\w\-]+$)|^#([\w\-]+$)/.exec( query );
+
+ if ( match && (context.nodeType === 1 || context.nodeType === 9) ) {
+ // Speed-up: Sizzle("TAG")
+ if ( match[1] ) {
+ return makeArray( context.getElementsByTagName( query ), extra );
+
+ // Speed-up: Sizzle(".CLASS")
+ } else if ( match[2] && Expr.find.CLASS && context.getElementsByClassName ) {
+ return makeArray( context.getElementsByClassName( match[2] ), extra );
+ }
+ }
+
+ if ( context.nodeType === 9 ) {
+ // Speed-up: Sizzle("body")
+ // The body element only exists once, optimize finding it
+ if ( query === "body" && context.body ) {
+ return makeArray( [ context.body ], extra );
+
+ // Speed-up: Sizzle("#ID")
+ } else if ( match && match[3] ) {
+ var elem = context.getElementById( match[3] );
+
+ // Check parentNode to catch when Blackberry 4.6 returns
+ // nodes that are no longer in the document #6963
+ if ( elem && elem.parentNode ) {
+ // Handle the case where IE and Opera return items
+ // by name instead of ID
+ if ( elem.id === match[3] ) {
+ return makeArray( [ elem ], extra );
+ }
+
+ } else {
+ return makeArray( [], extra );
+ }
+ }
+
+ try {
+ return makeArray( context.querySelectorAll(query), extra );
+ } catch(qsaError) {}
+
+ // qSA works strangely on Element-rooted queries
+ // We can work around this by specifying an extra ID on the root
+ // and working up from there (Thanks to Andrew Dupont for the technique)
+ // IE 8 doesn't work on object elements
+ } else if ( context.nodeType === 1 && context.nodeName.toLowerCase() !== "object" ) {
+ var oldContext = context,
+ old = context.getAttribute( "id" ),
+ nid = old || id,
+ hasParent = context.parentNode,
+ relativeHierarchySelector = /^\s*[+~]/.test( query );
+
+ if ( !old ) {
+ context.setAttribute( "id", nid );
+ } else {
+ nid = nid.replace( /'/g, "\\$&" );
+ }
+ if ( relativeHierarchySelector && hasParent ) {
+ context = context.parentNode;
+ }
+
+ try {
+ if ( !relativeHierarchySelector || hasParent ) {
+ return makeArray( context.querySelectorAll( "[id='" + nid + "'] " + query ), extra );
+ }
+
+ } catch(pseudoError) {
+ } finally {
+ if ( !old ) {
+ oldContext.removeAttribute( "id" );
+ }
+ }
+ }
+ }
+
+ return oldSizzle(query, context, extra, seed);
+ };
+
+ for ( var prop in oldSizzle ) {
+ Sizzle[ prop ] = oldSizzle[ prop ];
+ }
+
+ // release memory in IE
+ div = null;
+ })();
+}
+
+(function(){
+ var html = document.documentElement,
+ matches = html.matchesSelector || html.mozMatchesSelector || html.webkitMatchesSelector || html.msMatchesSelector;
+
+ if ( matches ) {
+ // Check to see if it's possible to do matchesSelector
+ // on a disconnected node (IE 9 fails this)
+ var disconnectedMatch = !matches.call( document.createElement( "div" ), "div" ),
+ pseudoWorks = false;
+
+ try {
+ // This should fail with an exception
+ // Gecko does not error, returns false instead
+ matches.call( document.documentElement, "[test!='']:sizzle" );
+
+ } catch( pseudoError ) {
+ pseudoWorks = true;
+ }
+
+ Sizzle.matchesSelector = function( node, expr ) {
+ // Make sure that attribute selectors are quoted
+ expr = expr.replace(/\=\s*([^'"\]]*)\s*\]/g, "='$1']");
+
+ if ( !Sizzle.isXML( node ) ) {
+ try {
+ if ( pseudoWorks || !Expr.match.PSEUDO.test( expr ) && !/!=/.test( expr ) ) {
+ var ret = matches.call( node, expr );
+
+ // IE 9's matchesSelector returns false on disconnected nodes
+ if ( ret || !disconnectedMatch ||
+ // As well, disconnected nodes are said to be in a document
+ // fragment in IE 9, so check for that
+ node.document && node.document.nodeType !== 11 ) {
+ return ret;
+ }
+ }
+ } catch(e) {}
+ }
+
+ return Sizzle(expr, null, null, [node]).length > 0;
+ };
+ }
+})();
+
+(function(){
+ var div = document.createElement("div");
+
+ div.innerHTML = "<div class='test e'></div><div class='test'></div>";
+
+ // Opera can't find a second classname (in 9.6)
+ // Also, make sure that getElementsByClassName actually exists
+ if ( !div.getElementsByClassName || div.getElementsByClassName("e").length === 0 ) {
+ return;
+ }
+
+ // Safari caches class attributes, doesn't catch changes (in 3.2)
+ div.lastChild.className = "e";
+
+ if ( div.getElementsByClassName("e").length === 1 ) {
+ return;
+ }
+
+ Expr.order.splice(1, 0, "CLASS");
+ Expr.find.CLASS = function( match, context, isXML ) {
+ if ( typeof context.getElementsByClassName !== "undefined" && !isXML ) {
+ return context.getElementsByClassName(match[1]);
+ }
+ };
+
+ // release memory in IE
+ div = null;
+})();
+
+function dirNodeCheck( dir, cur, doneName, checkSet, nodeCheck, isXML ) {
+ for ( var i = 0, l = checkSet.length; i < l; i++ ) {
+ var elem = checkSet[i];
+
+ if ( elem ) {
+ var match = false;
+
+ elem = elem[dir];
+
+ while ( elem ) {
+ if ( elem[ expando ] === doneName ) {
+ match = checkSet[elem.sizset];
+ break;
+ }
+
+ if ( elem.nodeType === 1 && !isXML ){
+ elem[ expando ] = doneName;
+ elem.sizset = i;
+ }
+
+ if ( elem.nodeName.toLowerCase() === cur ) {
+ match = elem;
+ break;
+ }
+
+ elem = elem[dir];
+ }
+
+ checkSet[i] = match;
+ }
+ }
+}
+
+function dirCheck( dir, cur, doneName, checkSet, nodeCheck, isXML ) {
+ for ( var i = 0, l = checkSet.length; i < l; i++ ) {
+ var elem = checkSet[i];
+
+ if ( elem ) {
+ var match = false;
+
+ elem = elem[dir];
+
+ while ( elem ) {
+ if ( elem[ expando ] === doneName ) {
+ match = checkSet[elem.sizset];
+ break;
+ }
+
+ if ( elem.nodeType === 1 ) {
+ if ( !isXML ) {
+ elem[ expando ] = doneName;
+ elem.sizset = i;
+ }
+
+ if ( typeof cur !== "string" ) {
+ if ( elem === cur ) {
+ match = true;
+ break;
+ }
+
+ } else if ( Sizzle.filter( cur, [elem] ).length > 0 ) {
+ match = elem;
+ break;
+ }
+ }
+
+ elem = elem[dir];
+ }
+
+ checkSet[i] = match;
+ }
+ }
+}
+
+if ( document.documentElement.contains ) {
+ Sizzle.contains = function( a, b ) {
+ return a !== b && (a.contains ? a.contains(b) : true);
+ };
+
+} else if ( document.documentElement.compareDocumentPosition ) {
+ Sizzle.contains = function( a, b ) {
+ return !!(a.compareDocumentPosition(b) & 16);
+ };
+
+} else {
+ Sizzle.contains = function() {
+ return false;
+ };
+}
+
+Sizzle.isXML = function( elem ) {
+ // documentElement is verified for cases where it doesn't yet exist
+ // (such as loading iframes in IE - #4833)
+ var documentElement = (elem ? elem.ownerDocument || elem : 0).documentElement;
+
+ return documentElement ? documentElement.nodeName !== "HTML" : false;
+};
+
+var posProcess = function( selector, context, seed ) {
+ var match,
+ tmpSet = [],
+ later = "",
+ root = context.nodeType ? [context] : context;
+
+ // Position selectors must be done after the filter
+ // And so must :not(positional) so we move all PSEUDOs to the end
+ while ( (match = Expr.match.PSEUDO.exec( selector )) ) {
+ later += match[0];
+ selector = selector.replace( Expr.match.PSEUDO, "" );
+ }
+
+ selector = Expr.relative[selector] ? selector + "*" : selector;
+
+ for ( var i = 0, l = root.length; i < l; i++ ) {
+ Sizzle( selector, root[i], tmpSet, seed );
+ }
+
+ return Sizzle.filter( later, tmpSet );
+};
+
+// EXPOSE
+// Override sizzle attribute retrieval
+Sizzle.attr = jQuery.attr;
+Sizzle.selectors.attrMap = {};
+jQuery.find = Sizzle;
+jQuery.expr = Sizzle.selectors;
+jQuery.expr[":"] = jQuery.expr.filters;
+jQuery.unique = Sizzle.uniqueSort;
+jQuery.text = Sizzle.getText;
+jQuery.isXMLDoc = Sizzle.isXML;
+jQuery.contains = Sizzle.contains;
+
+
+})();
+
+
+var runtil = /Until$/,
+ rparentsprev = /^(?:parents|prevUntil|prevAll)/,
+ // Note: This RegExp should be improved, or likely pulled from Sizzle
+ rmultiselector = /,/,
+ isSimple = /^.[^:#\[\.,]*$/,
+ slice = Array.prototype.slice,
+ POS = jQuery.expr.match.globalPOS,
+ // methods guaranteed to produce a unique set when starting from a unique set
+ guaranteedUnique = {
+ children: true,
+ contents: true,
+ next: true,
+ prev: true
+ };
+
+jQuery.fn.extend({
+ find: function( selector ) {
+ var self = this,
+ i, l;
+
+ if ( typeof selector !== "string" ) {
+ return jQuery( selector ).filter(function() {
+ for ( i = 0, l = self.length; i < l; i++ ) {
+ if ( jQuery.contains( self[ i ], this ) ) {
+ return true;
+ }
+ }
+ });
+ }
+
+ var ret = this.pushStack( "", "find", selector ),
+ length, n, r;
+
+ for ( i = 0, l = this.length; i < l; i++ ) {
+ length = ret.length;
+ jQuery.find( selector, this[i], ret );
+
+ if ( i > 0 ) {
+ // Make sure that the results are unique
+ for ( n = length; n < ret.length; n++ ) {
+ for ( r = 0; r < length; r++ ) {
+ if ( ret[r] === ret[n] ) {
+ ret.splice(n--, 1);
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ return ret;
+ },
+
+ has: function( target ) {
+ var targets = jQuery( target );
+ return this.filter(function() {
+ for ( var i = 0, l = targets.length; i < l; i++ ) {
+ if ( jQuery.contains( this, targets[i] ) ) {
+ return true;
+ }
+ }
+ });
+ },
+
+ not: function( selector ) {
+ return this.pushStack( winnow(this, selector, false), "not", selector);
+ },
+
+ filter: function( selector ) {
+ return this.pushStack( winnow(this, selector, true), "filter", selector );
+ },
+
+ is: function( selector ) {
+ return !!selector && (
+ typeof selector === "string" ?
+ // If this is a positional selector, check membership in the returned set
+ // so $("p:first").is("p:last") won't return true for a doc with two "p".
+ POS.test( selector ) ?
+ jQuery( selector, this.context ).index( this[0] ) >= 0 :
+ jQuery.filter( selector, this ).length > 0 :
+ this.filter( selector ).length > 0 );
+ },
+
+ closest: function( selectors, context ) {
+ var ret = [], i, l, cur = this[0];
+
+ // Array (deprecated as of jQuery 1.7)
+ if ( jQuery.isArray( selectors ) ) {
+ var level = 1;
+
+ while ( cur && cur.ownerDocument && cur !== context ) {
+ for ( i = 0; i < selectors.length; i++ ) {
+
+ if ( jQuery( cur ).is( selectors[ i ] ) ) {
+ ret.push({ selector: selectors[ i ], elem: cur, level: level });
+ }
+ }
+
+ cur = cur.parentNode;
+ level++;
+ }
+
+ return ret;
+ }
+
+ // String
+ var pos = POS.test( selectors ) || typeof selectors !== "string" ?
+ jQuery( selectors, context || this.context ) :
+ 0;
+
+ for ( i = 0, l = this.length; i < l; i++ ) {
+ cur = this[i];
+
+ while ( cur ) {
+ if ( pos ? pos.index(cur) > -1 : jQuery.find.matchesSelector(cur, selectors) ) {
+ ret.push( cur );
+ break;
+
+ } else {
+ cur = cur.parentNode;
+ if ( !cur || !cur.ownerDocument || cur === context || cur.nodeType === 11 ) {
+ break;
+ }
+ }
+ }
+ }
+
+ ret = ret.length > 1 ? jQuery.unique( ret ) : ret;
+
+ return this.pushStack( ret, "closest", selectors );
+ },
+
+ // Determine the position of an element within
+ // the matched set of elements
+ index: function( elem ) {
+
+ // No argument, return index in parent
+ if ( !elem ) {
+ return ( this[0] && this[0].parentNode ) ? this.prevAll().length : -1;
+ }
+
+ // index in selector
+ if ( typeof elem === "string" ) {
+ return jQuery.inArray( this[0], jQuery( elem ) );
+ }
+
+ // Locate the position of the desired element
+ return jQuery.inArray(
+ // If it receives a jQuery object, the first element is used
+ elem.jquery ? elem[0] : elem, this );
+ },
+
+ add: function( selector, context ) {
+ var set = typeof selector === "string" ?
+ jQuery( selector, context ) :
+ jQuery.makeArray( selector && selector.nodeType ? [ selector ] : selector ),
+ all = jQuery.merge( this.get(), set );
+
+ return this.pushStack( isDisconnected( set[0] ) || isDisconnected( all[0] ) ?
+ all :
+ jQuery.unique( all ) );
+ },
+
+ andSelf: function() {
+ return this.add( this.prevObject );
+ }
+});
+
+// A painfully simple check to see if an element is disconnected
+// from a document (should be improved, where feasible).
+function isDisconnected( node ) {
+ return !node || !node.parentNode || node.parentNode.nodeType === 11;
+}
+
+jQuery.each({
+ parent: function( elem ) {
+ var parent = elem.parentNode;
+ return parent && parent.nodeType !== 11 ? parent : null;
+ },
+ parents: function( elem ) {
+ return jQuery.dir( elem, "parentNode" );
+ },
+ parentsUntil: function( elem, i, until ) {
+ return jQuery.dir( elem, "parentNode", until );
+ },
+ next: function( elem ) {
+ return jQuery.nth( elem, 2, "nextSibling" );
+ },
+ prev: function( elem ) {
+ return jQuery.nth( elem, 2, "previousSibling" );
+ },
+ nextAll: function( elem ) {
+ return jQuery.dir( elem, "nextSibling" );
+ },
+ prevAll: function( elem ) {
+ return jQuery.dir( elem, "previousSibling" );
+ },
+ nextUntil: function( elem, i, until ) {
+ return jQuery.dir( elem, "nextSibling", until );
+ },
+ prevUntil: function( elem, i, until ) {
+ return jQuery.dir( elem, "previousSibling", until );
+ },
+ siblings: function( elem ) {
+ return jQuery.sibling( ( elem.parentNode || {} ).firstChild, elem );
+ },
+ children: function( elem ) {
+ return jQuery.sibling( elem.firstChild );
+ },
+ contents: function( elem ) {
+ return jQuery.nodeName( elem, "iframe" ) ?
+ elem.contentDocument || elem.contentWindow.document :
+ jQuery.makeArray( elem.childNodes );
+ }
+}, function( name, fn ) {
+ jQuery.fn[ name ] = function( until, selector ) {
+ var ret = jQuery.map( this, fn, until );
+
+ if ( !runtil.test( name ) ) {
+ selector = until;
+ }
+
+ if ( selector && typeof selector === "string" ) {
+ ret = jQuery.filter( selector, ret );
+ }
+
+ ret = this.length > 1 && !guaranteedUnique[ name ] ? jQuery.unique( ret ) : ret;
+
+ if ( (this.length > 1 || rmultiselector.test( selector )) && rparentsprev.test( name ) ) {
+ ret = ret.reverse();
+ }
+
+ return this.pushStack( ret, name, slice.call( arguments ).join(",") );
+ };
+});
+
+jQuery.extend({
+ filter: function( expr, elems, not ) {
+ if ( not ) {
+ expr = ":not(" + expr + ")";
+ }
+
+ return elems.length === 1 ?
+ jQuery.find.matchesSelector(elems[0], expr) ? [ elems[0] ] : [] :
+ jQuery.find.matches(expr, elems);
+ },
+
+ dir: function( elem, dir, until ) {
+ var matched = [],
+ cur = elem[ dir ];
+
+ while ( cur && cur.nodeType !== 9 && (until === undefined || cur.nodeType !== 1 || !jQuery( cur ).is( until )) ) {
+ if ( cur.nodeType === 1 ) {
+ matched.push( cur );
+ }
+ cur = cur[dir];
+ }
+ return matched;
+ },
+
+ nth: function( cur, result, dir, elem ) {
+ result = result || 1;
+ var num = 0;
+
+ for ( ; cur; cur = cur[dir] ) {
+ if ( cur.nodeType === 1 && ++num === result ) {
+ break;
+ }
+ }
+
+ return cur;
+ },
+
+ sibling: function( n, elem ) {
+ var r = [];
+
+ for ( ; n; n = n.nextSibling ) {
+ if ( n.nodeType === 1 && n !== elem ) {
+ r.push( n );
+ }
+ }
+
+ return r;
+ }
+});
+
+// Implement the identical functionality for filter and not
+function winnow( elements, qualifier, keep ) {
+
+ // Can't pass null or undefined to indexOf in Firefox 4
+ // Set to 0 to skip string check
+ qualifier = qualifier || 0;
+
+ if ( jQuery.isFunction( qualifier ) ) {
+ return jQuery.grep(elements, function( elem, i ) {
+ var retVal = !!qualifier.call( elem, i, elem );
+ return retVal === keep;
+ });
+
+ } else if ( qualifier.nodeType ) {
+ return jQuery.grep(elements, function( elem, i ) {
+ return ( elem === qualifier ) === keep;
+ });
+
+ } else if ( typeof qualifier === "string" ) {
+ var filtered = jQuery.grep(elements, function( elem ) {
+ return elem.nodeType === 1;
+ });
+
+ if ( isSimple.test( qualifier ) ) {
+ return jQuery.filter(qualifier, filtered, !keep);
+ } else {
+ qualifier = jQuery.filter( qualifier, filtered );
+ }
+ }
+
+ return jQuery.grep(elements, function( elem, i ) {
+ return ( jQuery.inArray( elem, qualifier ) >= 0 ) === keep;
+ });
+}
+
+
+
+
+function createSafeFragment( document ) {
+ var list = nodeNames.split( "|" ),
+ safeFrag = document.createDocumentFragment();
+
+ if ( safeFrag.createElement ) {
+ while ( list.length ) {
+ safeFrag.createElement(
+ list.pop()
+ );
+ }
+ }
+ return safeFrag;
+}
+
+var nodeNames = "abbr|article|aside|audio|bdi|canvas|data|datalist|details|figcaption|figure|footer|" +
+ "header|hgroup|mark|meter|nav|output|progress|section|summary|time|video",
+ rinlinejQuery = / jQuery\d+="(?:\d+|null)"/g,
+ rleadingWhitespace = /^\s+/,
+ rxhtmlTag = /<(?!area|br|col|embed|hr|img|input|link|meta|param)(([\w:]+)[^>]*)\/>/ig,
+ rtagName = /<([\w:]+)/,
+ rtbody = /<tbody/i,
+ rhtml = /<|&#?\w+;/,
+ rnoInnerhtml = /<(?:script|style)/i,
+ rnocache = /<(?:script|object|embed|option|style)/i,
+ rnoshimcache = new RegExp("<(?:" + nodeNames + ")[\\s/>]", "i"),
+ // checked="checked" or checked
+ rchecked = /checked\s*(?:[^=]|=\s*.checked.)/i,
+ rscriptType = /\/(java|ecma)script/i,
+ rcleanScript = /^\s*<!(?:\[CDATA\[|\-\-)/,
+ wrapMap = {
+ option: [ 1, "<select multiple='multiple'>", "</select>" ],
+ legend: [ 1, "<fieldset>", "</fieldset>" ],
+ thead: [ 1, "<table>", "</table>" ],
+ tr: [ 2, "<table><tbody>", "</tbody></table>" ],
+ td: [ 3, "<table><tbody><tr>", "</tr></tbody></table>" ],
+ col: [ 2, "<table><tbody></tbody><colgroup>", "</colgroup></table>" ],
+ area: [ 1, "<map>", "</map>" ],
+ _default: [ 0, "", "" ]
+ },
+ safeFragment = createSafeFragment( document );
+
+wrapMap.optgroup = wrapMap.option;
+wrapMap.tbody = wrapMap.tfoot = wrapMap.colgroup = wrapMap.caption = wrapMap.thead;
+wrapMap.th = wrapMap.td;
+
+// IE can't serialize <link> and <script> tags normally
+if ( !jQuery.support.htmlSerialize ) {
+ wrapMap._default = [ 1, "div<div>", "</div>" ];
+}
+
+jQuery.fn.extend({
+ text: function( value ) {
+ return jQuery.access( this, function( value ) {
+ return value === undefined ?
+ jQuery.text( this ) :
+ this.empty().append( ( this[0] && this[0].ownerDocument || document ).createTextNode( value ) );
+ }, null, value, arguments.length );
+ },
+
+ wrapAll: function( html ) {
+ if ( jQuery.isFunction( html ) ) {
+ return this.each(function(i) {
+ jQuery(this).wrapAll( html.call(this, i) );
+ });
+ }
+
+ if ( this[0] ) {
+ // The elements to wrap the target around
+ var wrap = jQuery( html, this[0].ownerDocument ).eq(0).clone(true);
+
+ if ( this[0].parentNode ) {
+ wrap.insertBefore( this[0] );
+ }
+
+ wrap.map(function() {
+ var elem = this;
+
+ while ( elem.firstChild && elem.firstChild.nodeType === 1 ) {
+ elem = elem.firstChild;
+ }
+
+ return elem;
+ }).append( this );
+ }
+
+ return this;
+ },
+
+ wrapInner: function( html ) {
+ if ( jQuery.isFunction( html ) ) {
+ return this.each(function(i) {
+ jQuery(this).wrapInner( html.call(this, i) );
+ });
+ }
+
+ return this.each(function() {
+ var self = jQuery( this ),
+ contents = self.contents();
+
+ if ( contents.length ) {
+ contents.wrapAll( html );
+
+ } else {
+ self.append( html );
+ }
+ });
+ },
+
+ wrap: function( html ) {
+ var isFunction = jQuery.isFunction( html );
+
+ return this.each(function(i) {
+ jQuery( this ).wrapAll( isFunction ? html.call(this, i) : html );
+ });
+ },
+
+ unwrap: function() {
+ return this.parent().each(function() {
+ if ( !jQuery.nodeName( this, "body" ) ) {
+ jQuery( this ).replaceWith( this.childNodes );
+ }
+ }).end();
+ },
+
+ append: function() {
+ return this.domManip(arguments, true, function( elem ) {
+ if ( this.nodeType === 1 ) {
+ this.appendChild( elem );
+ }
+ });
+ },
+
+ prepend: function() {
+ return this.domManip(arguments, true, function( elem ) {
+ if ( this.nodeType === 1 ) {
+ this.insertBefore( elem, this.firstChild );
+ }
+ });
+ },
+
+ before: function() {
+ if ( this[0] && this[0].parentNode ) {
+ return this.domManip(arguments, false, function( elem ) {
+ this.parentNode.insertBefore( elem, this );
+ });
+ } else if ( arguments.length ) {
+ var set = jQuery.clean( arguments );
+ set.push.apply( set, this.toArray() );
+ return this.pushStack( set, "before", arguments );
+ }
+ },
+
+ after: function() {
+ if ( this[0] && this[0].parentNode ) {
+ return this.domManip(arguments, false, function( elem ) {
+ this.parentNode.insertBefore( elem, this.nextSibling );
+ });
+ } else if ( arguments.length ) {
+ var set = this.pushStack( this, "after", arguments );
+ set.push.apply( set, jQuery.clean(arguments) );
+ return set;
+ }
+ },
+
+ // keepData is for internal use only--do not document
+ remove: function( selector, keepData ) {
+ for ( var i = 0, elem; (elem = this[i]) != null; i++ ) {
+ if ( !selector || jQuery.filter( selector, [ elem ] ).length ) {
+ if ( !keepData && elem.nodeType === 1 ) {
+ jQuery.cleanData( elem.getElementsByTagName("*") );
+ jQuery.cleanData( [ elem ] );
+ }
+
+ if ( elem.parentNode ) {
+ elem.parentNode.removeChild( elem );
+ }
+ }
+ }
+
+ return this;
+ },
+
+ empty: function() {
+ for ( var i = 0, elem; (elem = this[i]) != null; i++ ) {
+ // Remove element nodes and prevent memory leaks
+ if ( elem.nodeType === 1 ) {
+ jQuery.cleanData( elem.getElementsByTagName("*") );
+ }
+
+ // Remove any remaining nodes
+ while ( elem.firstChild ) {
+ elem.removeChild( elem.firstChild );
+ }
+ }
+
+ return this;
+ },
+
+ clone: function( dataAndEvents, deepDataAndEvents ) {
+ dataAndEvents = dataAndEvents == null ? false : dataAndEvents;
+ deepDataAndEvents = deepDataAndEvents == null ? dataAndEvents : deepDataAndEvents;
+
+ return this.map( function () {
+ return jQuery.clone( this, dataAndEvents, deepDataAndEvents );
+ });
+ },
+
+ html: function( value ) {
+ return jQuery.access( this, function( value ) {
+ var elem = this[0] || {},
+ i = 0,
+ l = this.length;
+
+ if ( value === undefined ) {
+ return elem.nodeType === 1 ?
+ elem.innerHTML.replace( rinlinejQuery, "" ) :
+ null;
+ }
+
+
+ if ( typeof value === "string" && !rnoInnerhtml.test( value ) &&
+ ( jQuery.support.leadingWhitespace || !rleadingWhitespace.test( value ) ) &&
+ !wrapMap[ ( rtagName.exec( value ) || ["", ""] )[1].toLowerCase() ] ) {
+
+ value = value.replace( rxhtmlTag, "<$1></$2>" );
+
+ try {
+ for (; i < l; i++ ) {
+ // Remove element nodes and prevent memory leaks
+ elem = this[i] || {};
+ if ( elem.nodeType === 1 ) {
+ jQuery.cleanData( elem.getElementsByTagName( "*" ) );
+ elem.innerHTML = value;
+ }
+ }
+
+ elem = 0;
+
+ // If using innerHTML throws an exception, use the fallback method
+ } catch(e) {}
+ }
+
+ if ( elem ) {
+ this.empty().append( value );
+ }
+ }, null, value, arguments.length );
+ },
+
+ replaceWith: function( value ) {
+ if ( this[0] && this[0].parentNode ) {
+ // Make sure that the elements are removed from the DOM before they are inserted
+ // this can help fix replacing a parent with child elements
+ if ( jQuery.isFunction( value ) ) {
+ return this.each(function(i) {
+ var self = jQuery(this), old = self.html();
+ self.replaceWith( value.call( this, i, old ) );
+ });
+ }
+
+ if ( typeof value !== "string" ) {
+ value = jQuery( value ).detach();
+ }
+
+ return this.each(function() {
+ var next = this.nextSibling,
+ parent = this.parentNode;
+
+ jQuery( this ).remove();
+
+ if ( next ) {
+ jQuery(next).before( value );
+ } else {
+ jQuery(parent).append( value );
+ }
+ });
+ } else {
+ return this.length ?
+ this.pushStack( jQuery(jQuery.isFunction(value) ? value() : value), "replaceWith", value ) :
+ this;
+ }
+ },
+
+ detach: function( selector ) {
+ return this.remove( selector, true );
+ },
+
+ domManip: function( args, table, callback ) {
+ var results, first, fragment, parent,
+ value = args[0],
+ scripts = [];
+
+ // We can't cloneNode fragments that contain checked, in WebKit
+ if ( !jQuery.support.checkClone && arguments.length === 3 && typeof value === "string" && rchecked.test( value ) ) {
+ return this.each(function() {
+ jQuery(this).domManip( args, table, callback, true );
+ });
+ }
+
+ if ( jQuery.isFunction(value) ) {
+ return this.each(function(i) {
+ var self = jQuery(this);
+ args[0] = value.call(this, i, table ? self.html() : undefined);
+ self.domManip( args, table, callback );
+ });
+ }
+
+ if ( this[0] ) {
+ parent = value && value.parentNode;
+
+ // If we're in a fragment, just use that instead of building a new one
+ if ( jQuery.support.parentNode && parent && parent.nodeType === 11 && parent.childNodes.length === this.length ) {
+ results = { fragment: parent };
+
+ } else {
+ results = jQuery.buildFragment( args, this, scripts );
+ }
+
+ fragment = results.fragment;
+
+ if ( fragment.childNodes.length === 1 ) {
+ first = fragment = fragment.firstChild;
+ } else {
+ first = fragment.firstChild;
+ }
+
+ if ( first ) {
+ table = table && jQuery.nodeName( first, "tr" );
+
+ for ( var i = 0, l = this.length, lastIndex = l - 1; i < l; i++ ) {
+ callback.call(
+ table ?
+ root(this[i], first) :
+ this[i],
+ // Make sure that we do not leak memory by inadvertently discarding
+ // the original fragment (which might have attached data) instead of
+ // using it; in addition, use the original fragment object for the last
+ // item instead of first because it can end up being emptied incorrectly
+ // in certain situations (Bug #8070).
+ // Fragments from the fragment cache must always be cloned and never used
+ // in place.
+ results.cacheable || ( l > 1 && i < lastIndex ) ?
+ jQuery.clone( fragment, true, true ) :
+ fragment
+ );
+ }
+ }
+
+ if ( scripts.length ) {
+ jQuery.each( scripts, function( i, elem ) {
+ if ( elem.src ) {
+ jQuery.ajax({
+ type: "GET",
+ global: false,
+ url: elem.src,
+ async: false,
+ dataType: "script"
+ });
+ } else {
+ jQuery.globalEval( ( elem.text || elem.textContent || elem.innerHTML || "" ).replace( rcleanScript, "/*$0*/" ) );
+ }
+
+ if ( elem.parentNode ) {
+ elem.parentNode.removeChild( elem );
+ }
+ });
+ }
+ }
+
+ return this;
+ }
+});
+
+function root( elem, cur ) {
+ return jQuery.nodeName(elem, "table") ?
+ (elem.getElementsByTagName("tbody")[0] ||
+ elem.appendChild(elem.ownerDocument.createElement("tbody"))) :
+ elem;
+}
+
+function cloneCopyEvent( src, dest ) {
+
+ if ( dest.nodeType !== 1 || !jQuery.hasData( src ) ) {
+ return;
+ }
+
+ var type, i, l,
+ oldData = jQuery._data( src ),
+ curData = jQuery._data( dest, oldData ),
+ events = oldData.events;
+
+ if ( events ) {
+ delete curData.handle;
+ curData.events = {};
+
+ for ( type in events ) {
+ for ( i = 0, l = events[ type ].length; i < l; i++ ) {
+ jQuery.event.add( dest, type, events[ type ][ i ] );
+ }
+ }
+ }
+
+ // make the cloned public data object a copy from the original
+ if ( curData.data ) {
+ curData.data = jQuery.extend( {}, curData.data );
+ }
+}
+
+function cloneFixAttributes( src, dest ) {
+ var nodeName;
+
+ // We do not need to do anything for non-Elements
+ if ( dest.nodeType !== 1 ) {
+ return;
+ }
+
+ // clearAttributes removes the attributes, which we don't want,
+ // but also removes the attachEvent events, which we *do* want
+ if ( dest.clearAttributes ) {
+ dest.clearAttributes();
+ }
+
+ // mergeAttributes, in contrast, only merges back on the
+ // original attributes, not the events
+ if ( dest.mergeAttributes ) {
+ dest.mergeAttributes( src );
+ }
+
+ nodeName = dest.nodeName.toLowerCase();
+
+ // IE6-8 fail to clone children inside object elements that use
+ // the proprietary classid attribute value (rather than the type
+ // attribute) to identify the type of content to display
+ if ( nodeName === "object" ) {
+ dest.outerHTML = src.outerHTML;
+
+ } else if ( nodeName === "input" && (src.type === "checkbox" || src.type === "radio") ) {
+ // IE6-8 fails to persist the checked state of a cloned checkbox
+ // or radio button. Worse, IE6-7 fail to give the cloned element
+ // a checked appearance if the defaultChecked value isn't also set
+ if ( src.checked ) {
+ dest.defaultChecked = dest.checked = src.checked;
+ }
+
+ // IE6-7 get confused and end up setting the value of a cloned
+ // checkbox/radio button to an empty string instead of "on"
+ if ( dest.value !== src.value ) {
+ dest.value = src.value;
+ }
+
+ // IE6-8 fails to return the selected option to the default selected
+ // state when cloning options
+ } else if ( nodeName === "option" ) {
+ dest.selected = src.defaultSelected;
+
+ // IE6-8 fails to set the defaultValue to the correct value when
+ // cloning other types of input fields
+ } else if ( nodeName === "input" || nodeName === "textarea" ) {
+ dest.defaultValue = src.defaultValue;
+
+ // IE blanks contents when cloning scripts
+ } else if ( nodeName === "script" && dest.text !== src.text ) {
+ dest.text = src.text;
+ }
+
+ // Event data gets referenced instead of copied if the expando
+ // gets copied too
+ dest.removeAttribute( jQuery.expando );
+
+ // Clear flags for bubbling special change/submit events, they must
+ // be reattached when the newly cloned events are first activated
+ dest.removeAttribute( "_submit_attached" );
+ dest.removeAttribute( "_change_attached" );
+}
+
+jQuery.buildFragment = function( args, nodes, scripts ) {
+ var fragment, cacheable, cacheresults, doc,
+ first = args[ 0 ];
+
+ // nodes may contain either an explicit document object,
+ // a jQuery collection or context object.
+ // If nodes[0] contains a valid object to assign to doc
+ if ( nodes && nodes[0] ) {
+ doc = nodes[0].ownerDocument || nodes[0];
+ }
+
+ // Ensure that an attr object doesn't incorrectly stand in as a document object
+ // Chrome and Firefox seem to allow this to occur and will throw exception
+ // Fixes #8950
+ if ( !doc.createDocumentFragment ) {
+ doc = document;
+ }
+
+ // Only cache "small" (1/2 KB) HTML strings that are associated with the main document
+ // Cloning options loses the selected state, so don't cache them
+ // IE 6 doesn't like it when you put <object> or <embed> elements in a fragment
+ // Also, WebKit does not clone 'checked' attributes on cloneNode, so don't cache
+ // Lastly, IE6,7,8 will not correctly reuse cached fragments that were created from unknown elems #10501
+ if ( args.length === 1 && typeof first === "string" && first.length < 512 && doc === document &&
+ first.charAt(0) === "<" && !rnocache.test( first ) &&
+ (jQuery.support.checkClone || !rchecked.test( first )) &&
+ (jQuery.support.html5Clone || !rnoshimcache.test( first )) ) {
+
+ cacheable = true;
+
+ cacheresults = jQuery.fragments[ first ];
+ if ( cacheresults && cacheresults !== 1 ) {
+ fragment = cacheresults;
+ }
+ }
+
+ if ( !fragment ) {
+ fragment = doc.createDocumentFragment();
+ jQuery.clean( args, doc, fragment, scripts );
+ }
+
+ if ( cacheable ) {
+ jQuery.fragments[ first ] = cacheresults ? fragment : 1;
+ }
+
+ return { fragment: fragment, cacheable: cacheable };
+};
+
+jQuery.fragments = {};
+
+jQuery.each({
+ appendTo: "append",
+ prependTo: "prepend",
+ insertBefore: "before",
+ insertAfter: "after",
+ replaceAll: "replaceWith"
+}, function( name, original ) {
+ jQuery.fn[ name ] = function( selector ) {
+ var ret = [],
+ insert = jQuery( selector ),
+ parent = this.length === 1 && this[0].parentNode;
+
+ if ( parent && parent.nodeType === 11 && parent.childNodes.length === 1 && insert.length === 1 ) {
+ insert[ original ]( this[0] );
+ return this;
+
+ } else {
+ for ( var i = 0, l = insert.length; i < l; i++ ) {
+ var elems = ( i > 0 ? this.clone(true) : this ).get();
+ jQuery( insert[i] )[ original ]( elems );
+ ret = ret.concat( elems );
+ }
+
+ return this.pushStack( ret, name, insert.selector );
+ }
+ };
+});
+
+function getAll( elem ) {
+ if ( typeof elem.getElementsByTagName !== "undefined" ) {
+ return elem.getElementsByTagName( "*" );
+
+ } else if ( typeof elem.querySelectorAll !== "undefined" ) {
+ return elem.querySelectorAll( "*" );
+
+ } else {
+ return [];
+ }
+}
+
+// Used in clean, fixes the defaultChecked property
+function fixDefaultChecked( elem ) {
+ if ( elem.type === "checkbox" || elem.type === "radio" ) {
+ elem.defaultChecked = elem.checked;
+ }
+}
+// Finds all inputs and passes them to fixDefaultChecked
+function findInputs( elem ) {
+ var nodeName = ( elem.nodeName || "" ).toLowerCase();
+ if ( nodeName === "input" ) {
+ fixDefaultChecked( elem );
+ // Skip scripts, get other children
+ } else if ( nodeName !== "script" && typeof elem.getElementsByTagName !== "undefined" ) {
+ jQuery.grep( elem.getElementsByTagName("input"), fixDefaultChecked );
+ }
+}
+
+// Derived From: http://www.iecss.com/shimprove/javascript/shimprove.1-0-1.js
+function shimCloneNode( elem ) {
+ var div = document.createElement( "div" );
+ safeFragment.appendChild( div );
+
+ div.innerHTML = elem.outerHTML;
+ return div.firstChild;
+}
+
+jQuery.extend({
+ clone: function( elem, dataAndEvents, deepDataAndEvents ) {
+ var srcElements,
+ destElements,
+ i,
+ // IE<=8 does not properly clone detached, unknown element nodes
+ clone = jQuery.support.html5Clone || jQuery.isXMLDoc(elem) || !rnoshimcache.test( "<" + elem.nodeName + ">" ) ?
+ elem.cloneNode( true ) :
+ shimCloneNode( elem );
+
+ if ( (!jQuery.support.noCloneEvent || !jQuery.support.noCloneChecked) &&
+ (elem.nodeType === 1 || elem.nodeType === 11) && !jQuery.isXMLDoc(elem) ) {
+ // IE copies events bound via attachEvent when using cloneNode.
+ // Calling detachEvent on the clone will also remove the events
+ // from the original. In order to get around this, we use some
+ // proprietary methods to clear the events. Thanks to MooTools
+ // guys for this hotness.
+
+ cloneFixAttributes( elem, clone );
+
+ // Using Sizzle here is crazy slow, so we use getElementsByTagName instead
+ srcElements = getAll( elem );
+ destElements = getAll( clone );
+
+ // Weird iteration because IE will replace the length property
+ // with an element if you are cloning the body and one of the
+ // elements on the page has a name or id of "length"
+ for ( i = 0; srcElements[i]; ++i ) {
+ // Ensure that the destination node is not null; Fixes #9587
+ if ( destElements[i] ) {
+ cloneFixAttributes( srcElements[i], destElements[i] );
+ }
+ }
+ }
+
+ // Copy the events from the original to the clone
+ if ( dataAndEvents ) {
+ cloneCopyEvent( elem, clone );
+
+ if ( deepDataAndEvents ) {
+ srcElements = getAll( elem );
+ destElements = getAll( clone );
+
+ for ( i = 0; srcElements[i]; ++i ) {
+ cloneCopyEvent( srcElements[i], destElements[i] );
+ }
+ }
+ }
+
+ srcElements = destElements = null;
+
+ // Return the cloned set
+ return clone;
+ },
+
+ clean: function( elems, context, fragment, scripts ) {
+ var checkScriptType, script, j,
+ ret = [];
+
+ context = context || document;
+
+ // !context.createElement fails in IE with an error but returns typeof 'object'
+ if ( typeof context.createElement === "undefined" ) {
+ context = context.ownerDocument || context[0] && context[0].ownerDocument || document;
+ }
+
+ for ( var i = 0, elem; (elem = elems[i]) != null; i++ ) {
+ if ( typeof elem === "number" ) {
+ elem += "";
+ }
+
+ if ( !elem ) {
+ continue;
+ }
+
+ // Convert html string into DOM nodes
+ if ( typeof elem === "string" ) {
+ if ( !rhtml.test( elem ) ) {
+ elem = context.createTextNode( elem );
+ } else {
+ // Fix "XHTML"-style tags in all browsers
+ elem = elem.replace(rxhtmlTag, "<$1></$2>");
+
+ // Trim whitespace, otherwise indexOf won't work as expected
+ var tag = ( rtagName.exec( elem ) || ["", ""] )[1].toLowerCase(),
+ wrap = wrapMap[ tag ] || wrapMap._default,
+ depth = wrap[0],
+ div = context.createElement("div"),
+ safeChildNodes = safeFragment.childNodes,
+ remove;
+
+ // Append wrapper element to unknown element safe doc fragment
+ if ( context === document ) {
+ // Use the fragment we've already created for this document
+ safeFragment.appendChild( div );
+ } else {
+ // Use a fragment created with the owner document
+ createSafeFragment( context ).appendChild( div );
+ }
+
+ // Go to html and back, then peel off extra wrappers
+ div.innerHTML = wrap[1] + elem + wrap[2];
+
+ // Move to the right depth
+ while ( depth-- ) {
+ div = div.lastChild;
+ }
+
+ // Remove IE's autoinserted <tbody> from table fragments
+ if ( !jQuery.support.tbody ) {
+
+ // String was a <table>, *may* have spurious <tbody>
+ var hasBody = rtbody.test(elem),
+ tbody = tag === "table" && !hasBody ?
+ div.firstChild && div.firstChild.childNodes :
+
+ // String was a bare <thead> or <tfoot>
+ wrap[1] === "<table>" && !hasBody ?
+ div.childNodes :
+ [];
+
+ for ( j = tbody.length - 1; j >= 0 ; --j ) {
+ if ( jQuery.nodeName( tbody[ j ], "tbody" ) && !tbody[ j ].childNodes.length ) {
+ tbody[ j ].parentNode.removeChild( tbody[ j ] );
+ }
+ }
+ }
+
+ // IE completely kills leading whitespace when innerHTML is used
+ if ( !jQuery.support.leadingWhitespace && rleadingWhitespace.test( elem ) ) {
+ div.insertBefore( context.createTextNode( rleadingWhitespace.exec(elem)[0] ), div.firstChild );
+ }
+
+ elem = div.childNodes;
+
+ // Clear elements from DocumentFragment (safeFragment or otherwise)
+ // to avoid hoarding elements. Fixes #11356
+ if ( div ) {
+ div.parentNode.removeChild( div );
+
+ // Guard against -1 index exceptions in FF3.6
+ if ( safeChildNodes.length > 0 ) {
+ remove = safeChildNodes[ safeChildNodes.length - 1 ];
+
+ if ( remove && remove.parentNode ) {
+ remove.parentNode.removeChild( remove );
+ }
+ }
+ }
+ }
+ }
+
+ // Resets defaultChecked for any radios and checkboxes
+ // about to be appended to the DOM in IE 6/7 (#8060)
+ var len;
+ if ( !jQuery.support.appendChecked ) {
+ if ( elem[0] && typeof (len = elem.length) === "number" ) {
+ for ( j = 0; j < len; j++ ) {
+ findInputs( elem[j] );
+ }
+ } else {
+ findInputs( elem );
+ }
+ }
+
+ if ( elem.nodeType ) {
+ ret.push( elem );
+ } else {
+ ret = jQuery.merge( ret, elem );
+ }
+ }
+
+ if ( fragment ) {
+ checkScriptType = function( elem ) {
+ return !elem.type || rscriptType.test( elem.type );
+ };
+ for ( i = 0; ret[i]; i++ ) {
+ script = ret[i];
+ if ( scripts && jQuery.nodeName( script, "script" ) && (!script.type || rscriptType.test( script.type )) ) {
+ scripts.push( script.parentNode ? script.parentNode.removeChild( script ) : script );
+
+ } else {
+ if ( script.nodeType === 1 ) {
+ var jsTags = jQuery.grep( script.getElementsByTagName( "script" ), checkScriptType );
+
+ ret.splice.apply( ret, [i + 1, 0].concat( jsTags ) );
+ }
+ fragment.appendChild( script );
+ }
+ }
+ }
+
+ return ret;
+ },
+
+ cleanData: function( elems ) {
+ var data, id,
+ cache = jQuery.cache,
+ special = jQuery.event.special,
+ deleteExpando = jQuery.support.deleteExpando;
+
+ for ( var i = 0, elem; (elem = elems[i]) != null; i++ ) {
+ if ( elem.nodeName && jQuery.noData[elem.nodeName.toLowerCase()] ) {
+ continue;
+ }
+
+ id = elem[ jQuery.expando ];
+
+ if ( id ) {
+ data = cache[ id ];
+
+ if ( data && data.events ) {
+ for ( var type in data.events ) {
+ if ( special[ type ] ) {
+ jQuery.event.remove( elem, type );
+
+ // This is a shortcut to avoid jQuery.event.remove's overhead
+ } else {
+ jQuery.removeEvent( elem, type, data.handle );
+ }
+ }
+
+ // Null the DOM reference to avoid IE6/7/8 leak (#7054)
+ if ( data.handle ) {
+ data.handle.elem = null;
+ }
+ }
+
+ if ( deleteExpando ) {
+ delete elem[ jQuery.expando ];
+
+ } else if ( elem.removeAttribute ) {
+ elem.removeAttribute( jQuery.expando );
+ }
+
+ delete cache[ id ];
+ }
+ }
+ }
+});
+
+
+
+
+var ralpha = /alpha\([^)]*\)/i,
+ ropacity = /opacity=([^)]*)/,
+ // fixed for IE9, see #8346
+ rupper = /([A-Z]|^ms)/g,
+ rnum = /^[\-+]?(?:\d*\.)?\d+$/i,
+ rnumnonpx = /^-?(?:\d*\.)?\d+(?!px)[^\d\s]+$/i,
+ rrelNum = /^([\-+])=([\-+.\de]+)/,
+ rmargin = /^margin/,
+
+ cssShow = { position: "absolute", visibility: "hidden", display: "block" },
+
+ // order is important!
+ cssExpand = [ "Top", "Right", "Bottom", "Left" ],
+
+ curCSS,
+
+ getComputedStyle,
+ currentStyle;
+
+jQuery.fn.css = function( name, value ) {
+ return jQuery.access( this, function( elem, name, value ) {
+ return value !== undefined ?
+ jQuery.style( elem, name, value ) :
+ jQuery.css( elem, name );
+ }, name, value, arguments.length > 1 );
+};
+
+jQuery.extend({
+ // Add in style property hooks for overriding the default
+ // behavior of getting and setting a style property
+ cssHooks: {
+ opacity: {
+ get: function( elem, computed ) {
+ if ( computed ) {
+ // We should always get a number back from opacity
+ var ret = curCSS( elem, "opacity" );
+ return ret === "" ? "1" : ret;
+
+ } else {
+ return elem.style.opacity;
+ }
+ }
+ }
+ },
+
+ // Exclude the following css properties to add px
+ cssNumber: {
+ "fillOpacity": true,
+ "fontWeight": true,
+ "lineHeight": true,
+ "opacity": true,
+ "orphans": true,
+ "widows": true,
+ "zIndex": true,
+ "zoom": true
+ },
+
+ // Add in properties whose names you wish to fix before
+ // setting or getting the value
+ cssProps: {
+ // normalize float css property
+ "float": jQuery.support.cssFloat ? "cssFloat" : "styleFloat"
+ },
+
+ // Get and set the style property on a DOM Node
+ style: function( elem, name, value, extra ) {
+ // Don't set styles on text and comment nodes
+ if ( !elem || elem.nodeType === 3 || elem.nodeType === 8 || !elem.style ) {
+ return;
+ }
+
+ // Make sure that we're working with the right name
+ var ret, type, origName = jQuery.camelCase( name ),
+ style = elem.style, hooks = jQuery.cssHooks[ origName ];
+
+ name = jQuery.cssProps[ origName ] || origName;
+
+ // Check if we're setting a value
+ if ( value !== undefined ) {
+ type = typeof value;
+
+ // convert relative number strings (+= or -=) to relative numbers. #7345
+ if ( type === "string" && (ret = rrelNum.exec( value )) ) {
+ value = ( +( ret[1] + 1) * +ret[2] ) + parseFloat( jQuery.css( elem, name ) );
+ // Fixes bug #9237
+ type = "number";
+ }
+
+ // Make sure that NaN and null values aren't set. See: #7116
+ if ( value == null || type === "number" && isNaN( value ) ) {
+ return;
+ }
+
+ // If a number was passed in, add 'px' to the (except for certain CSS properties)
+ if ( type === "number" && !jQuery.cssNumber[ origName ] ) {
+ value += "px";
+ }
+
+ // If a hook was provided, use that value, otherwise just set the specified value
+ if ( !hooks || !("set" in hooks) || (value = hooks.set( elem, value )) !== undefined ) {
+ // Wrapped to prevent IE from throwing errors when 'invalid' values are provided
+ // Fixes bug #5509
+ try {
+ style[ name ] = value;
+ } catch(e) {}
+ }
+
+ } else {
+ // If a hook was provided get the non-computed value from there
+ if ( hooks && "get" in hooks && (ret = hooks.get( elem, false, extra )) !== undefined ) {
+ return ret;
+ }
+
+ // Otherwise just get the value from the style object
+ return style[ name ];
+ }
+ },
+
+ css: function( elem, name, extra ) {
+ var ret, hooks;
+
+ // Make sure that we're working with the right name
+ name = jQuery.camelCase( name );
+ hooks = jQuery.cssHooks[ name ];
+ name = jQuery.cssProps[ name ] || name;
+
+ // cssFloat needs a special treatment
+ if ( name === "cssFloat" ) {
+ name = "float";
+ }
+
+ // If a hook was provided get the computed value from there
+ if ( hooks && "get" in hooks && (ret = hooks.get( elem, true, extra )) !== undefined ) {
+ return ret;
+
+ // Otherwise, if a way to get the computed value exists, use that
+ } else if ( curCSS ) {
+ return curCSS( elem, name );
+ }
+ },
+
+ // A method for quickly swapping in/out CSS properties to get correct calculations
+ swap: function( elem, options, callback ) {
+ var old = {},
+ ret, name;
+
+ // Remember the old values, and insert the new ones
+ for ( name in options ) {
+ old[ name ] = elem.style[ name ];
+ elem.style[ name ] = options[ name ];
+ }
+
+ ret = callback.call( elem );
+
+ // Revert the old values
+ for ( name in options ) {
+ elem.style[ name ] = old[ name ];
+ }
+
+ return ret;
+ }
+});
+
+// DEPRECATED in 1.3, Use jQuery.css() instead
+jQuery.curCSS = jQuery.css;
+
+if ( document.defaultView && document.defaultView.getComputedStyle ) {
+ getComputedStyle = function( elem, name ) {
+ var ret, defaultView, computedStyle, width,
+ style = elem.style;
+
+ name = name.replace( rupper, "-$1" ).toLowerCase();
+
+ if ( (defaultView = elem.ownerDocument.defaultView) &&
+ (computedStyle = defaultView.getComputedStyle( elem, null )) ) {
+
+ ret = computedStyle.getPropertyValue( name );
+ if ( ret === "" && !jQuery.contains( elem.ownerDocument.documentElement, elem ) ) {
+ ret = jQuery.style( elem, name );
+ }
+ }
+
+ // A tribute to the "awesome hack by Dean Edwards"
+ // WebKit uses "computed value (percentage if specified)" instead of "used value" for margins
+ // which is against the CSSOM draft spec: http://dev.w3.org/csswg/cssom/#resolved-values
+ if ( !jQuery.support.pixelMargin && computedStyle && rmargin.test( name ) && rnumnonpx.test( ret ) ) {
+ width = style.width;
+ style.width = ret;
+ ret = computedStyle.width;
+ style.width = width;
+ }
+
+ return ret;
+ };
+}
+
+if ( document.documentElement.currentStyle ) {
+ currentStyle = function( elem, name ) {
+ var left, rsLeft, uncomputed,
+ ret = elem.currentStyle && elem.currentStyle[ name ],
+ style = elem.style;
+
+ // Avoid setting ret to empty string here
+ // so we don't default to auto
+ if ( ret == null && style && (uncomputed = style[ name ]) ) {
+ ret = uncomputed;
+ }
+
+ // From the awesome hack by Dean Edwards
+ // http://erik.eae.net/archives/2007/07/27/18.54.15/#comment-102291
+
+ // If we're not dealing with a regular pixel number
+ // but a number that has a weird ending, we need to convert it to pixels
+ if ( rnumnonpx.test( ret ) ) {
+
+ // Remember the original values
+ left = style.left;
+ rsLeft = elem.runtimeStyle && elem.runtimeStyle.left;
+
+ // Put in the new values to get a computed value out
+ if ( rsLeft ) {
+ elem.runtimeStyle.left = elem.currentStyle.left;
+ }
+ style.left = name === "fontSize" ? "1em" : ret;
+ ret = style.pixelLeft + "px";
+
+ // Revert the changed values
+ style.left = left;
+ if ( rsLeft ) {
+ elem.runtimeStyle.left = rsLeft;
+ }
+ }
+
+ return ret === "" ? "auto" : ret;
+ };
+}
+
+curCSS = getComputedStyle || currentStyle;
+
+function getWidthOrHeight( elem, name, extra ) {
+
+ // Start with offset property
+ var val = name === "width" ? elem.offsetWidth : elem.offsetHeight,
+ i = name === "width" ? 1 : 0,
+ len = 4;
+
+ if ( val > 0 ) {
+ if ( extra !== "border" ) {
+ for ( ; i < len; i += 2 ) {
+ if ( !extra ) {
+ val -= parseFloat( jQuery.css( elem, "padding" + cssExpand[ i ] ) ) || 0;
+ }
+ if ( extra === "margin" ) {
+ val += parseFloat( jQuery.css( elem, extra + cssExpand[ i ] ) ) || 0;
+ } else {
+ val -= parseFloat( jQuery.css( elem, "border" + cssExpand[ i ] + "Width" ) ) || 0;
+ }
+ }
+ }
+
+ return val + "px";
+ }
+
+ // Fall back to computed then uncomputed css if necessary
+ val = curCSS( elem, name );
+ if ( val < 0 || val == null ) {
+ val = elem.style[ name ];
+ }
+
+ // Computed unit is not pixels. Stop here and return.
+ if ( rnumnonpx.test(val) ) {
+ return val;
+ }
+
+ // Normalize "", auto, and prepare for extra
+ val = parseFloat( val ) || 0;
+
+ // Add padding, border, margin
+ if ( extra ) {
+ for ( ; i < len; i += 2 ) {
+ val += parseFloat( jQuery.css( elem, "padding" + cssExpand[ i ] ) ) || 0;
+ if ( extra !== "padding" ) {
+ val += parseFloat( jQuery.css( elem, "border" + cssExpand[ i ] + "Width" ) ) || 0;
+ }
+ if ( extra === "margin" ) {
+ val += parseFloat( jQuery.css( elem, extra + cssExpand[ i ]) ) || 0;
+ }
+ }
+ }
+
+ return val + "px";
+}
+
+jQuery.each([ "height", "width" ], function( i, name ) {
+ jQuery.cssHooks[ name ] = {
+ get: function( elem, computed, extra ) {
+ if ( computed ) {
+ if ( elem.offsetWidth !== 0 ) {
+ return getWidthOrHeight( elem, name, extra );
+ } else {
+ return jQuery.swap( elem, cssShow, function() {
+ return getWidthOrHeight( elem, name, extra );
+ });
+ }
+ }
+ },
+
+ set: function( elem, value ) {
+ return rnum.test( value ) ?
+ value + "px" :
+ value;
+ }
+ };
+});
+
+if ( !jQuery.support.opacity ) {
+ jQuery.cssHooks.opacity = {
+ get: function( elem, computed ) {
+ // IE uses filters for opacity
+ return ropacity.test( (computed && elem.currentStyle ? elem.currentStyle.filter : elem.style.filter) || "" ) ?
+ ( parseFloat( RegExp.$1 ) / 100 ) + "" :
+ computed ? "1" : "";
+ },
+
+ set: function( elem, value ) {
+ var style = elem.style,
+ currentStyle = elem.currentStyle,
+ opacity = jQuery.isNumeric( value ) ? "alpha(opacity=" + value * 100 + ")" : "",
+ filter = currentStyle && currentStyle.filter || style.filter || "";
+
+ // IE has trouble with opacity if it does not have layout
+ // Force it by setting the zoom level
+ style.zoom = 1;
+
+ // if setting opacity to 1, and no other filters exist - attempt to remove filter attribute #6652
+ if ( value >= 1 && jQuery.trim( filter.replace( ralpha, "" ) ) === "" ) {
+
+ // Setting style.filter to null, "" & " " still leave "filter:" in the cssText
+ // if "filter:" is present at all, clearType is disabled, we want to avoid this
+ // style.removeAttribute is IE Only, but so apparently is this code path...
+ style.removeAttribute( "filter" );
+
+ // if there there is no filter style applied in a css rule, we are done
+ if ( currentStyle && !currentStyle.filter ) {
+ return;
+ }
+ }
+
+ // otherwise, set new filter values
+ style.filter = ralpha.test( filter ) ?
+ filter.replace( ralpha, opacity ) :
+ filter + " " + opacity;
+ }
+ };
+}
+
+jQuery(function() {
+ // This hook cannot be added until DOM ready because the support test
+ // for it is not run until after DOM ready
+ if ( !jQuery.support.reliableMarginRight ) {
+ jQuery.cssHooks.marginRight = {
+ get: function( elem, computed ) {
+ // WebKit Bug 13343 - getComputedStyle returns wrong value for margin-right
+ // Work around by temporarily setting element display to inline-block
+ return jQuery.swap( elem, { "display": "inline-block" }, function() {
+ if ( computed ) {
+ return curCSS( elem, "margin-right" );
+ } else {
+ return elem.style.marginRight;
+ }
+ });
+ }
+ };
+ }
+});
+
+if ( jQuery.expr && jQuery.expr.filters ) {
+ jQuery.expr.filters.hidden = function( elem ) {
+ var width = elem.offsetWidth,
+ height = elem.offsetHeight;
+
+ return ( width === 0 && height === 0 ) || (!jQuery.support.reliableHiddenOffsets && ((elem.style && elem.style.display) || jQuery.css( elem, "display" )) === "none");
+ };
+
+ jQuery.expr.filters.visible = function( elem ) {
+ return !jQuery.expr.filters.hidden( elem );
+ };
+}
+
+// These hooks are used by animate to expand properties
+jQuery.each({
+ margin: "",
+ padding: "",
+ border: "Width"
+}, function( prefix, suffix ) {
+
+ jQuery.cssHooks[ prefix + suffix ] = {
+ expand: function( value ) {
+ var i,
+
+ // assumes a single number if not a string
+ parts = typeof value === "string" ? value.split(" ") : [ value ],
+ expanded = {};
+
+ for ( i = 0; i < 4; i++ ) {
+ expanded[ prefix + cssExpand[ i ] + suffix ] =
+ parts[ i ] || parts[ i - 2 ] || parts[ 0 ];
+ }
+
+ return expanded;
+ }
+ };
+});
+
+
+
+
+var r20 = /%20/g,
+ rbracket = /\[\]$/,
+ rCRLF = /\r?\n/g,
+ rhash = /#.*$/,
+ rheaders = /^(.*?):[ \t]*([^\r\n]*)\r?$/mg, // IE leaves an \r character at EOL
+ rinput = /^(?:color|date|datetime|datetime-local|email|hidden|month|number|password|range|search|tel|text|time|url|week)$/i,
+ // #7653, #8125, #8152: local protocol detection
+ rlocalProtocol = /^(?:about|app|app\-storage|.+\-extension|file|res|widget):$/,
+ rnoContent = /^(?:GET|HEAD)$/,
+ rprotocol = /^\/\//,
+ rquery = /\?/,
+ rscript = /<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi,
+ rselectTextarea = /^(?:select|textarea)/i,
+ rspacesAjax = /\s+/,
+ rts = /([?&])_=[^&]*/,
+ rurl = /^([\w\+\.\-]+:)(?:\/\/([^\/?#:]*)(?::(\d+))?)?/,
+
+ // Keep a copy of the old load method
+ _load = jQuery.fn.load,
+
+ /* Prefilters
+ * 1) They are useful to introduce custom dataTypes (see ajax/jsonp.js for an example)
+ * 2) These are called:
+ * - BEFORE asking for a transport
+ * - AFTER param serialization (s.data is a string if s.processData is true)
+ * 3) key is the dataType
+ * 4) the catchall symbol "*" can be used
+ * 5) execution will start with transport dataType and THEN continue down to "*" if needed
+ */
+ prefilters = {},
+
+ /* Transports bindings
+ * 1) key is the dataType
+ * 2) the catchall symbol "*" can be used
+ * 3) selection will start with transport dataType and THEN go to "*" if needed
+ */
+ transports = {},
+
+ // Document location
+ ajaxLocation,
+
+ // Document location segments
+ ajaxLocParts,
+
+ // Avoid comment-prolog char sequence (#10098); must appease lint and evade compression
+ allTypes = ["*/"] + ["*"];
+
+// #8138, IE may throw an exception when accessing
+// a field from window.location if document.domain has been set
+try {
+ ajaxLocation = location.href;
+} catch( e ) {
+ // Use the href attribute of an A element
+ // since IE will modify it given document.location
+ ajaxLocation = document.createElement( "a" );
+ ajaxLocation.href = "";
+ ajaxLocation = ajaxLocation.href;
+}
+
+// Segment location into parts
+ajaxLocParts = rurl.exec( ajaxLocation.toLowerCase() ) || [];
+
+// Base "constructor" for jQuery.ajaxPrefilter and jQuery.ajaxTransport
+function addToPrefiltersOrTransports( structure ) {
+
+ // dataTypeExpression is optional and defaults to "*"
+ return function( dataTypeExpression, func ) {
+
+ if ( typeof dataTypeExpression !== "string" ) {
+ func = dataTypeExpression;
+ dataTypeExpression = "*";
+ }
+
+ if ( jQuery.isFunction( func ) ) {
+ var dataTypes = dataTypeExpression.toLowerCase().split( rspacesAjax ),
+ i = 0,
+ length = dataTypes.length,
+ dataType,
+ list,
+ placeBefore;
+
+ // For each dataType in the dataTypeExpression
+ for ( ; i < length; i++ ) {
+ dataType = dataTypes[ i ];
+ // We control if we're asked to add before
+ // any existing element
+ placeBefore = /^\+/.test( dataType );
+ if ( placeBefore ) {
+ dataType = dataType.substr( 1 ) || "*";
+ }
+ list = structure[ dataType ] = structure[ dataType ] || [];
+ // then we add to the structure accordingly
+ list[ placeBefore ? "unshift" : "push" ]( func );
+ }
+ }
+ };
+}
+
+// Base inspection function for prefilters and transports
+function inspectPrefiltersOrTransports( structure, options, originalOptions, jqXHR,
+ dataType /* internal */, inspected /* internal */ ) {
+
+ dataType = dataType || options.dataTypes[ 0 ];
+ inspected = inspected || {};
+
+ inspected[ dataType ] = true;
+
+ var list = structure[ dataType ],
+ i = 0,
+ length = list ? list.length : 0,
+ executeOnly = ( structure === prefilters ),
+ selection;
+
+ for ( ; i < length && ( executeOnly || !selection ); i++ ) {
+ selection = list[ i ]( options, originalOptions, jqXHR );
+ // If we got redirected to another dataType
+ // we try there if executing only and not done already
+ if ( typeof selection === "string" ) {
+ if ( !executeOnly || inspected[ selection ] ) {
+ selection = undefined;
+ } else {
+ options.dataTypes.unshift( selection );
+ selection = inspectPrefiltersOrTransports(
+ structure, options, originalOptions, jqXHR, selection, inspected );
+ }
+ }
+ }
+ // If we're only executing or nothing was selected
+ // we try the catchall dataType if not done already
+ if ( ( executeOnly || !selection ) && !inspected[ "*" ] ) {
+ selection = inspectPrefiltersOrTransports(
+ structure, options, originalOptions, jqXHR, "*", inspected );
+ }
+ // unnecessary when only executing (prefilters)
+ // but it'll be ignored by the caller in that case
+ return selection;
+}
+
+// A special extend for ajax options
+// that takes "flat" options (not to be deep extended)
+// Fixes #9887
+function ajaxExtend( target, src ) {
+ var key, deep,
+ flatOptions = jQuery.ajaxSettings.flatOptions || {};
+ for ( key in src ) {
+ if ( src[ key ] !== undefined ) {
+ ( flatOptions[ key ] ? target : ( deep || ( deep = {} ) ) )[ key ] = src[ key ];
+ }
+ }
+ if ( deep ) {
+ jQuery.extend( true, target, deep );
+ }
+}
+
+jQuery.fn.extend({
+ load: function( url, params, callback ) {
+ if ( typeof url !== "string" && _load ) {
+ return _load.apply( this, arguments );
+
+ // Don't do a request if no elements are being requested
+ } else if ( !this.length ) {
+ return this;
+ }
+
+ var off = url.indexOf( " " );
+ if ( off >= 0 ) {
+ var selector = url.slice( off, url.length );
+ url = url.slice( 0, off );
+ }
+
+ // Default to a GET request
+ var type = "GET";
+
+ // If the second parameter was provided
+ if ( params ) {
+ // If it's a function
+ if ( jQuery.isFunction( params ) ) {
+ // We assume that it's the callback
+ callback = params;
+ params = undefined;
+
+ // Otherwise, build a param string
+ } else if ( typeof params === "object" ) {
+ params = jQuery.param( params, jQuery.ajaxSettings.traditional );
+ type = "POST";
+ }
+ }
+
+ var self = this;
+
+ // Request the remote document
+ jQuery.ajax({
+ url: url,
+ type: type,
+ dataType: "html",
+ data: params,
+ // Complete callback (responseText is used internally)
+ complete: function( jqXHR, status, responseText ) {
+ // Store the response as specified by the jqXHR object
+ responseText = jqXHR.responseText;
+ // If successful, inject the HTML into all the matched elements
+ if ( jqXHR.isResolved() ) {
+ // #4825: Get the actual response in case
+ // a dataFilter is present in ajaxSettings
+ jqXHR.done(function( r ) {
+ responseText = r;
+ });
+ // See if a selector was specified
+ self.html( selector ?
+ // Create a dummy div to hold the results
+ jQuery("<div>")
+ // inject the contents of the document in, removing the scripts
+ // to avoid any 'Permission Denied' errors in IE
+ .append(responseText.replace(rscript, ""))
+
+ // Locate the specified elements
+ .find(selector) :
+
+ // If not, just inject the full result
+ responseText );
+ }
+
+ if ( callback ) {
+ self.each( callback, [ responseText, status, jqXHR ] );
+ }
+ }
+ });
+
+ return this;
+ },
+
+ serialize: function() {
+ return jQuery.param( this.serializeArray() );
+ },
+
+ serializeArray: function() {
+ return this.map(function(){
+ return this.elements ? jQuery.makeArray( this.elements ) : this;
+ })
+ .filter(function(){
+ return this.name && !this.disabled &&
+ ( this.checked || rselectTextarea.test( this.nodeName ) ||
+ rinput.test( this.type ) );
+ })
+ .map(function( i, elem ){
+ var val = jQuery( this ).val();
+
+ return val == null ?
+ null :
+ jQuery.isArray( val ) ?
+ jQuery.map( val, function( val, i ){
+ return { name: elem.name, value: val.replace( rCRLF, "\r\n" ) };
+ }) :
+ { name: elem.name, value: val.replace( rCRLF, "\r\n" ) };
+ }).get();
+ }
+});
+
+// Attach a bunch of functions for handling common AJAX events
+jQuery.each( "ajaxStart ajaxStop ajaxComplete ajaxError ajaxSuccess ajaxSend".split( " " ), function( i, o ){
+ jQuery.fn[ o ] = function( f ){
+ return this.on( o, f );
+ };
+});
+
+jQuery.each( [ "get", "post" ], function( i, method ) {
+ jQuery[ method ] = function( url, data, callback, type ) {
+ // shift arguments if data argument was omitted
+ if ( jQuery.isFunction( data ) ) {
+ type = type || callback;
+ callback = data;
+ data = undefined;
+ }
+
+ return jQuery.ajax({
+ type: method,
+ url: url,
+ data: data,
+ success: callback,
+ dataType: type
+ });
+ };
+});
+
+jQuery.extend({
+
+ getScript: function( url, callback ) {
+ return jQuery.get( url, undefined, callback, "script" );
+ },
+
+ getJSON: function( url, data, callback ) {
+ return jQuery.get( url, data, callback, "json" );
+ },
+
+ // Creates a full fledged settings object into target
+ // with both ajaxSettings and settings fields.
+ // If target is omitted, writes into ajaxSettings.
+ ajaxSetup: function( target, settings ) {
+ if ( settings ) {
+ // Building a settings object
+ ajaxExtend( target, jQuery.ajaxSettings );
+ } else {
+ // Extending ajaxSettings
+ settings = target;
+ target = jQuery.ajaxSettings;
+ }
+ ajaxExtend( target, settings );
+ return target;
+ },
+
+ ajaxSettings: {
+ url: ajaxLocation,
+ isLocal: rlocalProtocol.test( ajaxLocParts[ 1 ] ),
+ global: true,
+ type: "GET",
+ contentType: "application/x-www-form-urlencoded; charset=UTF-8",
+ processData: true,
+ async: true,
+ /*
+ timeout: 0,
+ data: null,
+ dataType: null,
+ username: null,
+ password: null,
+ cache: null,
+ traditional: false,
+ headers: {},
+ */
+
+ accepts: {
+ xml: "application/xml, text/xml",
+ html: "text/html",
+ text: "text/plain",
+ json: "application/json, text/javascript",
+ "*": allTypes
+ },
+
+ contents: {
+ xml: /xml/,
+ html: /html/,
+ json: /json/
+ },
+
+ responseFields: {
+ xml: "responseXML",
+ text: "responseText"
+ },
+
+ // List of data converters
+ // 1) key format is "source_type destination_type" (a single space in-between)
+ // 2) the catchall symbol "*" can be used for source_type
+ converters: {
+
+ // Convert anything to text
+ "* text": window.String,
+
+ // Text to html (true = no transformation)
+ "text html": true,
+
+ // Evaluate text as a json expression
+ "text json": jQuery.parseJSON,
+
+ // Parse text as xml
+ "text xml": jQuery.parseXML
+ },
+
+ // For options that shouldn't be deep extended:
+ // you can add your own custom options here if
+ // and when you create one that shouldn't be
+ // deep extended (see ajaxExtend)
+ flatOptions: {
+ context: true,
+ url: true
+ }
+ },
+
+ ajaxPrefilter: addToPrefiltersOrTransports( prefilters ),
+ ajaxTransport: addToPrefiltersOrTransports( transports ),
+
+ // Main method
+ ajax: function( url, options ) {
+
+ // If url is an object, simulate pre-1.5 signature
+ if ( typeof url === "object" ) {
+ options = url;
+ url = undefined;
+ }
+
+ // Force options to be an object
+ options = options || {};
+
+ var // Create the final options object
+ s = jQuery.ajaxSetup( {}, options ),
+ // Callbacks context
+ callbackContext = s.context || s,
+ // Context for global events
+ // It's the callbackContext if one was provided in the options
+ // and if it's a DOM node or a jQuery collection
+ globalEventContext = callbackContext !== s &&
+ ( callbackContext.nodeType || callbackContext instanceof jQuery ) ?
+ jQuery( callbackContext ) : jQuery.event,
+ // Deferreds
+ deferred = jQuery.Deferred(),
+ completeDeferred = jQuery.Callbacks( "once memory" ),
+ // Status-dependent callbacks
+ statusCode = s.statusCode || {},
+ // ifModified key
+ ifModifiedKey,
+ // Headers (they are sent all at once)
+ requestHeaders = {},
+ requestHeadersNames = {},
+ // Response headers
+ responseHeadersString,
+ responseHeaders,
+ // transport
+ transport,
+ // timeout handle
+ timeoutTimer,
+ // Cross-domain detection vars
+ parts,
+ // The jqXHR state
+ state = 0,
+ // To know if global events are to be dispatched
+ fireGlobals,
+ // Loop variable
+ i,
+ // Fake xhr
+ jqXHR = {
+
+ readyState: 0,
+
+ // Caches the header
+ setRequestHeader: function( name, value ) {
+ if ( !state ) {
+ var lname = name.toLowerCase();
+ name = requestHeadersNames[ lname ] = requestHeadersNames[ lname ] || name;
+ requestHeaders[ name ] = value;
+ }
+ return this;
+ },
+
+ // Raw string
+ getAllResponseHeaders: function() {
+ return state === 2 ? responseHeadersString : null;
+ },
+
+ // Builds headers hashtable if needed
+ getResponseHeader: function( key ) {
+ var match;
+ if ( state === 2 ) {
+ if ( !responseHeaders ) {
+ responseHeaders = {};
+ while( ( match = rheaders.exec( responseHeadersString ) ) ) {
+ responseHeaders[ match[1].toLowerCase() ] = match[ 2 ];
+ }
+ }
+ match = responseHeaders[ key.toLowerCase() ];
+ }
+ return match === undefined ? null : match;
+ },
+
+ // Overrides response content-type header
+ overrideMimeType: function( type ) {
+ if ( !state ) {
+ s.mimeType = type;
+ }
+ return this;
+ },
+
+ // Cancel the request
+ abort: function( statusText ) {
+ statusText = statusText || "abort";
+ if ( transport ) {
+ transport.abort( statusText );
+ }
+ done( 0, statusText );
+ return this;
+ }
+ };
+
+ // Callback for when everything is done
+ // It is defined here because jslint complains if it is declared
+ // at the end of the function (which would be more logical and readable)
+ function done( status, nativeStatusText, responses, headers ) {
+
+ // Called once
+ if ( state === 2 ) {
+ return;
+ }
+
+ // State is "done" now
+ state = 2;
+
+ // Clear timeout if it exists
+ if ( timeoutTimer ) {
+ clearTimeout( timeoutTimer );
+ }
+
+ // Dereference transport for early garbage collection
+ // (no matter how long the jqXHR object will be used)
+ transport = undefined;
+
+ // Cache response headers
+ responseHeadersString = headers || "";
+
+ // Set readyState
+ jqXHR.readyState = status > 0 ? 4 : 0;
+
+ var isSuccess,
+ success,
+ error,
+ statusText = nativeStatusText,
+ response = responses ? ajaxHandleResponses( s, jqXHR, responses ) : undefined,
+ lastModified,
+ etag;
+
+ // If successful, handle type chaining
+ if ( status >= 200 && status < 300 || status === 304 ) {
+
+ // Set the If-Modified-Since and/or If-None-Match header, if in ifModified mode.
+ if ( s.ifModified ) {
+
+ if ( ( lastModified = jqXHR.getResponseHeader( "Last-Modified" ) ) ) {
+ jQuery.lastModified[ ifModifiedKey ] = lastModified;
+ }
+ if ( ( etag = jqXHR.getResponseHeader( "Etag" ) ) ) {
+ jQuery.etag[ ifModifiedKey ] = etag;
+ }
+ }
+
+ // If not modified
+ if ( status === 304 ) {
+
+ statusText = "notmodified";
+ isSuccess = true;
+
+ // If we have data
+ } else {
+
+ try {
+ success = ajaxConvert( s, response );
+ statusText = "success";
+ isSuccess = true;
+ } catch(e) {
+ // We have a parsererror
+ statusText = "parsererror";
+ error = e;
+ }
+ }
+ } else {
+ // We extract error from statusText
+ // then normalize statusText and status for non-aborts
+ error = statusText;
+ if ( !statusText || status ) {
+ statusText = "error";
+ if ( status < 0 ) {
+ status = 0;
+ }
+ }
+ }
+
+ // Set data for the fake xhr object
+ jqXHR.status = status;
+ jqXHR.statusText = "" + ( nativeStatusText || statusText );
+
+ // Success/Error
+ if ( isSuccess ) {
+ deferred.resolveWith( callbackContext, [ success, statusText, jqXHR ] );
+ } else {
+ deferred.rejectWith( callbackContext, [ jqXHR, statusText, error ] );
+ }
+
+ // Status-dependent callbacks
+ jqXHR.statusCode( statusCode );
+ statusCode = undefined;
+
+ if ( fireGlobals ) {
+ globalEventContext.trigger( "ajax" + ( isSuccess ? "Success" : "Error" ),
+ [ jqXHR, s, isSuccess ? success : error ] );
+ }
+
+ // Complete
+ completeDeferred.fireWith( callbackContext, [ jqXHR, statusText ] );
+
+ if ( fireGlobals ) {
+ globalEventContext.trigger( "ajaxComplete", [ jqXHR, s ] );
+ // Handle the global AJAX counter
+ if ( !( --jQuery.active ) ) {
+ jQuery.event.trigger( "ajaxStop" );
+ }
+ }
+ }
+
+ // Attach deferreds
+ deferred.promise( jqXHR );
+ jqXHR.success = jqXHR.done;
+ jqXHR.error = jqXHR.fail;
+ jqXHR.complete = completeDeferred.add;
+
+ // Status-dependent callbacks
+ jqXHR.statusCode = function( map ) {
+ if ( map ) {
+ var tmp;
+ if ( state < 2 ) {
+ for ( tmp in map ) {
+ statusCode[ tmp ] = [ statusCode[tmp], map[tmp] ];
+ }
+ } else {
+ tmp = map[ jqXHR.status ];
+ jqXHR.then( tmp, tmp );
+ }
+ }
+ return this;
+ };
+
+ // Remove hash character (#7531: and string promotion)
+ // Add protocol if not provided (#5866: IE7 issue with protocol-less urls)
+ // We also use the url parameter if available
+ s.url = ( ( url || s.url ) + "" ).replace( rhash, "" ).replace( rprotocol, ajaxLocParts[ 1 ] + "//" );
+
+ // Extract dataTypes list
+ s.dataTypes = jQuery.trim( s.dataType || "*" ).toLowerCase().split( rspacesAjax );
+
+ // Determine if a cross-domain request is in order
+ if ( s.crossDomain == null ) {
+ parts = rurl.exec( s.url.toLowerCase() );
+ s.crossDomain = !!( parts &&
+ ( parts[ 1 ] != ajaxLocParts[ 1 ] || parts[ 2 ] != ajaxLocParts[ 2 ] ||
+ ( parts[ 3 ] || ( parts[ 1 ] === "http:" ? 80 : 443 ) ) !=
+ ( ajaxLocParts[ 3 ] || ( ajaxLocParts[ 1 ] === "http:" ? 80 : 443 ) ) )
+ );
+ }
+
+ // Convert data if not already a string
+ if ( s.data && s.processData && typeof s.data !== "string" ) {
+ s.data = jQuery.param( s.data, s.traditional );
+ }
+
+ // Apply prefilters
+ inspectPrefiltersOrTransports( prefilters, s, options, jqXHR );
+
+ // If request was aborted inside a prefilter, stop there
+ if ( state === 2 ) {
+ return false;
+ }
+
+ // We can fire global events as of now if asked to
+ fireGlobals = s.global;
+
+ // Uppercase the type
+ s.type = s.type.toUpperCase();
+
+ // Determine if request has content
+ s.hasContent = !rnoContent.test( s.type );
+
+ // Watch for a new set of requests
+ if ( fireGlobals && jQuery.active++ === 0 ) {
+ jQuery.event.trigger( "ajaxStart" );
+ }
+
+ // More options handling for requests with no content
+ if ( !s.hasContent ) {
+
+ // If data is available, append data to url
+ if ( s.data ) {
+ s.url += ( rquery.test( s.url ) ? "&" : "?" ) + s.data;
+ // #9682: remove data so that it's not used in an eventual retry
+ delete s.data;
+ }
+
+ // Get ifModifiedKey before adding the anti-cache parameter
+ ifModifiedKey = s.url;
+
+ // Add anti-cache in url if needed
+ if ( s.cache === false ) {
+
+ var ts = jQuery.now(),
+ // try replacing _= if it is there
+ ret = s.url.replace( rts, "$1_=" + ts );
+
+ // if nothing was replaced, add timestamp to the end
+ s.url = ret + ( ( ret === s.url ) ? ( rquery.test( s.url ) ? "&" : "?" ) + "_=" + ts : "" );
+ }
+ }
+
+ // Set the correct header, if data is being sent
+ if ( s.data && s.hasContent && s.contentType !== false || options.contentType ) {
+ jqXHR.setRequestHeader( "Content-Type", s.contentType );
+ }
+
+ // Set the If-Modified-Since and/or If-None-Match header, if in ifModified mode.
+ if ( s.ifModified ) {
+ ifModifiedKey = ifModifiedKey || s.url;
+ if ( jQuery.lastModified[ ifModifiedKey ] ) {
+ jqXHR.setRequestHeader( "If-Modified-Since", jQuery.lastModified[ ifModifiedKey ] );
+ }
+ if ( jQuery.etag[ ifModifiedKey ] ) {
+ jqXHR.setRequestHeader( "If-None-Match", jQuery.etag[ ifModifiedKey ] );
+ }
+ }
+
+ // Set the Accepts header for the server, depending on the dataType
+ jqXHR.setRequestHeader(
+ "Accept",
+ s.dataTypes[ 0 ] && s.accepts[ s.dataTypes[0] ] ?
+ s.accepts[ s.dataTypes[0] ] + ( s.dataTypes[ 0 ] !== "*" ? ", " + allTypes + "; q=0.01" : "" ) :
+ s.accepts[ "*" ]
+ );
+
+ // Check for headers option
+ for ( i in s.headers ) {
+ jqXHR.setRequestHeader( i, s.headers[ i ] );
+ }
+
+ // Allow custom headers/mimetypes and early abort
+ if ( s.beforeSend && ( s.beforeSend.call( callbackContext, jqXHR, s ) === false || state === 2 ) ) {
+ // Abort if not done already
+ jqXHR.abort();
+ return false;
+
+ }
+
+ // Install callbacks on deferreds
+ for ( i in { success: 1, error: 1, complete: 1 } ) {
+ jqXHR[ i ]( s[ i ] );
+ }
+
+ // Get transport
+ transport = inspectPrefiltersOrTransports( transports, s, options, jqXHR );
+
+ // If no transport, we auto-abort
+ if ( !transport ) {
+ done( -1, "No Transport" );
+ } else {
+ jqXHR.readyState = 1;
+ // Send global event
+ if ( fireGlobals ) {
+ globalEventContext.trigger( "ajaxSend", [ jqXHR, s ] );
+ }
+ // Timeout
+ if ( s.async && s.timeout > 0 ) {
+ timeoutTimer = setTimeout( function(){
+ jqXHR.abort( "timeout" );
+ }, s.timeout );
+ }
+
+ try {
+ state = 1;
+ transport.send( requestHeaders, done );
+ } catch (e) {
+ // Propagate exception as error if not done
+ if ( state < 2 ) {
+ done( -1, e );
+ // Simply rethrow otherwise
+ } else {
+ throw e;
+ }
+ }
+ }
+
+ return jqXHR;
+ },
+
+ // Serialize an array of form elements or a set of
+ // key/values into a query string
+ param: function( a, traditional ) {
+ var s = [],
+ add = function( key, value ) {
+ // If value is a function, invoke it and return its value
+ value = jQuery.isFunction( value ) ? value() : value;
+ s[ s.length ] = encodeURIComponent( key ) + "=" + encodeURIComponent( value );
+ };
+
+ // Set traditional to true for jQuery <= 1.3.2 behavior.
+ if ( traditional === undefined ) {
+ traditional = jQuery.ajaxSettings.traditional;
+ }
+
+ // If an array was passed in, assume that it is an array of form elements.
+ if ( jQuery.isArray( a ) || ( a.jquery && !jQuery.isPlainObject( a ) ) ) {
+ // Serialize the form elements
+ jQuery.each( a, function() {
+ add( this.name, this.value );
+ });
+
+ } else {
+ // If traditional, encode the "old" way (the way 1.3.2 or older
+ // did it), otherwise encode params recursively.
+ for ( var prefix in a ) {
+ buildParams( prefix, a[ prefix ], traditional, add );
+ }
+ }
+
+ // Return the resulting serialization
+ return s.join( "&" ).replace( r20, "+" );
+ }
+});
+
+function buildParams( prefix, obj, traditional, add ) {
+ if ( jQuery.isArray( obj ) ) {
+ // Serialize array item.
+ jQuery.each( obj, function( i, v ) {
+ if ( traditional || rbracket.test( prefix ) ) {
+ // Treat each array item as a scalar.
+ add( prefix, v );
+
+ } else {
+ // If array item is non-scalar (array or object), encode its
+ // numeric index to resolve deserialization ambiguity issues.
+ // Note that rack (as of 1.0.0) can't currently deserialize
+ // nested arrays properly, and attempting to do so may cause
+ // a server error. Possible fixes are to modify rack's
+ // deserialization algorithm or to provide an option or flag
+ // to force array serialization to be shallow.
+ buildParams( prefix + "[" + ( typeof v === "object" ? i : "" ) + "]", v, traditional, add );
+ }
+ });
+
+ } else if ( !traditional && jQuery.type( obj ) === "object" ) {
+ // Serialize object item.
+ for ( var name in obj ) {
+ buildParams( prefix + "[" + name + "]", obj[ name ], traditional, add );
+ }
+
+ } else {
+ // Serialize scalar item.
+ add( prefix, obj );
+ }
+}
+
+// This is still on the jQuery object... for now
+// Want to move this to jQuery.ajax some day
+jQuery.extend({
+
+ // Counter for holding the number of active queries
+ active: 0,
+
+ // Last-Modified header cache for next request
+ lastModified: {},
+ etag: {}
+
+});
+
+/* Handles responses to an ajax request:
+ * - sets all responseXXX fields accordingly
+ * - finds the right dataType (mediates between content-type and expected dataType)
+ * - returns the corresponding response
+ */
+function ajaxHandleResponses( s, jqXHR, responses ) {
+
+ var contents = s.contents,
+ dataTypes = s.dataTypes,
+ responseFields = s.responseFields,
+ ct,
+ type,
+ finalDataType,
+ firstDataType;
+
+ // Fill responseXXX fields
+ for ( type in responseFields ) {
+ if ( type in responses ) {
+ jqXHR[ responseFields[type] ] = responses[ type ];
+ }
+ }
+
+ // Remove auto dataType and get content-type in the process
+ while( dataTypes[ 0 ] === "*" ) {
+ dataTypes.shift();
+ if ( ct === undefined ) {
+ ct = s.mimeType || jqXHR.getResponseHeader( "content-type" );
+ }
+ }
+
+ // Check if we're dealing with a known content-type
+ if ( ct ) {
+ for ( type in contents ) {
+ if ( contents[ type ] && contents[ type ].test( ct ) ) {
+ dataTypes.unshift( type );
+ break;
+ }
+ }
+ }
+
+ // Check to see if we have a response for the expected dataType
+ if ( dataTypes[ 0 ] in responses ) {
+ finalDataType = dataTypes[ 0 ];
+ } else {
+ // Try convertible dataTypes
+ for ( type in responses ) {
+ if ( !dataTypes[ 0 ] || s.converters[ type + " " + dataTypes[0] ] ) {
+ finalDataType = type;
+ break;
+ }
+ if ( !firstDataType ) {
+ firstDataType = type;
+ }
+ }
+ // Or just use first one
+ finalDataType = finalDataType || firstDataType;
+ }
+
+ // If we found a dataType
+ // We add the dataType to the list if needed
+ // and return the corresponding response
+ if ( finalDataType ) {
+ if ( finalDataType !== dataTypes[ 0 ] ) {
+ dataTypes.unshift( finalDataType );
+ }
+ return responses[ finalDataType ];
+ }
+}
+
+// Chain conversions given the request and the original response
+function ajaxConvert( s, response ) {
+
+ // Apply the dataFilter if provided
+ if ( s.dataFilter ) {
+ response = s.dataFilter( response, s.dataType );
+ }
+
+ var dataTypes = s.dataTypes,
+ converters = {},
+ i,
+ key,
+ length = dataTypes.length,
+ tmp,
+ // Current and previous dataTypes
+ current = dataTypes[ 0 ],
+ prev,
+ // Conversion expression
+ conversion,
+ // Conversion function
+ conv,
+ // Conversion functions (transitive conversion)
+ conv1,
+ conv2;
+
+ // For each dataType in the chain
+ for ( i = 1; i < length; i++ ) {
+
+ // Create converters map
+ // with lowercased keys
+ if ( i === 1 ) {
+ for ( key in s.converters ) {
+ if ( typeof key === "string" ) {
+ converters[ key.toLowerCase() ] = s.converters[ key ];
+ }
+ }
+ }
+
+ // Get the dataTypes
+ prev = current;
+ current = dataTypes[ i ];
+
+ // If current is auto dataType, update it to prev
+ if ( current === "*" ) {
+ current = prev;
+ // If no auto and dataTypes are actually different
+ } else if ( prev !== "*" && prev !== current ) {
+
+ // Get the converter
+ conversion = prev + " " + current;
+ conv = converters[ conversion ] || converters[ "* " + current ];
+
+ // If there is no direct converter, search transitively
+ if ( !conv ) {
+ conv2 = undefined;
+ for ( conv1 in converters ) {
+ tmp = conv1.split( " " );
+ if ( tmp[ 0 ] === prev || tmp[ 0 ] === "*" ) {
+ conv2 = converters[ tmp[1] + " " + current ];
+ if ( conv2 ) {
+ conv1 = converters[ conv1 ];
+ if ( conv1 === true ) {
+ conv = conv2;
+ } else if ( conv2 === true ) {
+ conv = conv1;
+ }
+ break;
+ }
+ }
+ }
+ }
+ // If we found no converter, dispatch an error
+ if ( !( conv || conv2 ) ) {
+ jQuery.error( "No conversion from " + conversion.replace(" "," to ") );
+ }
+ // If found converter is not an equivalence
+ if ( conv !== true ) {
+ // Convert with 1 or 2 converters accordingly
+ response = conv ? conv( response ) : conv2( conv1(response) );
+ }
+ }
+ }
+ return response;
+}
+
+
+
+
+var jsc = jQuery.now(),
+ jsre = /(\=)\?(&|$)|\?\?/i;
+
+// Default jsonp settings
+jQuery.ajaxSetup({
+ jsonp: "callback",
+ jsonpCallback: function() {
+ return jQuery.expando + "_" + ( jsc++ );
+ }
+});
+
+// Detect, normalize options and install callbacks for jsonp requests
+jQuery.ajaxPrefilter( "json jsonp", function( s, originalSettings, jqXHR ) {
+
+ var inspectData = ( typeof s.data === "string" ) && /^application\/x\-www\-form\-urlencoded/.test( s.contentType );
+
+ if ( s.dataTypes[ 0 ] === "jsonp" ||
+ s.jsonp !== false && ( jsre.test( s.url ) ||
+ inspectData && jsre.test( s.data ) ) ) {
+
+ var responseContainer,
+ jsonpCallback = s.jsonpCallback =
+ jQuery.isFunction( s.jsonpCallback ) ? s.jsonpCallback() : s.jsonpCallback,
+ previous = window[ jsonpCallback ],
+ url = s.url,
+ data = s.data,
+ replace = "$1" + jsonpCallback + "$2";
+
+ if ( s.jsonp !== false ) {
+ url = url.replace( jsre, replace );
+ if ( s.url === url ) {
+ if ( inspectData ) {
+ data = data.replace( jsre, replace );
+ }
+ if ( s.data === data ) {
+ // Add callback manually
+ url += (/\?/.test( url ) ? "&" : "?") + s.jsonp + "=" + jsonpCallback;
+ }
+ }
+ }
+
+ s.url = url;
+ s.data = data;
+
+ // Install callback
+ window[ jsonpCallback ] = function( response ) {
+ responseContainer = [ response ];
+ };
+
+ // Clean-up function
+ jqXHR.always(function() {
+ // Set callback back to previous value
+ window[ jsonpCallback ] = previous;
+ // Call if it was a function and we have a response
+ if ( responseContainer && jQuery.isFunction( previous ) ) {
+ window[ jsonpCallback ]( responseContainer[ 0 ] );
+ }
+ });
+
+ // Use data converter to retrieve json after script execution
+ s.converters["script json"] = function() {
+ if ( !responseContainer ) {
+ jQuery.error( jsonpCallback + " was not called" );
+ }
+ return responseContainer[ 0 ];
+ };
+
+ // force json dataType
+ s.dataTypes[ 0 ] = "json";
+
+ // Delegate to script
+ return "script";
+ }
+});
+
+
+
+
+// Install script dataType
+jQuery.ajaxSetup({
+ accepts: {
+ script: "text/javascript, application/javascript, application/ecmascript, application/x-ecmascript"
+ },
+ contents: {
+ script: /javascript|ecmascript/
+ },
+ converters: {
+ "text script": function( text ) {
+ jQuery.globalEval( text );
+ return text;
+ }
+ }
+});
+
+// Handle cache's special case and global
+jQuery.ajaxPrefilter( "script", function( s ) {
+ if ( s.cache === undefined ) {
+ s.cache = false;
+ }
+ if ( s.crossDomain ) {
+ s.type = "GET";
+ s.global = false;
+ }
+});
+
+// Bind script tag hack transport
+jQuery.ajaxTransport( "script", function(s) {
+
+ // This transport only deals with cross domain requests
+ if ( s.crossDomain ) {
+
+ var script,
+ head = document.head || document.getElementsByTagName( "head" )[0] || document.documentElement;
+
+ return {
+
+ send: function( _, callback ) {
+
+ script = document.createElement( "script" );
+
+ script.async = "async";
+
+ if ( s.scriptCharset ) {
+ script.charset = s.scriptCharset;
+ }
+
+ script.src = s.url;
+
+ // Attach handlers for all browsers
+ script.onload = script.onreadystatechange = function( _, isAbort ) {
+
+ if ( isAbort || !script.readyState || /loaded|complete/.test( script.readyState ) ) {
+
+ // Handle memory leak in IE
+ script.onload = script.onreadystatechange = null;
+
+ // Remove the script
+ if ( head && script.parentNode ) {
+ head.removeChild( script );
+ }
+
+ // Dereference the script
+ script = undefined;
+
+ // Callback if not abort
+ if ( !isAbort ) {
+ callback( 200, "success" );
+ }
+ }
+ };
+ // Use insertBefore instead of appendChild to circumvent an IE6 bug.
+ // This arises when a base node is used (#2709 and #4378).
+ head.insertBefore( script, head.firstChild );
+ },
+
+ abort: function() {
+ if ( script ) {
+ script.onload( 0, 1 );
+ }
+ }
+ };
+ }
+});
+
+
+
+
+var // #5280: Internet Explorer will keep connections alive if we don't abort on unload
+ xhrOnUnloadAbort = window.ActiveXObject ? function() {
+ // Abort all pending requests
+ for ( var key in xhrCallbacks ) {
+ xhrCallbacks[ key ]( 0, 1 );
+ }
+ } : false,
+ xhrId = 0,
+ xhrCallbacks;
+
+// Functions to create xhrs
+function createStandardXHR() {
+ try {
+ return new window.XMLHttpRequest();
+ } catch( e ) {}
+}
+
+function createActiveXHR() {
+ try {
+ return new window.ActiveXObject( "Microsoft.XMLHTTP" );
+ } catch( e ) {}
+}
+
+// Create the request object
+// (This is still attached to ajaxSettings for backward compatibility)
+jQuery.ajaxSettings.xhr = window.ActiveXObject ?
+ /* Microsoft failed to properly
+ * implement the XMLHttpRequest in IE7 (can't request local files),
+ * so we use the ActiveXObject when it is available
+ * Additionally XMLHttpRequest can be disabled in IE7/IE8 so
+ * we need a fallback.
+ */
+ function() {
+ return !this.isLocal && createStandardXHR() || createActiveXHR();
+ } :
+ // For all other browsers, use the standard XMLHttpRequest object
+ createStandardXHR;
+
+// Determine support properties
+(function( xhr ) {
+ jQuery.extend( jQuery.support, {
+ ajax: !!xhr,
+ cors: !!xhr && ( "withCredentials" in xhr )
+ });
+})( jQuery.ajaxSettings.xhr() );
+
+// Create transport if the browser can provide an xhr
+if ( jQuery.support.ajax ) {
+
+ jQuery.ajaxTransport(function( s ) {
+ // Cross domain only allowed if supported through XMLHttpRequest
+ if ( !s.crossDomain || jQuery.support.cors ) {
+
+ var callback;
+
+ return {
+ send: function( headers, complete ) {
+
+ // Get a new xhr
+ var xhr = s.xhr(),
+ handle,
+ i;
+
+ // Open the socket
+ // Passing null username, generates a login popup on Opera (#2865)
+ if ( s.username ) {
+ xhr.open( s.type, s.url, s.async, s.username, s.password );
+ } else {
+ xhr.open( s.type, s.url, s.async );
+ }
+
+ // Apply custom fields if provided
+ if ( s.xhrFields ) {
+ for ( i in s.xhrFields ) {
+ xhr[ i ] = s.xhrFields[ i ];
+ }
+ }
+
+ // Override mime type if needed
+ if ( s.mimeType && xhr.overrideMimeType ) {
+ xhr.overrideMimeType( s.mimeType );
+ }
+
+ // X-Requested-With header
+ // For cross-domain requests, seeing as conditions for a preflight are
+ // akin to a jigsaw puzzle, we simply never set it to be sure.
+ // (it can always be set on a per-request basis or even using ajaxSetup)
+ // For same-domain requests, won't change header if already provided.
+ if ( !s.crossDomain && !headers["X-Requested-With"] ) {
+ headers[ "X-Requested-With" ] = "XMLHttpRequest";
+ }
+
+ // Need an extra try/catch for cross domain requests in Firefox 3
+ try {
+ for ( i in headers ) {
+ xhr.setRequestHeader( i, headers[ i ] );
+ }
+ } catch( _ ) {}
+
+ // Do send the request
+ // This may raise an exception which is actually
+ // handled in jQuery.ajax (so no try/catch here)
+ xhr.send( ( s.hasContent && s.data ) || null );
+
+ // Listener
+ callback = function( _, isAbort ) {
+
+ var status,
+ statusText,
+ responseHeaders,
+ responses,
+ xml;
+
+ // Firefox throws exceptions when accessing properties
+ // of an xhr when a network error occured
+ // http://helpful.knobs-dials.com/index.php/Component_returned_failure_code:_0x80040111_(NS_ERROR_NOT_AVAILABLE)
+ try {
+
+ // Was never called and is aborted or complete
+ if ( callback && ( isAbort || xhr.readyState === 4 ) ) {
+
+ // Only called once
+ callback = undefined;
+
+ // Do not keep as active anymore
+ if ( handle ) {
+ xhr.onreadystatechange = jQuery.noop;
+ if ( xhrOnUnloadAbort ) {
+ delete xhrCallbacks[ handle ];
+ }
+ }
+
+ // If it's an abort
+ if ( isAbort ) {
+ // Abort it manually if needed
+ if ( xhr.readyState !== 4 ) {
+ xhr.abort();
+ }
+ } else {
+ status = xhr.status;
+ responseHeaders = xhr.getAllResponseHeaders();
+ responses = {};
+ xml = xhr.responseXML;
+
+ // Construct response list
+ if ( xml && xml.documentElement /* #4958 */ ) {
+ responses.xml = xml;
+ }
+
+ // When requesting binary data, IE6-9 will throw an exception
+ // on any attempt to access responseText (#11426)
+ try {
+ responses.text = xhr.responseText;
+ } catch( _ ) {
+ }
+
+ // Firefox throws an exception when accessing
+ // statusText for faulty cross-domain requests
+ try {
+ statusText = xhr.statusText;
+ } catch( e ) {
+ // We normalize with Webkit giving an empty statusText
+ statusText = "";
+ }
+
+ // Filter status for non standard behaviors
+
+ // If the request is local and we have data: assume a success
+ // (success with no data won't get notified, that's the best we
+ // can do given current implementations)
+ if ( !status && s.isLocal && !s.crossDomain ) {
+ status = responses.text ? 200 : 404;
+ // IE - #1450: sometimes returns 1223 when it should be 204
+ } else if ( status === 1223 ) {
+ status = 204;
+ }
+ }
+ }
+ } catch( firefoxAccessException ) {
+ if ( !isAbort ) {
+ complete( -1, firefoxAccessException );
+ }
+ }
+
+ // Call complete if needed
+ if ( responses ) {
+ complete( status, statusText, responses, responseHeaders );
+ }
+ };
+
+ // if we're in sync mode or it's in cache
+ // and has been retrieved directly (IE6 & IE7)
+ // we need to manually fire the callback
+ if ( !s.async || xhr.readyState === 4 ) {
+ callback();
+ } else {
+ handle = ++xhrId;
+ if ( xhrOnUnloadAbort ) {
+ // Create the active xhrs callbacks list if needed
+ // and attach the unload handler
+ if ( !xhrCallbacks ) {
+ xhrCallbacks = {};
+ jQuery( window ).unload( xhrOnUnloadAbort );
+ }
+ // Add to list of active xhrs callbacks
+ xhrCallbacks[ handle ] = callback;
+ }
+ xhr.onreadystatechange = callback;
+ }
+ },
+
+ abort: function() {
+ if ( callback ) {
+ callback(0,1);
+ }
+ }
+ };
+ }
+ });
+}
+
+
+
+
+var elemdisplay = {},
+ iframe, iframeDoc,
+ rfxtypes = /^(?:toggle|show|hide)$/,
+ rfxnum = /^([+\-]=)?([\d+.\-]+)([a-z%]*)$/i,
+ timerId,
+ fxAttrs = [
+ // height animations
+ [ "height", "marginTop", "marginBottom", "paddingTop", "paddingBottom" ],
+ // width animations
+ [ "width", "marginLeft", "marginRight", "paddingLeft", "paddingRight" ],
+ // opacity animations
+ [ "opacity" ]
+ ],
+ fxNow;
+
+jQuery.fn.extend({
+ show: function( speed, easing, callback ) {
+ var elem, display;
+
+ if ( speed || speed === 0 ) {
+ return this.animate( genFx("show", 3), speed, easing, callback );
+
+ } else {
+ for ( var i = 0, j = this.length; i < j; i++ ) {
+ elem = this[ i ];
+
+ if ( elem.style ) {
+ display = elem.style.display;
+
+ // Reset the inline display of this element to learn if it is
+ // being hidden by cascaded rules or not
+ if ( !jQuery._data(elem, "olddisplay") && display === "none" ) {
+ display = elem.style.display = "";
+ }
+
+ // Set elements which have been overridden with display: none
+ // in a stylesheet to whatever the default browser style is
+ // for such an element
+ if ( (display === "" && jQuery.css(elem, "display") === "none") ||
+ !jQuery.contains( elem.ownerDocument.documentElement, elem ) ) {
+ jQuery._data( elem, "olddisplay", defaultDisplay(elem.nodeName) );
+ }
+ }
+ }
+
+ // Set the display of most of the elements in a second loop
+ // to avoid the constant reflow
+ for ( i = 0; i < j; i++ ) {
+ elem = this[ i ];
+
+ if ( elem.style ) {
+ display = elem.style.display;
+
+ if ( display === "" || display === "none" ) {
+ elem.style.display = jQuery._data( elem, "olddisplay" ) || "";
+ }
+ }
+ }
+
+ return this;
+ }
+ },
+
+ hide: function( speed, easing, callback ) {
+ if ( speed || speed === 0 ) {
+ return this.animate( genFx("hide", 3), speed, easing, callback);
+
+ } else {
+ var elem, display,
+ i = 0,
+ j = this.length;
+
+ for ( ; i < j; i++ ) {
+ elem = this[i];
+ if ( elem.style ) {
+ display = jQuery.css( elem, "display" );
+
+ if ( display !== "none" && !jQuery._data( elem, "olddisplay" ) ) {
+ jQuery._data( elem, "olddisplay", display );
+ }
+ }
+ }
+
+ // Set the display of the elements in a second loop
+ // to avoid the constant reflow
+ for ( i = 0; i < j; i++ ) {
+ if ( this[i].style ) {
+ this[i].style.display = "none";
+ }
+ }
+
+ return this;
+ }
+ },
+
+ // Save the old toggle function
+ _toggle: jQuery.fn.toggle,
+
+ toggle: function( fn, fn2, callback ) {
+ var bool = typeof fn === "boolean";
+
+ if ( jQuery.isFunction(fn) && jQuery.isFunction(fn2) ) {
+ this._toggle.apply( this, arguments );
+
+ } else if ( fn == null || bool ) {
+ this.each(function() {
+ var state = bool ? fn : jQuery(this).is(":hidden");
+ jQuery(this)[ state ? "show" : "hide" ]();
+ });
+
+ } else {
+ this.animate(genFx("toggle", 3), fn, fn2, callback);
+ }
+
+ return this;
+ },
+
+ fadeTo: function( speed, to, easing, callback ) {
+ return this.filter(":hidden").css("opacity", 0).show().end()
+ .animate({opacity: to}, speed, easing, callback);
+ },
+
+ animate: function( prop, speed, easing, callback ) {
+ var optall = jQuery.speed( speed, easing, callback );
+
+ if ( jQuery.isEmptyObject( prop ) ) {
+ return this.each( optall.complete, [ false ] );
+ }
+
+ // Do not change referenced properties as per-property easing will be lost
+ prop = jQuery.extend( {}, prop );
+
+ function doAnimation() {
+ // XXX 'this' does not always have a nodeName when running the
+ // test suite
+
+ if ( optall.queue === false ) {
+ jQuery._mark( this );
+ }
+
+ var opt = jQuery.extend( {}, optall ),
+ isElement = this.nodeType === 1,
+ hidden = isElement && jQuery(this).is(":hidden"),
+ name, val, p, e, hooks, replace,
+ parts, start, end, unit,
+ method;
+
+ // will store per property easing and be used to determine when an animation is complete
+ opt.animatedProperties = {};
+
+ // first pass over propertys to expand / normalize
+ for ( p in prop ) {
+ name = jQuery.camelCase( p );
+ if ( p !== name ) {
+ prop[ name ] = prop[ p ];
+ delete prop[ p ];
+ }
+
+ if ( ( hooks = jQuery.cssHooks[ name ] ) && "expand" in hooks ) {
+ replace = hooks.expand( prop[ name ] );
+ delete prop[ name ];
+
+ // not quite $.extend, this wont overwrite keys already present.
+ // also - reusing 'p' from above because we have the correct "name"
+ for ( p in replace ) {
+ if ( ! ( p in prop ) ) {
+ prop[ p ] = replace[ p ];
+ }
+ }
+ }
+ }
+
+ for ( name in prop ) {
+ val = prop[ name ];
+ // easing resolution: per property > opt.specialEasing > opt.easing > 'swing' (default)
+ if ( jQuery.isArray( val ) ) {
+ opt.animatedProperties[ name ] = val[ 1 ];
+ val = prop[ name ] = val[ 0 ];
+ } else {
+ opt.animatedProperties[ name ] = opt.specialEasing && opt.specialEasing[ name ] || opt.easing || 'swing';
+ }
+
+ if ( val === "hide" && hidden || val === "show" && !hidden ) {
+ return opt.complete.call( this );
+ }
+
+ if ( isElement && ( name === "height" || name === "width" ) ) {
+ // Make sure that nothing sneaks out
+ // Record all 3 overflow attributes because IE does not
+ // change the overflow attribute when overflowX and
+ // overflowY are set to the same value
+ opt.overflow = [ this.style.overflow, this.style.overflowX, this.style.overflowY ];
+
+ // Set display property to inline-block for height/width
+ // animations on inline elements that are having width/height animated
+ if ( jQuery.css( this, "display" ) === "inline" &&
+ jQuery.css( this, "float" ) === "none" ) {
+
+ // inline-level elements accept inline-block;
+ // block-level elements need to be inline with layout
+ if ( !jQuery.support.inlineBlockNeedsLayout || defaultDisplay( this.nodeName ) === "inline" ) {
+ this.style.display = "inline-block";
+
+ } else {
+ this.style.zoom = 1;
+ }
+ }
+ }
+ }
+
+ if ( opt.overflow != null ) {
+ this.style.overflow = "hidden";
+ }
+
+ for ( p in prop ) {
+ e = new jQuery.fx( this, opt, p );
+ val = prop[ p ];
+
+ if ( rfxtypes.test( val ) ) {
+
+ // Tracks whether to show or hide based on private
+ // data attached to the element
+ method = jQuery._data( this, "toggle" + p ) || ( val === "toggle" ? hidden ? "show" : "hide" : 0 );
+ if ( method ) {
+ jQuery._data( this, "toggle" + p, method === "show" ? "hide" : "show" );
+ e[ method ]();
+ } else {
+ e[ val ]();
+ }
+
+ } else {
+ parts = rfxnum.exec( val );
+ start = e.cur();
+
+ if ( parts ) {
+ end = parseFloat( parts[2] );
+ unit = parts[3] || ( jQuery.cssNumber[ p ] ? "" : "px" );
+
+ // We need to compute starting value
+ if ( unit !== "px" ) {
+ jQuery.style( this, p, (end || 1) + unit);
+ start = ( (end || 1) / e.cur() ) * start;
+ jQuery.style( this, p, start + unit);
+ }
+
+ // If a +=/-= token was provided, we're doing a relative animation
+ if ( parts[1] ) {
+ end = ( (parts[ 1 ] === "-=" ? -1 : 1) * end ) + start;
+ }
+
+ e.custom( start, end, unit );
+
+ } else {
+ e.custom( start, val, "" );
+ }
+ }
+ }
+
+ // For JS strict compliance
+ return true;
+ }
+
+ return optall.queue === false ?
+ this.each( doAnimation ) :
+ this.queue( optall.queue, doAnimation );
+ },
+
+ stop: function( type, clearQueue, gotoEnd ) {
+ if ( typeof type !== "string" ) {
+ gotoEnd = clearQueue;
+ clearQueue = type;
+ type = undefined;
+ }
+ if ( clearQueue && type !== false ) {
+ this.queue( type || "fx", [] );
+ }
+
+ return this.each(function() {
+ var index,
+ hadTimers = false,
+ timers = jQuery.timers,
+ data = jQuery._data( this );
+
+ // clear marker counters if we know they won't be
+ if ( !gotoEnd ) {
+ jQuery._unmark( true, this );
+ }
+
+ function stopQueue( elem, data, index ) {
+ var hooks = data[ index ];
+ jQuery.removeData( elem, index, true );
+ hooks.stop( gotoEnd );
+ }
+
+ if ( type == null ) {
+ for ( index in data ) {
+ if ( data[ index ] && data[ index ].stop && index.indexOf(".run") === index.length - 4 ) {
+ stopQueue( this, data, index );
+ }
+ }
+ } else if ( data[ index = type + ".run" ] && data[ index ].stop ){
+ stopQueue( this, data, index );
+ }
+
+ for ( index = timers.length; index--; ) {
+ if ( timers[ index ].elem === this && (type == null || timers[ index ].queue === type) ) {
+ if ( gotoEnd ) {
+
+ // force the next step to be the last
+ timers[ index ]( true );
+ } else {
+ timers[ index ].saveState();
+ }
+ hadTimers = true;
+ timers.splice( index, 1 );
+ }
+ }
+
+ // start the next in the queue if the last step wasn't forced
+ // timers currently will call their complete callbacks, which will dequeue
+ // but only if they were gotoEnd
+ if ( !( gotoEnd && hadTimers ) ) {
+ jQuery.dequeue( this, type );
+ }
+ });
+ }
+
+});
+
+// Animations created synchronously will run synchronously
+function createFxNow() {
+ setTimeout( clearFxNow, 0 );
+ return ( fxNow = jQuery.now() );
+}
+
+function clearFxNow() {
+ fxNow = undefined;
+}
+
+// Generate parameters to create a standard animation
+function genFx( type, num ) {
+ var obj = {};
+
+ jQuery.each( fxAttrs.concat.apply([], fxAttrs.slice( 0, num )), function() {
+ obj[ this ] = type;
+ });
+
+ return obj;
+}
+
+// Generate shortcuts for custom animations
+jQuery.each({
+ slideDown: genFx( "show", 1 ),
+ slideUp: genFx( "hide", 1 ),
+ slideToggle: genFx( "toggle", 1 ),
+ fadeIn: { opacity: "show" },
+ fadeOut: { opacity: "hide" },
+ fadeToggle: { opacity: "toggle" }
+}, function( name, props ) {
+ jQuery.fn[ name ] = function( speed, easing, callback ) {
+ return this.animate( props, speed, easing, callback );
+ };
+});
+
+jQuery.extend({
+ speed: function( speed, easing, fn ) {
+ var opt = speed && typeof speed === "object" ? jQuery.extend( {}, speed ) : {
+ complete: fn || !fn && easing ||
+ jQuery.isFunction( speed ) && speed,
+ duration: speed,
+ easing: fn && easing || easing && !jQuery.isFunction( easing ) && easing
+ };
+
+ opt.duration = jQuery.fx.off ? 0 : typeof opt.duration === "number" ? opt.duration :
+ opt.duration in jQuery.fx.speeds ? jQuery.fx.speeds[ opt.duration ] : jQuery.fx.speeds._default;
+
+ // normalize opt.queue - true/undefined/null -> "fx"
+ if ( opt.queue == null || opt.queue === true ) {
+ opt.queue = "fx";
+ }
+
+ // Queueing
+ opt.old = opt.complete;
+
+ opt.complete = function( noUnmark ) {
+ if ( jQuery.isFunction( opt.old ) ) {
+ opt.old.call( this );
+ }
+
+ if ( opt.queue ) {
+ jQuery.dequeue( this, opt.queue );
+ } else if ( noUnmark !== false ) {
+ jQuery._unmark( this );
+ }
+ };
+
+ return opt;
+ },
+
+ easing: {
+ linear: function( p ) {
+ return p;
+ },
+ swing: function( p ) {
+ return ( -Math.cos( p*Math.PI ) / 2 ) + 0.5;
+ }
+ },
+
+ timers: [],
+
+ fx: function( elem, options, prop ) {
+ this.options = options;
+ this.elem = elem;
+ this.prop = prop;
+
+ options.orig = options.orig || {};
+ }
+
+});
+
+jQuery.fx.prototype = {
+ // Simple function for setting a style value
+ update: function() {
+ if ( this.options.step ) {
+ this.options.step.call( this.elem, this.now, this );
+ }
+
+ ( jQuery.fx.step[ this.prop ] || jQuery.fx.step._default )( this );
+ },
+
+ // Get the current size
+ cur: function() {
+ if ( this.elem[ this.prop ] != null && (!this.elem.style || this.elem.style[ this.prop ] == null) ) {
+ return this.elem[ this.prop ];
+ }
+
+ var parsed,
+ r = jQuery.css( this.elem, this.prop );
+ // Empty strings, null, undefined and "auto" are converted to 0,
+ // complex values such as "rotate(1rad)" are returned as is,
+ // simple values such as "10px" are parsed to Float.
+ return isNaN( parsed = parseFloat( r ) ) ? !r || r === "auto" ? 0 : r : parsed;
+ },
+
+ // Start an animation from one number to another
+ custom: function( from, to, unit ) {
+ var self = this,
+ fx = jQuery.fx;
+
+ this.startTime = fxNow || createFxNow();
+ this.end = to;
+ this.now = this.start = from;
+ this.pos = this.state = 0;
+ this.unit = unit || this.unit || ( jQuery.cssNumber[ this.prop ] ? "" : "px" );
+
+ function t( gotoEnd ) {
+ return self.step( gotoEnd );
+ }
+
+ t.queue = this.options.queue;
+ t.elem = this.elem;
+ t.saveState = function() {
+ if ( jQuery._data( self.elem, "fxshow" + self.prop ) === undefined ) {
+ if ( self.options.hide ) {
+ jQuery._data( self.elem, "fxshow" + self.prop, self.start );
+ } else if ( self.options.show ) {
+ jQuery._data( self.elem, "fxshow" + self.prop, self.end );
+ }
+ }
+ };
+
+ if ( t() && jQuery.timers.push(t) && !timerId ) {
+ timerId = setInterval( fx.tick, fx.interval );
+ }
+ },
+
+ // Simple 'show' function
+ show: function() {
+ var dataShow = jQuery._data( this.elem, "fxshow" + this.prop );
+
+ // Remember where we started, so that we can go back to it later
+ this.options.orig[ this.prop ] = dataShow || jQuery.style( this.elem, this.prop );
+ this.options.show = true;
+
+ // Begin the animation
+ // Make sure that we start at a small width/height to avoid any flash of content
+ if ( dataShow !== undefined ) {
+ // This show is picking up where a previous hide or show left off
+ this.custom( this.cur(), dataShow );
+ } else {
+ this.custom( this.prop === "width" || this.prop === "height" ? 1 : 0, this.cur() );
+ }
+
+ // Start by showing the element
+ jQuery( this.elem ).show();
+ },
+
+ // Simple 'hide' function
+ hide: function() {
+ // Remember where we started, so that we can go back to it later
+ this.options.orig[ this.prop ] = jQuery._data( this.elem, "fxshow" + this.prop ) || jQuery.style( this.elem, this.prop );
+ this.options.hide = true;
+
+ // Begin the animation
+ this.custom( this.cur(), 0 );
+ },
+
+ // Each step of an animation
+ step: function( gotoEnd ) {
+ var p, n, complete,
+ t = fxNow || createFxNow(),
+ done = true,
+ elem = this.elem,
+ options = this.options;
+
+ if ( gotoEnd || t >= options.duration + this.startTime ) {
+ this.now = this.end;
+ this.pos = this.state = 1;
+ this.update();
+
+ options.animatedProperties[ this.prop ] = true;
+
+ for ( p in options.animatedProperties ) {
+ if ( options.animatedProperties[ p ] !== true ) {
+ done = false;
+ }
+ }
+
+ if ( done ) {
+ // Reset the overflow
+ if ( options.overflow != null && !jQuery.support.shrinkWrapBlocks ) {
+
+ jQuery.each( [ "", "X", "Y" ], function( index, value ) {
+ elem.style[ "overflow" + value ] = options.overflow[ index ];
+ });
+ }
+
+ // Hide the element if the "hide" operation was done
+ if ( options.hide ) {
+ jQuery( elem ).hide();
+ }
+
+ // Reset the properties, if the item has been hidden or shown
+ if ( options.hide || options.show ) {
+ for ( p in options.animatedProperties ) {
+ jQuery.style( elem, p, options.orig[ p ] );
+ jQuery.removeData( elem, "fxshow" + p, true );
+ // Toggle data is no longer needed
+ jQuery.removeData( elem, "toggle" + p, true );
+ }
+ }
+
+ // Execute the complete function
+ // in the event that the complete function throws an exception
+ // we must ensure it won't be called twice. #5684
+
+ complete = options.complete;
+ if ( complete ) {
+
+ options.complete = false;
+ complete.call( elem );
+ }
+ }
+
+ return false;
+
+ } else {
+ // classical easing cannot be used with an Infinity duration
+ if ( options.duration == Infinity ) {
+ this.now = t;
+ } else {
+ n = t - this.startTime;
+ this.state = n / options.duration;
+
+ // Perform the easing function, defaults to swing
+ this.pos = jQuery.easing[ options.animatedProperties[this.prop] ]( this.state, n, 0, 1, options.duration );
+ this.now = this.start + ( (this.end - this.start) * this.pos );
+ }
+ // Perform the next step of the animation
+ this.update();
+ }
+
+ return true;
+ }
+};
+
+jQuery.extend( jQuery.fx, {
+ tick: function() {
+ var timer,
+ timers = jQuery.timers,
+ i = 0;
+
+ for ( ; i < timers.length; i++ ) {
+ timer = timers[ i ];
+ // Checks the timer has not already been removed
+ if ( !timer() && timers[ i ] === timer ) {
+ timers.splice( i--, 1 );
+ }
+ }
+
+ if ( !timers.length ) {
+ jQuery.fx.stop();
+ }
+ },
+
+ interval: 13,
+
+ stop: function() {
+ clearInterval( timerId );
+ timerId = null;
+ },
+
+ speeds: {
+ slow: 600,
+ fast: 200,
+ // Default speed
+ _default: 400
+ },
+
+ step: {
+ opacity: function( fx ) {
+ jQuery.style( fx.elem, "opacity", fx.now );
+ },
+
+ _default: function( fx ) {
+ if ( fx.elem.style && fx.elem.style[ fx.prop ] != null ) {
+ fx.elem.style[ fx.prop ] = fx.now + fx.unit;
+ } else {
+ fx.elem[ fx.prop ] = fx.now;
+ }
+ }
+ }
+});
+
+// Ensure props that can't be negative don't go there on undershoot easing
+jQuery.each( fxAttrs.concat.apply( [], fxAttrs ), function( i, prop ) {
+ // exclude marginTop, marginLeft, marginBottom and marginRight from this list
+ if ( prop.indexOf( "margin" ) ) {
+ jQuery.fx.step[ prop ] = function( fx ) {
+ jQuery.style( fx.elem, prop, Math.max(0, fx.now) + fx.unit );
+ };
+ }
+});
+
+if ( jQuery.expr && jQuery.expr.filters ) {
+ jQuery.expr.filters.animated = function( elem ) {
+ return jQuery.grep(jQuery.timers, function( fn ) {
+ return elem === fn.elem;
+ }).length;
+ };
+}
+
+// Try to restore the default display value of an element
+function defaultDisplay( nodeName ) {
+
+ if ( !elemdisplay[ nodeName ] ) {
+
+ var body = document.body,
+ elem = jQuery( "<" + nodeName + ">" ).appendTo( body ),
+ display = elem.css( "display" );
+ elem.remove();
+
+ // If the simple way fails,
+ // get element's real default display by attaching it to a temp iframe
+ if ( display === "none" || display === "" ) {
+ // No iframe to use yet, so create it
+ if ( !iframe ) {
+ iframe = document.createElement( "iframe" );
+ iframe.frameBorder = iframe.width = iframe.height = 0;
+ }
+
+ body.appendChild( iframe );
+
+ // Create a cacheable copy of the iframe document on first call.
+ // IE and Opera will allow us to reuse the iframeDoc without re-writing the fake HTML
+ // document to it; WebKit & Firefox won't allow reusing the iframe document.
+ if ( !iframeDoc || !iframe.createElement ) {
+ iframeDoc = ( iframe.contentWindow || iframe.contentDocument ).document;
+ iframeDoc.write( ( jQuery.support.boxModel ? "<!doctype html>" : "" ) + "<html><body>" );
+ iframeDoc.close();
+ }
+
+ elem = iframeDoc.createElement( nodeName );
+
+ iframeDoc.body.appendChild( elem );
+
+ display = jQuery.css( elem, "display" );
+ body.removeChild( iframe );
+ }
+
+ // Store the correct default display
+ elemdisplay[ nodeName ] = display;
+ }
+
+ return elemdisplay[ nodeName ];
+}
+
+
+
+
+var getOffset,
+ rtable = /^t(?:able|d|h)$/i,
+ rroot = /^(?:body|html)$/i;
+
+if ( "getBoundingClientRect" in document.documentElement ) {
+ getOffset = function( elem, doc, docElem, box ) {
+ try {
+ box = elem.getBoundingClientRect();
+ } catch(e) {}
+
+ // Make sure we're not dealing with a disconnected DOM node
+ if ( !box || !jQuery.contains( docElem, elem ) ) {
+ return box ? { top: box.top, left: box.left } : { top: 0, left: 0 };
+ }
+
+ var body = doc.body,
+ win = getWindow( doc ),
+ clientTop = docElem.clientTop || body.clientTop || 0,
+ clientLeft = docElem.clientLeft || body.clientLeft || 0,
+ scrollTop = win.pageYOffset || jQuery.support.boxModel && docElem.scrollTop || body.scrollTop,
+ scrollLeft = win.pageXOffset || jQuery.support.boxModel && docElem.scrollLeft || body.scrollLeft,
+ top = box.top + scrollTop - clientTop,
+ left = box.left + scrollLeft - clientLeft;
+
+ return { top: top, left: left };
+ };
+
+} else {
+ getOffset = function( elem, doc, docElem ) {
+ var computedStyle,
+ offsetParent = elem.offsetParent,
+ prevOffsetParent = elem,
+ body = doc.body,
+ defaultView = doc.defaultView,
+ prevComputedStyle = defaultView ? defaultView.getComputedStyle( elem, null ) : elem.currentStyle,
+ top = elem.offsetTop,
+ left = elem.offsetLeft;
+
+ while ( (elem = elem.parentNode) && elem !== body && elem !== docElem ) {
+ if ( jQuery.support.fixedPosition && prevComputedStyle.position === "fixed" ) {
+ break;
+ }
+
+ computedStyle = defaultView ? defaultView.getComputedStyle(elem, null) : elem.currentStyle;
+ top -= elem.scrollTop;
+ left -= elem.scrollLeft;
+
+ if ( elem === offsetParent ) {
+ top += elem.offsetTop;
+ left += elem.offsetLeft;
+
+ if ( jQuery.support.doesNotAddBorder && !(jQuery.support.doesAddBorderForTableAndCells && rtable.test(elem.nodeName)) ) {
+ top += parseFloat( computedStyle.borderTopWidth ) || 0;
+ left += parseFloat( computedStyle.borderLeftWidth ) || 0;
+ }
+
+ prevOffsetParent = offsetParent;
+ offsetParent = elem.offsetParent;
+ }
+
+ if ( jQuery.support.subtractsBorderForOverflowNotVisible && computedStyle.overflow !== "visible" ) {
+ top += parseFloat( computedStyle.borderTopWidth ) || 0;
+ left += parseFloat( computedStyle.borderLeftWidth ) || 0;
+ }
+
+ prevComputedStyle = computedStyle;
+ }
+
+ if ( prevComputedStyle.position === "relative" || prevComputedStyle.position === "static" ) {
+ top += body.offsetTop;
+ left += body.offsetLeft;
+ }
+
+ if ( jQuery.support.fixedPosition && prevComputedStyle.position === "fixed" ) {
+ top += Math.max( docElem.scrollTop, body.scrollTop );
+ left += Math.max( docElem.scrollLeft, body.scrollLeft );
+ }
+
+ return { top: top, left: left };
+ };
+}
+
+jQuery.fn.offset = function( options ) {
+ if ( arguments.length ) {
+ return options === undefined ?
+ this :
+ this.each(function( i ) {
+ jQuery.offset.setOffset( this, options, i );
+ });
+ }
+
+ var elem = this[0],
+ doc = elem && elem.ownerDocument;
+
+ if ( !doc ) {
+ return null;
+ }
+
+ if ( elem === doc.body ) {
+ return jQuery.offset.bodyOffset( elem );
+ }
+
+ return getOffset( elem, doc, doc.documentElement );
+};
+
+jQuery.offset = {
+
+ bodyOffset: function( body ) {
+ var top = body.offsetTop,
+ left = body.offsetLeft;
+
+ if ( jQuery.support.doesNotIncludeMarginInBodyOffset ) {
+ top += parseFloat( jQuery.css(body, "marginTop") ) || 0;
+ left += parseFloat( jQuery.css(body, "marginLeft") ) || 0;
+ }
+
+ return { top: top, left: left };
+ },
+
+ setOffset: function( elem, options, i ) {
+ var position = jQuery.css( elem, "position" );
+
+ // set position first, in-case top/left are set even on static elem
+ if ( position === "static" ) {
+ elem.style.position = "relative";
+ }
+
+ var curElem = jQuery( elem ),
+ curOffset = curElem.offset(),
+ curCSSTop = jQuery.css( elem, "top" ),
+ curCSSLeft = jQuery.css( elem, "left" ),
+ calculatePosition = ( position === "absolute" || position === "fixed" ) && jQuery.inArray("auto", [curCSSTop, curCSSLeft]) > -1,
+ props = {}, curPosition = {}, curTop, curLeft;
+
+ // need to be able to calculate position if either top or left is auto and position is either absolute or fixed
+ if ( calculatePosition ) {
+ curPosition = curElem.position();
+ curTop = curPosition.top;
+ curLeft = curPosition.left;
+ } else {
+ curTop = parseFloat( curCSSTop ) || 0;
+ curLeft = parseFloat( curCSSLeft ) || 0;
+ }
+
+ if ( jQuery.isFunction( options ) ) {
+ options = options.call( elem, i, curOffset );
+ }
+
+ if ( options.top != null ) {
+ props.top = ( options.top - curOffset.top ) + curTop;
+ }
+ if ( options.left != null ) {
+ props.left = ( options.left - curOffset.left ) + curLeft;
+ }
+
+ if ( "using" in options ) {
+ options.using.call( elem, props );
+ } else {
+ curElem.css( props );
+ }
+ }
+};
+
+
+jQuery.fn.extend({
+
+ position: function() {
+ if ( !this[0] ) {
+ return null;
+ }
+
+ var elem = this[0],
+
+ // Get *real* offsetParent
+ offsetParent = this.offsetParent(),
+
+ // Get correct offsets
+ offset = this.offset(),
+ parentOffset = rroot.test(offsetParent[0].nodeName) ? { top: 0, left: 0 } : offsetParent.offset();
+
+ // Subtract element margins
+ // note: when an element has margin: auto the offsetLeft and marginLeft
+ // are the same in Safari causing offset.left to incorrectly be 0
+ offset.top -= parseFloat( jQuery.css(elem, "marginTop") ) || 0;
+ offset.left -= parseFloat( jQuery.css(elem, "marginLeft") ) || 0;
+
+ // Add offsetParent borders
+ parentOffset.top += parseFloat( jQuery.css(offsetParent[0], "borderTopWidth") ) || 0;
+ parentOffset.left += parseFloat( jQuery.css(offsetParent[0], "borderLeftWidth") ) || 0;
+
+ // Subtract the two offsets
+ return {
+ top: offset.top - parentOffset.top,
+ left: offset.left - parentOffset.left
+ };
+ },
+
+ offsetParent: function() {
+ return this.map(function() {
+ var offsetParent = this.offsetParent || document.body;
+ while ( offsetParent && (!rroot.test(offsetParent.nodeName) && jQuery.css(offsetParent, "position") === "static") ) {
+ offsetParent = offsetParent.offsetParent;
+ }
+ return offsetParent;
+ });
+ }
+});
+
+
+// Create scrollLeft and scrollTop methods
+jQuery.each( {scrollLeft: "pageXOffset", scrollTop: "pageYOffset"}, function( method, prop ) {
+ var top = /Y/.test( prop );
+
+ jQuery.fn[ method ] = function( val ) {
+ return jQuery.access( this, function( elem, method, val ) {
+ var win = getWindow( elem );
+
+ if ( val === undefined ) {
+ return win ? (prop in win) ? win[ prop ] :
+ jQuery.support.boxModel && win.document.documentElement[ method ] ||
+ win.document.body[ method ] :
+ elem[ method ];
+ }
+
+ if ( win ) {
+ win.scrollTo(
+ !top ? val : jQuery( win ).scrollLeft(),
+ top ? val : jQuery( win ).scrollTop()
+ );
+
+ } else {
+ elem[ method ] = val;
+ }
+ }, method, val, arguments.length, null );
+ };
+});
+
+function getWindow( elem ) {
+ return jQuery.isWindow( elem ) ?
+ elem :
+ elem.nodeType === 9 ?
+ elem.defaultView || elem.parentWindow :
+ false;
+}
+
+
+
+
+// Create width, height, innerHeight, innerWidth, outerHeight and outerWidth methods
+jQuery.each( { Height: "height", Width: "width" }, function( name, type ) {
+ var clientProp = "client" + name,
+ scrollProp = "scroll" + name,
+ offsetProp = "offset" + name;
+
+ // innerHeight and innerWidth
+ jQuery.fn[ "inner" + name ] = function() {
+ var elem = this[0];
+ return elem ?
+ elem.style ?
+ parseFloat( jQuery.css( elem, type, "padding" ) ) :
+ this[ type ]() :
+ null;
+ };
+
+ // outerHeight and outerWidth
+ jQuery.fn[ "outer" + name ] = function( margin ) {
+ var elem = this[0];
+ return elem ?
+ elem.style ?
+ parseFloat( jQuery.css( elem, type, margin ? "margin" : "border" ) ) :
+ this[ type ]() :
+ null;
+ };
+
+ jQuery.fn[ type ] = function( value ) {
+ return jQuery.access( this, function( elem, type, value ) {
+ var doc, docElemProp, orig, ret;
+
+ if ( jQuery.isWindow( elem ) ) {
+ // 3rd condition allows Nokia support, as it supports the docElem prop but not CSS1Compat
+ doc = elem.document;
+ docElemProp = doc.documentElement[ clientProp ];
+ return jQuery.support.boxModel && docElemProp ||
+ doc.body && doc.body[ clientProp ] || docElemProp;
+ }
+
+ // Get document width or height
+ if ( elem.nodeType === 9 ) {
+ // Either scroll[Width/Height] or offset[Width/Height], whichever is greater
+ doc = elem.documentElement;
+
+ // when a window > document, IE6 reports a offset[Width/Height] > client[Width/Height]
+ // so we can't use max, as it'll choose the incorrect offset[Width/Height]
+ // instead we use the correct client[Width/Height]
+ // support:IE6
+ if ( doc[ clientProp ] >= doc[ scrollProp ] ) {
+ return doc[ clientProp ];
+ }
+
+ return Math.max(
+ elem.body[ scrollProp ], doc[ scrollProp ],
+ elem.body[ offsetProp ], doc[ offsetProp ]
+ );
+ }
+
+ // Get width or height on the element
+ if ( value === undefined ) {
+ orig = jQuery.css( elem, type );
+ ret = parseFloat( orig );
+ return jQuery.isNumeric( ret ) ? ret : orig;
+ }
+
+ // Set the width or height on the element
+ jQuery( elem ).css( type, value );
+ }, type, value, arguments.length, null );
+ };
+});
+
+
+
+
+// Expose jQuery to the global object
+window.jQuery = window.$ = jQuery;
+
+// Expose jQuery as an AMD module, but only for AMD loaders that
+// understand the issues with loading multiple versions of jQuery
+// in a page that all might call define(). The loader will indicate
+// they have special allowances for multiple jQuery versions by
+// specifying define.amd.jQuery = true. Register as a named module,
+// since jQuery can be concatenated with other files that may use define,
+// but not use a proper concatenation script that understands anonymous
+// AMD modules. A named AMD is safest and most robust way to register.
+// Lowercase jquery is used because AMD module names are derived from
+// file names, and jQuery is normally delivered in a lowercase file name.
+// Do this after creating the global so that if an AMD module wants to call
+// noConflict to hide this version of jQuery, it will work.
+if ( typeof define === "function" && define.amd && define.amd.jQuery ) {
+ define( "jquery", [], function () { return jQuery; } );
+}
+
+
+
+})( window );
+
diff --git a/contrib/iSenWeb/moses.pl b/contrib/iSenWeb/moses.pl
new file mode 100755
index 000000000..89b437665
--- /dev/null
+++ b/contrib/iSenWeb/moses.pl
@@ -0,0 +1,59 @@
+#!/usr/bin/perl -w
+use warnings;
+use strict;
+$|++;
+
+# file: daemon.pl
+
+# Herve Saint-Amand
+# Universitaet des Saarlandes
+# Tue May 13 19:45:31 2008
+
+# This script starts Moses to run in the background, so that it can be used by
+# the CGI script. It spawns the Moses process, then binds itself to listen on
+# some port, and when it gets a connection, reads it line by line, feeds those
+# to Moses, and sends back the translation.
+
+# You can either run one instance of this on your Web server, or, if you have
+# the hardware setup for it, run several instances of this, then configure
+# translate.cgi to connect to these.
+
+#------------------------------------------------------------------------------
+# includes
+
+use IO::Socket::INET;
+use IPC::Open2;
+
+#------------------------------------------------------------------------------
+# constants, global vars, config
+
+my $MOSES = '/home/tianliang/research/moses-smt/scripts/training/model/moses';
+my $MOSES_INI = '/home/tianliang/research/moses-smt/scripts/training/model/moses.ini';
+
+die "usage: daemon.pl <hostname> <port>" unless (@ARGV == 2);
+my $LISTEN_HOST = shift;
+my $LISTEN_PORT = shift;
+
+#------------------------------------------------------------------------------
+# main
+
+# spawn moses
+my ($MOSES_IN, $MOSES_OUT);
+my $pid = open2 ($MOSES_OUT, $MOSES_IN, $MOSES, '-f', $MOSES_INI);
+
+# open server socket
+my $server_sock = new IO::Socket::INET
+ (LocalAddr => $LISTEN_HOST, LocalPort => $LISTEN_PORT, Listen => 1)
+ || die "Can't bind server socket";
+
+while (my $client_sock = $server_sock->accept) {
+ while (my $line = <$client_sock>) {
+ print $MOSES_IN $line;
+ $MOSES_IN->flush ();
+ print $client_sock scalar <$MOSES_OUT>;
+ }
+
+ $client_sock->close ();
+}
+
+#------------------------------------------------------------------------------
diff --git a/contrib/iSenWeb/themes/images/common/Logo (1000x300).png b/contrib/iSenWeb/themes/images/common/Logo (1000x300).png
new file mode 100755
index 000000000..43a3a5cd1
--- /dev/null
+++ b/contrib/iSenWeb/themes/images/common/Logo (1000x300).png
Binary files differ
diff --git a/contrib/iSenWeb/themes/images/common/Logo (2000x2000).png b/contrib/iSenWeb/themes/images/common/Logo (2000x2000).png
new file mode 100755
index 000000000..2c46f7424
--- /dev/null
+++ b/contrib/iSenWeb/themes/images/common/Logo (2000x2000).png
Binary files differ
diff --git a/contrib/iSenWeb/themes/images/common/Logo (250x250).png b/contrib/iSenWeb/themes/images/common/Logo (250x250).png
new file mode 100755
index 000000000..0a710e39a
--- /dev/null
+++ b/contrib/iSenWeb/themes/images/common/Logo (250x250).png
Binary files differ
diff --git a/contrib/iSenWeb/themes/images/common/Logo (500x500).png b/contrib/iSenWeb/themes/images/common/Logo (500x500).png
new file mode 100755
index 000000000..8e154b6cd
--- /dev/null
+++ b/contrib/iSenWeb/themes/images/common/Logo (500x500).png
Binary files differ
diff --git a/contrib/iSenWeb/themes/images/common/Logo.png b/contrib/iSenWeb/themes/images/common/Logo.png
new file mode 100755
index 000000000..43a3a5cd1
--- /dev/null
+++ b/contrib/iSenWeb/themes/images/common/Logo.png
Binary files differ
diff --git a/contrib/iSenWeb/themes/images/common/Logo_lab.png b/contrib/iSenWeb/themes/images/common/Logo_lab.png
new file mode 100755
index 000000000..1358e1782
--- /dev/null
+++ b/contrib/iSenWeb/themes/images/common/Logo_lab.png
Binary files differ
diff --git a/contrib/iSenWeb/themes/images/common/header_bg.png b/contrib/iSenWeb/themes/images/common/header_bg.png
new file mode 100755
index 000000000..8b682f7c6
--- /dev/null
+++ b/contrib/iSenWeb/themes/images/common/header_bg.png
Binary files differ
diff --git a/contrib/iSenWeb/themes/images/common/ico_cor10.png b/contrib/iSenWeb/themes/images/common/ico_cor10.png
new file mode 100755
index 000000000..71fe9be6c
--- /dev/null
+++ b/contrib/iSenWeb/themes/images/common/ico_cor10.png
Binary files differ
diff --git a/contrib/iSenWeb/themes/images/common/icon_feedback.png b/contrib/iSenWeb/themes/images/common/icon_feedback.png
new file mode 100755
index 000000000..1f02cef36
--- /dev/null
+++ b/contrib/iSenWeb/themes/images/common/icon_feedback.png
Binary files differ
diff --git a/contrib/iSenWeb/themes/images/common/logo_christmas.png b/contrib/iSenWeb/themes/images/common/logo_christmas.png
new file mode 100755
index 000000000..c5ce49d29
--- /dev/null
+++ b/contrib/iSenWeb/themes/images/common/logo_christmas.png
Binary files differ
diff --git a/contrib/iSenWeb/themes/images/common/logo_christmas1.png b/contrib/iSenWeb/themes/images/common/logo_christmas1.png
new file mode 100755
index 000000000..a23f52b98
--- /dev/null
+++ b/contrib/iSenWeb/themes/images/common/logo_christmas1.png
Binary files differ
diff --git a/contrib/iSenWeb/themes/images/common/logo_christmas2.png b/contrib/iSenWeb/themes/images/common/logo_christmas2.png
new file mode 100755
index 000000000..2bc098959
--- /dev/null
+++ b/contrib/iSenWeb/themes/images/common/logo_christmas2.png
Binary files differ
diff --git a/contrib/iSenWeb/themes/images/common/logo_christmas3.png b/contrib/iSenWeb/themes/images/common/logo_christmas3.png
new file mode 100755
index 000000000..ba83eb3ec
--- /dev/null
+++ b/contrib/iSenWeb/themes/images/common/logo_christmas3.png
Binary files differ
diff --git a/contrib/iSenWeb/themes/images/common/nav_bgn.png b/contrib/iSenWeb/themes/images/common/nav_bgn.png
new file mode 100755
index 000000000..7967edfd4
--- /dev/null
+++ b/contrib/iSenWeb/themes/images/common/nav_bgn.png
Binary files differ
diff --git a/contrib/iSenWeb/themes/images/common/sidebar_bg.png b/contrib/iSenWeb/themes/images/common/sidebar_bg.png
new file mode 100755
index 000000000..8be0429d0
--- /dev/null
+++ b/contrib/iSenWeb/themes/images/common/sidebar_bg.png
Binary files differ
diff --git a/contrib/iSenWeb/themes/images/fanyi/fanyi_sprite.png b/contrib/iSenWeb/themes/images/fanyi/fanyi_sprite.png
new file mode 100755
index 000000000..9854dc1cd
--- /dev/null
+++ b/contrib/iSenWeb/themes/images/fanyi/fanyi_sprite.png
Binary files differ
diff --git a/contrib/iSenWeb/themes/images/fanyi/inputTextBg.png b/contrib/iSenWeb/themes/images/fanyi/inputTextBg.png
new file mode 100755
index 000000000..35dacb5c0
--- /dev/null
+++ b/contrib/iSenWeb/themes/images/fanyi/inputTextBg.png
Binary files differ
diff --git a/contrib/iSenWeb/themes/images/search/s.png b/contrib/iSenWeb/themes/images/search/s.png
new file mode 100755
index 000000000..f2a4e94d5
--- /dev/null
+++ b/contrib/iSenWeb/themes/images/search/s.png
Binary files differ
diff --git a/contrib/iSenWeb/themes/styles/common.css b/contrib/iSenWeb/themes/styles/common.css
new file mode 100755
index 000000000..21b6bb8f3
--- /dev/null
+++ b/contrib/iSenWeb/themes/styles/common.css
@@ -0,0 +1,288 @@
+@charset "utf-8";
+
+html,body,div,span,applet,object,iframe,table,caption,tbody,tfoot,thead,tr,th,td,del,dfn,em,font,img,ins,kbd,q,s,samp,small,strike,tt,var,h1,h2,h3,h4,h5,h6,p,blockquote,pre,a,abbr,acronym,address,big,cite,code,dl,dt,dd,ol,ul,li,fieldset,form,label,legend {
+ outline:0;
+ padding:0;
+ margin:0;
+ border:0;
+ text-align:left;
+ font-style:normal;
+ word-wrap:break-word;
+}
+:focus {
+ outline:0;
+}
+body {
+ font-family:"Microsoft Yahei","\534E\6587\9ED1\4F53","Arail","Verdana","Helvetica","sans-serif";
+ color:#999;
+ font-size:12px;
+}
+ol,ul,li {
+ list-style:none;
+}
+table {
+ border-collapse:collapse;
+ border-spacing:0;
+ width:100%;
+}
+caption,th,td {
+ font-weight:normal;
+ text-align:left;
+ vertical-align:top;
+}
+a:link,a:visited {
+ font-family:"Microsoft Yahei";
+ color:#568d99;
+ text-decoration:none;
+}
+a:hover {
+ font-family:"Microsoft Yahei";
+ color:#568d99;
+ text-decoration:underline;
+}
+input.txt {
+ border-top:1px solid #cdcdcd;
+ border-left:1px solid #a4a4a4;
+ border-bottom:1px solid #e8e8e8;
+ border-right:1px solid #d9d9d9;
+ font-family:Arial,Helvetica,sans-serif;
+ color:#666;
+ font-size:14px
+}
+body {
+ background:#eeefef;
+}
+.topWrap {
+ height:200px;
+ background:url(../images/common/header_bg.png) repeat-x center top;
+}
+.topW {
+ width:940px;
+ position:relative;
+ margin:0 auto;
+}
+.top {
+ width:900px;
+ margin:0 auto;
+ height:90px;
+ z-index:100;
+}
+.top .logo {
+ width:20px;
+ height:300px;
+ background:url(../images/common/Logo.png) no-repeat;
+ _background:url(../images/common/logo.gif) no-repeat;
+ float:left;
+ margin:0px 0 0 0;
+}
+.top .logNoLogin {
+ width:159px;
+ overflow:hidden
+}
+.top .logo a {
+ width:165px;
+ height:55px;
+ float:left;
+ text-indent:-9999px;
+}
+.top .nav {
+ float:right;
+ margin-top:37px;
+ font-size:16px;
+ position:relative;
+ width:542px;
+}
+.top .nav a {
+ height:35px;
+ line-height:23px;
+ margin-left:10px;
+ padding:0 10px;
+ float:left;
+ display:block;
+ overflow:hidden;
+ text-decoration:none;
+ text-align:center;
+ color:#3c6770;
+}
+.top .nav a:hover {
+ background:url(../images/common/nav_bgn.png) no-repeat 0 -40px;
+ _background:url(../images/common/nav_bgn.gif) no-repeat 0 -40px;
+ color:#3c6770;
+}
+.top .nav a.current {
+ background:url(../images/common/nav_bgn.png) no-repeat center 0;
+ _background:url(../images/common/nav_bgn.gif) no-repeat center 0;
+}
+.top .nav .uname {
+ float:right
+}
+.top .nav a.username {
+ height:26px;
+ max-width:96px;
+ padding-right:4px;
+ cursor:pointer;
+ display:inline-block;
+ vertical-align:middle
+}
+.top .nav a.username:hover {
+ background:none;
+}
+.top .nav .uname .cor {
+ display:inline-block;
+ width:12px;
+ height:12px;
+ background:url(../images/common/ico_cor10.png) 0 0 no-repeat;
+ cursor:pointer;
+ vertical-align:middle;
+ overflow:hidden
+}
+.noLogin .nav {
+ width:auto;
+ margin-right:48px;
+}
+
+
+.ConBox {
+ width:900px;
+ min-height:600px;
+ margin:15px auto 50px auto;
+ padding-bottom:8px;
+ -webkit-box-shadow:0 0 5px 0 #aeaeae;
+ -moz-box-shadow:0 0 5px 0 #aeaeae;
+ -box-shadow:0 0 5px 0 #aeaeae;
+ -webkit-border-radius:8px;
+ -moz-border-radius:8px;
+ border-radius:8px;
+ background:#fff
+}
+.ConBox .hd {
+ padding:30px 30px 10px;
+}
+.ConBox .hd_left {
+ float:left;
+ width:560px;
+
+}
+.ConBox .hd_right {
+ float:right;
+ width:260px;
+
+}
+
+
+.ConBox .bd {
+ padding:0px;
+ float:right;
+}
+
+.ConBox .rank-index {
+ background-color: #E0EEF7;
+ padding: 10px;
+}
+
+.ConBox .right-panel-title {
+ color: #035168;
+ font: bolder 16px/ 18px "Microsoft Yahei";
+ margin: 0 0 5px 0;
+}
+
+.searchbar {
+ width:900px;
+ margin:10px auto 0;
+ overflow:hidden;
+ *zoom:1;
+}
+.searchbar .bd {
+ float:right;
+ border:1px solid #CBCBCD;
+ height:28px;
+ position:relative;
+ width:181px;
+}
+.searchbar .bd input.ipt {
+ background:url(../images/common/sidebar_bg.png) no-repeat 0 -300px;
+ border:0 none;
+ color:#cfcfcf;
+ font-family:"Microsoft Yahei",arial;
+ font-size:14px;
+ height:28px;
+ *height:27px;
+ line-height:28px;
+ margin:0;
+ padding:0 33px 0 9px;
+ width:119px;
+ _background:url(../images/common/sidebar_bg.gif) 0 -300px no-repeat;
+ *background-position:0 -301px;
+}
+.searchbar .bd input.btn {
+ background:url(../images/common/sidebar_bg.png) no-repeat -188px -343px;
+ border:0 none;
+ cursor:pointer;
+ height:28px;
+ position:absolute;
+ right:0;
+ top:0;
+ width:30px;
+ _background:url(../images/common/sidebar_bg.gif) -188px -343px no-repeat;
+}
+.searchbar .inpt_focus {
+ border:1px solid #649C9C;
+}
+.searchbar .inpt_focus input.btn {
+ background-position:-188px -473px;
+}
+.searchbar .inpt_focus input.ipt {
+ color:#333;
+}
+.wrap {
+ clear:both;
+}
+.container {
+ width:960px;
+ margin:60px auto 0;
+}
+.content .bd {
+ clear:both;
+}
+.footer {
+ width:960px;
+ height:66px;
+ padding-top:20px;
+ color:#b9b8b8;
+ text-align:center;
+}
+.footer p {
+ text-align:center;
+ line-height:23px;
+}
+.footer a,.footer a:link {
+ color:#b9b8b8;
+ text-decoration:none;
+}
+.footer a:hover {
+ color:#b9b8b8;
+ text-decoration:underline;
+}
+.top .logo {
+ height:200px;
+ width:550px;
+ background:url(../images/common/Logo_lab.png) no-repeat;
+ _background:url(../images/common/logo_christmas_ie6.png) no-repeat
+}
+
+
+.Feedback {
+right: 0;
+position: fixed;
+top: 40%;
+_position: absolute;
+z-index: 85;
+}
+.Feedback a {
+display: block;
+width: 41px;
+height: 127px;
+background: url(../images/common/icon_feedback.png) no-repeat;
+text-indent: -9999px;
+overflow: hidden;
+}
diff --git a/contrib/iSenWeb/themes/styles/fanyi.css b/contrib/iSenWeb/themes/styles/fanyi.css
new file mode 100755
index 000000000..5fa86e4bc
--- /dev/null
+++ b/contrib/iSenWeb/themes/styles/fanyi.css
@@ -0,0 +1,583 @@
+
+.column {
+ width:50%;
+}
+.fl .wrapper {
+ padding-right:20px;
+ _padding-right:10px;
+}
+h2 {
+ height:20px;
+ font-size:1.2em;
+}
+.column .row {
+ padding-top:.5em;
+}
+#transForm .user-research {
+ float:right;
+}
+#transForm .user-research a {
+ font-family:"宋体";
+}
+#transForm .desc {
+ zoom:1;
+}
+.column .desc {
+ position:relative;
+ color:#333333;
+ font-size:14px;
+}
+.text {
+ width:100%;
+ padding:0;
+ background:#fff;
+}
+input.text {
+ padding:3px 0;
+}
+.button {
+ width:5em;
+ *height:23px;
+ *padding-top:2px;
+}
+.actions a {
+ display:none;
+}
+#inputText {
+ display:block;
+ border-width:0 1px 1px 0;
+ border-color:#E5E5E5;
+ border-style:solid;
+ background:url("../images/fanyi/inputTextBg.png") no-repeat 0 0;
+ _background-attachment:fixed;
+ font-size:14px;
+ line-height:140%;
+ padding:10px 0 10px 10px;
+ height:187px;
+ resize:none;
+ outline:none;
+ font-family:arial,sans-serif;
+}
+*+html #inputText {
+ background:none;
+ border-width:2px 1px 1px 2px;
+ height:185px;
+}
+@-moz-document url-prefix() {
+ #inputText {
+ padding:3px 0 1px 10px;
+ height:204px;
+}
+}#customSelectBtn {
+ position:relative;
+ *float:left;
+ display:inline-block;
+ width:85px;
+ height:22px;
+ padding:1px 20px 1px 5px;
+ margin-right:5px;
+ line-height:22px;
+ border:1px solid #9fc7e3;
+ vertical-align:bottom;
+ cursor:pointer;
+ color:#000000;
+}
+#customSelectBtn .btn_arrow {
+ position:absolute;
+ top:10px;
+ right:5px;
+ border-width:5px;
+ border-style:solid dashed dashed dashed;
+ border-color:#9fc7e3 transparent transparent transparent;
+ line-height:0;
+ font-size:0;
+ width:0;
+ height:0;
+}
+#customSelectBtn.focus .btn_arrow {
+ top:4px;
+ border-style:dashed dashed solid dashed;
+ border-color:transparent transparent #9fc7e3 transparent;
+}
+#customSelectOption {
+ width:110px;
+ padding:0;
+ margin:1px 0 0;
+ list-style:none;
+ font-size:12px;
+ border:1px solid #9fc7e3;
+ background:#fff;
+ position:absolute;
+ z-index:9999;
+ left:-1px;
+ top:23px;
+ display:none;
+}
+#customSelectOption a {
+ display:block;
+ height:22px;
+ padding:0 5px;
+ line-height:22px;
+ text-decoration:none;
+ color:#2a2a2a;
+}
+#customSelectOption a:hover,#customSelectOption .on a {
+ background:#9fc7e3;
+}
+#translateBtn {
+ width:74px;
+ height:26px;
+ text-indent:-999em;
+ overflow:hidden;
+ background:#fff url(../images/fanyi/fanyi_sprite.png) left -42px;
+ cursor:pointer;
+ outline:none;
+ display:inline-block;
+ vertical-align:top;
+}
+#translateBtn:hover {
+ background-position:-74px -42px;
+}
+#translateBtn:active {
+ background-position:-148px -42px;
+}
+#outputMod {
+ position:relative;
+}
+#speech {
+ display:inline-block;
+ width:16px;
+ height:0;
+ padding-top:13px;
+ margin:0 5px -2px;
+ overflow:hidden;
+ background:url(../images/fanyi/fanyi_sprite.png) no-repeat -168px top;
+}
+#speech:hover,#speech.on {
+ background-position:-168px -13px;
+}
+#outputMod .desc {
+ position:relative;
+ zoom:1;
+ height:14px;
+}
+#entryList {
+ padding:40px 0 0;
+ margin:0 0 0 18px;
+ list-style:none;
+}
+#entryList li {
+ position:relative;
+ height:42px;
+ line-height:42px;
+ padding-left:40px;
+ margin-bottom:5px;
+ white-space:nowrap;
+ color:#666;
+}
+#entryList .sp {
+ position:absolute;
+ left:0;
+ top:0;
+ width:36px;
+ padding-top:42px;
+ background:url(../images/fanyi/fanyi_sprite.png) no-repeat right top;
+}
+#translated {
+ display:none;
+ zoom:1;
+}
+#copyit {
+ vertical-align:middle;
+ margin-top:-2px;
+}
+#outputText {
+ padding:15px 20px 0;
+ line-height:140%;
+ word-wrap:break-word;
+ overflow-y:auto;
+ background-color:#fafafa;
+ height:193px;
+ font-family:arial,sans-serif;
+}
+#translated .small_font .translated_result .tgt {
+ font-size:14px;
+ font-weight:normal;
+ margin-bottom:.4em;
+}
+#translated .small_font {
+ padding:10px 12px;
+ height:188px;
+}
+#outputText .src {
+ color:#787878;
+ font-size:1em;
+ margin-bottom:2px;
+}
+#outputText .tgt {
+ margin-bottom:10px;
+ font-size:1.5em;
+ font-weight:bold;
+ line-height:150%;
+}
+#outputText .selected {
+ background-color:#316ac5;
+ color:#fff;
+}
+.smart_result {
+ padding:.5em .8em 0 0;
+ border-top:1px solid #e0e0e0;
+ color:#000;
+}
+.smart_src_title {
+ color:#777;
+ font-size:1.2em;
+ margin-bottom:.6em;
+}
+.smart_result p {
+ margin:5px 0 5px 0;
+ line-height:125%;
+}
+.smart_result p a {
+ float:right;
+ margin-left:6px;
+}
+.smart_result p span {
+ overflow:hidden;
+ zoom:1;
+ display:block;
+}
+.smartresult_more {
+ font-size:12px;
+ margin-top:5px;
+ font-family:"宋体";
+}
+.compare-mode {
+ font-weight:bold;
+}
+#modeWrapper {
+ margin-top:-3px;
+ padding:3px 0;
+ *padding:0;
+}
+.read-mode {
+ float:right;
+ display:none;
+}
+.read-mode .title {
+ background:url("../images/fanyi/fanyi_sprite.png") no-repeat -168px -28px;
+ padding-left:18px;
+ outline:none;
+}
+.compare-mode input {
+ vertical-align:top;
+ *vertical-align:middle;
+ margin:0 3px 0 0;
+ border:0;
+ padding:0;
+}
+#errorHolder {
+ display:none;
+ position:absolute;
+ z-index:9999;
+ top:-25px;
+ left:50%;
+ text-align:center;
+ font-size:12px;
+}
+#errorHolder.nullError {
+ left:20%;
+ top:120px;
+}
+#errorHolder .error_text {
+ background:#3b7fc2;
+ display:inline-block;
+ padding:5px 10px;
+ height:15px;
+ line-height:15px;
+ color:#fff;
+}
+#errorHolder .error_text a {
+ text-decoration:underline;
+}
+#errorHolder.nullError .error_text {
+ width:72px;
+ text-align:center;
+}
+#errorHolder .add-fav {
+ color:white;
+}
+#errorHolder #closeit {
+ margin-left:8px;
+}
+.tip-close {
+ cursor:pointer;
+}
+#addons {
+ display:none;
+}
+#transBtnTip {
+ display:none;
+ position:absolute;
+ z-index:999;
+ left:100px;
+ top:100px;
+ font-size:12px;
+ *background:#4570e0;
+}
+#transBtnTipInner {
+ position:relative;
+ padding:10px 15px;
+ *margin:-1px 1px;
+ color:#fff;
+ background:#4570e0;
+ -moz-border-radius:7px;
+ -khtml-border-radius:7px;
+ -webkit-border-radius:7px;
+ border-radius:7px;
+}
+#transBtnTip .ar {
+ margin-top:10px;
+}
+#transBtnTipOK {
+ font-weight:bold;
+ color:#fff;
+}
+#transBtnTipArrow {
+ position:absolute;
+ left:50px;
+ top:100%;
+ display:block;
+ border-color:transparent transparent transparent #4570e0;
+ border-width:0 0 20px 20px;
+ border-style:dashed dashed dashed solid;
+ font-size:0;
+}
+#sponsor {
+ padding:1em 0 0;
+ clear:both;
+}
+#sponsor .desc {
+ white-space:normal;
+ zoom:1;
+}
+#sponsor .fr {
+ overflow:hidden;
+}
+#sponsor .more-services {
+ background-color:#eff7fd;
+ padding-left:10px;
+ height:26px;
+ line-height:26px;
+ text-align:left;
+}
+#sponsor .more-services-list {
+ margin-bottom:1em;
+ border:1px #eff7fd solid;
+ padding:5px 12px 4px 22px;
+}
+#sponsor .more-services-icon-sprite {
+ background:url("../images/fanyi/fanyi_sprite.png") no-repeat 0 0;
+ float:left;
+ padding-left:40px;
+ padding-top:40px;
+ line-height:0;
+ font-size:0;
+}
+#sponsor .icon1 {
+ background-position:0 0;
+}
+#sponsor .icon2 {
+ background-position:-40px 0;
+}
+#sponsor .icon3 {
+ background-position:-80px 0;
+}
+#sponsor .icon4 {
+ background-position:-120px 0;
+}
+#trans_tools {
+ width:100%;
+}
+#trans_tools td {
+ margin:0;
+ padding:0;
+ width:25%;
+}
+#trans_tools h3 {
+ float:left;
+ margin-left:10px;
+ padding:0 10px 0 0;
+ line-height:40px;
+ font-size:1.2em;
+}
+#trans_tools p {
+ padding:5px 10px 0 0;
+ color:#777;
+ font-size:1.2em;
+}
+#suggestYou {
+ color:#777;
+ font-family:"宋体";
+}
+#feedback_link {
+ font-family:"宋体";
+}
+.new {
+ color:#e60012;
+ font-size:12px;
+}
+.close-reading-mode {
+ display:none;
+}
+.open-reading-mode {
+ display:none;
+}
+.for-close {
+ display:none;
+}
+.show-reading-mode .open-reading-mode {
+ display:inline-block;
+}
+.reading-mode #inputMod {
+ display:none;
+}
+.reading-mode #outputMod {
+ margin:0 auto;
+ float:none;
+}
+.reading-mode .column {
+ width:65%;
+}
+.reading-mode #outputMod #addons {
+ display:none;
+}
+.reading-mode #outputMod #outputText {
+ background-color:transparent;
+ border-top:1px solid #e5e5e5;
+ border-bottom:1px solid #e5e5e5;
+}
+.reading-mode #sponsor {
+ display:none;
+}
+.reading-mode #translated .small_font {
+ height:auto;
+ padding:10px 0;
+}
+.reading-mode .for-close {
+ display:block;
+}
+.reading-mode .close-reading-mode {
+ display:inline-block;
+}
+.reading-mode .open-reading-mode {
+ display:none;
+}
+.reading-mode #translated .small_font .translated_result .tgt {
+ margin-bottom:.6em;
+ padding-bottom:.6em;
+}
+#selectorSwitcher {
+ float:right;
+ margin-top:-3px;
+ height:20px;
+ line-height:20px;
+ cursor:pointer;
+}
+#selectorStatus {
+ margin-left:21px;
+ margin-right:6px;
+ color:#1e50a2;
+}
+.selector-sprite {
+ background:url("../p/switcher.png") no-repeat 0 0;
+}
+.selector-enable {
+ background-position:-51px -22px;
+}
+.selector-enable.hover {
+ background-position:0 -22px;
+}
+.selector-disable {
+ background-position:-51px 0;
+}
+.selector-disable.hover {
+ background-position:0 0;
+}
+.show-translate #addons {
+ display:block;
+}
+#b {
+ border-top:0 solid;
+ max-width:960px;
+ min-width:500px;
+ _width:960px;
+ font-family:arial sans-serif;
+}
+#transForm .content {
+ position:relative;
+ zoom:1;
+}
+.typo-suggest {
+ display:none;
+ position:absolute;
+ bottom:10px;
+ left:12px;
+ font-size:1.2em;
+ font-family:verdana,sens-serif;
+ color:#dc143c;
+}
+.typo-suggest a.spell-corrected {
+ text-decoration:underline;
+}
+.typo-suggest b {
+ font-style:italic;
+ font-weight:bold;
+}
+.ads {
+ background-color:#FEFEEE;
+}
+#outputMod .wrapper {
+ _padding-right:15px;
+}
+#addons {
+ _padding-right:15px;
+}
+#microBlog {
+ float:right;
+ padding-right:5px;
+}
+#microBlog dd,#microBlog dt {
+ float:left;
+ padding-top:4px;
+ height:20px;
+ line-height:20px;
+}
+#microBlog dd {
+ padding-top:4px;
+ height:20px;
+}
+#microBlog .blog {
+ display:inline-block;
+ background:url('../images/fanyi/anyi_sprite.png') no-repeat;
+ width:20px;
+ height:20px;
+}
+#microBlog a.netease {
+ background-position:-110px -69px;
+}
+#microBlog a.sina {
+ background-position:-132px -69px;
+}
+#microBlog a.tencent {
+ background-position:-155px -69px;
+}
+#microBlog a.kaixin001 {
+ background-position:-177px -69px;
+}
+.fl {
+ float:left;
+}
+.fr {
+ float:right;
+}
diff --git a/contrib/iSenWeb/themes/styles/search.css b/contrib/iSenWeb/themes/styles/search.css
new file mode 100755
index 000000000..5ca9fd8b1
--- /dev/null
+++ b/contrib/iSenWeb/themes/styles/search.css
@@ -0,0 +1,31 @@
+/* TOP SEARCH */
+#ts{position:relative;float: right; font-size:10px;}
+/* query form */
+.fc,.aca,.qb,.rqb{background:url(/MosesServer-cgi/themes/images/search/s.png) no-repeat}
+.fc{position:relative;width:415px;height:33px;padding:2px 0 2px 2px;background-position:-3px -3px}
+.fc input{font-family:Arial,sans-serif;border:none}
+.qc{position:relative;float:left;width:325px;padding:3px 2px;border-right:1px solid #6a8aae}
+.q{width:294px;height:23px;padding:3px 0 0 2px;*margin:-1px 0;font-size:1.6em;background:transparent;*border:1px solid #fff;outline:none}
+.aca{position:absolute;right:2px;top:3px;width:26px;height:0;padding-top:26px;overflow:hidden;text-indent:-9999em;background-position:-415px -3px;cursor:pointer}
+.qb{width:81px;height:33px;padding:0 0 2px 1px;*padding:2px 0 0 1px;margin:0;_margin-left:-3px;font-weight:bold;font-size:1.4em;word-spacing:4px;color:#fff;background-position:right -50px;background-color:transparent;cursor:pointer}
+.no-suggest .q{width:320px}
+/* BOTTOM SEARCH */
+#bs{margin:15px 0 20px;font-size:10px;}
+#bs .q{width:320px}
+input.rqb{position:absolute;right:-110px;top:2px;width:102px;height:32px;padding-top:32px;overflow:hidden;text-indent:-9999em;background-color:transparent;background-position:left -50px;cursor:pointer}
+
+
+/* suggest */
+.sw{font-size:1.4em;border:1px solid #8cbbdd}
+.sw table{background:#fff;border-collapse:collapse}
+.remindtt75,.jstxlan{padding-left: .2em;font-size: 14px;height: 23px;line-height: 23px;}
+.remindtt752{padding:.2em;color:#808080;font-size:14px}
+.jstxlan{color:#808080;font-size:13px;cursor:pointer; float:right}
+.jstxhuitiaoyou{margin:-1px 0;border-top:1px solid #dbeffe;background:#eaf1fd}
+.aa_highlight{color:#fff;background:#3971bf}
+/* MODULES */
+.pm{display:none;width:70px;border:1px solid;font-size:13px;border-color:#8cbbdd;background:#fff}
+.pm ul{padding:0;margin:0;list-style:none}
+.pm a{display:block;padding:4px 3px;text-decoration:none;zoom:1}
+.pm a:hover{color:#fff;background:#3971bf}
+.pm .sl{height:0;margin:0 1px;*margin-top:-10px;font-size:0;border-bottom:1px solid #8cbbdd}
diff --git a/contrib/iSenWeb/trans_result.php b/contrib/iSenWeb/trans_result.php
new file mode 100755
index 000000000..a56900a6d
--- /dev/null
+++ b/contrib/iSenWeb/trans_result.php
@@ -0,0 +1,10 @@
+<?php
+ $result = "";
+ $Content = $_POST['input1'];
+ $ereg='/\n/';
+ $arr_str = preg_split($ereg,$Content);
+ foreach($arr_str as $value){
+ $result = ` echo $value | nc 161.64.89.129 1986`;
+ echo $result.'<br>';
+ }
+?>
diff --git a/contrib/lmserver/BUILD b/contrib/lmserver/BUILD
index 2f4d9ace4..2f4d9ace4 100644..100755
--- a/contrib/lmserver/BUILD
+++ b/contrib/lmserver/BUILD
diff --git a/contrib/lmserver/INSTALL b/contrib/lmserver/INSTALL
deleted file mode 120000
index 81fa6ffa4..000000000
--- a/contrib/lmserver/INSTALL
+++ /dev/null
@@ -1 +0,0 @@
-/usr/share/automake-1.9/INSTALL \ No newline at end of file
diff --git a/contrib/lmserver/compile b/contrib/lmserver/compile
index 1b1d23216..1b1d23216 100644..100755
--- a/contrib/lmserver/compile
+++ b/contrib/lmserver/compile
diff --git a/contrib/lmserver/config.guess b/contrib/lmserver/config.guess
index 2313a174e..2313a174e 100644..100755
--- a/contrib/lmserver/config.guess
+++ b/contrib/lmserver/config.guess
diff --git a/contrib/lmserver/config.status b/contrib/lmserver/config.status
index 490bcaf91..490bcaf91 100644..100755
--- a/contrib/lmserver/config.status
+++ b/contrib/lmserver/config.status
diff --git a/contrib/lmserver/config.sub b/contrib/lmserver/config.sub
index ba16ebf55..ba16ebf55 100644..100755
--- a/contrib/lmserver/config.sub
+++ b/contrib/lmserver/config.sub
diff --git a/contrib/lmserver/configure b/contrib/lmserver/configure
index 69a1a6f02..69a1a6f02 100644..100755
--- a/contrib/lmserver/configure
+++ b/contrib/lmserver/configure
diff --git a/contrib/lmserver/depcomp b/contrib/lmserver/depcomp
index e5f9736c7..e5f9736c7 100644..100755
--- a/contrib/lmserver/depcomp
+++ b/contrib/lmserver/depcomp
diff --git a/contrib/lmserver/install-sh b/contrib/lmserver/install-sh
index a5897de6e..a5897de6e 100644..100755
--- a/contrib/lmserver/install-sh
+++ b/contrib/lmserver/install-sh
diff --git a/contrib/lmserver/missing b/contrib/lmserver/missing
index 1c8ff7049..1c8ff7049 100644..100755
--- a/contrib/lmserver/missing
+++ b/contrib/lmserver/missing
diff --git a/contrib/memscore/configure b/contrib/memscore/configure
index 3849f2c29..3849f2c29 100644..100755
--- a/contrib/memscore/configure
+++ b/contrib/memscore/configure
diff --git a/contrib/memscore/depcomp b/contrib/memscore/depcomp
index 04701da53..04701da53 100644..100755
--- a/contrib/memscore/depcomp
+++ b/contrib/memscore/depcomp
diff --git a/contrib/memscore/install-sh b/contrib/memscore/install-sh
index 4d4a9519e..4d4a9519e 100644..100755
--- a/contrib/memscore/install-sh
+++ b/contrib/memscore/install-sh
diff --git a/contrib/memscore/missing b/contrib/memscore/missing
index 894e786e1..894e786e1 100644..100755
--- a/contrib/memscore/missing
+++ b/contrib/memscore/missing
diff --git a/scripts/training/mert-moses-multi.pl b/contrib/mert-moses-multi.pl
index 22b8ed3fc..3c1b36f0e 100755
--- a/scripts/training/mert-moses-multi.pl
+++ b/contrib/mert-moses-multi.pl
@@ -4,6 +4,18 @@
# mert-moses.pl <foreign> <english> <decoder-executable> <decoder-config>
# For other options see below or run 'mert-moses.pl --help'
+#
+# NB: This is a variant of of mert-moses.pl for use with the interpolated scorer
+# (MergeScorer) described in the following paper:
+#
+# "Optimising Multiple Metrics with MERT" by Christophe Servan and Holger Schwenk,
+# Prague Bulletin of Mathematical Linguistics 96 (2011) p109-117
+# http://www-lium.univ-lemans.fr/~servan/publications/Servan_PBML_2011.pdf
+#
+# If you are not using MergeScorer, then you should use the mert-moses.pl script instead
+#
+
+
# Notes:
# <foreign> and <english> should be raw text files, one sentence per line
# <english> can be a prefix, in which case the files are <english>0, <english>1, etc. are used
@@ -47,10 +59,10 @@
# 13 Oct 2004 Use alternative decoders (DWC)
# Original version by Philipp Koehn
-use FindBin qw($Bin);
+use FindBin qw($RealBin);
use File::Basename;
use File::Path;
-my $SCRIPTS_ROOTDIR = $Bin;
+my $SCRIPTS_ROOTDIR = $RealBin;
$SCRIPTS_ROOTDIR =~ s/\/training$//;
$SCRIPTS_ROOTDIR = $ENV{"SCRIPTS_ROOTDIR"} if defined($ENV{"SCRIPTS_ROOTDIR"});
diff --git a/contrib/moses-for-mere-mortals/READ_ME_FIRST.txt b/contrib/moses-for-mere-mortals/READ_ME_FIRST.txt
deleted file mode 100644
index fd5c71b18..000000000
--- a/contrib/moses-for-mere-mortals/READ_ME_FIRST.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-[11/09/2010]
-MOSES FOR MERE MORTALS
-======================
-Moses for Mere Mortals (MMM) has been tested with Ubuntu 10.04 LTS and the Moses version published on August 13, 2010 and updated on August 14, 2010 (http://sourceforge.net/projects/mosesdecoder/files/mosesdecoder/2010-08-13/moses-2010-08-13.tgz/download).
-
-***PURPOSES***:
-===============
-
-1) MOSES INSTALLATION WITH A SINGLE COMMAND
--------------------------------------------
-If you aren't used to compiling Linux programs (both Moses and the packages upon which it depends), you'll love this!
-
-2) MOSES VERY SIMPLE DEMO
--------------------------
-MMM is meant to quickly allow you to get results with Moses. You can place MMM wherever you prefer on your hard disk and then call, with a single command, each of its several scripts (their version number is omitted here):
-a) create (in order to compile Moses and the packages it uses with a single command);
-b) make-test-files;
-c) train;
-d) translate;
-e) score the translation(s) you got; and
-f) transfer trained corpora between users or to other places of your disk.
-
-MMM uses non-factored training, a type of training that in our experience already produces good results in a significant number of language pairs, and mainly with non-morphologically rich languages or with language pairs in which the target language is not morphologically rich. A Quick-Start-Guide should help you to quickly get the feel of it and start getting results.
-
-It comes with a small demo corpus, too small to do justice to the quality that Moses can achieve, but sufficient for you to get a general overview of SMT and an idea of how useful Moses can be to your work, if you are strting to use it.
-
-3) PROTOTYPE OF A REAL WORLD TRANSLATION CHAIN
-----------------------------------------------
-MMM enables you to use very large corpora and is being used for that purpose (translation for real translators) in our working environment. It was made having in mind that, in order to get the best results, corpora should be based on personal (ou group's) files and that many translators use translation memories. Therefore, we have coupled it with two Windows add-ins that enable you to convert your TMX files into Moses corpora and also allow you to convert the Moses translations into TMX files that translators can use with a translation memory tool.
-
-4) WAY OF STARTING LEARNING MOSES AND MACHINE TRANSLATION
----------------------------------------------------------
-MMM also comes with a very detailed Help-Tutorial (in its docs subdirectory). It therefore should ease the learning path for true beginners. The scripts code isn't particularly elegant, but most of it should be easily understandable even by beginners (if they read the Moses documentation, that is!). What's more, it does work!
-
-MMM was designed to be very easy and immediately feasible to use and that's indeed why it was made for mere mortals and called as such.
-
-***SOME CHARACTERISTICS***:
-===========================
- 1) Compiles all the packages used by these scripts with a single instruction;
- 2) Removes control characters from the input files (these can crash a training);
- 3) Extracts from the corpus files 2 test files by pseudorandomly selecting non-consecutive segments that are erased from the corpus files;
- 4) A new training does not interfere with the files of a previous training;
- 5) A new training reuses as much as possible the files created in previous trainings (thus saving time);
- 6) Detects inversions of corpora (e.g., from en-pt to pt-en), allowing a much quicker training than that of the original language pair (also checks that the inverse training is correct);
- 7) Stops with an informative message if any of the phases of training (language model building, recaser training, corpus training, memory-mapping, tuning or training test) doesn't produce the expected results;
- 8) Can limit the duration of tuning;
- 9) Generates the BLEU and NIST scores of a translation or of a set of translations placed in a single directory (either for each whole document or for each segment of it);
-10) Allows you to transfer your trainings to someone else's computer or to another Moses installation in the same computer;
-11) All the mkcls, GIZA and MGIZA parameters can be controlled through parameters of the train script;
-12) Selected parameters of the Moses scripts and the Moses decoder can be controlled with the train and translate scripts.
-
-
-
-
diff --git a/contrib/moses-for-mere-mortals/Windows-add-ins/Extract_TMX_Corpus-1.043/Extract_TMX_Corpus.py b/contrib/moses-for-mere-mortals/Windows-add-ins/Extract_TMX_Corpus-1.043/Extract_TMX_Corpus.py
deleted file mode 100755
index 67fbec0f7..000000000
--- a/contrib/moses-for-mere-mortals/Windows-add-ins/Extract_TMX_Corpus-1.043/Extract_TMX_Corpus.py
+++ /dev/null
@@ -1,592 +0,0 @@
-# -*- coding: utf_8 -*-
-"""This program is used to prepare corpora extracted from TMX files.
-It is particularly useful for translators not very familiar
-with machine translation systems that want to use Moses with a highly customised
-corpus.
-
-It extracts from a directory containing TMX files (and from all of its subdirectories)
-all the segments of one or more language pairs (except empty segments and segments that are equal in both languages)
-and removes all other information. It then creates 2 separate monolingual files per language pair,
-both of which have strictly parallel (aligned) segments. This kind of corpus can easily be transformed
-in other formats, if need be.
-
-The program requires that Pythoncard and wxPython (as well as Python) be previously installed.
-
-Copyright 2009, João Luís A. C. Rosas
-
-Distributed under GNU GPL v3 licence (see http://www.gnu.org/licenses/)
-
-E-mail: joao.luis.rosas@gmail.com """
-
-__version__ = "$Revision: 1.042$"
-__date__ = "$Date: 2010/03/25$"
-__author__="$João Luís A. C. Rosas$"
-#Special thanks to Gary Daine for a helpful suggestion about a regex expression and for suggestions for this program to cover even more translation memories
-
-from PythonCard import clipboard, dialog, graphic, model
-from PythonCard.components import button, combobox,statictext,checkbox,staticbox
-import wx
-import os, re
-import string
-import sys
-from time import strftime
-import codecs
-
-
-class Extract_TMX_Corpus(model.Background):
-
- def on_initialize(self, event):
- """Initialize values
-
-
- @self.inputdir: directory whose files will be treated
- @self.outputfile: base name of the resulting corpora files
- @self.outputpath: root directory of the resulting corpora files
- @currdir: program's current working directory
- @self.languages: list of languages whose segments can be processed
- @self.startinglanguage: something like 'EN-GB'
- @self.destinationlanguage: something like 'FR-FR'
- @self.components.cbStartingLanguage.items: list of values of the Starting Language combobox of the program's window
- @self.components.cbDestinationLanguage.items: list of values of the Destination Language combobox of the program's window
- @self.numtus: number of translation units extracted so far
- @self.presentfile: TMX file being currently processed
- @self.errortypes: variable that stocks the types of errors detected in the TMX file that is being processed
- @self.wroteactions: variable that indicates whether the actions files has already been written to
- """
-
- self.inputdir=''
- self.outputfile=''
- self.outputpath=''
- #Get directory where program file is and ...
- currdir=os.getcwd()
- #... load the file ("LanguageCodes.txt") with the list of languages that the program can process
- try:
- self.languages=open(currdir+r'\LanguageCodes.txt','r+').readlines()
- except:
- # If the languages file doesn't exist in the program directory, alert user that it is essential for the good working of the program and exit
- result = dialog.alertDialog(self, 'The file "LanguageCodes.txt" is missing. The program will now close.', 'Essential file missing')
- sys.exit()
- #remove end of line marker from each line in "LanguageCodes.txt"
- for lang in range(len(self.languages)):
- self.languages[lang]=self.languages[lang].rstrip()
- self.startinglanguage=''
- self.destinationlanguage=''
- #Insert list of language names in appropriate program window's combo boxes
- self.components.cbStartingLanguage.items=self.languages
- self.components.cbDestinationLanguage.items=self.languages
- self.tottus=0
- self.numtus=0
- self.numequaltus=0
- self.presentfile=''
- self.errortypes=''
- self.wroteactions=False
- self.errors=''
-
- def extract_language_segments_tmx(self,text):
- """Extracts TMX language segments from TMX files
-
- @text: the text of the TMX file
- @pattern: compiled regular expression object, which can be used for matching
- @tus: list that collects the translation units of the text
- @segs: list that collects the segment units of the relevant pair of languages
- @numtus: number of translation units extracted
- @present_tu: variable that stocks the translation unit relevant segments (of the chosen language pair) that are being processed
- @self.errortypes: variable that stocks the types of errors detected in the TMX file that is being processed
- """
- #print 'extract_language_segments: start at '+strftime('%H-%M-%S')
- result=('','')
- try:
- if text:
- # Convert character entities to "normal" characters
- pattern=re.compile('&gt;',re.U)
- text=re.sub(pattern,'>',text)
- pattern=re.compile('&lt;',re.U)
- text=re.sub(pattern,'<',text)
- pattern=re.compile('&amp;',re.U)
- text=re.sub(pattern,'&',text)
- pattern=re.compile('&quot;',re.U)
- text=re.sub(pattern,'"',text)
- pattern=re.compile('&apos;',re.U)
- text=re.sub(pattern,"'",text)
- # Extract translation units
- pattern=re.compile('(?s)<tu.*?>(.*?)</tu>')
- tus=re.findall(pattern,text)
- ling1=''
- ling2=''
- #Extract relevant segments and store them in the @text variable
- if tus:
- for tu in tus:
- pattern=re.compile('(?s)<tuv.*?lang="'+self.startinglanguage+'">.*?<seg>(.*?)</seg>.*?<tuv.*?lang="'+self.destinationlanguage+'">.*?<seg>(.*?)</seg>')
- present_tu=re.findall(pattern,tu)
- self.tottus+=1
- #reject empty segments
- if present_tu: # and not present_tu[0][0].startswith("<")
- present_tu1=present_tu[0][0].strip()
- present_tu2=present_tu[0][1].strip()
- present_tu1 = re.sub('<bpt.*</bpt>', '', present_tu1)
- present_tu2 = re.sub('<bpt.*</bpt>', '', present_tu2)
- present_tu1 = re.sub(r'<ept.*</ept>', '', present_tu1)
- present_tu2 = re.sub(r'<ept.*</ept>', '', present_tu2)
- present_tu1 = re.sub(r'<ut.*</ut>', '', present_tu1)
- present_tu2 = re.sub(r'<ut.*</ut>', '', present_tu2)
- present_tu1 = re.sub(r'<ph.*</ph>', '', present_tu1)
- present_tu2 = re.sub(r'<ph.*</ph>', '', present_tu2)
- #Thanks to Gary Daine
- present_tu1 = re.sub('^[0-9\.() \t\-_]*$', '', present_tu1)
- #Thanks to Gary Daine
- present_tu2 = re.sub('^[0-9\.() \t\-_]*$', '', present_tu2)
- if present_tu1 != present_tu2:
- x=len(present_tu1)
- y=len(present_tu2)
- if (x <= y*3) and (y <= x*3):
- ling1=ling1+present_tu1+'\n'
- ling2=ling2+present_tu2+'\n'
- self.numtus+=1
- else:
- self.numequaltus+=1
- pattern=re.compile('(?s)<tuv.*?lang="'+self.destinationlanguage+'">.*?<seg>(.*?)</seg>.*?<tuv.*?lang="'+self.startinglanguage+'">.*?<seg>(.*?)</seg>')
- present_tu=re.findall(pattern,tu)
- #print present_tu
- if present_tu:
- present_tu1=present_tu[0][1].strip()
- present_tu2=present_tu[0][0].strip()
- present_tu1 = re.sub('<bpt.*</bpt>', '', present_tu1)
- present_tu2 = re.sub('<bpt.*</bpt>', '', present_tu2)
- present_tu1 = re.sub(r'<ept.*</ept>', '', present_tu1)
- present_tu2 = re.sub(r'<ept.*</ept>', '', present_tu2)
- present_tu1 = re.sub(r'<ut.*</ut>', '', present_tu1)
- present_tu2 = re.sub(r'<ut.*</ut>', '', present_tu2)
- present_tu1 = re.sub(r'<ph.*</ph>', '', present_tu1)
- present_tu2 = re.sub(r'<ph.*</ph>', '', present_tu2)
- #Thanks to Gary Daine
- present_tu1 = re.sub('^[0-9\.() \t\-_]*$', '', present_tu1)
- #Thanks to Gary Daine
- present_tu2 = re.sub('^[0-9\.() \t\-_]*$', '', present_tu2)
- if present_tu1 != present_tu2:
- x=len(present_tu1)
- y=len(present_tu2)
- if (x <= y*3) and (y <= x*3):
- ling1=ling1+present_tu1+'\n'
- ling2=ling2+present_tu2+'\n'
- self.numtus+=1
- else:
- self.numequaltus+=1
- result=(ling1,ling2)
- except:
- self.errortypes=self.errortypes+' - Extract Language Segments error\n'
- return result
-
- def locate(self,pattern, basedir):
- """Locate all files matching supplied filename pattern in and below
- supplied root directory.
-
- @pattern: something like '*.tmx'
- @basedir:whole directory to be treated
- """
- import fnmatch
- for path, dirs, files in os.walk(os.path.abspath(basedir)):
- for filename in fnmatch.filter(files, pattern):
- yield os.path.join(path, filename)
-
- def getallsegments(self):
- """Get all language segments from the TMX files in the specified
- directory
-
- @self.startinglanguage: something like 'EN-GB'
- @self.destinationlanguage: something like 'FR-FR'
- @fileslist: list of files that should be processed
- @self.inputdir: directory whose files will be treated
- @startfile:output file containing all segments in the @startinglanguage; file
- will be created in @self.inputdir
- @destfile:output file containing all segments in the @destinationlanguage; file
- will be created in @self.inputdir
- @actions:output file indicating the names of all files that were processed without errors; file
- will be created in @self.inputdir
- @self.errortypes: variable that stocks the types of errors detected in the TMX file that is being processed
- @self.presentfile: TMX file being currently processed
- @preptext: parsed XML text with all tags extracted and in string format
- @tus: list that receives the extracted TMX language translation units just with segments of the relevant language pair
- @num: loop control variable between 0 and length of @tus - 1
- @self.numtus: number of translation units extracted so far
- """
- self.statusBar.text='Processing '+ self.inputdir
- try:
- # Get a list of all TMX files that need to be processed
- fileslist=self.locate('*.tmx',self.inputdir)
- # Open output files for writing
- startfile=open(self.outputpath+'\\'+self.startinglanguage+ ' ('+self.destinationlanguage+')_' +self.outputfile,'w+b')
- destfile=open(self.outputpath+'\\'+self.destinationlanguage+' ('+self.startinglanguage+')_'+self.outputfile,'w+b')
- actions=open(self.outputpath+'\\_processing_info\\'+self.startinglanguage+ '-'+self.destinationlanguage+'_'+'actions_'+self.outputfile+'.txt','w+')
- except:
- # if any error up to now, add the name of the TMX file to the output file @errors
- self.errortypes=self.errortypes+' - Get All Segments: creation of output files error\n'
- if fileslist:
- # For each relevant TMX file ...
- for self.presentfile in fileslist:
- self.errortypes=''
- try:
- print self.presentfile
- fileObj = codecs.open(self.presentfile, "rb", "utf-16","replace",0 )
- pos=0
- while True:
- # read a new chunk of text...
- preptext = fileObj.read(692141)
- if not preptext:
- break
- last5=''
- y=''
- #... and make it end at the end of a translation unit
- while True:
- y=fileObj.read(1)
- if not y:
- break
- last5=last5+y
- if '</tu>' in last5:
- break
- preptext=preptext+last5
- # ... and extract its relevant segments ...
- if not self.errortypes:
- segs1,segs2=self.extract_language_segments_tmx(preptext)
- preptext=''
- #... and write those segments to the output files
- if segs1 and segs2:
- try:
- startfile.write('%s' % (segs1.encode('utf-8','strict')))
- destfile.write('%s' % (segs2.encode('utf-8','strict')))
- except:
- self.errortypes=self.errortypes+' - Get All Segments: writing of output files error\n'
- print 'erro'
- #if no errors up to now, insert the name of the TMX file in the @actions output file
- #encoding is necessary because @actions may be in a directory whose name has special diacritic characters
- if self.errortypes=='':
- try:
- actions.write(self.presentfile.encode('utf_8','replace')+'\n')
- self.wroteactions=True
- except:
- self.errortypes=self.errortypes+' - Get All Segments: writing of actions file error\n'
- fileObj.close()
- except:
- self.errortypes=self.errortypes+' - Error reading input file\n'
- try:
- if self.wroteactions:
- actions.write('\n*************************************************\n\n')
- actions.write('Total number of translation units: '+str(self.tottus)+'\n')
- actions.write('Number of extracted translation units (source segment not equal to destination segment): '+str(self.numtus)+'\n')
- actions.write('Number of removed translation units (source segment equal to destination segment): '+str(self.numequaltus)+'\n')
- actions.write('Number of empty translation units (source segment and/or destination segment not present): '+str(self.tottus-self.numequaltus-self.numtus))
-
- except:
- self.errortypes=self.errortypes+' - Get All Segments: writing of actions file error\n'
- # Close output files
- actions.close()
- destfile.close()
- startfile.close()
-
- def SelectDirectory(self):
- """Select the directory where the TMX files to be processed are
-
- @result: object returned by the dialog window with attributes accepted (true if user clicked OK button, false otherwise) and
- path (list of strings containing the full pathnames to all files selected by the user)
- @self.inputdir: directory where TMX files to be processed are (and where output files will be written)
- @self.statusBar.text: text displayed in the program window status bar"""
-
- result= dialog.directoryDialog(self, 'Choose a directory', 'a')
- if result.accepted:
- self.inputdir=result.path
- self.statusBar.text=self.inputdir+' selected.'
-
- def on_menuFileSelectDirectory_select(self, event):
- self.SelectDirectory()
-
- def on_btnSelectDirectory_mouseClick(self, event):
- self.SelectDirectory()
-
- def GetOutputFileBaseName(self):
- """Get base name of the corpus files
-
- @expr: variable containing the base name of the output files
- @wildcard: list of wildcards used in the dialog window to filter types of files
- @result: object returned by the Open File dialog window with attributes accepted (true if user clicked OK button, false otherwise) and
- path (list of strings containing the full pathnames to all files selected by the user)
- @self.inputdir: directory where TMX files to be processed are (and where output files will be written)
- @location: variable containing the full path to the base name output file
- @self.outputpath: base directory of output files
- @self.outputfile: base name of the output files
- """
-
- # Default base name of the corpora files that will be produced. If you choose as base name "Corpus.txt", as starting language "EN-GB" and as destination
- # language "FR-FR" the corpora files will be named "Corpus_EN-GB.txt" and "Corpus_FR-FR.txt"
- expr='Corpus'
- #open a dialog that lets you choose the base name of the corpora files that will be produced.
- wildcard = "Text files (*.txt;*.TXT)|*.txt;*.TXT"
- result = dialog.openFileDialog(None, "Name of corpus file", self.inputdir,expr,wildcard=wildcard)
- if result.accepted:
- location=os.path.split(result.paths[0])
- self.outputpath=location[0]
- self.outputfile = location[1]
- if not os.path.exists(self.outputpath+'\\_processing_info'):
- try:
- os.mkdir(self.outputpath+'\\_processing_info')
- except:
- result1 = dialog.alertDialog(self, "The program can't create the directory " + self.outputpath+r'\_processing_info, which is necessary for ' + \
- 'the creation of the output files. The program will now close.','Error')
- sys.exit()
-
- def on_menuGetOutputFileBaseName_select(self, event):
- self.GetOutputFileBaseName()
-
- def on_btnGetOutputFileBaseName_mouseClick(self, event):
- self.GetOutputFileBaseName()
-
- def ExtractCorpus(self):
- """Get the directory where TMX files to be processed are, get the choice of the pair of languages that will be treated and launch the extraction
- of the corpus
-
- @self.errortypes: variable that stocks the types of errors detected in the TMX file that is being processed
- @self.presentfile: TMX file being currently processed
- @self.numtus: number of translation units extracted so far
- @self.startinglanguage: something like 'EN-GB'
- @self.destinationlanguage: something like 'FR-FR'
- @self.inputdir: directory whose files will be treated
- @self.components.cbStartingLanguage.items: list of values of the Starting Language combobox of the program's window
- @self.components.cbDestinationLanguage.items: list of values of the Destination Language combobox of the program's window
- @self.outputfile: base name of the resulting corpora files
- @self.errors:output file indicating the types of error that occurred in each processed TMX file
- @self.numtus: number of translation units extracted so far
- """
-
- print 'Extract corpus: started at '+strftime('%H-%M-%S')
- self.errortypes=''
- self.presentfile=''
- self.numtus=0
- #get the startinglanguage name (e.g.: "EN-GB") from the program window
- self.startinglanguage=self.components.cbStartingLanguage.text
- #get the destinationlanguage name from the program window
- self.destinationlanguage=self.components.cbDestinationLanguage.text
- #if the directory where TMX files (@inputdir) or the pair of languages were not previously chosen, open a dialog box explaining
- #the conditions that have to be met so that the extraction can be made and do nothing...
- if (self.inputdir=='') or (self.components.cbStartingLanguage.text=='') or (self.components.cbDestinationLanguage.text=='') or (self.outputfile=='') \
- or (self.components.cbStartingLanguage.text==self.components.cbDestinationLanguage.text):
- result = dialog.alertDialog(self, 'In order to extract a corpus, you need to:\n\n 1) indicate the directory where the TMX files are,\n 2)' \
- +' the starting language,\n 3) the destination language (the 2 languages must be different), and\n 4) the base name of the output files.', 'Error')
-
- #...else, go ahead
- else:
- try:
- self.errors=open(self.outputpath+'\\_processing_info\\'+self.startinglanguage+ '-'+self.destinationlanguage+'_'+'errors_'+self.outputfile+'.txt','w+')
- except:
- pass
- self.statusBar.text='Please wait. This can be a long process ...'
- #Launch the segment extraction
- self.numtus=0
- self.getallsegments()
- # if any error up to now, add the name of the TMX file to the output file @errors
- if self.errortypes:
- try:
- self.errors.write(self.presentfile.encode('utf_8','replace')+':\n'+self.errortypes)
- except:
- pass
- try:
- self.errors.close()
- except:
- pass
- self.statusBar.text='Processing finished.'
- #Open dialog box telling that processing is finished and where can the resulting files be found
- self.inputdir=''
- self.outputfile=''
- self.outputpath=''
- print 'Extract corpus: finished at '+strftime('%H-%M-%S')
- result = dialog.alertDialog(self, 'Processing done. Results found in:\n\n1) '+ \
- self.outputpath+'\\'+self.startinglanguage+ ' ('+self.destinationlanguage+')_' +self.outputfile+ ' (starting language corpus)\n2) '+ \
- self.outputpath+'\\'+self.destinationlanguage+' ('+self.startinglanguage+')_'+self.outputfile+ \
- ' (destination language corpus)\n3) '+self.outputpath+'\\_processing_info\\'+self.startinglanguage+ '-'+self.destinationlanguage+'_'+ \
- 'errors_'+self.outputfile+'.txt'+ ' (list of files that caused errors)\n4) '+self.outputpath+'\\_processing_info\\'+self.startinglanguage+ \
- '-'+self.destinationlanguage+'_'+'actions_'+self.outputfile+'.txt'+ ' (list of files where processing was successful)', 'Processing Done')
-
- def on_menuFileExtractCorpus_select(self, event):
- self.ExtractCorpus()
- def on_btnExtractCorpus_mouseClick(self, event):
- self.ExtractCorpus()
-
- def ExtractAllCorpora(self):
- """Extracts all the LanguagePairs that can be composed with the languages indicated in the file "LanguageCodes.txt"
-
- @self.presentfile: TMX file being currently processed
- @self.numtus: number of translation units extracted so far
- @numcorpora: number of language pair being processed
- @self.inputdir: directory whose files will be treated
- @self.outputfile: base name of the resulting corpora files
- @self.errors:output file indicating the types of error that occurred in each processed TMX file
- @self.startinglanguage: something like 'EN-GB'
- @self.destinationlanguage: something like 'FR-FR'
- @lang1: code of the starting language
- @lang2: code of the destination language
- @self.errortypes: variable that stocks the types of errors detected in the TMX file that is being processed
- @self.wroteactions: variable that indicates whether the actions files has already been written to
- """
-
- print 'Extract All Corpora: started at '+strftime('%H-%M-%S')
- self.presentfile=''
- self.numtus=0
- numcorpora=0
- #if the directory where TMX files (@inputdir) or the base name of the output files were not previously chosen, open a dialog box explaining
- #the conditions that have to be met so that the extraction can be made and do nothing...
- if (self.inputdir=='') or (self.outputfile==''):
- result = dialog.alertDialog(self, 'In order to extract all corpora, you need to:\n\n 1) indicate the directory where the TMX files are, and\n ' \
- + '2) the base name of the output files.', 'Error')
- #...else, go ahead
- else:
- try:
- for lang1 in self.languages:
- for lang2 in self.languages:
- if lang2 > lang1:
- print lang1+'/'+lang2+' corpus being created...'
- numcorpora=numcorpora+1
- self.errortypes=''
- self.numtus=0
- self.wroteactions=False
- #get the startinglanguage name (e.g.: "EN-GB") from the program window
- self.startinglanguage=lang1
- #get the destinationlanguage name from the program window
- self.destinationlanguage=lang2
- try:
- self.errors=open(self.outputpath+'\\_processing_info\\'+self.startinglanguage+ '-'+self.destinationlanguage+'_'+'errors.txt','w+')
- except:
- pass
- self.statusBar.text='Language pair '+str(numcorpora)+' being processed. Please wait.'
- #Launch the segment extraction
- self.getallsegments()
- # if any error up to now, add the name of the TMX file to the output file @errors
- if self.errortypes:
- try:
- self.errors.write(self.presentfile.encode('utf_8','replace')+':\n'+self.errortypes.encode('utf_8','replace'))
- except:
- pass
- try:
- self.errors.close()
- except:
- pass
- self.statusBar.text='Processing finished.'
- except:
- self.errortypes=self.errortypes+' - Extract All Corpora error\n'
- self.errors.write(self.presentfile.encode('utf_8','replace')+':\n'+self.errortypes.encode('utf_8','replace'))
- self.errors.close()
- #Open dialog box telling that processing is finished and where can the resulting files be found
- self.inputdir=''
- self.outputfile=''
- self.outputpath=''
- print 'Extract All Corpora: finished at '+strftime('%H-%M-%S')
- result = dialog.alertDialog(self, 'Results found in: '+ self.outputpath+'.', 'Processing done')
-
-
- def on_menuFileExtractAllCorpora_select(self, event):
- self.ExtractAllCorpora()
- def on_btnExtractAllCorpora_mouseClick(self, event):
- self.ExtractAllCorpora()
-
- def ExtractSomeCorpora(self):
- """Extracts the segments of the LanguagePairs indicated in the file "LanguagePairs.txt" located in the program's root directory
-
- @self.presentfile: TMX file being currently processed
- @self.numtus: number of translation units extracted so far
- @currdir: current working directory of the program
- @pairsoflanguages: list of the pairs of language that are going to be processed
- @self.languages: list of languages whose segments can be processed
- @numcorpora: number of language pair being processed
- @self.inputdir: directory whose files will be treated
- @self.outputfile: base name of the resulting corpora files
- @self.errors:output file indicating the types of error that occurred in each processed TMX file
- @self.startinglanguage: something like 'EN-GB'
- @self.destinationlanguage: something like 'FR-FR'
- @lang1: code of the starting language
- @lang2: code of the destination language
- @self.errortypes: variable that stocks the types of errors detected in the TMX file that is being processed
- @self.wroteactions: variable that indicates whether the actions files has already been written to
- """
-
- print 'Extract Some Corpora: started at '+strftime('%H-%M-%S')
- self.presentfile=''
- self.numtus=0
- currdir=os.getcwd()
- #... load the file ("LanguageCodes.txt") with the list of languages that the program can process
- try:
- pairsoflanguages=open(currdir+r'\LanguagePairs.txt','r+').readlines()
- except:
- # If the languages file doesn't exist in the program directory, alert user that it is essential for the good working of the program and exit
- result = dialog.alertDialog(self, 'The file "LanguagePairs.txt" is missing. The program will now close.', 'Essential file missing')
- sys.exit()
- #remove end of line marker from each line in "LanguageCodes.txt"
- if pairsoflanguages:
- for item in range(len(pairsoflanguages)):
- pairsoflanguages[item]=pairsoflanguages[item].strip()
- pos=pairsoflanguages[item].find("/")
- pairsoflanguages[item]=(pairsoflanguages[item][:pos],pairsoflanguages[item][pos+1:])
- else:
- # If the languages file is empty, alert user that it is essential for the good working of the program and exit
- result = dialog.alertDialog(self, 'The file "LanguagePairs.txt" is an essential file and is empty. The program will now close.', 'Empty file')
- sys.exit()
-
- #if the directory where TMX files (@inputdir) or the base name of the output files were not previously chosen, open a dialog box explaining
- #the conditions that have to be met so that the extraction can be made and do nothing...
- if (self.inputdir=='') or (self.outputfile==''):
- result = dialog.alertDialog(self, 'In order to extract all corpora, you need to:\n\n 1) indicate the directory where the TMX files are, and\n ' \
- + '2) the base name of the output files.', 'Error')
- #...else, go ahead
- else:
- numcorpora=0
- for (lang1,lang2) in pairsoflanguages:
- if lang1<>lang2:
- print lang1+'/'+lang2+' corpus being created...'
- self.errortypes=''
- numcorpora=numcorpora+1
- #get the startinglanguage code (e.g.: "EN-GB")
- self.startinglanguage=lang1
- #get the destinationlanguage code
- self.destinationlanguage=lang2
- try:
- self.errors=open(self.outputpath+'\\_processing_info\\'+self.startinglanguage+ '-'+self.destinationlanguage+'_'+'errors.txt','w+')
- except:
- pass
- self.statusBar.text='Language pair '+str(numcorpora)+' being processed. Please wait.'
- #Launch the segment extraction
- self.numtus=0
- self.wroteactions=False
- self.getallsegments()
- # if any error up to now, add the name of the TMX file to the output file @errors
- if self.errortypes:
- try:
- self.errors.write(self.presentfile.encode('utf_8','replace')+':\n'+self.errortypes.encode('utf_8','replace'))
- except:
- pass
- try:
- self.errors.close()
- except:
- pass
- else:
- result = dialog.alertDialog(self, 'A bilingual corpus involves two different languages. The pair "'+lang1+'/'+lang2 + \
- '" will not be processed.', 'Alert')
- self.statusBar.text='Processing finished.'
- #Open dialog box telling that processing is finished and where can the resulting files be found
- self.inputdir=''
- self.outputfile=''
- self.outputpath=''
- print 'Extract Some Corpora: finished at '+strftime('%H-%M-%S')
- result = dialog.alertDialog(self, 'Results found in: '+ self.outputpath+'.', 'Processing done')
-
- def on_menuFileExtractSomeCorpora_select(self, event):
- self.ExtractSomeCorpora()
- def on_btnExtractSomeCorpora_mouseClick(self, event):
- self.ExtractSomeCorpora()
-
- def on_menuHelpHelp_select(self, event):
- try:
- f = open('_READ_ME_FIRST.txt', "r")
- msg = f.read()
- result = dialog.scrolledMessageDialog(self, msg, 'readme.txt')
- except:
- result = dialog.alertDialog(self, 'Help file missing', 'Problem with the Help file')
-
-
-if __name__ == '__main__':
- app = model.Application(Extract_TMX_Corpus)
- app.MainLoop()
diff --git a/contrib/moses-for-mere-mortals/Windows-add-ins/Extract_TMX_Corpus-1.043/Extract_TMX_Corpus.rsrc.py b/contrib/moses-for-mere-mortals/Windows-add-ins/Extract_TMX_Corpus-1.043/Extract_TMX_Corpus.rsrc.py
deleted file mode 100755
index 93e19edf2..000000000
--- a/contrib/moses-for-mere-mortals/Windows-add-ins/Extract_TMX_Corpus-1.043/Extract_TMX_Corpus.rsrc.py
+++ /dev/null
@@ -1,141 +0,0 @@
-{'application':{'type':'Application',
- 'name':'Extract_TMX_Corpus',
- 'backgrounds': [
- {'type':'Background',
- 'name':'bgExtract_TMX_Corpus',
- 'title':u'Extract_TMX_Corpus',
- 'size':(275, 410),
- 'statusBar':1,
-
- 'menubar': {'type':'MenuBar',
- 'menus': [
- {'type':'Menu',
- 'name':'menuFile',
- 'label':'&File',
- 'items': [
- {'type':'MenuItem',
- 'name':'menuFileSelectDirectory',
- 'label':u'Select &input/output directory...\tCtrl+I',
- 'command':'SelectListOfDirectories',
- },
- {'type':'MenuItem',
- 'name':'menuGetOutputFileBaseName',
- 'label':u'Get &output file base name...\tCtrl+O',
- 'command':'GetOutputFileBaseName',
- },
- {'type':'MenuItem',
- 'name':'fileSep1',
- 'label':'-',
- },
- {'type':'MenuItem',
- 'name':'menuFileExtractCorpus',
- 'label':u'&Extract corpus\tCtrl+E',
- 'command':'ExtractCorpus',
- },
- {'type':'MenuItem',
- 'name':'menuFileExtractSomeCorpora',
- 'label':u'Extract &some corpora\tCtrl+S',
- 'command':'ExtractSomeCorpora',
- },
- {'type':'MenuItem',
- 'name':'menuFileExtractAllCorpora',
- 'label':u'Extract &all corpora\tCtrl+A',
- 'command':'ExtractAllCorpora',
- },
- {'type':'MenuItem',
- 'name':'fileSep2',
- 'label':u'-',
- },
- {'type':'MenuItem',
- 'name':'menuFileExit',
- 'label':'E&xit\tAlt+X',
- 'command':'Doexit',
- },
- ]
- },
- {'type':'Menu',
- 'name':'menuHelp',
- 'label':u'&Help',
- 'items': [
- {'type':'MenuItem',
- 'name':'menuHelpHelp',
- 'label':u'&Help...\tCtrl+H',
- },
- ]
- },
- ]
- },
- 'components': [
-
-{'type':'Button',
- 'name':'btnExtractSomeCorpora',
- 'position':(18, 267),
- 'size':(225, 25),
- 'font':{'faceName': u'Arial', 'family': 'sansSerif', 'size': 10},
- 'label':u'Extract some corpora',
- },
-
-{'type':'Button',
- 'name':'btnExtractAllCorpora',
- 'position':(18, 233),
- 'size':(225, 25),
- 'font':{'faceName': u'Arial', 'family': 'sansSerif', 'size': 10},
- 'label':u'Extract all corpora',
- },
-
-{'type':'StaticText',
- 'name':'StaticText3',
- 'position':(18, 107),
- 'font':{'faceName': u'Arial', 'family': 'sansSerif', 'size': 10},
- 'text':u'Destination Language:',
- },
-
-{'type':'ComboBox',
- 'name':'cbDestinationLanguage',
- 'position':(18, 129),
- 'size':(225, -1),
- 'items':[],
- },
-
-{'type':'Button',
- 'name':'btnSelectDirectory',
- 'position':(18, 19),
- 'size':(225, 25),
- 'font':{'faceName': u'Arial', 'family': 'sansSerif', 'size': 10},
- 'label':u'Select input / output directory...',
- },
-
-{'type':'ComboBox',
- 'name':'cbStartingLanguage',
- 'position':(18, 74),
- 'size':(225, -1),
- 'items':[u'DE-PT', u'EN-PT', u'ES-PT', u'FR-PT'],
- },
-
-{'type':'Button',
- 'name':'btnGetOutputFileBaseName',
- 'position':(18, 166),
- 'size':(225, 25),
- 'font':{'faceName': u'Arial', 'family': 'sansSerif', 'size': 10},
- 'label':u'Select base name of output file...',
- },
-
-{'type':'Button',
- 'name':'btnExtractCorpus',
- 'position':(18, 200),
- 'size':(225, 25),
- 'font':{'faceName': u'Arial', 'family': 'sansSerif', 'size': 10},
- 'label':u'Extract one corpus',
- },
-
-{'type':'StaticText',
- 'name':'StaticText1',
- 'position':(18, 53),
- 'font':{'faceName': u'Arial', 'family': 'sansSerif', 'size': 10},
- 'text':u'Starting Language:',
- },
-
-] # end components
-} # end background
-] # end backgrounds
-} }
diff --git a/contrib/moses-for-mere-mortals/Windows-add-ins/Extract_TMX_Corpus-1.043/LanguageCodes.txt b/contrib/moses-for-mere-mortals/Windows-add-ins/Extract_TMX_Corpus-1.043/LanguageCodes.txt
deleted file mode 100644
index 22ca66c73..000000000
--- a/contrib/moses-for-mere-mortals/Windows-add-ins/Extract_TMX_Corpus-1.043/LanguageCodes.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-BG-01
-CS-01
-DA-01
-DE-DE
-EL-01
-EN-GB
-ES-ES
-ET-01
-FI-01
-FR-FR
-HU-01
-IT-IT
-LT-01
-LV-01
-MT-01
-NL-NL
-PL-01
-PT-PT
-RO-RO
-SK-01
-SL-01
-SV-SE \ No newline at end of file
diff --git a/contrib/moses-for-mere-mortals/Windows-add-ins/Extract_TMX_Corpus-1.043/LanguagePairs.txt b/contrib/moses-for-mere-mortals/Windows-add-ins/Extract_TMX_Corpus-1.043/LanguagePairs.txt
deleted file mode 100644
index d2ffd094e..000000000
--- a/contrib/moses-for-mere-mortals/Windows-add-ins/Extract_TMX_Corpus-1.043/LanguagePairs.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-BG-01/CS-01
-FR-FR/PT-PT
-EN-GB/LT-01 \ No newline at end of file
diff --git a/contrib/moses-for-mere-mortals/Windows-add-ins/Extract_TMX_Corpus-1.043/_READ_ME_FIRST.txt b/contrib/moses-for-mere-mortals/Windows-add-ins/Extract_TMX_Corpus-1.043/_READ_ME_FIRST.txt
deleted file mode 100644
index c84dd4ac4..000000000
--- a/contrib/moses-for-mere-mortals/Windows-add-ins/Extract_TMX_Corpus-1.043/_READ_ME_FIRST.txt
+++ /dev/null
@@ -1,119 +0,0 @@
-Summary:
- PURPOSE
- PERFORMANCE
- REQUIREMENTS
- INSTALLATION
- HOW TO USE
- GETTING THE RESULTS
- THANKS
- LICENSE
-
-****************************************************************************
-PURPOSE:
-****************************************************************************
-Extract_Tmx_Corpus_1.043 is a Windows program (Windows 7, Vista and XP supported) that enables translators not necessarily with a deep knowledge of linguistic tools to create highly customised corpora that can be used with the Moses machine translation system and with other systems. Some users call it "et cetera", playing a bit with its initials (ETC) and meaning that it can treat a never-ending number of files.
-
-In order to create corpora that are most useful to train machine translation systems, one should strive to include segments that are relevant for the task in hand. One of the ways of finding such segments could involve the usage of previous translation memory files (TMX files). This way the corpora could be customised for the person or for the type of task in question. The present program uses such files as input.
-
-The program can create strictly aligned corpora for a single pair of languages, several pairs of languages or all the pairs of languages contained in the TMX files.
-
-The program creates 2 separate files (UTF-8 format; Unix line endings) for each language pair that it processes: one for the starting language and another for the destination language. The lines of a given TMX translation unit are placed in strictly the same line in both files. The program suppresses empty TMX translation units, as well as those where the text for the first language is the same as that of the second language (like translation units consisting solely of numbers, or those in which the first language segment has not been translated into the second language). If you are interested in another format of corpus, it should be relatively easy to adapt this format to the format you are interested in.
-
-The program also informs about errors that might occur during processing and creates a file that lists the name(s) of the TMX files that caused them, as well as a separate one listing the files successfully treated and the number of segments extracted for the language pair.
-
-****************************************************************************
-PERFORMANCE:
-****************************************************************************
-The program can process very large numbers of TMX files (tens of thousands or more). It can also process extremely big TMX files (500 MB or more; it successfully processed a 2,3 GB file). The extraction of the corpus of a pair of languages in a very large (6,15 GB) set of TMX files took approximately 45 minutes in a Intel Core 2 Solo U3500 computer @ 1.4 GHz with 4 GB RAM.
-
-The starting language and the destination language segments can be in any order in the TMX files (e.g., the starting language segment may be found either before or after the destination language segment in one, several or all translation units of the TMX file).
-
-The program accepts and preserves text in any language (including special diacritical characters), but has only been tested with European Union official languages.
-****************************************************************************
-REQUIREMENTS:
-****************************************************************************
-These requirements only apply if you want to use the program from source. If you have downloaded the Windows executable you do not have to do anything.
-The program requires the following to be pre-installed in your computer:
-
-1) Python 2.5 (http://www.python.org/download/releases/2.5.4/);
-NOTE1: the program should work with Python 2.6, but has not been tested with it.
-NOTE2: if you use Windows Vista, launch the following installation programs by right-clicking their file in Windows Explorer and choosing the command "Execute as administrator" in the contextual menu.
-2) wxPython 2.8, Unicode version (http://www.wxpython.org/download.php);
-3) Pythoncard 0.8.2 (http://sourceforge.net/projects/pythoncard/files/PythonCard/0.8.2/PythonCard-0.8.2.win32.exe/download)
-
-****************************************************************************
-INSTALLATION:
-****************************************************************************
-1) Download the Extract_TMX_Corpus_1.043.exe file.
-2) Double-click it and follow the wizard instructions.
-***IMPORTANT***: Never erase the file "LanguageCodes.txt" in that directory. It is necessary for telling the program the languages that it has to process. If your TMX files use language codes that are different from those contained in this file, please replace the codes contained in the file with the codes used in your TMX files. You can always add or delete new codes to this file (when the program is not running).
-
-****************************************************************************
-HOW TO USE:
-****************************************************************************
-1) Create a directory where you will copy the TMX files that you want to process.
-2) Copy the TMX files to that directory.
-Note: If you do not have such files, try the following site: http://langtech.jrc.it/DGT-TM.html#Download. It contains the European Union DGT's Translation Memory, containing legislative documents of the European Union. For more details, see http://wt.jrc.it/lt/Acquis/DGT_TU_1.0/data/. These files are compressed in zip format and need to be unzipped before they can be used.
-3) Launch the program.
-4) Operate on the main window of the program in the direction from top to bottom:
-a) Click the "Select input/output directory" button to tell the root directory where the TMX files are (this directory can have subdirectories, all of which will also be processed), as well as where the output files produced by the program will be placed;
-
-
-NOTE: Please take note of this directory because the result files will also be placed there.
-b) In case you want to extract a ***single*** pair of languages, choose them in the "Starting Language" and "Destination Language" comboboxes. Do nothing if you want to extract more than one pair of languages.
-c) Click the "Select base name of output file" button and choose a base name for the output files (default: "Corpus.txt").
-Note: This base name is used to compose the names of the output files, which will also include the names of the starting and destination languages. If you accept the default "Corpus.txt" and choose "EN-GB" as starting language and "PT-PT" as destination language, for that corpus pair the respective corpora files will be named, respectively, "EN-GB (PT-PT)_Corpus.txt" and "PT-PT (EN-GB)_Corpus.txt".
-***TIP***: The base name is useful for getting different names for different corpora of the same language.
-d) Click one (***just one***) of the following buttons:
-- "Extract one corpus": this creates a single pair of strictly aligned corpora in the languages chosen in the "Starting Language" and "Destination Language" comboboxes;
--"Extract all corpora": this extracts all the combination pairs of languages for all the languages available in the "Starting Language" and "Destination language" comboboxes; if a language pair does not have segments of both languages in all of the translation units of all the TMX files, the result will be two empty corpora files for that language pair. If, however, there is just a single relevant translation unit, the corpus won’t be empty.
--"Extract some corpora": this extracts the pairs of languages listed in the file "LanguagePairs.txt". Each line of this file has the following structure:
- {Starting Language}/{Destination Language}.
-
-Here is an example of a file with 2 lines:
-
-EN-GB/PT-PT
-FR-FR/PT-PT
-
-This will create corpora for 4 pairs of languages: EN-PT, PT-EN and FR-PT and PT-FR. A sample "LanguagePairs.txt" comes with the program to serve as an example. Customise it to your needs respecting the syntax described above.
-NOTE: Never erase the "LanguagePairs.txt" file and always make sure that each pair of languages that you choose does exist in your TMX files. Otherwise, you won't get any results.
-
-The “Extract some corpora” and “Extract all corpora” functions are particularly useful if you want to prepare corpora for several or many language pairs. If your TMX files have translation units in all of the languages you are interested in, put them in a single directory (it can have subdirectories) and use those functions!
-
-****************************************************************************
-GETTING THE RESULTS:
-****************************************************************************
-The results are the aligned corpora files, as well as other files indicating how well the processing was done.
-
-When the processing is finished, you will find the corpora files in the directory you have chosen when you selected "Select input/output directory". In the "_processing_info" subdirectory of that directory you will find one or more *errors.txt file(s), listing the name of the TMX files that caused an error, and *actions.txt file(s), listing the files that were successfully processed as well as the number of translation units extracted.
-
-If you ask for the extraction of several corpora at once, you'll get lots of corpora files. If you feel somewhat confused by that abundance, please note 2 things:
-a) If you sort the files by order of modified date, you'll reconstitute the chronological order in which the corpora were made (corpora are always made in pairs one after the other);
-b) The name of the corpora file has the following structure:
-
-{Language of the segments} ({Language with which they are aligned})_{Base name of the corpus}.txt
-Ex: the file "BG-01 (MT-01)_Corpus.txt" has segments in the BG-01 (Bulgarian) language that also have a translation in the MT-01 (Maltese) language and corresponds to the corpus whose base name is "Corpus.txt". There should be an equivalent "MT-01 (BG-01)_Corpus.txt", this time with all the Maltese segments that have a translation in Bulgarian. Together, these 2 files constitute an aligned corpus ready to be fed to Moses.
-
-You can now feed Moses your customised corpora :-)
-****************************************************************************
-THANKS:
-****************************************************************************
-Thanks to Gary Daine, who pointed out a way to improve one of the regex expressions used in the code and suggested changes needed for it to cover more translation memories.
-****************************************************************************
-LICENSE:
-****************************************************************************
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation (version 3 of the License).
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-
-
diff --git a/contrib/moses-for-mere-mortals/Windows-add-ins/Extract_TMX_Corpus-1.043/gpl.txt b/contrib/moses-for-mere-mortals/Windows-add-ins/Extract_TMX_Corpus-1.043/gpl.txt
deleted file mode 100644
index 818433ecc..000000000
--- a/contrib/moses-for-mere-mortals/Windows-add-ins/Extract_TMX_Corpus-1.043/gpl.txt
+++ /dev/null
@@ -1,674 +0,0 @@
- GNU GENERAL PUBLIC LICENSE
- Version 3, 29 June 2007
-
- Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
- Preamble
-
- The GNU General Public License is a free, copyleft license for
-software and other kinds of works.
-
- The licenses for most software and other practical works are designed
-to take away your freedom to share and change the works. By contrast,
-the GNU General Public License is intended to guarantee your freedom to
-share and change all versions of a program--to make sure it remains free
-software for all its users. We, the Free Software Foundation, use the
-GNU General Public License for most of our software; it applies also to
-any other work released this way by its authors. You can apply it to
-your programs, too.
-
- When we speak of free software, we are referring to freedom, not
-price. Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-them if you wish), that you receive source code or can get it if you
-want it, that you can change the software or use pieces of it in new
-free programs, and that you know you can do these things.
-
- To protect your rights, we need to prevent others from denying you
-these rights or asking you to surrender the rights. Therefore, you have
-certain responsibilities if you distribute copies of the software, or if
-you modify it: responsibilities to respect the freedom of others.
-
- For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must pass on to the recipients the same
-freedoms that you received. You must make sure that they, too, receive
-or can get the source code. And you must show them these terms so they
-know their rights.
-
- Developers that use the GNU GPL protect your rights with two steps:
-(1) assert copyright on the software, and (2) offer you this License
-giving you legal permission to copy, distribute and/or modify it.
-
- For the developers' and authors' protection, the GPL clearly explains
-that there is no warranty for this free software. For both users' and
-authors' sake, the GPL requires that modified versions be marked as
-changed, so that their problems will not be attributed erroneously to
-authors of previous versions.
-
- Some devices are designed to deny users access to install or run
-modified versions of the software inside them, although the manufacturer
-can do so. This is fundamentally incompatible with the aim of
-protecting users' freedom to change the software. The systematic
-pattern of such abuse occurs in the area of products for individuals to
-use, which is precisely where it is most unacceptable. Therefore, we
-have designed this version of the GPL to prohibit the practice for those
-products. If such problems arise substantially in other domains, we
-stand ready to extend this provision to those domains in future versions
-of the GPL, as needed to protect the freedom of users.
-
- Finally, every program is threatened constantly by software patents.
-States should not allow patents to restrict development and use of
-software on general-purpose computers, but in those that do, we wish to
-avoid the special danger that patents applied to a free program could
-make it effectively proprietary. To prevent this, the GPL assures that
-patents cannot be used to render the program non-free.
-
- The precise terms and conditions for copying, distribution and
-modification follow.
-
- TERMS AND CONDITIONS
-
- 0. Definitions.
-
- "This License" refers to version 3 of the GNU General Public License.
-
- "Copyright" also means copyright-like laws that apply to other kinds of
-works, such as semiconductor masks.
-
- "The Program" refers to any copyrightable work licensed under this
-License. Each licensee is addressed as "you". "Licensees" and
-"recipients" may be individuals or organizations.
-
- To "modify" a work means to copy from or adapt all or part of the work
-in a fashion requiring copyright permission, other than the making of an
-exact copy. The resulting work is called a "modified version" of the
-earlier work or a work "based on" the earlier work.
-
- A "covered work" means either the unmodified Program or a work based
-on the Program.
-
- To "propagate" a work means to do anything with it that, without
-permission, would make you directly or secondarily liable for
-infringement under applicable copyright law, except executing it on a
-computer or modifying a private copy. Propagation includes copying,
-distribution (with or without modification), making available to the
-public, and in some countries other activities as well.
-
- To "convey" a work means any kind of propagation that enables other
-parties to make or receive copies. Mere interaction with a user through
-a computer network, with no transfer of a copy, is not conveying.
-
- An interactive user interface displays "Appropriate Legal Notices"
-to the extent that it includes a convenient and prominently visible
-feature that (1) displays an appropriate copyright notice, and (2)
-tells the user that there is no warranty for the work (except to the
-extent that warranties are provided), that licensees may convey the
-work under this License, and how to view a copy of this License. If
-the interface presents a list of user commands or options, such as a
-menu, a prominent item in the list meets this criterion.
-
- 1. Source Code.
-
- The "source code" for a work means the preferred form of the work
-for making modifications to it. "Object code" means any non-source
-form of a work.
-
- A "Standard Interface" means an interface that either is an official
-standard defined by a recognized standards body, or, in the case of
-interfaces specified for a particular programming language, one that
-is widely used among developers working in that language.
-
- The "System Libraries" of an executable work include anything, other
-than the work as a whole, that (a) is included in the normal form of
-packaging a Major Component, but which is not part of that Major
-Component, and (b) serves only to enable use of the work with that
-Major Component, or to implement a Standard Interface for which an
-implementation is available to the public in source code form. A
-"Major Component", in this context, means a major essential component
-(kernel, window system, and so on) of the specific operating system
-(if any) on which the executable work runs, or a compiler used to
-produce the work, or an object code interpreter used to run it.
-
- The "Corresponding Source" for a work in object code form means all
-the source code needed to generate, install, and (for an executable
-work) run the object code and to modify the work, including scripts to
-control those activities. However, it does not include the work's
-System Libraries, or general-purpose tools or generally available free
-programs which are used unmodified in performing those activities but
-which are not part of the work. For example, Corresponding Source
-includes interface definition files associated with source files for
-the work, and the source code for shared libraries and dynamically
-linked subprograms that the work is specifically designed to require,
-such as by intimate data communication or control flow between those
-subprograms and other parts of the work.
-
- The Corresponding Source need not include anything that users
-can regenerate automatically from other parts of the Corresponding
-Source.
-
- The Corresponding Source for a work in source code form is that
-same work.
-
- 2. Basic Permissions.
-
- All rights granted under this License are granted for the term of
-copyright on the Program, and are irrevocable provided the stated
-conditions are met. This License explicitly affirms your unlimited
-permission to run the unmodified Program. The output from running a
-covered work is covered by this License only if the output, given its
-content, constitutes a covered work. This License acknowledges your
-rights of fair use or other equivalent, as provided by copyright law.
-
- You may make, run and propagate covered works that you do not
-convey, without conditions so long as your license otherwise remains
-in force. You may convey covered works to others for the sole purpose
-of having them make modifications exclusively for you, or provide you
-with facilities for running those works, provided that you comply with
-the terms of this License in conveying all material for which you do
-not control copyright. Those thus making or running the covered works
-for you must do so exclusively on your behalf, under your direction
-and control, on terms that prohibit them from making any copies of
-your copyrighted material outside their relationship with you.
-
- Conveying under any other circumstances is permitted solely under
-the conditions stated below. Sublicensing is not allowed; section 10
-makes it unnecessary.
-
- 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
-
- No covered work shall be deemed part of an effective technological
-measure under any applicable law fulfilling obligations under article
-11 of the WIPO copyright treaty adopted on 20 December 1996, or
-similar laws prohibiting or restricting circumvention of such
-measures.
-
- When you convey a covered work, you waive any legal power to forbid
-circumvention of technological measures to the extent such circumvention
-is effected by exercising rights under this License with respect to
-the covered work, and you disclaim any intention to limit operation or
-modification of the work as a means of enforcing, against the work's
-users, your or third parties' legal rights to forbid circumvention of
-technological measures.
-
- 4. Conveying Verbatim Copies.
-
- You may convey verbatim copies of the Program's source code as you
-receive it, in any medium, provided that you conspicuously and
-appropriately publish on each copy an appropriate copyright notice;
-keep intact all notices stating that this License and any
-non-permissive terms added in accord with section 7 apply to the code;
-keep intact all notices of the absence of any warranty; and give all
-recipients a copy of this License along with the Program.
-
- You may charge any price or no price for each copy that you convey,
-and you may offer support or warranty protection for a fee.
-
- 5. Conveying Modified Source Versions.
-
- You may convey a work based on the Program, or the modifications to
-produce it from the Program, in the form of source code under the
-terms of section 4, provided that you also meet all of these conditions:
-
- a) The work must carry prominent notices stating that you modified
- it, and giving a relevant date.
-
- b) The work must carry prominent notices stating that it is
- released under this License and any conditions added under section
- 7. This requirement modifies the requirement in section 4 to
- "keep intact all notices".
-
- c) You must license the entire work, as a whole, under this
- License to anyone who comes into possession of a copy. This
- License will therefore apply, along with any applicable section 7
- additional terms, to the whole of the work, and all its parts,
- regardless of how they are packaged. This License gives no
- permission to license the work in any other way, but it does not
- invalidate such permission if you have separately received it.
-
- d) If the work has interactive user interfaces, each must display
- Appropriate Legal Notices; however, if the Program has interactive
- interfaces that do not display Appropriate Legal Notices, your
- work need not make them do so.
-
- A compilation of a covered work with other separate and independent
-works, which are not by their nature extensions of the covered work,
-and which are not combined with it such as to form a larger program,
-in or on a volume of a storage or distribution medium, is called an
-"aggregate" if the compilation and its resulting copyright are not
-used to limit the access or legal rights of the compilation's users
-beyond what the individual works permit. Inclusion of a covered work
-in an aggregate does not cause this License to apply to the other
-parts of the aggregate.
-
- 6. Conveying Non-Source Forms.
-
- You may convey a covered work in object code form under the terms
-of sections 4 and 5, provided that you also convey the
-machine-readable Corresponding Source under the terms of this License,
-in one of these ways:
-
- a) Convey the object code in, or embodied in, a physical product
- (including a physical distribution medium), accompanied by the
- Corresponding Source fixed on a durable physical medium
- customarily used for software interchange.
-
- b) Convey the object code in, or embodied in, a physical product
- (including a physical distribution medium), accompanied by a
- written offer, valid for at least three years and valid for as
- long as you offer spare parts or customer support for that product
- model, to give anyone who possesses the object code either (1) a
- copy of the Corresponding Source for all the software in the
- product that is covered by this License, on a durable physical
- medium customarily used for software interchange, for a price no
- more than your reasonable cost of physically performing this
- conveying of source, or (2) access to copy the
- Corresponding Source from a network server at no charge.
-
- c) Convey individual copies of the object code with a copy of the
- written offer to provide the Corresponding Source. This
- alternative is allowed only occasionally and noncommercially, and
- only if you received the object code with such an offer, in accord
- with subsection 6b.
-
- d) Convey the object code by offering access from a designated
- place (gratis or for a charge), and offer equivalent access to the
- Corresponding Source in the same way through the same place at no
- further charge. You need not require recipients to copy the
- Corresponding Source along with the object code. If the place to
- copy the object code is a network server, the Corresponding Source
- may be on a different server (operated by you or a third party)
- that supports equivalent copying facilities, provided you maintain
- clear directions next to the object code saying where to find the
- Corresponding Source. Regardless of what server hosts the
- Corresponding Source, you remain obligated to ensure that it is
- available for as long as needed to satisfy these requirements.
-
- e) Convey the object code using peer-to-peer transmission, provided
- you inform other peers where the object code and Corresponding
- Source of the work are being offered to the general public at no
- charge under subsection 6d.
-
- A separable portion of the object code, whose source code is excluded
-from the Corresponding Source as a System Library, need not be
-included in conveying the object code work.
-
- A "User Product" is either (1) a "consumer product", which means any
-tangible personal property which is normally used for personal, family,
-or household purposes, or (2) anything designed or sold for incorporation
-into a dwelling. In determining whether a product is a consumer product,
-doubtful cases shall be resolved in favor of coverage. For a particular
-product received by a particular user, "normally used" refers to a
-typical or common use of that class of product, regardless of the status
-of the particular user or of the way in which the particular user
-actually uses, or expects or is expected to use, the product. A product
-is a consumer product regardless of whether the product has substantial
-commercial, industrial or non-consumer uses, unless such uses represent
-the only significant mode of use of the product.
-
- "Installation Information" for a User Product means any methods,
-procedures, authorization keys, or other information required to install
-and execute modified versions of a covered work in that User Product from
-a modified version of its Corresponding Source. The information must
-suffice to ensure that the continued functioning of the modified object
-code is in no case prevented or interfered with solely because
-modification has been made.
-
- If you convey an object code work under this section in, or with, or
-specifically for use in, a User Product, and the conveying occurs as
-part of a transaction in which the right of possession and use of the
-User Product is transferred to the recipient in perpetuity or for a
-fixed term (regardless of how the transaction is characterized), the
-Corresponding Source conveyed under this section must be accompanied
-by the Installation Information. But this requirement does not apply
-if neither you nor any third party retains the ability to install
-modified object code on the User Product (for example, the work has
-been installed in ROM).
-
- The requirement to provide Installation Information does not include a
-requirement to continue to provide support service, warranty, or updates
-for a work that has been modified or installed by the recipient, or for
-the User Product in which it has been modified or installed. Access to a
-network may be denied when the modification itself materially and
-adversely affects the operation of the network or violates the rules and
-protocols for communication across the network.
-
- Corresponding Source conveyed, and Installation Information provided,
-in accord with this section must be in a format that is publicly
-documented (and with an implementation available to the public in
-source code form), and must require no special password or key for
-unpacking, reading or copying.
-
- 7. Additional Terms.
-
- "Additional permissions" are terms that supplement the terms of this
-License by making exceptions from one or more of its conditions.
-Additional permissions that are applicable to the entire Program shall
-be treated as though they were included in this License, to the extent
-that they are valid under applicable law. If additional permissions
-apply only to part of the Program, that part may be used separately
-under those permissions, but the entire Program remains governed by
-this License without regard to the additional permissions.
-
- When you convey a copy of a covered work, you may at your option
-remove any additional permissions from that copy, or from any part of
-it. (Additional permissions may be written to require their own
-removal in certain cases when you modify the work.) You may place
-additional permissions on material, added by you to a covered work,
-for which you have or can give appropriate copyright permission.
-
- Notwithstanding any other provision of this License, for material you
-add to a covered work, you may (if authorized by the copyright holders of
-that material) supplement the terms of this License with terms:
-
- a) Disclaiming warranty or limiting liability differently from the
- terms of sections 15 and 16 of this License; or
-
- b) Requiring preservation of specified reasonable legal notices or
- author attributions in that material or in the Appropriate Legal
- Notices displayed by works containing it; or
-
- c) Prohibiting misrepresentation of the origin of that material, or
- requiring that modified versions of such material be marked in
- reasonable ways as different from the original version; or
-
- d) Limiting the use for publicity purposes of names of licensors or
- authors of the material; or
-
- e) Declining to grant rights under trademark law for use of some
- trade names, trademarks, or service marks; or
-
- f) Requiring indemnification of licensors and authors of that
- material by anyone who conveys the material (or modified versions of
- it) with contractual assumptions of liability to the recipient, for
- any liability that these contractual assumptions directly impose on
- those licensors and authors.
-
- All other non-permissive additional terms are considered "further
-restrictions" within the meaning of section 10. If the Program as you
-received it, or any part of it, contains a notice stating that it is
-governed by this License along with a term that is a further
-restriction, you may remove that term. If a license document contains
-a further restriction but permits relicensing or conveying under this
-License, you may add to a covered work material governed by the terms
-of that license document, provided that the further restriction does
-not survive such relicensing or conveying.
-
- If you add terms to a covered work in accord with this section, you
-must place, in the relevant source files, a statement of the
-additional terms that apply to those files, or a notice indicating
-where to find the applicable terms.
-
- Additional terms, permissive or non-permissive, may be stated in the
-form of a separately written license, or stated as exceptions;
-the above requirements apply either way.
-
- 8. Termination.
-
- You may not propagate or modify a covered work except as expressly
-provided under this License. Any attempt otherwise to propagate or
-modify it is void, and will automatically terminate your rights under
-this License (including any patent licenses granted under the third
-paragraph of section 11).
-
- However, if you cease all violation of this License, then your
-license from a particular copyright holder is reinstated (a)
-provisionally, unless and until the copyright holder explicitly and
-finally terminates your license, and (b) permanently, if the copyright
-holder fails to notify you of the violation by some reasonable means
-prior to 60 days after the cessation.
-
- Moreover, your license from a particular copyright holder is
-reinstated permanently if the copyright holder notifies you of the
-violation by some reasonable means, this is the first time you have
-received notice of violation of this License (for any work) from that
-copyright holder, and you cure the violation prior to 30 days after
-your receipt of the notice.
-
- Termination of your rights under this section does not terminate the
-licenses of parties who have received copies or rights from you under
-this License. If your rights have been terminated and not permanently
-reinstated, you do not qualify to receive new licenses for the same
-material under section 10.
-
- 9. Acceptance Not Required for Having Copies.
-
- You are not required to accept this License in order to receive or
-run a copy of the Program. Ancillary propagation of a covered work
-occurring solely as a consequence of using peer-to-peer transmission
-to receive a copy likewise does not require acceptance. However,
-nothing other than this License grants you permission to propagate or
-modify any covered work. These actions infringe copyright if you do
-not accept this License. Therefore, by modifying or propagating a
-covered work, you indicate your acceptance of this License to do so.
-
- 10. Automatic Licensing of Downstream Recipients.
-
- Each time you convey a covered work, the recipient automatically
-receives a license from the original licensors, to run, modify and
-propagate that work, subject to this License. You are not responsible
-for enforcing compliance by third parties with this License.
-
- An "entity transaction" is a transaction transferring control of an
-organization, or substantially all assets of one, or subdividing an
-organization, or merging organizations. If propagation of a covered
-work results from an entity transaction, each party to that
-transaction who receives a copy of the work also receives whatever
-licenses to the work the party's predecessor in interest had or could
-give under the previous paragraph, plus a right to possession of the
-Corresponding Source of the work from the predecessor in interest, if
-the predecessor has it or can get it with reasonable efforts.
-
- You may not impose any further restrictions on the exercise of the
-rights granted or affirmed under this License. For example, you may
-not impose a license fee, royalty, or other charge for exercise of
-rights granted under this License, and you may not initiate litigation
-(including a cross-claim or counterclaim in a lawsuit) alleging that
-any patent claim is infringed by making, using, selling, offering for
-sale, or importing the Program or any portion of it.
-
- 11. Patents.
-
- A "contributor" is a copyright holder who authorizes use under this
-License of the Program or a work on which the Program is based. The
-work thus licensed is called the contributor's "contributor version".
-
- A contributor's "essential patent claims" are all patent claims
-owned or controlled by the contributor, whether already acquired or
-hereafter acquired, that would be infringed by some manner, permitted
-by this License, of making, using, or selling its contributor version,
-but do not include claims that would be infringed only as a
-consequence of further modification of the contributor version. For
-purposes of this definition, "control" includes the right to grant
-patent sublicenses in a manner consistent with the requirements of
-this License.
-
- Each contributor grants you a non-exclusive, worldwide, royalty-free
-patent license under the contributor's essential patent claims, to
-make, use, sell, offer for sale, import and otherwise run, modify and
-propagate the contents of its contributor version.
-
- In the following three paragraphs, a "patent license" is any express
-agreement or commitment, however denominated, not to enforce a patent
-(such as an express permission to practice a patent or covenant not to
-sue for patent infringement). To "grant" such a patent license to a
-party means to make such an agreement or commitment not to enforce a
-patent against the party.
-
- If you convey a covered work, knowingly relying on a patent license,
-and the Corresponding Source of the work is not available for anyone
-to copy, free of charge and under the terms of this License, through a
-publicly available network server or other readily accessible means,
-then you must either (1) cause the Corresponding Source to be so
-available, or (2) arrange to deprive yourself of the benefit of the
-patent license for this particular work, or (3) arrange, in a manner
-consistent with the requirements of this License, to extend the patent
-license to downstream recipients. "Knowingly relying" means you have
-actual knowledge that, but for the patent license, your conveying the
-covered work in a country, or your recipient's use of the covered work
-in a country, would infringe one or more identifiable patents in that
-country that you have reason to believe are valid.
-
- If, pursuant to or in connection with a single transaction or
-arrangement, you convey, or propagate by procuring conveyance of, a
-covered work, and grant a patent license to some of the parties
-receiving the covered work authorizing them to use, propagate, modify
-or convey a specific copy of the covered work, then the patent license
-you grant is automatically extended to all recipients of the covered
-work and works based on it.
-
- A patent license is "discriminatory" if it does not include within
-the scope of its coverage, prohibits the exercise of, or is
-conditioned on the non-exercise of one or more of the rights that are
-specifically granted under this License. You may not convey a covered
-work if you are a party to an arrangement with a third party that is
-in the business of distributing software, under which you make payment
-to the third party based on the extent of your activity of conveying
-the work, and under which the third party grants, to any of the
-parties who would receive the covered work from you, a discriminatory
-patent license (a) in connection with copies of the covered work
-conveyed by you (or copies made from those copies), or (b) primarily
-for and in connection with specific products or compilations that
-contain the covered work, unless you entered into that arrangement,
-or that patent license was granted, prior to 28 March 2007.
-
- Nothing in this License shall be construed as excluding or limiting
-any implied license or other defenses to infringement that may
-otherwise be available to you under applicable patent law.
-
- 12. No Surrender of Others' Freedom.
-
- If conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License. If you cannot convey a
-covered work so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you may
-not convey it at all. For example, if you agree to terms that obligate you
-to collect a royalty for further conveying from those to whom you convey
-the Program, the only way you could satisfy both those terms and this
-License would be to refrain entirely from conveying the Program.
-
- 13. Use with the GNU Affero General Public License.
-
- Notwithstanding any other provision of this License, you have
-permission to link or combine any covered work with a work licensed
-under version 3 of the GNU Affero General Public License into a single
-combined work, and to convey the resulting work. The terms of this
-License will continue to apply to the part which is the covered work,
-but the special requirements of the GNU Affero General Public License,
-section 13, concerning interaction through a network will apply to the
-combination as such.
-
- 14. Revised Versions of this License.
-
- The Free Software Foundation may publish revised and/or new versions of
-the GNU General Public License from time to time. Such new versions will
-be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
- Each version is given a distinguishing version number. If the
-Program specifies that a certain numbered version of the GNU General
-Public License "or any later version" applies to it, you have the
-option of following the terms and conditions either of that numbered
-version or of any later version published by the Free Software
-Foundation. If the Program does not specify a version number of the
-GNU General Public License, you may choose any version ever published
-by the Free Software Foundation.
-
- If the Program specifies that a proxy can decide which future
-versions of the GNU General Public License can be used, that proxy's
-public statement of acceptance of a version permanently authorizes you
-to choose that version for the Program.
-
- Later license versions may give you additional or different
-permissions. However, no additional obligations are imposed on any
-author or copyright holder as a result of your choosing to follow a
-later version.
-
- 15. Disclaimer of Warranty.
-
- THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
-APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
-HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
-OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
-IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
-ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
- 16. Limitation of Liability.
-
- IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
-THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
-GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
-USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
-DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
-PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
-EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGES.
-
- 17. Interpretation of Sections 15 and 16.
-
- If the disclaimer of warranty and limitation of liability provided
-above cannot be given local legal effect according to their terms,
-reviewing courts shall apply local law that most closely approximates
-an absolute waiver of all civil liability in connection with the
-Program, unless a warranty or assumption of liability accompanies a
-copy of the Program in return for a fee.
-
- END OF TERMS AND CONDITIONS
-
- How to Apply These Terms to Your New Programs
-
- If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
- To do so, attach the following notices to the program. It is safest
-to attach them to the start of each source file to most effectively
-state the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
- <one line to give the program's name and a brief idea of what it does.>
- Copyright (C) <year> <name of author>
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-Also add information on how to contact you by electronic and paper mail.
-
- If the program does terminal interaction, make it output a short
-notice like this when it starts in an interactive mode:
-
- <program> Copyright (C) <year> <name of author>
- This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
- This is free software, and you are welcome to redistribute it
- under certain conditions; type `show c' for details.
-
-The hypothetical commands `show w' and `show c' should show the appropriate
-parts of the General Public License. Of course, your program's commands
-might be different; for a GUI interface, you would use an "about box".
-
- You should also get your employer (if you work as a programmer) or school,
-if any, to sign a "copyright disclaimer" for the program, if necessary.
-For more information on this, and how to apply and follow the GNU GPL, see
-<http://www.gnu.org/licenses/>.
-
- The GNU General Public License does not permit incorporating your program
-into proprietary programs. If your program is a subroutine library, you
-may consider it more useful to permit linking proprietary applications with
-the library. If this is what you want to do, use the GNU Lesser General
-Public License instead of this License. But first, please read
-<http://www.gnu.org/philosophy/why-not-lgpl.html>.
diff --git a/contrib/moses-for-mere-mortals/Windows-add-ins/Moses2TMX-1.032/LanguageCodes.txt b/contrib/moses-for-mere-mortals/Windows-add-ins/Moses2TMX-1.032/LanguageCodes.txt
deleted file mode 100644
index 22ca66c73..000000000
--- a/contrib/moses-for-mere-mortals/Windows-add-ins/Moses2TMX-1.032/LanguageCodes.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-BG-01
-CS-01
-DA-01
-DE-DE
-EL-01
-EN-GB
-ES-ES
-ET-01
-FI-01
-FR-FR
-HU-01
-IT-IT
-LT-01
-LV-01
-MT-01
-NL-NL
-PL-01
-PT-PT
-RO-RO
-SK-01
-SL-01
-SV-SE \ No newline at end of file
diff --git a/contrib/moses-for-mere-mortals/Windows-add-ins/Moses2TMX-1.032/Moses2TMX.py b/contrib/moses-for-mere-mortals/Windows-add-ins/Moses2TMX-1.032/Moses2TMX.py
deleted file mode 100755
index 43ec3c78c..000000000
--- a/contrib/moses-for-mere-mortals/Windows-add-ins/Moses2TMX-1.032/Moses2TMX.py
+++ /dev/null
@@ -1,166 +0,0 @@
-# -*- coding: utf_8 -*-
-"""This program is used to prepare TMX files from corpora composed of 2 files for each language pair,
-where the position of a segment in the first language file is exactly the same as in the second
-language file.
-
-The program requires that Pythoncard and wxPython (as well as Python) be previously installed.
-
-Copyright 2009, 2010 João Luís A. C. Rosas
-
-Distributed under GNU GPL v3 licence (see http://www.gnu.org/licenses/)
-
-E-mail: joao.luis.rosas@gmail.com """
-
-__version__ = "$Revision: 1.032$"
-__date__ = "$Date: 2010/02/25$"
-__author__="$João Luís A. C. Rosas$"
-
-from PythonCard import clipboard, dialog, graphic, model
-from PythonCard.components import button, combobox,statictext,checkbox,staticbox
-import wx
-import os, re
-import string
-import sys
-from time import strftime
-import codecs
-import sys
-
-class Moses2TMX(model.Background):
-
- def on_initialize(self, event):
- self.inputdir=''
- #Get directory where program file is and ...
- currdir=os.getcwd()
- #... load the file ("LanguageCodes.txt") with the list of languages that the program can process
- try:
- self.languages=open(currdir+r'\LanguageCodes.txt','r+').readlines()
- except:
- # If the languages file doesn't exist in the program directory, alert user that it is essential for the good working of the program and exit
- result = dialog.alertDialog(self, 'The file "LanguageCodes.txt" is missing. The program will now close.', 'Essential file missing')
- sys.exit()
- #remove end of line marker from each line in "LanguageCodes.txt"
- for lang in range(len(self.languages)):
- self.languages[lang]=self.languages[lang].rstrip()
- self.lang1code=''
- self.lang2code=''
- #Insert list of language names in appropriate program window's combo boxes
- self.components.cbStartingLanguage.items=self.languages
- self.components.cbDestinationLanguage.items=self.languages
-
- def CreateTMX(self, name):
- print 'Started at '+strftime('%H-%M-%S')
- #get the startinglanguage name (e.g.: "EN-GB") from the program window
- self.lang1code=self.components.cbStartingLanguage.text
- #get the destinationlanguage name from the program window
- self.lang2code=self.components.cbDestinationLanguage.text
- print name+'.'+self.lang2code[:2].lower()
- e=codecs.open(name,'r',"utf-8","strict")
- f=codecs.open(name+'.'+self.lang2code[:2].lower()+'.moses','r',"utf-8","strict")
- a=codecs.open(name+'.tmp','w',"utf-8","strict")
- b=codecs.open(name+'.'+self.lang2code[:2].lower()+'.moses.tmp','w',"utf-8","strict")
- for line in e:
- if line.strip():
- a.write(line)
- for line in f:
- if line.strip():
- b.write(line)
- a=codecs.open(name+'.tmp','r',"utf-8","strict")
- b=codecs.open(name+'.'+self.lang2code[:2].lower()+'.moses.tmp','r',"utf-8","strict")
- g=codecs.open(name+'.tmx','w','utf-16','strict')
- g.write('<?xml version="1.0" ?>\r\n<!DOCTYPE tmx SYSTEM "tmx14.dtd">\r\n<tmx version="version 1.4">\r\n\r\n<header\r\ncreationtool="moses2tmx"\r\ncreationtoolversion="1.032"\r\nsegtype="sentence"\r\ndatatype="PlainText"\r\nadminlang="EN-US"\r\nsrclang="'+self.lang1code+'"\r\n>\r\n</header>\r\n\r\n<body>\r\n')
- parar=0
- while True:
- self.ling1segm=a.readline().strip()
- self.ling2segm=b.readline().strip()
- if not self.ling1segm:
- break
- elif not self.ling2segm:
- break
- else:
- try:
- g.write('<tu creationid="MT!">\r\n<prop type="Txt::Translator">Moses</prop>\r\n<tuv xml:lang="'+self.lang1code+'">\r\n<seg>'+self.ling1segm+'</seg>\r\n</tuv>\r\n<tuv xml:lang="'+self.lang2code+ \
- '">\r\n<seg>'+self.ling2segm+'</seg>\r\n</tuv>\r\n</tu>\r\n\r\n')
- except:
- pass
- a.close()
- b.close()
- e.close()
- f.close()
- g.write('</body>\r\n</tmx>\r\n')
- g.close()
- #os.remove(name)
- #os.remove(name+'.'+self.lang2code[:2].lower()+'.moses')
- os.remove(name+'.tmp')
- os.remove(name+'.'+self.lang2code[:2].lower()+'.moses.tmp')
-
- def createTMXs(self):
- try:
- # Get a list of all TMX files that need to be processed
- fileslist=self.locate('*.moses',self.inputdir)
- except:
- # if any error up to now, add the name of the TMX file to the output file @errors
- self.errortypes=self.errortypes+' - Get All Segments: creation of output files error\n'
- if fileslist:
- # For each relevant TMX file ...
- for self.presentfile in fileslist:
- filename=self.presentfile[:-9]
- #print filename
- self.CreateTMX(filename)
- print 'Finished at '+strftime('%H-%M-%S')
- result = dialog.alertDialog(self, 'Processing done.', 'Processing Done')
-
- def on_btnCreateTMX_mouseClick(self, event):
- self.createTMXs()
-
- def on_menuFileCreateTMXFiles_select(self, event):
- self.createTMXs()
-
- def on_btnSelectLang1File_mouseClick(self, event):
- self.input1=self.GetInputFileName()
-
- def on_btnSelectLang2File_mouseClick(self, event):
- self.input2=self.GetInputFileName()
-
- def locate(self,pattern, basedir):
- """Locate all files matching supplied filename pattern in and below
- supplied root directory.
-
- @pattern: something like '*.tmx'
- @basedir:whole directory to be treated
- """
- import fnmatch
- for path, dirs, files in os.walk(os.path.abspath(basedir)):
- for filename in fnmatch.filter(files, pattern):
- yield os.path.join(path, filename)
-
- def SelectDirectory(self):
- """Select the directory where the files to be processed are
-
- @result: object returned by the dialog window with attributes accepted (true if user clicked OK button, false otherwise) and
- path (list of strings containing the full pathnames to all files selected by the user)
- @self.inputdir: directory where files to be processed are (and where output files will be written)
- @self.statusBar.text: text displayed in the program window status bar"""
-
- result= dialog.directoryDialog(self, 'Choose a directory', 'a')
- if result.accepted:
- self.inputdir=result.path
- self.statusBar.text=self.inputdir+' selected.'
-
- def on_menuFileSelectDirectory_select(self, event):
- self.SelectDirectory()
-
- def on_btnSelectDirectory_mouseClick(self, event):
- self.SelectDirectory()
-
- def on_menuHelpShowHelp_select(self, event):
- f = open('_READ_ME_FIRST.txt', "r")
- msg = f.read()
- result = dialog.scrolledMessageDialog(self, msg, '_READ_ME_FIRST.txt')
-
- def on_menuFileExit_select(self, event):
- sys.exit()
-
-
-if __name__ == '__main__':
- app = model.Application(Moses2TMX)
- app.MainLoop()
diff --git a/contrib/moses-for-mere-mortals/Windows-add-ins/Moses2TMX-1.032/Moses2TMX.rsrc.py b/contrib/moses-for-mere-mortals/Windows-add-ins/Moses2TMX-1.032/Moses2TMX.rsrc.py
deleted file mode 100755
index dc1570c7f..000000000
--- a/contrib/moses-for-mere-mortals/Windows-add-ins/Moses2TMX-1.032/Moses2TMX.rsrc.py
+++ /dev/null
@@ -1,95 +0,0 @@
-{'application':{'type':'Application',
- 'name':'Moses2TMX',
- 'backgrounds': [
- {'type':'Background',
- 'name':'bgMoses2TMX',
- 'title':u'Moses2TMX-1.032',
- 'size':(277, 307),
- 'statusBar':1,
-
- 'menubar': {'type':'MenuBar',
- 'menus': [
- {'type':'Menu',
- 'name':'menuFile',
- 'label':u'&File',
- 'items': [
- {'type':'MenuItem',
- 'name':'menuFileSelectDirectory',
- 'label':u'Select &Directory ...\tAlt+D',
- },
- {'type':'MenuItem',
- 'name':'menuFileCreateTMXFiles',
- 'label':u'&Create TMX Files\tAlt+C',
- },
- {'type':'MenuItem',
- 'name':'Sep1',
- 'label':u'-',
- },
- {'type':'MenuItem',
- 'name':'menuFileExit',
- 'label':u'&Exit\tAlt+E',
- },
- ]
- },
- {'type':'Menu',
- 'name':'menuHelp',
- 'label':u'&Help',
- 'items': [
- {'type':'MenuItem',
- 'name':'menuHelpShowHelp',
- 'label':u'&Show Help\tAlt+S',
- },
- ]
- },
- ]
- },
- 'components': [
-
-{'type':'Button',
- 'name':'btnSelectDirectory',
- 'position':(15, 15),
- 'size':(225, 25),
- 'font':{'faceName': u'Arial', 'family': 'sansSerif', 'size': 10},
- 'label':u'Select Directory ...',
- },
-
-{'type':'StaticText',
- 'name':'StaticText3',
- 'position':(17, 106),
- 'font':{'faceName': u'Arial', 'family': 'sansSerif', 'size': 10},
- 'text':u'Target Language:',
- },
-
-{'type':'ComboBox',
- 'name':'cbStartingLanguage',
- 'position':(18, 75),
- 'size':(70, -1),
- 'items':[],
- },
-
-{'type':'ComboBox',
- 'name':'cbDestinationLanguage',
- 'position':(17, 123),
- 'size':(70, -1),
- 'items':[u'DE-PT', u'EN-PT', u'ES-PT', u'FR-PT'],
- },
-
-{'type':'Button',
- 'name':'btnCreateTMX',
- 'position':(20, 160),
- 'size':(225, 25),
- 'font':{'faceName': u'Arial', 'family': 'sansSerif', 'size': 10},
- 'label':u'Create TMX Files',
- },
-
-{'type':'StaticText',
- 'name':'StaticText1',
- 'position':(18, 56),
- 'font':{'faceName': u'Arial', 'family': 'sansSerif', 'size': 10},
- 'text':u'Source Language:',
- },
-
-] # end components
-} # end background
-] # end backgrounds
-} }
diff --git a/contrib/moses-for-mere-mortals/Windows-add-ins/Moses2TMX-1.032/_READ_ME_FIRST.txt b/contrib/moses-for-mere-mortals/Windows-add-ins/Moses2TMX-1.032/_READ_ME_FIRST.txt
deleted file mode 100644
index d661dd136..000000000
--- a/contrib/moses-for-mere-mortals/Windows-add-ins/Moses2TMX-1.032/_READ_ME_FIRST.txt
+++ /dev/null
@@ -1,82 +0,0 @@
-Summary:
- PURPOSE
- REQUIREMENTS
- INSTALLATION
- HOW TO USE
- LICENSE
-
-
-
-***********************************************************************************************************
-PURPOSE:
-***********************************************************************************************************
-Moses2TMX is a Windows program (Windows7, Vista and XP supported) that enables translators not necessarily with a deep knowledge of linguistic tools to create TMX files from a Moses corpus or from any other corpus made up of 2 separate files, one for the source language and another for the target language, whose lines are strictly aligned.
-
-The program processes a whole directory containing source language and corresponding target language documents and creates 1 TMX file (UTF-16 format; Windows line endings) for each document pair that it processes.
-
-The program accepts and preserves text in any language (including special diacritical characters), but has only been tested with European Union official languages.
-
-The program is specifically intended to work with the output of a series of Linux scripts together called Moses-for-Mere-Mortals.
-***********************************************************************************************************
-REQUIREMENTS:
-***********************************************************************************************************
-The program requires the following to be pre-installed in your computer:
-
-1) Python 2.5 (http://www.python.org/download/releases/2.5.4/);
-NOTE1: the program should work with Python 2.6, but has not been tested with it.
-NOTE2: if you use Windows Vista, launch the following installation programs by right-clicking their file in Windows Explorer and choosing the command "Execute as administrator" in the contextual menu.
-2) wxPython 2.8, Unicode version (http://www.wxpython.org/download.php);
-3) Pythoncard 0.8.2 (http://sourceforge.net/projects/pythoncard/files/PythonCard/0.8.2/PythonCard-0.8.2.win32.exe/download)
-
-***********************************************************************************************************
-INSTALLATION:
-***********************************************************************************************************
-1) Download the Moses2TMX.exe file in a directory of your choice.
-2) Double-click Moses2TMX.exe and follow the wizard's instructions.
-***IMPORTANT***: Never erase the file "LanguageCodes.txt" in that directory. It is necessary for telling the program the languages that it has to process. If your TMX files use language codes that are different from those contained in this file, please replace the codes contained in the file with the codes used in your TMX files. You can always add or delete new codes to this file (when the program is not running).
-
-
-***********************************************************************************************************
-HOW TO USE:
-***********************************************************************************************************
-1) Create a directory where you will copy the files that you want to process.
-2) Copy the source and target language documents that you want to process to that directory.
-NOTE -YOU HAVE TO RESPECT SOME NAMING CONVENTIONS IN ORDER TO BE ABLE TO USE THIS PROGRAM:
-a) the target documents have to have follow the following convention:
-
- {basename}.{abbreviation of target language}.moses
-
- where {abbreviation of target language} is a ***2 character*** string containing the lowercased first 2 characters of any of the language codes present in the LanguageCodes.txt (present in the base directory of Moses2TMX)
-
- Example: If {basename} = "200000" and the target language has a code "EN-GB" in the LanguageCodes.txt, then the name of the target file should be "200000.en.moses"
-
-b) the source language document should have the name:
-
- {basename}
-
- Example: continuing the preceding example, the name of the corresponding source document should be "200000".
-
-3) Launch the program as indicated above in the "Launching the program" section.
-4) Operate on the main window of the program in the direction from top to bottom:
-a) Click the "Select Directory..." button to indicate the directory containing all the source and corresponding target documents that you want to process;
-b) Indicate the languages of your files refers to in the "Source Language" and "Target Language" comboboxes.
-c) Click the Create TMX Files button
-
-***********************************************************************************************************
-LICENSE:
-***********************************************************************************************************
-
-This program is free software: you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation (version 3 of the License).
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-
-
diff --git a/contrib/moses-for-mere-mortals/Windows-add-ins/Moses2TMX-1.032/gpl.txt b/contrib/moses-for-mere-mortals/Windows-add-ins/Moses2TMX-1.032/gpl.txt
deleted file mode 100644
index 818433ecc..000000000
--- a/contrib/moses-for-mere-mortals/Windows-add-ins/Moses2TMX-1.032/gpl.txt
+++ /dev/null
@@ -1,674 +0,0 @@
- GNU GENERAL PUBLIC LICENSE
- Version 3, 29 June 2007
-
- Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
- Preamble
-
- The GNU General Public License is a free, copyleft license for
-software and other kinds of works.
-
- The licenses for most software and other practical works are designed
-to take away your freedom to share and change the works. By contrast,
-the GNU General Public License is intended to guarantee your freedom to
-share and change all versions of a program--to make sure it remains free
-software for all its users. We, the Free Software Foundation, use the
-GNU General Public License for most of our software; it applies also to
-any other work released this way by its authors. You can apply it to
-your programs, too.
-
- When we speak of free software, we are referring to freedom, not
-price. Our General Public Licenses are designed to make sure that you
-have the freedom to distribute copies of free software (and charge for
-them if you wish), that you receive source code or can get it if you
-want it, that you can change the software or use pieces of it in new
-free programs, and that you know you can do these things.
-
- To protect your rights, we need to prevent others from denying you
-these rights or asking you to surrender the rights. Therefore, you have
-certain responsibilities if you distribute copies of the software, or if
-you modify it: responsibilities to respect the freedom of others.
-
- For example, if you distribute copies of such a program, whether
-gratis or for a fee, you must pass on to the recipients the same
-freedoms that you received. You must make sure that they, too, receive
-or can get the source code. And you must show them these terms so they
-know their rights.
-
- Developers that use the GNU GPL protect your rights with two steps:
-(1) assert copyright on the software, and (2) offer you this License
-giving you legal permission to copy, distribute and/or modify it.
-
- For the developers' and authors' protection, the GPL clearly explains
-that there is no warranty for this free software. For both users' and
-authors' sake, the GPL requires that modified versions be marked as
-changed, so that their problems will not be attributed erroneously to
-authors of previous versions.
-
- Some devices are designed to deny users access to install or run
-modified versions of the software inside them, although the manufacturer
-can do so. This is fundamentally incompatible with the aim of
-protecting users' freedom to change the software. The systematic
-pattern of such abuse occurs in the area of products for individuals to
-use, which is precisely where it is most unacceptable. Therefore, we
-have designed this version of the GPL to prohibit the practice for those
-products. If such problems arise substantially in other domains, we
-stand ready to extend this provision to those domains in future versions
-of the GPL, as needed to protect the freedom of users.
-
- Finally, every program is threatened constantly by software patents.
-States should not allow patents to restrict development and use of
-software on general-purpose computers, but in those that do, we wish to
-avoid the special danger that patents applied to a free program could
-make it effectively proprietary. To prevent this, the GPL assures that
-patents cannot be used to render the program non-free.
-
- The precise terms and conditions for copying, distribution and
-modification follow.
-
- TERMS AND CONDITIONS
-
- 0. Definitions.
-
- "This License" refers to version 3 of the GNU General Public License.
-
- "Copyright" also means copyright-like laws that apply to other kinds of
-works, such as semiconductor masks.
-
- "The Program" refers to any copyrightable work licensed under this
-License. Each licensee is addressed as "you". "Licensees" and
-"recipients" may be individuals or organizations.
-
- To "modify" a work means to copy from or adapt all or part of the work
-in a fashion requiring copyright permission, other than the making of an
-exact copy. The resulting work is called a "modified version" of the
-earlier work or a work "based on" the earlier work.
-
- A "covered work" means either the unmodified Program or a work based
-on the Program.
-
- To "propagate" a work means to do anything with it that, without
-permission, would make you directly or secondarily liable for
-infringement under applicable copyright law, except executing it on a
-computer or modifying a private copy. Propagation includes copying,
-distribution (with or without modification), making available to the
-public, and in some countries other activities as well.
-
- To "convey" a work means any kind of propagation that enables other
-parties to make or receive copies. Mere interaction with a user through
-a computer network, with no transfer of a copy, is not conveying.
-
- An interactive user interface displays "Appropriate Legal Notices"
-to the extent that it includes a convenient and prominently visible
-feature that (1) displays an appropriate copyright notice, and (2)
-tells the user that there is no warranty for the work (except to the
-extent that warranties are provided), that licensees may convey the
-work under this License, and how to view a copy of this License. If
-the interface presents a list of user commands or options, such as a
-menu, a prominent item in the list meets this criterion.
-
- 1. Source Code.
-
- The "source code" for a work means the preferred form of the work
-for making modifications to it. "Object code" means any non-source
-form of a work.
-
- A "Standard Interface" means an interface that either is an official
-standard defined by a recognized standards body, or, in the case of
-interfaces specified for a particular programming language, one that
-is widely used among developers working in that language.
-
- The "System Libraries" of an executable work include anything, other
-than the work as a whole, that (a) is included in the normal form of
-packaging a Major Component, but which is not part of that Major
-Component, and (b) serves only to enable use of the work with that
-Major Component, or to implement a Standard Interface for which an
-implementation is available to the public in source code form. A
-"Major Component", in this context, means a major essential component
-(kernel, window system, and so on) of the specific operating system
-(if any) on which the executable work runs, or a compiler used to
-produce the work, or an object code interpreter used to run it.
-
- The "Corresponding Source" for a work in object code form means all
-the source code needed to generate, install, and (for an executable
-work) run the object code and to modify the work, including scripts to
-control those activities. However, it does not include the work's
-System Libraries, or general-purpose tools or generally available free
-programs which are used unmodified in performing those activities but
-which are not part of the work. For example, Corresponding Source
-includes interface definition files associated with source files for
-the work, and the source code for shared libraries and dynamically
-linked subprograms that the work is specifically designed to require,
-such as by intimate data communication or control flow between those
-subprograms and other parts of the work.
-
- The Corresponding Source need not include anything that users
-can regenerate automatically from other parts of the Corresponding
-Source.
-
- The Corresponding Source for a work in source code form is that
-same work.
-
- 2. Basic Permissions.
-
- All rights granted under this License are granted for the term of
-copyright on the Program, and are irrevocable provided the stated
-conditions are met. This License explicitly affirms your unlimited
-permission to run the unmodified Program. The output from running a
-covered work is covered by this License only if the output, given its
-content, constitutes a covered work. This License acknowledges your
-rights of fair use or other equivalent, as provided by copyright law.
-
- You may make, run and propagate covered works that you do not
-convey, without conditions so long as your license otherwise remains
-in force. You may convey covered works to others for the sole purpose
-of having them make modifications exclusively for you, or provide you
-with facilities for running those works, provided that you comply with
-the terms of this License in conveying all material for which you do
-not control copyright. Those thus making or running the covered works
-for you must do so exclusively on your behalf, under your direction
-and control, on terms that prohibit them from making any copies of
-your copyrighted material outside their relationship with you.
-
- Conveying under any other circumstances is permitted solely under
-the conditions stated below. Sublicensing is not allowed; section 10
-makes it unnecessary.
-
- 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
-
- No covered work shall be deemed part of an effective technological
-measure under any applicable law fulfilling obligations under article
-11 of the WIPO copyright treaty adopted on 20 December 1996, or
-similar laws prohibiting or restricting circumvention of such
-measures.
-
- When you convey a covered work, you waive any legal power to forbid
-circumvention of technological measures to the extent such circumvention
-is effected by exercising rights under this License with respect to
-the covered work, and you disclaim any intention to limit operation or
-modification of the work as a means of enforcing, against the work's
-users, your or third parties' legal rights to forbid circumvention of
-technological measures.
-
- 4. Conveying Verbatim Copies.
-
- You may convey verbatim copies of the Program's source code as you
-receive it, in any medium, provided that you conspicuously and
-appropriately publish on each copy an appropriate copyright notice;
-keep intact all notices stating that this License and any
-non-permissive terms added in accord with section 7 apply to the code;
-keep intact all notices of the absence of any warranty; and give all
-recipients a copy of this License along with the Program.
-
- You may charge any price or no price for each copy that you convey,
-and you may offer support or warranty protection for a fee.
-
- 5. Conveying Modified Source Versions.
-
- You may convey a work based on the Program, or the modifications to
-produce it from the Program, in the form of source code under the
-terms of section 4, provided that you also meet all of these conditions:
-
- a) The work must carry prominent notices stating that you modified
- it, and giving a relevant date.
-
- b) The work must carry prominent notices stating that it is
- released under this License and any conditions added under section
- 7. This requirement modifies the requirement in section 4 to
- "keep intact all notices".
-
- c) You must license the entire work, as a whole, under this
- License to anyone who comes into possession of a copy. This
- License will therefore apply, along with any applicable section 7
- additional terms, to the whole of the work, and all its parts,
- regardless of how they are packaged. This License gives no
- permission to license the work in any other way, but it does not
- invalidate such permission if you have separately received it.
-
- d) If the work has interactive user interfaces, each must display
- Appropriate Legal Notices; however, if the Program has interactive
- interfaces that do not display Appropriate Legal Notices, your
- work need not make them do so.
-
- A compilation of a covered work with other separate and independent
-works, which are not by their nature extensions of the covered work,
-and which are not combined with it such as to form a larger program,
-in or on a volume of a storage or distribution medium, is called an
-"aggregate" if the compilation and its resulting copyright are not
-used to limit the access or legal rights of the compilation's users
-beyond what the individual works permit. Inclusion of a covered work
-in an aggregate does not cause this License to apply to the other
-parts of the aggregate.
-
- 6. Conveying Non-Source Forms.
-
- You may convey a covered work in object code form under the terms
-of sections 4 and 5, provided that you also convey the
-machine-readable Corresponding Source under the terms of this License,
-in one of these ways:
-
- a) Convey the object code in, or embodied in, a physical product
- (including a physical distribution medium), accompanied by the
- Corresponding Source fixed on a durable physical medium
- customarily used for software interchange.
-
- b) Convey the object code in, or embodied in, a physical product
- (including a physical distribution medium), accompanied by a
- written offer, valid for at least three years and valid for as
- long as you offer spare parts or customer support for that product
- model, to give anyone who possesses the object code either (1) a
- copy of the Corresponding Source for all the software in the
- product that is covered by this License, on a durable physical
- medium customarily used for software interchange, for a price no
- more than your reasonable cost of physically performing this
- conveying of source, or (2) access to copy the
- Corresponding Source from a network server at no charge.
-
- c) Convey individual copies of the object code with a copy of the
- written offer to provide the Corresponding Source. This
- alternative is allowed only occasionally and noncommercially, and
- only if you received the object code with such an offer, in accord
- with subsection 6b.
-
- d) Convey the object code by offering access from a designated
- place (gratis or for a charge), and offer equivalent access to the
- Corresponding Source in the same way through the same place at no
- further charge. You need not require recipients to copy the
- Corresponding Source along with the object code. If the place to
- copy the object code is a network server, the Corresponding Source
- may be on a different server (operated by you or a third party)
- that supports equivalent copying facilities, provided you maintain
- clear directions next to the object code saying where to find the
- Corresponding Source. Regardless of what server hosts the
- Corresponding Source, you remain obligated to ensure that it is
- available for as long as needed to satisfy these requirements.
-
- e) Convey the object code using peer-to-peer transmission, provided
- you inform other peers where the object code and Corresponding
- Source of the work are being offered to the general public at no
- charge under subsection 6d.
-
- A separable portion of the object code, whose source code is excluded
-from the Corresponding Source as a System Library, need not be
-included in conveying the object code work.
-
- A "User Product" is either (1) a "consumer product", which means any
-tangible personal property which is normally used for personal, family,
-or household purposes, or (2) anything designed or sold for incorporation
-into a dwelling. In determining whether a product is a consumer product,
-doubtful cases shall be resolved in favor of coverage. For a particular
-product received by a particular user, "normally used" refers to a
-typical or common use of that class of product, regardless of the status
-of the particular user or of the way in which the particular user
-actually uses, or expects or is expected to use, the product. A product
-is a consumer product regardless of whether the product has substantial
-commercial, industrial or non-consumer uses, unless such uses represent
-the only significant mode of use of the product.
-
- "Installation Information" for a User Product means any methods,
-procedures, authorization keys, or other information required to install
-and execute modified versions of a covered work in that User Product from
-a modified version of its Corresponding Source. The information must
-suffice to ensure that the continued functioning of the modified object
-code is in no case prevented or interfered with solely because
-modification has been made.
-
- If you convey an object code work under this section in, or with, or
-specifically for use in, a User Product, and the conveying occurs as
-part of a transaction in which the right of possession and use of the
-User Product is transferred to the recipient in perpetuity or for a
-fixed term (regardless of how the transaction is characterized), the
-Corresponding Source conveyed under this section must be accompanied
-by the Installation Information. But this requirement does not apply
-if neither you nor any third party retains the ability to install
-modified object code on the User Product (for example, the work has
-been installed in ROM).
-
- The requirement to provide Installation Information does not include a
-requirement to continue to provide support service, warranty, or updates
-for a work that has been modified or installed by the recipient, or for
-the User Product in which it has been modified or installed. Access to a
-network may be denied when the modification itself materially and
-adversely affects the operation of the network or violates the rules and
-protocols for communication across the network.
-
- Corresponding Source conveyed, and Installation Information provided,
-in accord with this section must be in a format that is publicly
-documented (and with an implementation available to the public in
-source code form), and must require no special password or key for
-unpacking, reading or copying.
-
- 7. Additional Terms.
-
- "Additional permissions" are terms that supplement the terms of this
-License by making exceptions from one or more of its conditions.
-Additional permissions that are applicable to the entire Program shall
-be treated as though they were included in this License, to the extent
-that they are valid under applicable law. If additional permissions
-apply only to part of the Program, that part may be used separately
-under those permissions, but the entire Program remains governed by
-this License without regard to the additional permissions.
-
- When you convey a copy of a covered work, you may at your option
-remove any additional permissions from that copy, or from any part of
-it. (Additional permissions may be written to require their own
-removal in certain cases when you modify the work.) You may place
-additional permissions on material, added by you to a covered work,
-for which you have or can give appropriate copyright permission.
-
- Notwithstanding any other provision of this License, for material you
-add to a covered work, you may (if authorized by the copyright holders of
-that material) supplement the terms of this License with terms:
-
- a) Disclaiming warranty or limiting liability differently from the
- terms of sections 15 and 16 of this License; or
-
- b) Requiring preservation of specified reasonable legal notices or
- author attributions in that material or in the Appropriate Legal
- Notices displayed by works containing it; or
-
- c) Prohibiting misrepresentation of the origin of that material, or
- requiring that modified versions of such material be marked in
- reasonable ways as different from the original version; or
-
- d) Limiting the use for publicity purposes of names of licensors or
- authors of the material; or
-
- e) Declining to grant rights under trademark law for use of some
- trade names, trademarks, or service marks; or
-
- f) Requiring indemnification of licensors and authors of that
- material by anyone who conveys the material (or modified versions of
- it) with contractual assumptions of liability to the recipient, for
- any liability that these contractual assumptions directly impose on
- those licensors and authors.
-
- All other non-permissive additional terms are considered "further
-restrictions" within the meaning of section 10. If the Program as you
-received it, or any part of it, contains a notice stating that it is
-governed by this License along with a term that is a further
-restriction, you may remove that term. If a license document contains
-a further restriction but permits relicensing or conveying under this
-License, you may add to a covered work material governed by the terms
-of that license document, provided that the further restriction does
-not survive such relicensing or conveying.
-
- If you add terms to a covered work in accord with this section, you
-must place, in the relevant source files, a statement of the
-additional terms that apply to those files, or a notice indicating
-where to find the applicable terms.
-
- Additional terms, permissive or non-permissive, may be stated in the
-form of a separately written license, or stated as exceptions;
-the above requirements apply either way.
-
- 8. Termination.
-
- You may not propagate or modify a covered work except as expressly
-provided under this License. Any attempt otherwise to propagate or
-modify it is void, and will automatically terminate your rights under
-this License (including any patent licenses granted under the third
-paragraph of section 11).
-
- However, if you cease all violation of this License, then your
-license from a particular copyright holder is reinstated (a)
-provisionally, unless and until the copyright holder explicitly and
-finally terminates your license, and (b) permanently, if the copyright
-holder fails to notify you of the violation by some reasonable means
-prior to 60 days after the cessation.
-
- Moreover, your license from a particular copyright holder is
-reinstated permanently if the copyright holder notifies you of the
-violation by some reasonable means, this is the first time you have
-received notice of violation of this License (for any work) from that
-copyright holder, and you cure the violation prior to 30 days after
-your receipt of the notice.
-
- Termination of your rights under this section does not terminate the
-licenses of parties who have received copies or rights from you under
-this License. If your rights have been terminated and not permanently
-reinstated, you do not qualify to receive new licenses for the same
-material under section 10.
-
- 9. Acceptance Not Required for Having Copies.
-
- You are not required to accept this License in order to receive or
-run a copy of the Program. Ancillary propagation of a covered work
-occurring solely as a consequence of using peer-to-peer transmission
-to receive a copy likewise does not require acceptance. However,
-nothing other than this License grants you permission to propagate or
-modify any covered work. These actions infringe copyright if you do
-not accept this License. Therefore, by modifying or propagating a
-covered work, you indicate your acceptance of this License to do so.
-
- 10. Automatic Licensing of Downstream Recipients.
-
- Each time you convey a covered work, the recipient automatically
-receives a license from the original licensors, to run, modify and
-propagate that work, subject to this License. You are not responsible
-for enforcing compliance by third parties with this License.
-
- An "entity transaction" is a transaction transferring control of an
-organization, or substantially all assets of one, or subdividing an
-organization, or merging organizations. If propagation of a covered
-work results from an entity transaction, each party to that
-transaction who receives a copy of the work also receives whatever
-licenses to the work the party's predecessor in interest had or could
-give under the previous paragraph, plus a right to possession of the
-Corresponding Source of the work from the predecessor in interest, if
-the predecessor has it or can get it with reasonable efforts.
-
- You may not impose any further restrictions on the exercise of the
-rights granted or affirmed under this License. For example, you may
-not impose a license fee, royalty, or other charge for exercise of
-rights granted under this License, and you may not initiate litigation
-(including a cross-claim or counterclaim in a lawsuit) alleging that
-any patent claim is infringed by making, using, selling, offering for
-sale, or importing the Program or any portion of it.
-
- 11. Patents.
-
- A "contributor" is a copyright holder who authorizes use under this
-License of the Program or a work on which the Program is based. The
-work thus licensed is called the contributor's "contributor version".
-
- A contributor's "essential patent claims" are all patent claims
-owned or controlled by the contributor, whether already acquired or
-hereafter acquired, that would be infringed by some manner, permitted
-by this License, of making, using, or selling its contributor version,
-but do not include claims that would be infringed only as a
-consequence of further modification of the contributor version. For
-purposes of this definition, "control" includes the right to grant
-patent sublicenses in a manner consistent with the requirements of
-this License.
-
- Each contributor grants you a non-exclusive, worldwide, royalty-free
-patent license under the contributor's essential patent claims, to
-make, use, sell, offer for sale, import and otherwise run, modify and
-propagate the contents of its contributor version.
-
- In the following three paragraphs, a "patent license" is any express
-agreement or commitment, however denominated, not to enforce a patent
-(such as an express permission to practice a patent or covenant not to
-sue for patent infringement). To "grant" such a patent license to a
-party means to make such an agreement or commitment not to enforce a
-patent against the party.
-
- If you convey a covered work, knowingly relying on a patent license,
-and the Corresponding Source of the work is not available for anyone
-to copy, free of charge and under the terms of this License, through a
-publicly available network server or other readily accessible means,
-then you must either (1) cause the Corresponding Source to be so
-available, or (2) arrange to deprive yourself of the benefit of the
-patent license for this particular work, or (3) arrange, in a manner
-consistent with the requirements of this License, to extend the patent
-license to downstream recipients. "Knowingly relying" means you have
-actual knowledge that, but for the patent license, your conveying the
-covered work in a country, or your recipient's use of the covered work
-in a country, would infringe one or more identifiable patents in that
-country that you have reason to believe are valid.
-
- If, pursuant to or in connection with a single transaction or
-arrangement, you convey, or propagate by procuring conveyance of, a
-covered work, and grant a patent license to some of the parties
-receiving the covered work authorizing them to use, propagate, modify
-or convey a specific copy of the covered work, then the patent license
-you grant is automatically extended to all recipients of the covered
-work and works based on it.
-
- A patent license is "discriminatory" if it does not include within
-the scope of its coverage, prohibits the exercise of, or is
-conditioned on the non-exercise of one or more of the rights that are
-specifically granted under this License. You may not convey a covered
-work if you are a party to an arrangement with a third party that is
-in the business of distributing software, under which you make payment
-to the third party based on the extent of your activity of conveying
-the work, and under which the third party grants, to any of the
-parties who would receive the covered work from you, a discriminatory
-patent license (a) in connection with copies of the covered work
-conveyed by you (or copies made from those copies), or (b) primarily
-for and in connection with specific products or compilations that
-contain the covered work, unless you entered into that arrangement,
-or that patent license was granted, prior to 28 March 2007.
-
- Nothing in this License shall be construed as excluding or limiting
-any implied license or other defenses to infringement that may
-otherwise be available to you under applicable patent law.
-
- 12. No Surrender of Others' Freedom.
-
- If conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License. If you cannot convey a
-covered work so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you may
-not convey it at all. For example, if you agree to terms that obligate you
-to collect a royalty for further conveying from those to whom you convey
-the Program, the only way you could satisfy both those terms and this
-License would be to refrain entirely from conveying the Program.
-
- 13. Use with the GNU Affero General Public License.
-
- Notwithstanding any other provision of this License, you have
-permission to link or combine any covered work with a work licensed
-under version 3 of the GNU Affero General Public License into a single
-combined work, and to convey the resulting work. The terms of this
-License will continue to apply to the part which is the covered work,
-but the special requirements of the GNU Affero General Public License,
-section 13, concerning interaction through a network will apply to the
-combination as such.
-
- 14. Revised Versions of this License.
-
- The Free Software Foundation may publish revised and/or new versions of
-the GNU General Public License from time to time. Such new versions will
-be similar in spirit to the present version, but may differ in detail to
-address new problems or concerns.
-
- Each version is given a distinguishing version number. If the
-Program specifies that a certain numbered version of the GNU General
-Public License "or any later version" applies to it, you have the
-option of following the terms and conditions either of that numbered
-version or of any later version published by the Free Software
-Foundation. If the Program does not specify a version number of the
-GNU General Public License, you may choose any version ever published
-by the Free Software Foundation.
-
- If the Program specifies that a proxy can decide which future
-versions of the GNU General Public License can be used, that proxy's
-public statement of acceptance of a version permanently authorizes you
-to choose that version for the Program.
-
- Later license versions may give you additional or different
-permissions. However, no additional obligations are imposed on any
-author or copyright holder as a result of your choosing to follow a
-later version.
-
- 15. Disclaimer of Warranty.
-
- THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
-APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
-HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
-OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
-THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
-IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
-ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
- 16. Limitation of Liability.
-
- IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
-WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
-THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
-GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
-USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
-DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
-PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
-EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
-SUCH DAMAGES.
-
- 17. Interpretation of Sections 15 and 16.
-
- If the disclaimer of warranty and limitation of liability provided
-above cannot be given local legal effect according to their terms,
-reviewing courts shall apply local law that most closely approximates
-an absolute waiver of all civil liability in connection with the
-Program, unless a warranty or assumption of liability accompanies a
-copy of the Program in return for a fee.
-
- END OF TERMS AND CONDITIONS
-
- How to Apply These Terms to Your New Programs
-
- If you develop a new program, and you want it to be of the greatest
-possible use to the public, the best way to achieve this is to make it
-free software which everyone can redistribute and change under these terms.
-
- To do so, attach the following notices to the program. It is safest
-to attach them to the start of each source file to most effectively
-state the exclusion of warranty; and each file should have at least
-the "copyright" line and a pointer to where the full notice is found.
-
- <one line to give the program's name and a brief idea of what it does.>
- Copyright (C) <year> <name of author>
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-Also add information on how to contact you by electronic and paper mail.
-
- If the program does terminal interaction, make it output a short
-notice like this when it starts in an interactive mode:
-
- <program> Copyright (C) <year> <name of author>
- This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
- This is free software, and you are welcome to redistribute it
- under certain conditions; type `show c' for details.
-
-The hypothetical commands `show w' and `show c' should show the appropriate
-parts of the General Public License. Of course, your program's commands
-might be different; for a GUI interface, you would use an "about box".
-
- You should also get your employer (if you work as a programmer) or school,
-if any, to sign a "copyright disclaimer" for the program, if necessary.
-For more information on this, and how to apply and follow the GNU GPL, see
-<http://www.gnu.org/licenses/>.
-
- The GNU General Public License does not permit incorporating your program
-into proprietary programs. If your program is a subroutine library, you
-may consider it more useful to permit linking proprietary applications with
-the library. If this is what you want to do, use the GNU Lesser General
-Public License instead of this License. But first, please read
-<http://www.gnu.org/philosophy/why-not-lgpl.html>.
diff --git a/contrib/moses-for-mere-mortals/all.css b/contrib/moses-for-mere-mortals/all.css
deleted file mode 100644
index e83379dd4..000000000
--- a/contrib/moses-for-mere-mortals/all.css
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
-all.css
-Generated by redsea 14
-Generated by redsea. See license and other details in http://dragoman.org/redsea
-*/
-body {
- margin : 3.5em ;
- padding-bottom : 1.6em ;
- border-bottom : 4px solid #990000 ;
- background-color : #fffff3 ;
- background-image : none ;
- color : black ;
- font-family : monospace ;
- font-size : 1.3em ;
- text-align : justify ;
- margin-top : 0.2em ;
-}
-
-h1 {
- margin-top : 0.6em ;
- margin-bottom : 0.1em ;
- border-bottom : 4px solid #990000 ;
- text-align : center ;
- font-size : 2.0em ;
- color : #770000 ;
-}
-
-a:link {
- text-decoration : none ;
- color : #0000cc ;
-}
-
-a:visited {
- text-decoration : none ;
- color : #0000CC ;
-}
-
-a:hover {
- text-decoration : none ;
- border-bottom : 2px solid #990000 ;
-}
-
-table {
- margin : auto ;
- width : 95% ;
-}
-
-td {
- padding : 0.5em ;
- }
-
-td.feedback {
- height : 4.1em ;
-}
-/* +++ */
diff --git a/contrib/moses-for-mere-mortals/docs/Help-Tutorial.doc b/contrib/moses-for-mere-mortals/docs/Help-Tutorial.doc
deleted file mode 100644
index 2d03240c4..000000000
--- a/contrib/moses-for-mere-mortals/docs/Help-Tutorial.doc
+++ /dev/null
Binary files differ
diff --git a/contrib/moses-for-mere-mortals/docs/Overview.jpeg b/contrib/moses-for-mere-mortals/docs/Overview.jpeg
deleted file mode 100644
index 9b4b1b0f2..000000000
--- a/contrib/moses-for-mere-mortals/docs/Overview.jpeg
+++ /dev/null
Binary files differ
diff --git a/contrib/moses-for-mere-mortals/docs/Quick-Start-Guide.doc b/contrib/moses-for-mere-mortals/docs/Quick-Start-Guide.doc
deleted file mode 100644
index eac30c951..000000000
--- a/contrib/moses-for-mere-mortals/docs/Quick-Start-Guide.doc
+++ /dev/null
Binary files differ
diff --git a/contrib/moses-for-mere-mortals/docs/all.css b/contrib/moses-for-mere-mortals/docs/all.css
deleted file mode 100644
index e83379dd4..000000000
--- a/contrib/moses-for-mere-mortals/docs/all.css
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
-all.css
-Generated by redsea 14
-Generated by redsea. See license and other details in http://dragoman.org/redsea
-*/
-body {
- margin : 3.5em ;
- padding-bottom : 1.6em ;
- border-bottom : 4px solid #990000 ;
- background-color : #fffff3 ;
- background-image : none ;
- color : black ;
- font-family : monospace ;
- font-size : 1.3em ;
- text-align : justify ;
- margin-top : 0.2em ;
-}
-
-h1 {
- margin-top : 0.6em ;
- margin-bottom : 0.1em ;
- border-bottom : 4px solid #990000 ;
- text-align : center ;
- font-size : 2.0em ;
- color : #770000 ;
-}
-
-a:link {
- text-decoration : none ;
- color : #0000cc ;
-}
-
-a:visited {
- text-decoration : none ;
- color : #0000CC ;
-}
-
-a:hover {
- text-decoration : none ;
- border-bottom : 2px solid #990000 ;
-}
-
-table {
- margin : auto ;
- width : 95% ;
-}
-
-td {
- padding : 0.5em ;
- }
-
-td.feedback {
- height : 4.1em ;
-}
-/* +++ */
diff --git a/contrib/moses-for-mere-mortals/docs/thanks.html b/contrib/moses-for-mere-mortals/docs/thanks.html
deleted file mode 100644
index afddba9f8..000000000
--- a/contrib/moses-for-mere-mortals/docs/thanks.html
+++ /dev/null
@@ -1,27 +0,0 @@
-<!DOCTYPE html PUBLIC
- "-//W3C//DTD XHTML 1.0 Strict//EN"
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"
->
-
-<html>
- <head>
- <title>Thanks</title>
- <link rel="stylesheet" href="all.css" type="text/css" media="all" />
- </head>
- <body>
- <h1>Thanks</h1>
- <ul>
- <li><B>Maria Jos&eacute; Machado</B>, whose suggestions and research have influenced significantly the <I>scorer-moses-irstlm-randlm</I> script. She helped in the evaluation of Moses output in general and organised, together with Hil&aacute;rio, a comparative evaluation, made by professional translators, of the qualitative results of Google, Moses and a rule-based MT engine. She suggested a deep restructuring of the Help-Tutorial file. </li>
- <li><B>Hil&aacute;rio Leal Fontes</B>, who made very helpful suggestions and comprehensive tests. He is the author of the <I>breaking_prefixes.pt</I> script (for the Portuguese language). He has compiled the corpora that were used to train Moses and to test these scripts, including 2 very large corpora with 6.6 and 12 million <i>segments</i>. He has also revised the Help file.</li>
- <li><B>Tom Hoar</B>, who consolidated the previous documentation into the Quick-Start-Guide.doc to help users to get up to speed very quickly.</li>
- <li><B>Manuel Tomas Carrasco Benitez</B>, whose <a href="http://dragoman.org/xdossier/" >Xdossier</a> was used to create the pack of the Moses-for-Mere-Mortals files.</li>
- <li><B>Gary Daine</B>, who made helpful remarks and who contributed code for Extract_TMX_Corpus.</li>
- <li>Authors of the <a href="http://www.dlsi.ua.es/~mlf/fosmt-moses.html" >http://www.dlsi.ua.es/~mlf/fosmt-moses.html</a> (<b>Mikel Forcada</b> and <b>Francis Tyers</b>) and of the <a href="http://www.statmt.org/moses_steps.html" >http://www.statmt.org/moses_steps.html</a> pages. These pages have helped me a lot in the first steps with Moses.</li>
- <li>Authors of the documentation of Moses, giza-pp, MGIZA, IRSTLM and RandLM; some of the commentaries of the present scripts describing the various settings include citations of them.</li>
- <li>European Commission's Joint Research Center and Directorate-General for Translation for the <a href="http://wt.jrc.it/lt/Acquis/DGT_TU_1.0/data/" >DGT-TM Acquis</a> - freely available on the JRC website and providing aligned corpora of about 1 million segments of Community law texts in 22 languages- which was used in the demonstration corpus. Please note that only European Community legislation printed in the paper edition of the Official Journal of the European Union is deemed authentic.</li>
- </ul>
- <P>
- <P>
- </body>
-</html>
-
diff --git a/contrib/moses-for-mere-mortals/index.html b/contrib/moses-for-mere-mortals/index.html
deleted file mode 100644
index 537e4db99..000000000
--- a/contrib/moses-for-mere-mortals/index.html
+++ /dev/null
@@ -1,22 +0,0 @@
-<!DOCTYPE html PUBLIC
- "-//W3C//DTD XHTML 1.0 Strict//EN"
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"
->
-
-<html>
- <head>
- <title>Moses for Mere Mortals</title>
- <link rel="stylesheet" href="all.css" type="text/css" media="all" />
- </head>
- <body>
- <h1>Moses for Mere Mortals</h1>
- <ul>
- <li><a href="docs/Overview.jpeg">Overview</a></li>
- <li><a href="docs/Quick-Start-Guide.doc">Quick Start Guide</a></li>
- <li><a href="docs/Help-Tutorial.doc">Help-Tutorial</a></li>
- <li><a href="scripts">Scripts</a></li>
- <li><a href="data-files">Data Files</a></li>
- <li><a href="docs/thanks.html">Thanks</a></li>
- </ul>
- </body>
-</html>
diff --git a/contrib/moses-for-mere-mortals/scripts/create-1.37 b/contrib/moses-for-mere-mortals/scripts/create-1.37
deleted file mode 100644
index 932eb5e92..000000000
--- a/contrib/moses-for-mere-mortals/scripts/create-1.37
+++ /dev/null
@@ -1,557 +0,0 @@
-#!/usr/bin/env bash
-# create-1.37
-# copyright 2009,2010 João L. A. C. Rosas
-# date: 22/09/2010
-# licenced under the GPL licence, version 3
-# the Mosesdecoder (http://sourceforge.net/projects/mosesdecoder/), is a tool upon which this script depends that is licenced under the GNU Library or Lesser General Public License (LGPL)
-# Special thanks to Hilário Leal Fontes and Maria José Machado, who helped to test the script and made very helpful suggestions
-
-# *** Purpose ***: this script downloads, compiles and installs Moses together with MGIZA, IRSTLM, RandLM, the Moses scripts and a demonstration corpus. Tested in in Ubuntu 10.04 LTS (http://releases.ubuntu.com/10.04/)
-
-############################### REQUIREMENTS ################################
-# You should install the following packages (in Ubuntu 10.04 LTS: #
-# http://releases.ubuntu.com/10.04/) *** before launching *** this script: #
-# automake #
-# bison #
-# boost-build #
-# build-essential #
-# flex #
-# help2man #
-# libboost-all-dev #
-# libpthread-stubs0-dev #
-# libgc-dev #
-# libtool #
-# zlibc #
-# zlib1g-dev #
-# gawk #
-# tofrodos #
-#############################################################################
-
-######################################################################################
-# The values of the variables that follow should be filled according to your needs: #
-######################################################################################
-#Full path of the base directory location of your Moses system
-mosesdir="$HOME/moses-irstlm-randlm"
-
-#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-# !!! Please set $mosesnumprocessors to the number of processors of your computer !!!
-#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-#Number of processors in your computer
-mosesnumprocessors=1
-#Install small demo corpus: 1 = Install; Any other value = Do not install (!!! this will install a very small corpus that can be used to see what the scripts and Moses can do; if dodemocorpus is set to 1, this series of scripts will be able to use the demo corpus without you having to change their settings !!!)
-dodemocorpus=1
-#Remove the downloaded compressed packages and some directories no longer needed once the installation is done; 1 = remove the downloaded packages; any other value = do not remove those packages
-removedownloadedpackges=1
-######################################################################################
-# End of parameters that you should fill #
-######################################################################################
-
-######################################################################################
-# DON'T CHANGE THE LINES THAT FOLLOW ... unless you know what you are doing! #
-######################################################################################
-#Present working directory
-prworkdir=$PWD
-#Full path of the directory where the tools (Moses, IRSTLM, RandLM) will be placed
-toolsdir=$mosesdir/tools
-
-#create, if need be, some important directories
-if [ ! -d $mosesdir ]; then
- mkdir -p $mosesdir
- if [ ! -d $mosesdir ]; then
- echo "The $mosesdir directory could not be created. Please make sure that you have enough disk space and that the \$mosesdir setting of this script is a legal name and points to a location in which you have write permissions. Exiting ..."
- exit 0
- fi
-fi
-
-# Directory with files used for corpus training, language model building, tuning, testing and recasing
-#if [ ! -d $mosesdir/corpora_for_training ]; then mkdir -p $mosesdir/corpora_for_training; fi
-
-# Directory with the tools used (Moses, IRSTLM, RandLM, giza-pp, MGIZA, ...)
-if [ ! -d $toolsdir ]; then mkdir -p $toolsdir; fi
-
-# Directory with new and modified scripts
-if [ ! -d $toolsdir/modified-scripts ]; then mkdir -p $toolsdir/modified-scripts; fi
-
-# Directory with the reference (human-made) translation files
-#if [ ! -d $mosesdir/translation_reference ]; then mkdir -p $mosesdir/translation_reference; fi
-
-# Directory with the files that need to be translated by Moses
-#if [ ! -d $mosesdir/translation_input ]; then mkdir -p $mosesdir/translation_input; fi
-
-
-#Addresses of packages used (*pack = name of package; *url = url of package; *dir = directory base name of decompressed, non-compiled package on disk; *newdir = base directory of compiled package on disk)
-#Name of the pack compressed file
-irstlmpack=irstlm-5.22.01.tar.gz
-#URL of package compressed file
-irstlmurl=http://heanet.dl.sourceforge.net/project/irstlm/irstlm/irstlm-5.22.01/$irstlmpack
-#Name of the decompressed package base directory
-irstlmdir=irstlm-5.22.01
-#Name of the compiled package base directory
-irstlmnewdir=irstlm
-randlmpack=randlm-v0.11.tar.gz
-randlmurl=http://kent.dl.sourceforge.net/project/randlm/randlm/randlm%20v0.11/$randlmpack
-randlmdir=randlm
-randlmnewdir=randlm
-mgizapack=mgiza-0.6.3-10-01-11.tar.gz
-mgizaurl=http://www.cs.cmu.edu/~qing/release/$mgizapack
-mgizadir=MGIZA
-mgizanewdir=mgiza
-mosespack=moses-2010-08-13.tgz
-mosesurl=http://sunet.dl.sourceforge.net/project/mosesdecoder/mosesdecoder/2010-08-13/$mosespack
-mosessoftdir=moses
-mosesnewdir=moses
-scriptspack=scripts.tgz
-scriptsurl=http://homepages.inf.ed.ac.uk/jschroe1/how-to/$scriptspack
-scriptsdir=scripts
-scriptsnewdir=scripts
-scorerpack=mteval-v11b.pl
-scorerurl=ftp://jaguar.ncsl.nist.gov/mt/resources/$scorerpack
-datapack=m3-data.tgz
-dataurl="http://www.statmt.org/moses/download/$datapack"
-
-cd $PWD
-cd ..
-wget $dataurl
-tar -xzvf $datapack
-cp -r data-files/corpora_for_training $mosesdir
-cp -r data-files/translation_input $mosesdir
-cp -r data-files/translation_reference $mosesdir
-rm -f $datapack
-if [ ! -f $mosesdir/corpora_for_training/300000.en ]; then
- echo "Data files not correctly created. Exiting..."
- exit 0
-fi
-cd $toolsdir
-
-
-cd $toolsdir
-if [ ! -f $mosesdir/create.moses.log ]; then
- echo "did_irstlm=" > $mosesdir/create.moses.log
- echo "did_randlm=" >> $mosesdir/create.moses.log
- echo "did_mgiza=" >> $mosesdir/create.moses.log
- echo "did_moses=" >> $mosesdir/create.moses.log
- echo "did_mosesscripts=" >> $mosesdir/create.moses.log
- echo "did_newscripts=" >> $mosesdir/create.moses.log
- echo "did_scorer=" >> $mosesdir/create.moses.log
- echo "Download IRSTLM package"
- wget $irstlmurl
- if [ ! -f $irstlmpack ]; then
- echo "$irstlmpack pack not correctly downloaded. Please check your Internet connection. Exiting ..."
- exit 0
- fi
- echo "Download RandLM package"
- wget $randlmurl
- echo "Download MGIZA package"
- wget $mgizaurl
- #echo "Download Moses package"
- #wget $mosesurl
- echo "Download Moses scripts package"
- wget $scriptsurl
- echo "Download scorer package"
- wget $scorerurl
- chmod -R +rwx $toolsdir/$scorerpack
-else
- . $mosesdir/create.moses.log
- if [ "$did_irstlm" = "0" ]; then
- if [ -f $toolsdir/$irstlmpack ]; then
- rm $toolsdir/$irstlmpack
- if [ -d $toolsdir/$irstlmdir ]; then
- rm -rf $toolsdir/$irstlmdir
- fi
- if [ -d $toolsdir/$irstlmnewdir ]; then
- rm -rf $toolsdir/$irstlmnewdir
- fi
- fi
- echo "Download IRSTLM package"
- wget $irstlmurl
- if [ ! -f $irstlmpack ]; then
- echo "$irstlmpack pack not correctly downloaded. Please check your Internet connection. Exiting ..."
- exit 0
- fi
- fi
- if [ "$did_randlm" = "0" ]; then
- if [ -f $toolsdir/$randlmpack ]; then
- rm $toolsdir/$randlmpack
- if [ -d $toolsdir/$randlmdir ]; then
- rm -rf $toolsdir/$randlmdir
- fi
- if [ -d $toolsdir/$randlmnewdir ]; then
- rm -rf $toolsdir/$randlmnewdir
- fi
- fi
- echo "Download RandLM package"
- wget $randlmurl
- fi
- if [ "$did_mgiza" = "0" ]; then
- if [ -f $toolsdir/$mgizapack ]; then
- rm $toolsdir/$mgizapack
- if [ -d $toolsdir/$mgizadir ]; then
- rm -rf $toolsdir/$mgizadir
- fi
- if [ -d $toolsdir/$mgizanewdir ]; then
- rm -rf $toolsdir/$mgizanewdir
- fi
- echo "Download MGIZA package"
- wget $mgizaurl
- fi
- fi
- if [ "$did_moses" = "0" ]; then
- if [ -f $toolsdir/$mosespack ]; then
- rm $toolsdir/$mosespack
- if [ -d $toolsdir/$mosessoftdir ]; then
- rm -rf $toolsdir/$mosessoftdir
- fi
- if [ -d $toolsdir/$mosesnewdir ]; then
- rm -rf $toolsdir/$mosesnewdir
- fi
- fi
- #echo "Download Moses package"
- #wget $mosesurl
- fi
- if [ "$did_mosesscripts" = "0" ]; then
- if [ -f $toolsdir/$scriptspack ]; then
- rm $toolsdir/$scriptspack
- if [ -d $toolsdir/$scriptsdir ]; then
- rm -rf $toolsdir/$scriptsdir
- fi
- if [ -d $toolsdir/$scriptsnewdir ]; then
- rm -rf $toolsdir/$scriptsnewdir
- fi
- fi
- echo "Download Moses script package"
- wget $scriptsurl
- fi
- if [ "$did_scorer" = "0" ]; then
- if [ -f $toolsdir/$scorerpack ]; then
- rm $toolsdir/$scorerpack
- fi
- echo "Download scorer package"
- wget $scorerurl
- fi
-fi
-
-
-if [ -f $mosesdir/create.moses.log ]; then
- . $mosesdir/create.moses.log
-fi
-if [ "$did_irstlm" != "1" ]; then
- echo "****************************************** IRSTLM ..."
- if [ -f $toolsdir/$irstlmpack ]; then
- tar -xzvf $toolsdir/$irstlmpack
- if [ -d $toolsdir/$irstlmdir ]; then
- mv -f $toolsdir/$irstlmdir $toolsdir/$irstlmnewdir
- fi
- else
- wget $irstlmurl
- tar -xzvf $irstlmpack
- if [ -d $toolsdir/$irstlmdir ]; then
- mv -f $toolsdir/$irstlmdir $toolsdir/$irstlmnewdir
- fi
- fi
- if [ -d $toolsdir/$irstlmnewdir ]; then
- cd $toolsdir/$irstlmnewdir
- #./regenerate-makefiles.sh
- ./configure --prefix=$toolsdir/$irstlmnewdir
- make
- make install
- fi
- MachType=`uname -m`
- if [ ! -f $toolsdir/$irstlmnewdir/bin/$MachType/quantize-lm ]; then
- echo "************************ IRSTLM not correctly installed. Script will now exit."
- sed -ie 's/^did_irstlm=.*$/did_irstlm=0/g' $mosesdir/create.moses.log
- exit 0
- else
- echo "************************ IRSTLM correctly installed."
- sed -ie 's/^did_irstlm=.*$/did_irstlm=1/g' $mosesdir/create.moses.log
- export PATH=$toolsdir/$irstlmnewdir/bin/$MachType:$toolsdir/$irstlmnewdir/bin:$PATH
- if [ "$did_moses" = "1" ]; then
- echo "Even though Moses was already correctly installed, it needs to be reinstalled after having recompiled IRSTLM."
- sed -ie 's/^did_moses=.*$/did_moses=0/g' $mosesdir/create.moses.log
- fi
- fi
-else
- echo "************************ IRSTLM already correctly installed. Reusing it."
- export PATH=$toolsdir/$irstlmnewdir/bin/$MachType:$toolsdir/$irstlmnewdir/bin:$PATH
-fi
-
-cd $toolsdir
-. $mosesdir/create.moses.log
-if [ "$did_randlm" != "1" ]; then
- echo "****************************************** RandLM ..."
- cd $toolsdir
- if [ -f $toolsdir/$randlmpack ]; then
- tar -xzvf $randlmpack
- else
- wget $randlmurl
- tar -xzvf $randlmpack
- fi
- if [ -d $toolsdir/$randlmdir/src ]; then
- sed '28i\#include <cstdio>' $toolsdir/$randlmdir/src/RandLMUtils.h > $toolsdir/$randlmdir/src/RandLMUtils.hr
- mv $toolsdir/$randlmdir/src/RandLMUtils.hr $toolsdir/$randlmdir/src/RandLMUtils.h
- cd $toolsdir/$randlmdir/src
- make all
- fi
- if [ ! -f $toolsdir/$randlmdir/bin/buildlm ]; then
- echo "************************ RandLM not correctly installed. Script will now exit."
- sed -ie 's/^did_randlm=.*$/did_randlm=0/g' $mosesdir/create.moses.log
- exit 0
- else
- echo "************************ RandLM correctly installed."
- sed -ie 's/^did_randlm=.*$/did_randlm=1/g' $mosesdir/create.moses.log
- export PATH=$toolsdir/$randlmdir/bin:$PATH
- if [ "$did_moses" = "1" ]; then
- echo "Even though Moses was already correctly installed, it needs to be reinstalled after having recompiled RandLM."
- sed -ie 's/^did_moses=.*$/did_moses=0/g' $mosesdir/create.moses.log
- fi
- fi
-else
- echo "************************ RandLM already correctly installed. Reusing it."
- export PATH=$toolsdir/$randlmdir/bin:$PATH
-fi
-
-cd $toolsdir
-. $mosesdir/create.moses.log
-
-if [ "$did_mgiza" != "1" ]; then
- echo "****************************************** MGIZA ..."
- cd $toolsdir
- if [ -f $toolsdir/$mgizapack ]; then
- tar -xvzf $toolsdir/$mgizapack
- else
- wget $mgizaurl
- tar -xvzf $toolsdir/$mgizapack
- fi
- if [ -d $toolsdir/$mgizadir ]; then
- mv $toolsdir/$mgizadir $toolsdir/$mgizanewdir
- cd $toolsdir/$mgizanewdir
- ./configure --prefix=$toolsdir/$mgizanewdir
- make
- make install
- fi
- if [ ! -f $toolsdir/$mgizanewdir/bin/symal ]; then
- echo "************************ MGIZA not correctly installed. Script will now exit."
- sed -ie 's/^did_mgiza=.*$/did_mgiza=0/g' $mosesdir/create.moses.log
- exit 0
- else
- echo "************************ MGIZA correctly installed."
- sed -ie 's/^did_mgiza=.*$/did_mgiza=1/g' $mosesdir/create.moses.log
-
- # the train-model.perl script, as it is used by these scripts (which enable you to access otherwise hidden parameters), insists on using giza-pp; therefore, rename the
- # MGIZA executables so that they have the names that this script requires; another solution is probably possible, but no time to test it now
- cp $toolsdir/$mgizanewdir/bin/mgiza $toolsdir/$mgizanewdir/bin/GIZA++
- cp $toolsdir/$mgizanewdir/bin/snt2cooc $toolsdir/$mgizanewdir/bin/snt2cooc.out
-
- export PATH=$toolsdir/$mgizanewdir/bin:$toolsdir/$mgizanewdir/scripts:$PATH
- fi
-else
- echo "************************ MGIZA already correctly installed. Reusing it."
- export PATH=$toolsdir/$mgizanewdir/bin:$toolsdir/$mgizanewdir/scripts:$PATH
-fi
-
-if [ "$did_moses" != "1" ]; then
- echo "****************************************** Moses ..."
- cd $toolsdir
- if [ -f $toolsdir/$mosespack ]; then
- tar -xvzf $toolsdir/$mosespack
- chmod -R +rwx $toolsdir/$mosesnewdir
- else
- wget $mosesurl
- tar -xvzf $toolsdir/$mosespack
- chmod -R +rwx $toolsdir/$mosesnewdir
- fi
-
- export mosesdir
-
- if [ -d $toolsdir/$mosesnewdir ]; then
- cd $toolsdir/$mosesnewdir
- ./regenerate-makefiles.sh
- ./configure --with-irstlm=$toolsdir/$irstlmnewdir --with-randlm=$toolsdir/$randlmnewdir
-
- make -j $mosesnumprocessors
- cp $toolsdir/$mosesnewdir/$scriptsdir $toolsdir/$mosesnewdir/$scriptsnewdir
- cd $toolsdir/$mosesnewdir/scripts
- sed -e 's#TARGETDIR=\/home\/pkoehn\/statmt\/bin#TARGETDIR=toolsdir/mosesnewdir#g' -e 's#BINDIR=\/home\/pkoehn\/statmt\/bin#BINDIR=toolsdir/mgizanewdir/bin#g' -e "s#toolsdir#$toolsdir#g" -e "s#mgizanewdir#$mgizanewdir#g" -e "s#mosesnewdir#$mosesnewdir#g" $toolsdir/$mosesnewdir/scripts/Makefile > $toolsdir/$mosesnewdir/scripts/Makefile.new
- mv $toolsdir/$mosesnewdir/scripts/Makefile.new $toolsdir/$mosesnewdir/scripts/Makefile
- make release
- if [ -d $toolsdir/$mosesnewdir/scripts?* ]; then
- rm -rf $toolsdir/$mosesnewdir/scripts
- fi
- fi
- if [ ! -f $toolsdir/$mosesnewdir/moses-cmd/src/moses ]; then
- echo "************************ Moses not correctly installed. Script will now exit."
- sed -ie 's/^did_moses=.*$/did_moses=0/g' $mosesdir/create.moses.log
- exit 0
- else
- echo "************************ Moses correctly installed."
- sed -ie 's/^did_moses=.*$/did_moses=1/g' $mosesdir/create.moses.log
- export SCRIPTS_ROOTDIR=$toolsdir/$mosesnewdir/script*
- fi
-else
- echo "************************ Moses already correctly installed. Reusing it."
- export SCRIPTS_ROOTDIR=$toolsdir/$mosesnewdir/script*
-fi
-
-if [ "$did_mosesscripts" != "1" ]; then
- echo "****************************************** Moses scripts ..."
- . $mosesdir/create.moses.log
- cd $toolsdir
- if [ -f $toolsdir/$scriptspack ]; then
- tar -xzvf $toolsdir/$scriptspack
- else
- wget $scriptsurl
- tar -xvzf $toolsdir/$scriptspack
- fi
- if [ ! -f $toolsdir/$scriptsdir/tokenizer.perl ]; then
- echo "************************ Moses scripts not correctly copied. Script will now exit."
- sed -ie 's/^did_mosesscripts=.*$/did_mosesscripts=0/g' $mosesdir/create.moses.log
- exit 0
- else
- echo "************************ Moses scripts correctly copied."
- sed -ie 's/^did_mosesscripts=.*$/did_mosesscripts=1/g' $mosesdir/create.moses.log
- fi
-else
- echo "************************ Moses scripts already correctly installed. Reusing it."
-fi
-
-if [ "$did_newscripts" != "1" ]; then
- echo "****************************************** New and modified scripts and demo files ..."
- SAVEIFS=$IFS
- IFS=$(echo -en "\n\b")
- cd $prworkdir
- if [ -d $prworkdir/modified-scripts ]; then
- echo "***************** Copy nonbreaking_prefix.pt ..."
- cp -f $prworkdir/modified-scripts/nonbreaking_prefix.pt $toolsdir/scripts/nonbreaking_prefixes
- chmod +x $toolsdir/scripts/nonbreaking_prefixes/nonbreaking_prefix.pt
- echo "***************** Copy mert-moses-new-modif.pl ..."
- cp -f $prworkdir/modified-scripts/mert-moses-new-modif.pl $toolsdir/modified-scripts/mert-moses-new-modif.pl
- chmod +x $toolsdir/modified-scripts/mert-moses-new-modif.pl
- if [ -f $toolsdir/scripts/nonbreaking_prefixes/nonbreaking_prefix.pt -a -f $toolsdir/modified-scripts/mert-moses-new-modif.pl ]; then
- sed -ie 's/^did_newscripts=.*$/did_newscripts=1/g' $mosesdir/create.moses.log
- else
- sed -ie 's/^did_newscripts=.*$/did_newscripts=0/g' $mosesdir/create.moses.log
- echo "Some new/modified scripts could not be copied. Exiting ..."
- IFS=$SAVEIFS
- exit 0
- fi
- else
- sed -ie 's/^did_newscripts=.*$/did_newscripts=0/g' $mosesdir/create.moses.log
- echo "The structure of Moses for Mere Mortals was changed. Please restore the initial structure so that the installation can proceed. New and modified Moses scripts cannot be installed. Exiting ..."
- exit 0
- fi
- IFS=$SAVEIFS
-
- if [ -d ../data-files/corpora_for_training ]; then
- echo "***************** Copy corpora_for_training ..."
- cp ../data-files/corpora_for_training/* $mosesdir/corpora_for_training
- if [ -f $mosesdir/corpora_for_training/300000.en ]; then
- sed -ie 's/^did_newscripts=.*$/did_newscripts=1/g' $mosesdir/create.moses.log
- else
- sed -ie 's/^did_newscripts=.*$/did_newscripts=0/g' $mosesdir/create.moses.log
- echo "Some corpora files needed for the demo of this script could not be copied. Exiting ..."
- exit 0
- fi
- else
- sed -ie 's/^did_newscripts=.*$/did_newscripts=0/g' $mosesdir/create.moses.log
- echo " The structure of Moses for Mere Mortals was changed. Corpora for training cannot be installed. Please restore the initial structure so that the installation can proceed. Exiting ..."
- exit 0
- fi
-
- if [ -d ../data-files/translation_input ]; then
- cp ../data-files/translation_input/* $mosesdir/translation_input
- if [ -f $mosesdir/translation_input/100.pt ]; then
- sed -ie 's/^did_newscripts=.*$/did_newscripts=1/g' $mosesdir/create.moses.log
- else
- sed -ie 's/^did_newscripts=.*$/did_newscripts=0/g' $mosesdir/create.moses.log
- echo "A demo file needed for translation (100.pt) could not be copied. Exiting ..."
- exit 0
- fi
- else
- echo " The structure of Moses for Mere Mortals was changed. Please restore the initial structure so that the installation can proceed. Exiting ..."
- sed -ie 's/^did_newscripts=.*$/did_newscripts=0/g' $mosesdir/create.moses.log
- exit 0
- fi
-
- if [ -d ../data-files/translation_reference ]; then
- cp ../data-files/translation_reference/* $mosesdir/translation_reference
- if [ -f $mosesdir/translation_reference/100.en.ref ]; then
- sed -ie 's/^did_newscripts=.*$/did_newscripts=1/g' $mosesdir/create.moses.log
- else
- sed -ie 's/^did_newscripts=.*$/did_newscripts=0/g' $mosesdir/create.moses.log
- echo "A demo file needed for translation (100.en) could not be copied. Exiting ..."
- exit 0
- fi
- else
- echo " The structure of Moses for Mere Mortals was changed. Please restore the initial structure so that the installation can proceed. Exiting ..."
- sed -ie 's/^did_newscripts=.*$/did_newscripts=0/g' $mosesdir/create.moses.log
- exit 0
- fi
-else
- echo "************************ Modified scripts and demo files already correctly installed. Reusing them."
-fi
-
-if [ "$did_scorer" != "1" ]; then
- echo "****************************************** Scorer ..."
- if [ ! -f $toolsdir/$scorerpack ]; then
- cd $toolsdir
- wget $scorerurl
- if [ ! -f $toolsdir/$scorerpack ]; then
- echo "************************ Scorer not correctly copied. Script will now exit."
- sed -ie 's/^did_scorer=.*$/did_scorer=0/g' $mosesdir/create.moses.log
- exit 0
- else
- chmod +x $toolsdir/$scorerpack
- echo "************************ Scorer correctly copied."
- sed -ie 's/^did_scorer=.*$/did_scorer=1/g' $mosesdir/create.moses.log
- fi
- else
- echo "************************ Scorer correctly copied."
- sed -ie 's/^did_scorer=.*$/did_scorer=1/g' $mosesdir/create.moses.log
- fi
-else
- echo "************************ Scorer already correctly installed. Reusing it."
-fi
-
-
-cd $toolsdir
-if [ -f $mosesdir/create.moses.loge ]; then
- rm $mosesdir/create.moses.loge
-fi
-
-if [ "$removedownloadedpackges" = "1" ]; then
- if [ -f $toolsdir/$irstlmpack ]; then
- rm $toolsdir/$irstlmpack
- fi
- if [ -f $toolsdir/$irstlmdir ]; then
- rm $toolsdir/$irstlmdir
- fi
- if [ -f $toolsdir/$randlmpack ]; then
- rm $toolsdir/$randlmpack
- fi
- if [ -f $toolsdir/$mgizapack ]; then
- rm $toolsdir/$mgizapack
- fi
- if [ -f $toolsdir/$mosespack ]; then
- rm $toolsdir/$mosespack
- fi
- if [ -f $toolsdir/$mosessoftdir ]; then
- rm $toolsdir/$mosessoftdir
- fi
- if [ -f $toolsdir/$scriptspack ]; then
- rm $toolsdir/$scriptspack
- fi
-fi
-echo ""
-echo "!!! Successful end of Moses installation. !!!"
-echo "Moses base directory located in $mosesdir"
-
-#*************************************************************************************************
-# Changes in versions 1.36, 1.37
-#*************************************************************************************************
-#Changed the data files (containing namely the demo corpus) to another location
-#*************************************************************************************************
-# Changes in version 1.35
-#*************************************************************************************************
-# Uses new Moses decoder (published in August 13, 2010 and updated in August 14, 2010)
-# Updates package dependencies
-#Suppresses giza-pp installation (which is not used by the train script)
-# Works in Ubuntu 10.04 LTS (and, if you adapt the package dependencies, with Ubuntu 9.10 and 9.04)
diff --git a/contrib/moses-for-mere-mortals/scripts/make-test-files-0.14 b/contrib/moses-for-mere-mortals/scripts/make-test-files-0.14
deleted file mode 100644
index c0807496d..000000000
--- a/contrib/moses-for-mere-mortals/scripts/make-test-files-0.14
+++ /dev/null
@@ -1,137 +0,0 @@
-#!/usr/bin/env bash
-# make-test-files-0.14
-# copyright 2010, João L. A. C. Rosas
-# licenced under the GPL licence, version 3
-# date: 23/08/2010
-# Special thanks to Hilário Leal Fontes and Maria José Machado, who helped to test the script and made very helpful suggestions
-
-# ***Purpose***: given 2 strictly aligned files, one in the source language and another in the target language, this script creates a backup of them and cuts each of them in 2 parts: one that will be used for training and another for testing the training. The initial files are divided into X sectors (defined by the user in the settings of this script) and the script extracts Y pseudorandom segments from each sector (the value Y is also defined by the user). This script can be used to create training test files that attempt to cover the whole universe of the sampling space and which simultaneously sample pseudorandomly each of those sectors Y times in an attempt to get a test file that is more representative of that universe than a list of X*Y consecutive segments would be. The files used for training will have those segments erased. The initial corpus will be preserved (the files that will be used for corpus training are new files created by this script).
-
-###########################################################################################################################################################
-#THIS SCRIPT ASSUMES THAT A IRSTLM AND RANDLM ENABLED MOSES HAS ALREADY BEEN INSTALLED WITH create-moses-irstlm-randlm IN $mosesdir (BY DEFAULT $HOME/moses-irstlm-randlm; CHANGE THIS VARIABLE ACCORDING TO YOUR NEEDS)
-# IT ALSO ASSUMES THAT THE PACKAGES UPON WHICH IT DEPENDS, INDICATED IN the create-moses-irstlm-randlm script, HAVE BEEN INSTALLED
-# This script should be used after the execution of create-moses-irstlm-randlm and before the execution of train-moses-irstlm-randlm (it creates the corpus and the test files that will be used by this latter script)
-###########################################################################################################################################################
-
-##########################################################################################################################################################
-# The values of the variables that follow should be filled according to your needs: # ##########################################################################################################################################################
-#Base path of Moses installation
-mosesdir=$HOME/moses-irstlm-randlm
-#Source language abbreviation
-lang1=pt
-#Target language abbreviation
-lang2=en
-#Number of sectors in which each input file will be cut
-totalnumsectors=100
-#Number of segments pseudorandomly searched in each sector
-numsegs=10
-#Name of the source language file used for creating one of the test files (!!! omit the path; the name should not include spaces !!!)
-basefilename=200000
-##########################################################################################################################################################
-# DO NOT CHANGE THE LINES THAT FOLLOW ... unless you know what you are doing! #
-##########################################################################################################################################################
-startdate=`date +day:%d/%m/%y-time:%H:%M:%S`
-#Function to get a random positive number with up to 10 digits between highest ($1) and lowest ($2)
-randompos(){
- num=$(( ( ($RANDOM & 3)<<30 | $RANDOM<<15 | $RANDOM ) - 0x80000000 ))
- if [ $num -lt 0 ] ; then
- # $1 = highest; $2 = lowest
- newnum=$[ `expr 0 - $num` % ( $[ $1 - $2 ] + 1 ) + $2 ]
- else
- newnum=$[ $num % ( $[ $1 - $2 ] + 1 ) + $2 ]
- fi
- echo $newnum
-}
-
-exchange()
-{
- local temp=${numsegarray[$1]}
- numsegarray[$1]=${numsegarray[$2]}
- numsegarray[$2]=$temp
- return
-}
-
-#This function was published in jeronimo's blog (http://www.roth.lu/serendipity/index.php?/archives/31-Bash-Arrays-and-search-function.html)
-# Function to find out whether something exists in a bash array or not
-bash__is_in_array () {
-haystack=( "$@" )
-haystack_size=( "${#haystack[@]}" )
-needle=${haystack[$((${haystack_size}-1))]}
-for ((i=0;i<$(($haystack_size-1));i++)); do
-h=${haystack[${i}]};
-[ $h = $needle ] && return 42
-done
-}
-
-echo "************* Do some preparatory work (it can take a long time to read the input files, if they are large)"
-#Directory where the source and target language files used for creating one of the test files is located
-basefiledir=$mosesdir/corpora_for_training
-#Directory where will be placed the test files that will be created
-testdir=$mosesdir/corpora_for_training
-
-#Eliminate some control characters that can cause Moses training errors
-tr '\a\b\f\r\v|' ' /' < $basefiledir/$basefilename.$lang1 > $testdir/$basefilename.for_train.$lang1
-tr '\a\b\f\r\v|' ' /' < $basefiledir/$basefilename.$lang2 > $testdir/$basefilename.for_train.$lang2
-
-#Determine the number of lines of each file and check that they are equal
-numlines_s=`wc -l "$basefiledir/$basefilename.$lang1" | awk '{print $1'}`
-numlines_t=`wc -l "$basefiledir/$basefilename.$lang2" | awk '{print $1'}`
-if [ "$numlines_s" != "$numlines_t" ]; then
- echo "Source and target files have a different number of segments (source = $numlines_s and target = $numlines_t). If you verify manually that they do have the same number of segments, then Bash is interpreting at least one of the characters of one of the files as something it isn't. If that is the case, you will have to isolate the line(s) that is (are) causing problems and to substitute the character in question by some other character. Exiting ..."
- exit 0
-fi
-
-#Calculate number of lines per sector
-numlinespersector=$(echo "scale=0; $numlines_s/$totalnumsectors" | bc)
-#Calculate total number of segments to extract
-totsegstoextract=$(echo "scale=0; $totalnumsectors*$numsegs" | bc)
-
-echo "************* $totalnumsectors sectors to extract. This can take some time ..."
-#Create temporary files
-echo > /tmp/$basefilename.for_test.$lang1
-echo > /tmp/$basefilename.for_test.$lang2
-echo "extract segments for testing from:"
-#Total number of segments extracted so far for the training test file
-totsegsextracted=0
-if (( $(echo "scale=0; $totsegstoextract-$numlines_s" | bc) < 0 )); then
- for (( sector=1; sector<=$totalnumsectors; sector++ )) ; do
- echo "sector $sector"
- floor=$(echo "scale=0; $numlinespersector*$sector-$numlinespersector+1" | bc)
- ceiling=$(echo "scale=0; $numlinespersector*$sector" | bc)
- sectornumsegsextracted=0
- number=-1
- while (( $sectornumsegsextracted < $numsegs )) ; do
- number=`randompos $ceiling $floor`
- bash__is_in_array "${numsegarray[@]}" $number
- if [ $? -ne 42 ]; then
- let "sectornumsegsextracted += 1"
- let "totsegsextracted += 1"
- awk "NR==$number{print;exit}" $testdir/$basefilename.for_train.$lang1 >> /tmp/$basefilename.for_test.$lang1
- numsegarray[$totsegsextracted]=$number
- f+=${numsegarray[$totsegsextracted]}
- f+="d;"
- awk "NR==$number{print;exit}" $testdir/$basefilename.for_train.$lang2 >> /tmp/$basefilename.for_test.$lang2
- fi
- done
- done
-
- echo "************* erase segments used for testing in training files"
- f=`echo "$f" | sed 's#\;#\n#g' | sort -nr `
- f=`echo "$f" | sed 's#\n#;#g'`
- f=${f%;;*}
- sed "$f" $testdir/$basefilename.for_train.$lang1 > /tmp/$basefilename.for_train.$lang1.temp
- sed "$f" $testdir/$basefilename.for_train.$lang2 > /tmp/$basefilename.for_train.$lang2.temp
- echo "************* final cleaning operations"
- sed '1d' /tmp/$basefilename.for_test.$lang1 > $testdir/$basefilename.for_test.$lang1
- sed '1d' /tmp/$basefilename.for_test.$lang2 > $testdir/$basefilename.for_test.$lang2
- mv -f /tmp/$basefilename.for_train.$lang1.temp $testdir/$basefilename.for_train.$lang1
- mv -f /tmp/$basefilename.for_train.$lang2.temp $testdir/$basefilename.for_train.$lang2
-else
- echo "The files you want to sample have less lines than the number of sectors times the number of segments that you want to extract per sector. Exiting ..."
- exit 0
-fi
-echo "starting date: $startdate"
-echo "ending date : `date +day:%d/%m/%y-time:%H:%M:%S`"
-echo "!!! Test files created in $testdir/$basefilename.for_test.$lang1 and $testdir/$basefilename.for_test.$lang2. Corpus training files (where the segments selected for training were erased) created in $testdir/$basefilename.for_train.$lang1 and $testdir/$basefilename.for_train.$lang2 !!!"
-
-
diff --git a/contrib/moses-for-mere-mortals/scripts/modified-scripts/READ_ME_FIRST b/contrib/moses-for-mere-mortals/scripts/modified-scripts/READ_ME_FIRST
deleted file mode 100644
index d63fbea8e..000000000
--- a/contrib/moses-for-mere-mortals/scripts/modified-scripts/READ_ME_FIRST
+++ /dev/null
@@ -1,2 +0,0 @@
-1) The mert-moses-new-modif.pl is a file that contains a slight modification of the mert-moses-new.pl script so that tuning can be stopped after a set amount of runs.
-2) The nonbreaking_prefix.pt script is a file whose author is Hilário Leal Fontes.
diff --git a/contrib/moses-for-mere-mortals/scripts/modified-scripts/mert-moses-new-modif.pl b/contrib/moses-for-mere-mortals/scripts/modified-scripts/mert-moses-new-modif.pl
deleted file mode 100755
index bab8532cc..000000000
--- a/contrib/moses-for-mere-mortals/scripts/modified-scripts/mert-moses-new-modif.pl
+++ /dev/null
@@ -1,1217 +0,0 @@
-#!/usr/bin/perl -w
-
-# $Id: mert-moses.pl 1745 2008-05-16 15:54:02Z phkoehn $
-# Usage:
-# mert-moses.pl <foreign> <english> <decoder-executable> <decoder-config>
-# For other options see below or run 'mert-moses.pl --help'
-
-# Notes:
-# <foreign> and <english> should be raw text files, one sentence per line
-# <english> can be a prefix, in which case the files are <english>0, <english>1, etc. are used
-
-# This script is subject to the licence defined by its author (Philipp Koehn)
-
-# Revision history
-
-# 17 Aug 2010 This script was slightly changed so as to create a new $max_runs parameter that can limit the maximal number of tuning iterations (João Rosas)
-# 5 Aug 2009 Handling with different reference length policies (shortest, average, closest) for BLEU
-# and case-sensistive/insensitive evaluation (Nicola Bertoldi)
-# 5 Jun 2008 Forked previous version to support new mert implementation.
-# 13 Feb 2007 Better handling of default values for lambda, now works with multiple
-# models and lexicalized reordering
-# 11 Oct 2006 Handle different input types through parameter --inputype=[0|1]
-# (0 for text, 1 for confusion network, default is 0) (Nicola Bertoldi)
-# 10 Oct 2006 Allow skip of filtering of phrase tables (--no-filter-phrase-table)
-# useful if binary phrase tables are used (Nicola Bertoldi)
-# 28 Aug 2006 Use either closest or average or shortest (default) reference
-# length as effective reference length
-# Use either normalization or not (default) of texts (Nicola Bertoldi)
-# 31 Jul 2006 move gzip run*.out to avoid failure wit restartings
-# adding default paths
-# 29 Jul 2006 run-filter, score-nbest and mert run on the queue (Nicola; Ondrej had to type it in again)
-# 28 Jul 2006 attempt at foolproof usage, strong checking of input validity, merged the parallel and nonparallel version (Ondrej Bojar)
-# 27 Jul 2006 adding the safesystem() function to handle with process failure
-# 22 Jul 2006 fixed a bug about handling relative path of configuration file (Nicola Bertoldi)
-# 21 Jul 2006 adapted for Moses-in-parallel (Nicola Bertoldi)
-# 18 Jul 2006 adapted for Moses and cleaned up (PK)
-# 21 Jan 2005 unified various versions, thorough cleanup (DWC)
-# now indexing accumulated n-best list solely by feature vectors
-# 14 Dec 2004 reimplemented find_threshold_points in C (NMD)
-# 25 Oct 2004 Use either average or shortest (default) reference
-# length as effective reference length (DWC)
-# 13 Oct 2004 Use alternative decoders (DWC)
-# Original version by Philipp Koehn
-
-use FindBin qw($Bin);
-use File::Basename;
-my $SCRIPTS_ROOTDIR = $Bin;
-$SCRIPTS_ROOTDIR =~ s/\/training$//;
-$SCRIPTS_ROOTDIR = $ENV{"SCRIPTS_ROOTDIR"} if defined($ENV{"SCRIPTS_ROOTDIR"});
-
-# for each _d_istortion, _l_anguage _m_odel, _t_ranslation _m_odel and _w_ord penalty, there is a list
-# of [ default value, lower bound, upper bound ]-triples. In most cases, only one triple is used,
-# but the translation model has currently 5 features
-
-# defaults for initial values and ranges are:
-
-my $default_triples = {
- # these two basic models exist even if not specified, they are
- # not associated with any model file
- "w" => [ [ 0.0, -1.0, 1.0 ] ], # word penalty
-};
-
-my $additional_triples = {
- # if the more lambda parameters for the weights are needed
- # (due to additional tables) use the following values for them
- "d" => [ [ 1.0, 0.0, 2.0 ], # lexicalized reordering model
- [ 1.0, 0.0, 2.0 ],
- [ 1.0, 0.0, 2.0 ],
- [ 1.0, 0.0, 2.0 ],
- [ 1.0, 0.0, 2.0 ],
- [ 1.0, 0.0, 2.0 ],
- [ 1.0, 0.0, 2.0 ] ],
- "lm" => [ [ 1.0, 0.0, 2.0 ] ], # language model
- "g" => [ [ 1.0, 0.0, 2.0 ], # generation model
- [ 1.0, 0.0, 2.0 ] ],
- "tm" => [ [ 0.3, 0.0, 0.5 ], # translation model
- [ 0.2, 0.0, 0.5 ],
- [ 0.3, 0.0, 0.5 ],
- [ 0.2, 0.0, 0.5 ],
- [ 0.0,-1.0, 1.0 ] ], # ... last weight is phrase penalty
- "lex"=> [ [ 0.1, 0.0, 0.2 ] ], # global lexical model
-};
-
-# moses.ini file uses FULL names for lambdas, while this training script internally (and on the command line)
-# uses ABBR names.
-my $ABBR_FULL_MAP = "d=weight-d lm=weight-l tm=weight-t w=weight-w g=weight-generation lex=weight-lex";
-my %ABBR2FULL = map {split/=/,$_,2} split /\s+/, $ABBR_FULL_MAP;
-my %FULL2ABBR = map {my ($a, $b) = split/=/,$_,2; ($b, $a);} split /\s+/, $ABBR_FULL_MAP;
-
-# We parse moses.ini to figure out how many weights do we need to optimize.
-# For this, we must know the correspondence between options defining files
-# for models and options assigning weights to these models.
-my $TABLECONFIG_ABBR_MAP = "ttable-file=tm lmodel-file=lm distortion-file=d generation-file=g global-lexical-file=lex";
-my %TABLECONFIG2ABBR = map {split(/=/,$_,2)} split /\s+/, $TABLECONFIG_ABBR_MAP;
-
-# There are weights that do not correspond to any input file, they just increase the total number of lambdas we optimize
-#my $extra_lambdas_for_model = {
-# "w" => 1, # word penalty
-# "d" => 1, # basic distortion
-#};
-
-my $minimum_required_change_in_weights = 0.00001;
- # stop if no lambda changes more than this
-
-my $verbose = 0;
-my $usage = 0; # request for --help
-my $___WORKING_DIR = "mert-work";
-my $___DEV_F = undef; # required, input text to decode
-my $___DEV_E = undef; # required, basename of files with references
-my $___DECODER = undef; # required, pathname to the decoder executable
-my $___CONFIG = undef; # required, pathname to startup ini file
-my $___N_BEST_LIST_SIZE = 100;
-my $queue_flags = "-l mem_free=0.5G -hard"; # extra parameters for parallelizer
- # the -l ws0ssmt is relevant only to JHU workshop
-my $___JOBS = undef; # if parallel, number of jobs to use (undef -> serial)
-my $___DECODER_FLAGS = ""; # additional parametrs to pass to the decoder
-my $___LAMBDA = undef; # string specifying the seed weights and boundaries of all lambdas
-my $continue = 0; # should we try to continue from the last saved step?
-my $skip_decoder = 0; # and should we skip the first decoder run (assuming we got interrupted during mert)
-my $___FILTER_PHRASE_TABLE = 1; # filter phrase table
-my $___PREDICTABLE_SEEDS = 0;
-
-
-# Parameter for effective reference length when computing BLEU score
-# Default is to use shortest reference
-# Use "--shortest" to use shortest reference length
-# Use "--average" to use average reference length
-# Use "--closest" to use closest reference length
-# Only one between --shortest, --average and --closest can be set
-# If more than one choice the defualt (--shortest) is used
-my $___SHORTEST = 0;
-my $___AVERAGE = 0;
-my $___CLOSEST = 0;
-
-# Use "--nocase" to compute case-insensitive scores
-my $___NOCASE = 0;
-
-# Use "--nonorm" to non normalize translation before computing scores
-my $___NONORM = 0;
-
-# set 0 if input type is text, set 1 if input type is confusion network
-my $___INPUTTYPE = 0;
-#input weights for CNs and Lattices: don't have a direct ini file counter, so specified here
-my $___INPUTWEIGHTS = 1;
-
-
-my $mertdir = undef; # path to new mert directory
-my $mertargs = undef; # args to pass through to mert
-my $filtercmd = undef; # path to filter-model-given-input.pl
-my $SCORENBESTCMD = undef;
-my $qsubwrapper = undef;
-my $moses_parallel_cmd = undef;
-my $___CONFIG_BAK = undef; # backup pathname to startup ini file
-my $efficient_scorenbest_flag = undef; # set to 1 to activate a time-efficient scoring of nbest lists
- # (this method is more memory-consumptive)
-my $___ACTIVATE_FEATURES = undef; # comma-separated (or blank-separated) list of features to work on
- # if undef work on all features
- # (others are fixed to the starting values)
-my $prev_aggregate_nbl_size = -1; # number of previous step to consider when loading data (default =-1)
- # -1 means all previous, i.e. from iteration 1
- # 0 means no previous data, i.e. from actual iteration
- # 1 means 1 previous data , i.e. from the actual iteration and from the previous one
- # and so on
-my $max_runs = -1; # maximal number of runs
- # -1 means no limit is set
-
-
-use strict;
-use Getopt::Long;
-GetOptions(
- "working-dir=s" => \$___WORKING_DIR,
- "input=s" => \$___DEV_F,
- "inputtype=i" => \$___INPUTTYPE,
- "inputweights=i" => \$___INPUTWEIGHTS,
- "refs=s" => \$___DEV_E,
- "decoder=s" => \$___DECODER,
- "config=s" => \$___CONFIG,
- "nbest=i" => \$___N_BEST_LIST_SIZE,
- "queue-flags=s" => \$queue_flags,
- "jobs=i" => \$___JOBS,
- "decoder-flags=s" => \$___DECODER_FLAGS,
- "lambdas=s" => \$___LAMBDA,
- "continue" => \$continue,
- "skip-decoder" => \$skip_decoder,
- "shortest" => \$___SHORTEST,
- "average" => \$___AVERAGE,
- "closest" => \$___CLOSEST,
- "nocase" => \$___NOCASE,
- "nonorm" => \$___NONORM,
- "help" => \$usage,
- "verbose" => \$verbose,
- "mertdir=s" => \$mertdir,
- "mertargs=s" => \$mertargs,
- "rootdir=s" => \$SCRIPTS_ROOTDIR,
- "filtercmd=s" => \$filtercmd, # allow to override the default location
- "scorenbestcmd=s" => \$SCORENBESTCMD, # path to score-nbest.py
- "qsubwrapper=s" => \$qsubwrapper, # allow to override the default location
- "mosesparallelcmd=s" => \$moses_parallel_cmd, # allow to override the default location
- "filter-phrase-table!" => \$___FILTER_PHRASE_TABLE, # allow (disallow)filtering of phrase tables
- "predictable-seeds" => \$___PREDICTABLE_SEEDS, # allow (disallow) switch on/off reseeding of random restarts
- "efficient_scorenbest_flag" => \$efficient_scorenbest_flag, # activate a time-efficient scoring of nbest lists
- "activate-features=s" => \$___ACTIVATE_FEATURES, #comma-separated (or blank-separated) list of features to work on (others are fixed to the starting values)
- "prev-aggregate-nbestlist=i" => \$prev_aggregate_nbl_size, #number of previous step to consider when loading data (default =-1, i.e. all previous)
- "max-runs=i" => \$max_runs, #maximal number of runs
-) or exit(1);
-
-print "Predict $___PREDICTABLE_SEEDS\n";
-
-# the 4 required parameters can be supplied on the command line directly
-# or using the --options
-if (scalar @ARGV == 4) {
- # required parameters: input_file references_basename decoder_executable
- $___DEV_F = shift;
- $___DEV_E = shift;
- $___DECODER = shift;
- $___CONFIG = shift;
-}
-
-print STDERR "After default: $queue_flags\n";
-if ($usage || !defined $___DEV_F || !defined $___DEV_E || !defined $___DECODER || !defined $___CONFIG) {
- print STDERR "usage: mert-moses-new.pl input-text references decoder-executable decoder.ini
-Options:
- --working-dir=mert-dir ... where all the files are created
- --nbest=100 ... how big nbestlist to generate
- --jobs=N ... set this to anything to run moses in parallel
- --mosesparallelcmd=STRING ... use a different script instead of moses-parallel
- --queue-flags=STRING ... anything you with to pass to
- qsub, eg. '-l ws06osssmt=true'
- The default is
- -l mem_free=0.5G -hard
- To reset the parameters, please use \"--queue-flags=' '\" (i.e. a space between
- the quotes).
- --decoder-flags=STRING ... extra parameters for the decoder
- --lambdas=STRING ... default values and ranges for lambdas, a complex string
- such as 'd:1,0.5-1.5 lm:1,0.5-1.5 tm:0.3,0.25-0.75;0.2,0.25-0.75;0.2,0.25-0.75;0.3,0.25-0.75;0,-0.5-0.5 w:0,-0.5-0.5'
- --allow-unknown-lambdas ... keep going even if someone supplies a new lambda
- in the lambdas option (such as 'superbmodel:1,0-1'); optimize it, too
- --continue ... continue from the last achieved state
- --skip-decoder ... skip the decoder run for the first time, assuming that
- we got interrupted during optimization
- --shortest ... Use shortest reference length as effective reference length (mutually exclusive with --average and --closest)
- --average ... Use average reference length as effective reference length (mutually exclusive with --shortest and --closest)
- --closest ... Use closest reference length as effective reference length (mutually exclusive with --shortest and --average)
- --nocase ... Do not preserve case information; i.e. case-insensitive evaluation (default is false)
- --nonorm ... Do not use text normalization (flag is not active, i.e. text is NOT normalized)
- --filtercmd=STRING ... path to filter-model-given-input.pl
- --rootdir=STRING ... where do helpers reside (if not given explicitly)
- --mertdir=STRING ... path to new mert implementation
- --mertargs=STRING ... extra args for mert, eg to specify scorer
- --scorenbestcmd=STRING ... path to score-nbest.py
- --inputtype=[0|1|2] ... Handle different input types (0 for text, 1 for confusion network, 2 for lattices, default is 0)
- --inputweights=N ... For confusion networks and lattices, number of weights to optimize for weight-i
- (must supply -link-param-count N to decoder-flags if N != 1 for decoder to deal with this correctly)
- --no-filter-phrase-table ... disallow filtering of phrase tables
- (useful if binary phrase tables are available)
- --predictable-seeds ... provide predictable seeds to mert so that random restarts are the same on every run
- --efficient_scorenbest_flag ... activate a time-efficient scoring of nbest lists
- (this method is more memory-consumptive)
- --activate-features=STRING ... comma-separated list of features to work on
- (if undef work on all features)
- # (others are fixed to the starting values)
- --prev-aggregate-nbestlist=INT ... number of previous step to consider when loading data (default =-1)
- -1 means all previous, i.e. from iteration 1
- 0 means no previous data, i.e. from actual iteration
- 1 means 1 previous data , i.e. from the actual iteration and from the previous one
- and so on
-
-";
- exit 1;
-}
-
-# update default variables if input is confusion network or lattice
-if ($___INPUTTYPE == 1 || $___INPUTTYPE == 2)
-{
- $ABBR_FULL_MAP = "$ABBR_FULL_MAP I=weight-i";
- %ABBR2FULL = map {split/=/,$_,2} split /\s+/, $ABBR_FULL_MAP;
- %FULL2ABBR = map {my ($a, $b) = split/=/,$_,2; ($b, $a);} split /\s+/, $ABBR_FULL_MAP;
-
- my @my_array;
-
- for(my $i=0 ; $i < $___INPUTWEIGHTS ; $i++)
- {
- push @my_array, [ 1.0, 0.0, 2.0 ];
- }
- push @{$default_triples -> {"I"}}, @my_array;
-
-}
-
-
-# Check validity of input parameters and set defaults if needed
-
-print STDERR "Using SCRIPTS_ROOTDIR: $SCRIPTS_ROOTDIR\n";
-
-# path of script for filtering phrase tables and running the decoder
-$filtercmd="$SCRIPTS_ROOTDIR/training/filter-model-given-input.pl" if !defined $filtercmd;
-
-$qsubwrapper="$SCRIPTS_ROOTDIR/generic/qsub-wrapper.pl" if !defined $qsubwrapper;
-
-$moses_parallel_cmd = "$SCRIPTS_ROOTDIR/generic/moses-parallel.pl"
- if !defined $moses_parallel_cmd;
-
-
-
-
-die "Error: need to specify the mert directory" if !defined $mertdir;
-
-my $mert_extract_cmd = "$mertdir/extractor";
-my $mert_mert_cmd = "$mertdir/mert";
-
-die "Not executable: $mert_extract_cmd" if ! -x $mert_extract_cmd;
-die "Not executable: $mert_mert_cmd" if ! -x $mert_mert_cmd;
-
-$mertargs = "" if !defined $mertargs;
-
-my $scconfig = undef;
-if ($mertargs =~ /\-\-scconfig\s+(.+?)(\s|$)/){
- $scconfig=$1;
- $scconfig =~ s/\,/ /g;
- $mertargs =~ s/\-\-scconfig\s+(.+?)(\s|$)//;
-}
-
-# handling reference lengh strategy
-if (($___CLOSEST + $___AVERAGE + $___SHORTEST) > 1){
- die "You can specify just ONE reference length strategy (closest or shortest or average) not both\n";
-}
-
-if ($___SHORTEST){
- $scconfig .= " reflen:shortest";
-}elsif ($___AVERAGE){
- $scconfig .= " reflen:average";
-}elsif ($___CLOSEST){
- $scconfig .= " reflen:closest";
-}
-
-# handling case-insensitive flag
-if ($___NOCASE) {
- $scconfig .= " case:false";
-}else{
- $scconfig .= " case:true";
-}
-$scconfig =~ s/^\s+//;
-$scconfig =~ s/\s+$//;
-$scconfig =~ s/\s+/,/g;
-
-$scconfig = "--scconfig $scconfig" if ($scconfig);
-
-my $mert_extract_args=$mertargs;
-$mert_extract_args .=" $scconfig";
-
-my $mert_mert_args=$mertargs;
-$mert_mert_args =~ s/\-+(binary|b)\b//;
-$mert_mert_args .=" $scconfig";
-if ($___ACTIVATE_FEATURES){ $mert_mert_args .=" -o \"$___ACTIVATE_FEATURES\""; }
-
-my ($just_cmd_filtercmd,$x) = split(/ /,$filtercmd);
-die "Not executable: $just_cmd_filtercmd" if ! -x $just_cmd_filtercmd;
-die "Not executable: $moses_parallel_cmd" if defined $___JOBS && ! -x $moses_parallel_cmd;
-die "Not executable: $qsubwrapper" if defined $___JOBS && ! -x $qsubwrapper;
-die "Not executable: $___DECODER" if ! -x $___DECODER;
-
-
-my $input_abs = ensure_full_path($___DEV_F);
-die "File not found: $___DEV_F (interpreted as $input_abs)."
- if ! -e $input_abs;
-$___DEV_F = $input_abs;
-
-
-my $decoder_abs = ensure_full_path($___DECODER);
-die "File not found: $___DECODER (interpreted as $decoder_abs)."
- if ! -x $decoder_abs;
-$___DECODER = $decoder_abs;
-
-
-my $ref_abs = ensure_full_path($___DEV_E);
-# check if English dev set (reference translations) exist and store a list of all references
-my @references;
-if (-e $ref_abs) {
- push @references, $ref_abs;
-}
-else {
- # if multiple file, get a full list of the files
- my $part = 0;
- while (-e $ref_abs.$part) {
- push @references, $ref_abs.$part;
- $part++;
- }
- die("Reference translations not found: $___DEV_E (interpreted as $ref_abs)") unless $part;
-}
-
-my $config_abs = ensure_full_path($___CONFIG);
-die "File not found: $___CONFIG (interpreted as $config_abs)."
- if ! -e $config_abs;
-$___CONFIG = $config_abs;
-
-
-
-# check validity of moses.ini and collect number of models and lambdas per model
-# need to make a copy of $extra_lambdas_for_model, scan_config spoils it
-#my %copy_of_extra_lambdas_for_model = %$extra_lambdas_for_model;
-my %used_triples = %{$default_triples};
-my ($models_used) = scan_config($___CONFIG);
-
-# Parse the lambda config string and convert it to a nice structure in the same format as $used_triples
-if (defined $___LAMBDA) {
- my %specified_triples;
- # interpreting lambdas from command line
- foreach (split(/\s+/,$___LAMBDA)) {
- my ($name,$values) = split(/:/);
- die "Malformed setting: '$_', expected name:values\n" if !defined $name || !defined $values;
- foreach my $startminmax (split/;/,$values) {
- if ($startminmax =~ /^(-?[\.\d]+),(-?[\.\d]+)-(-?[\.\d]+)$/) {
- my $start = $1;
- my $min = $2;
- my $max = $3;
- push @{$specified_triples{$name}}, [$start, $min, $max];
- }
- else {
- die "Malformed feature range definition: $name => $startminmax\n";
- }
- }
- }
- # sanity checks for specified lambda triples
- foreach my $name (keys %used_triples) {
- die "No lambdas specified for '$name', but ".($#{$used_triples{$name}}+1)." needed.\n"
- unless defined($specified_triples{$name});
- die "Number of lambdas specified for '$name' (".($#{$specified_triples{$name}}+1).") does not match number needed (".($#{$used_triples{$name}}+1).")\n"
- if (($#{$used_triples{$name}}) != ($#{$specified_triples{$name}}));
- }
- foreach my $name (keys %specified_triples) {
- die "Lambdas specified for '$name' ".(@{$specified_triples{$name}}).", but none needed.\n"
- unless defined($used_triples{$name});
- }
- %used_triples = %specified_triples;
-}
-
-# moses should use our config
-if ($___DECODER_FLAGS =~ /(^|\s)-(config|f) /
-|| $___DECODER_FLAGS =~ /(^|\s)-(ttable-file|t) /
-|| $___DECODER_FLAGS =~ /(^|\s)-(distortion-file) /
-|| $___DECODER_FLAGS =~ /(^|\s)-(generation-file) /
-|| $___DECODER_FLAGS =~ /(^|\s)-(lmodel-file) /
-|| $___DECODER_FLAGS =~ /(^|\s)-(global-lexical-file) /
-) {
- die "It is forbidden to supply any of -config, -ttable-file, -distortion-file, -generation-file or -lmodel-file in the --decoder-flags.\nPlease use only the --config option to give the config file that lists all the supplementary files.";
-}
-
-# as weights are normalized in the next steps (by cmert)
-# normalize initial LAMBDAs, too
-my $need_to_normalize = 1;
-
-
-
-my @order_of_lambdas_from_decoder = ();
-# this will store the labels of scores coming out of the decoder (and hence the order of lambdas coming out of mert)
-# we will use the array to interpret the lambdas
-# the array gets filled with labels only after first nbestlist was generated
-
-
-
-
-#store current directory and create the working directory (if needed)
-my $cwd = `pawd 2>/dev/null`;
-if(!$cwd){$cwd = `pwd`;}
-chomp($cwd);
-
-safesystem("mkdir -p $___WORKING_DIR") or die "Can't mkdir $___WORKING_DIR";
-
-{
-# open local scope
-
-#chdir to the working directory
-chdir($___WORKING_DIR) or die "Can't chdir to $___WORKING_DIR";
-
-# fixed file names
-my $mert_logfile = "mert.log";
-my $weights_in_file = "init.opt";
-my $weights_out_file = "weights.txt";
-
-
-# set start run
-my $start_run = 1;
-my $bestpoint = undef;
-my $devbleu = undef;
-
-my $prev_feature_file = undef;
-my $prev_score_file = undef;
-
-if ($continue) {
- # getting the last finished step
- print STDERR "Trying to continue an interrupted optimization.\n";
- open IN, "finished_step.txt" or die "Failed to find the step number, failed to read finished_step.txt";
- my $step = <IN>;
- chomp $step;
- close IN;
-
- print STDERR "Last finished step is $step\n";
-
- # getting the first needed step
- my $firststep;
- if ($prev_aggregate_nbl_size==-1){
- $firststep=1;
- }
- else{
- $firststep=$step-$prev_aggregate_nbl_size+1;
- $firststep=($firststep>0)?$firststep:1;
- }
-
-#checking if all needed data are available
- if ($firststep<=$step){
- print STDERR "First previous needed data index is $firststep\n";
- print STDERR "Checking whether all needed data (from step $firststep to step $step) are available\n";
-
- for (my $prevstep=$firststep; $prevstep<=$step;$prevstep++){
- print STDERR "Checking whether data of step $prevstep are available\n";
- if (! -e "run$prevstep.features.dat"){
- die "Can't start from step $step, because run$prevstep.features.dat was not found!";
- }else{
- if (defined $prev_feature_file){
- $prev_feature_file = "${prev_feature_file},run$prevstep.features.dat";
- }
- else{
- $prev_feature_file = "run$prevstep.features.dat";
- }
- }
- if (! -e "run$prevstep.scores.dat"){
- die "Can't start from step $step, because run$prevstep.scores.dat was not found!";
- }else{
- if (defined $prev_score_file){
- $prev_score_file = "${prev_score_file},run$prevstep.scores.dat";
- }
- else{
- $prev_score_file = "run$prevstep.scores.dat";
- }
- }
- }
- if (! -e "run$step.weights.txt"){
- die "Can't start from step $step, because run$step.weights.txt was not found!";
- }
- if (! -e "run$step.$mert_logfile"){
- die "Can't start from step $step, because run$step.$mert_logfile was not found!";
- }
- if (! -e "run$step.best$___N_BEST_LIST_SIZE.out.gz"){
- die "Can't start from step $step, because run$step.best$___N_BEST_LIST_SIZE.out.gz was not found!";
- }
- print STDERR "All needed data are available\n";
-
- print STDERR "Loading information from last step ($step)\n";
- open(IN,"run$step.$mert_logfile") or die "Can't open run$step.$mert_logfile";
- while (<IN>) {
- if (/Best point:\s*([\s\d\.\-e]+?)\s*=> ([\-\d\.]+)/) {
- $bestpoint = $1;
- $devbleu = $2;
- last;
- }
- }
- close IN;
- die "Failed to parse mert.log, missed Best point there."
- if !defined $bestpoint || !defined $devbleu;
- print "($step) BEST at $step $bestpoint => $devbleu at ".`date`;
-
- my @newweights = split /\s+/, $bestpoint;
-
-
- print STDERR "Reading last cached lambda values (result from step $step)\n";
- @order_of_lambdas_from_decoder = get_order_of_scores_from_nbestlist("gunzip -c < run$step.best$___N_BEST_LIST_SIZE.out.gz |");
-
-
- # update my cache of lambda values
- store_new_lambda_values(\%used_triples, \@order_of_lambdas_from_decoder, \@newweights);
-
- }
- else{
- print STDERR "No pevious data are needed\n";
- }
-
- $start_run = $step +1;
-}
-
-if ($___FILTER_PHRASE_TABLE){
- # filter the phrase tables wih respect to input, use --decoder-flags
- print "filtering the phrase tables... ".`date`;
- my $cmd = "$filtercmd ./filtered $___CONFIG $___DEV_F";
- if (defined $___JOBS) {
- safesystem("$qsubwrapper -command='$cmd' -queue-parameter=\"$queue_flags\" -stdout=filterphrases.out -stderr=filterphrases.err" )
- or die "Failed to submit filtering of tables to the queue (via $qsubwrapper)";
- } else {
- safesystem($cmd) or die "Failed to filter the tables.";
- }
-
- # make a backup copy of startup ini file
- $___CONFIG_BAK = $___CONFIG;
- # the decoder should now use the filtered model
- $___CONFIG = "filtered/moses.ini";
-}
-else{
- # do not filter phrase tables (useful if binary phrase tables are available)
- # use the original configuration file
- $___CONFIG_BAK = $___CONFIG;
-}
-
-my $PARAMETERS;
-#$PARAMETERS = $___DECODER_FLAGS . " -config $___CONFIG -inputtype $___INPUTTYPE";
-$PARAMETERS = $___DECODER_FLAGS;
-
-my $run=$start_run-1;
-
-my $oldallsorted = undef;
-my $allsorted = undef;
-
-my $cmd;
-# features and scores from the last run.
-my $nbest_file=undef;
-
-while(1) {
- $run++;
- # run beamdecoder with option to output nbestlists
- # the end result should be (1) @NBEST_LIST, a list of lists; (2) @SCORE, a list of lists of lists
-
- print "run $run start at ".`date`;
-
- # In case something dies later, we might wish to have a copy
- create_config($___CONFIG, "./run$run.moses.ini", \%used_triples, $run, (defined$devbleu?$devbleu:"--not-estimated--"));
-
-
- # skip if the user wanted
- if (!$skip_decoder) {
- print "($run) run decoder to produce n-best lists\n";
- $nbest_file = run_decoder(\%used_triples, $PARAMETERS, $run, \@order_of_lambdas_from_decoder, $need_to_normalize);
- $need_to_normalize = 0;
- safesystem("gzip -f $nbest_file") or die "Failed to gzip run*out";
- $nbest_file = $nbest_file.".gz";
- }
- else {
- die "Skipping not yet supported\n";
- #print "skipped decoder run\n";
- #if (0 == scalar @order_of_lambdas_from_decoder) {
- # @order_of_lambdas_from_decoder = get_order_of_scores_from_nbestlist("gunzip -dc run*.best*.out.gz | head -1 |");
- #}
- #$skip_decoder = 0;
- #$need_to_normalize = 0;
- }
-
-
-
- # extract score statistics and features from the nbest lists
- print STDERR "Scoring the nbestlist.\n";
-
- my $base_feature_file = "features.dat";
- my $base_score_file = "scores.dat";
- my $feature_file = "run$run.${base_feature_file}";
- my $score_file = "run$run.${base_score_file}";
-
- $cmd = "$mert_extract_cmd $mert_extract_args --scfile $score_file --ffile $feature_file -r ".join(",", @references)." -n $nbest_file";
-
- if (defined $___JOBS) {
- safesystem("$qsubwrapper -command='$cmd' -queue-parameter=\"$queue_flags\" -stdout=extract.out -stderr=extract.err" )
- or die "Failed to submit extraction to queue (via $qsubwrapper)";
- } else {
- safesystem("$cmd > extract.out 2> extract.err") or die "Failed to do extraction of statistics.";
- }
-
- # Create the initial weights file for mert, in init.opt
- # mert reads in the file init.opt containing the current
- # values of lambda.
-
- # We need to prepare the files and **the order of the lambdas must
- # correspond to the order @order_of_lambdas_from_decoder
-
- # NB: This code is copied from the old version of mert-moses.pl,
- # even though the max,min and name are not yet used in the new
- # version.
-
- my @MIN = (); # lower bounds
- my @MAX = (); # upper bounds
- my @CURR = (); # the starting values
- my @NAME = (); # to which model does the lambda belong
-
- # walk in order of @order_of_lambdas_from_decoder and collect the min,max,val
- my %visited = ();
- foreach my $name (@order_of_lambdas_from_decoder) {
- next if $visited{$name};
- $visited{$name} = 1;
- if (!defined $used_triples{$name})
- {
- die "The decoder produced also some '$name' scores, but we do not know the ranges for them, no way to optimize them\n";
- }
-
- my $count = 0;
- foreach my $feature (@{$used_triples{$name}}) {
- $count++;
- my ($val, $min, $max) = @$feature;
- push @CURR, $val;
- push @MIN, $min;
- push @MAX, $max;
- push @NAME, $name;
- }
- }
-
- open(OUT,"> $weights_in_file") or die "Can't write $weights_in_file (WD now $___WORKING_DIR)";
- print OUT join(" ", @CURR)."\n";
- close(OUT);
-
- # make a backup copy labelled with this run number
- safesystem("\\cp -f $weights_in_file run$run.$weights_in_file") or die;
-
- my $DIM = scalar(@CURR); # number of lambdas
-
- # run mert
- $cmd = "$mert_mert_cmd -d $DIM $mert_mert_args -n 20";
- if ($___PREDICTABLE_SEEDS) {
- my $seed = $run * 1000;
- $cmd = $cmd." -r $seed";
- }
-
- if (defined $prev_feature_file) {
- $cmd = $cmd." --ffile $prev_feature_file,$feature_file";
- }
- else{
- $cmd = $cmd." --ffile $feature_file";
- }
- if (defined $prev_score_file) {
- $cmd = $cmd." --scfile $prev_score_file,$score_file";
- }
- else{
- $cmd = $cmd." --scfile $score_file";
- }
-
- $cmd = $cmd." --ifile run$run.$weights_in_file";
-
- if (defined $___JOBS) {
- safesystem("$qsubwrapper -command='$cmd' -stderr=$mert_logfile -queue-parameter=\"$queue_flags\"") or die "Failed to start mert (via qsubwrapper $qsubwrapper)";
- } else {
- safesystem("$cmd 2> $mert_logfile") or die "Failed to run mert";
- }
- die "Optimization failed, file $weights_out_file does not exist or is empty"
- if ! -s $weights_out_file;
-
-
- # backup copies
- safesystem ("\\cp -f extract.err run$run.extract.err") or die;
- safesystem ("\\cp -f extract.out run$run.extract.out") or die;
- safesystem ("\\cp -f $mert_logfile run$run.$mert_logfile") or die;
- safesystem ("touch $mert_logfile run$run.$mert_logfile") or die;
- safesystem ("\\cp -f $weights_out_file run$run.$weights_out_file") or die; # this one is needed for restarts, too
-
- print "run $run end at ".`date`;
-
- $bestpoint = undef;
- $devbleu = undef;
- open(IN,"run$run.$mert_logfile") or die "Can't open run$run.$mert_logfile";
- while (<IN>) {
- if (/Best point:\s*([\s\d\.\-e]+?)\s*=> ([\-\d\.]+)/) {
- $bestpoint = $1;
- $devbleu = $2;
- last;
- }
- }
- close IN;
- die "Failed to parse mert.log, missed Best point there."
- if !defined $bestpoint || !defined $devbleu;
- print "($run) BEST at $run: $bestpoint => $devbleu at ".`date`;
-
- my @newweights = split /\s+/, $bestpoint;
-
- # update my cache of lambda values
- store_new_lambda_values(\%used_triples, \@order_of_lambdas_from_decoder, \@newweights);
-
- ## additional stopping criterion: weights have not changed
- my $shouldstop = 1;
- for(my $i=0; $i<@CURR; $i++) {
- die "Lost weight! mert reported fewer weights (@newweights) than we gave it (@CURR)"
- if !defined $newweights[$i];
- if (abs($CURR[$i] - $newweights[$i]) >= $minimum_required_change_in_weights) {
- $shouldstop = 0;
- last;
- }
- }
-
- open F, "> finished_step.txt" or die "Can't mark finished step";
- print F $run."\n";
- close F;
-
-
- if ($shouldstop) {
- print STDERR "None of the weights changed more than $minimum_required_change_in_weights. Stopping.\n";
- last;
- }
-
- #Next 7 lines added to make it stop after x number of runs
- if ($run==$max_runs){
- $shouldstop = 1;
- }
- if ($shouldstop) {
- print STDERR "Maximal limit of $max_runs attained. Stopping.\n";
- last;
- }
-
- my $firstrun;
- if ($prev_aggregate_nbl_size==-1){
- $firstrun=1;
- }
- else{
- $firstrun=$run-$prev_aggregate_nbl_size+1;
- $firstrun=($firstrun>0)?$firstrun:1;
- }
- print "loading data from $firstrun to $run (prev_aggregate_nbl_size=$prev_aggregate_nbl_size)\n";
- $prev_feature_file = undef;
- $prev_score_file = undef;
- for (my $i=$firstrun;$i<=$run;$i++){
- if (defined $prev_feature_file){
- $prev_feature_file = "${prev_feature_file},run${i}.${base_feature_file}";
- }
- else{
- $prev_feature_file = "run${i}.${base_feature_file}";
- }
- if (defined $prev_score_file){
- $prev_score_file = "${prev_score_file},run${i}.${base_score_file}";
- }
- else{
- $prev_score_file = "run${i}.${base_score_file}";
- }
- }
- print "loading data from $prev_feature_file\n" if defined($prev_feature_file);
- print "loading data from $prev_score_file\n" if defined($prev_score_file);
-}
-print "Training finished at ".`date`;
-
-if (defined $allsorted){ safesystem ("\\rm -f $allsorted") or die; };
-
-safesystem("\\cp -f $weights_in_file run$run.$weights_in_file") or die;
-safesystem("\\cp -f $mert_logfile run$run.$mert_logfile") or die;
-
-create_config($___CONFIG_BAK, "./moses.ini", \%used_triples, $run, $devbleu);
-
-# just to be sure that we have the really last finished step marked
-open F, "> finished_step.txt" or die "Can't mark finished step";
-print F $run."\n";
-close F;
-
-
-#chdir back to the original directory # useless, just to remind we were not there
-chdir($cwd);
-
-} # end of local scope
-
-sub store_new_lambda_values {
- # given new lambda values (in given order), replace the 'val' element in our triples
- my $triples = shift;
- my $names = shift;
- my $values = shift;
-
- my %idx = ();
- foreach my $i (0..scalar(@$values)-1) {
- my $name = $names->[$i];
- die "Missed name for lambda $values->[$i] (in @$values; names: @$names)"
- if !defined $name;
- if (!defined $idx{$name}) {
- $idx{$name} = 0;
- } else {
- $idx{$name}++;
- }
- die "We did not optimize '$name', but moses returned it back to us"
- if !defined $triples->{$name};
- die "Moses gave us too many lambdas for '$name', we had ".scalar(@{$triples->{$name}})
- ." but we got at least ".$idx{$name}+1
- if !defined $triples->{$name}->[$idx{$name}];
-
- # set the corresponding field in triples
- # print STDERR "Storing $i-th score as $name: $idx{$name}: $values->[$i]\n";
- $triples->{$name}->[$idx{$name}]->[0] = $values->[$i];
- }
-}
-
-sub dump_triples {
- my $triples = shift;
-
- foreach my $name (keys %$triples) {
- foreach my $triple (@{$triples->{$name}}) {
- my ($val, $min, $max) = @$triple;
- print STDERR "Triples: $name\t$val\t$min\t$max ($triple)\n";
- }
- }
-}
-
-
-sub run_decoder {
- my ($triples, $parameters, $run, $output_order_of_lambdas, $need_to_normalize) = @_;
- my $filename_template = "run%d.best$___N_BEST_LIST_SIZE.out";
- my $filename = sprintf($filename_template, $run);
-
- print "params = $parameters\n";
- # prepare the decoder config:
- my $decoder_config = "";
- my @vals = ();
- foreach my $name (keys %$triples) {
- $decoder_config .= "-$name ";
- foreach my $triple (@{$triples->{$name}}) {
- my ($val, $min, $max) = @$triple;
- $decoder_config .= "%.6f ";
- push @vals, $val;
- }
- }
- if ($need_to_normalize) {
- print STDERR "Normalizing lambdas: @vals\n";
- my $totlambda=0;
- grep($totlambda+=abs($_),@vals);
- grep($_/=$totlambda,@vals);
- }
- print STDERR "DECODER_CFG = $decoder_config\n";
- print STDERR " values = @vals\n";
- $decoder_config = sprintf($decoder_config, @vals);
- print "decoder_config = $decoder_config\n";
-
- # run the decoder
- my $nBest_cmd = "-n-best-size $___N_BEST_LIST_SIZE";
- my $decoder_cmd;
-
- if (defined $___JOBS) {
- $decoder_cmd = "$moses_parallel_cmd -config $___CONFIG -inputtype $___INPUTTYPE -qsub-prefix mert$run -queue-parameters \"$queue_flags\" -decoder-parameters \"$parameters $decoder_config\" -n-best-file \"$filename\" -n-best-size $___N_BEST_LIST_SIZE -input-file $___DEV_F -jobs $___JOBS -decoder $___DECODER > run$run.out";
- } else {
- $decoder_cmd = "$___DECODER $parameters -config $___CONFIG -inputtype $___INPUTTYPE $decoder_config -n-best-list $filename $___N_BEST_LIST_SIZE -i $___DEV_F > run$run.out";
- }
-
- safesystem($decoder_cmd) or die "The decoder died. CONFIG WAS $decoder_config \n";
-
- if (0 == scalar @$output_order_of_lambdas) {
- # we have to peek at the nbestlist
- @$output_order_of_lambdas = get_order_of_scores_from_nbestlist($filename);
- }
- # we have checked the nbestlist already, we trust the order of output scores does not change
- return $filename;
-}
-
-sub get_order_of_scores_from_nbestlist {
- # read the first line and interpret the ||| label: num num num label2: num ||| column in nbestlist
- # return the score labels in order
- my $fname_or_source = shift;
- print STDERR "Peeking at the beginning of nbestlist to get order of scores: $fname_or_source\n";
- open IN, $fname_or_source or die "Failed to get order of scores from nbestlist '$fname_or_source'";
- my $line = <IN>;
- close IN;
- die "Line empty in nbestlist '$fname_or_source'" if !defined $line;
- my ($sent, $hypo, $scores, $total) = split /\|\|\|/, $line;
- $scores =~ s/^\s*|\s*$//g;
- die "No scores in line: $line" if $scores eq "";
-
- my @order = ();
- my $label = undef;
- foreach my $tok (split /\s+/, $scores) {
- if ($tok =~ /^([a-z][0-9a-z]*):/i) {
- $label = $1;
- } elsif ($tok =~ /^-?[-0-9.e]+$/) {
- # a score found, remember it
- die "Found a score but no label before it! Bad nbestlist '$fname_or_source'!"
- if !defined $label;
- push @order, $label;
- } else {
- die "Not a label, not a score '$tok'. Failed to parse the scores string: '$scores' of nbestlist '$fname_or_source'";
- }
- }
- print STDERR "The decoder returns the scores in this order: @order\n";
- return @order;
-}
-
-sub create_config {
- my $infn = shift; # source config
- my $outfn = shift; # where to save the config
- my $triples = shift; # the lambdas we should write
- my $iteration = shift; # just for verbosity
- my $bleu_achieved = shift; # just for verbosity
-
- my %P; # the hash of all parameters we wish to override
-
- # first convert the command line parameters to the hash
- { # ensure local scope of vars
- my $parameter=undef;
- print "Parsing --decoder-flags: |$___DECODER_FLAGS|\n";
- $___DECODER_FLAGS =~ s/^\s*|\s*$//;
- $___DECODER_FLAGS =~ s/\s+/ /;
- foreach (split(/ /,$___DECODER_FLAGS)) {
- if (/^\-([^\d].*)$/) {
- $parameter = $1;
- $parameter = $ABBR2FULL{$parameter} if defined($ABBR2FULL{$parameter});
- }
- else {
- die "Found value with no -paramname before it: $_"
- if !defined $parameter;
- push @{$P{$parameter}},$_;
- }
- }
- }
-
- # Convert weights to elements in P
- foreach my $abbr (keys %$triples) {
- # First delete all weights params from the input, in short or long-named version
- delete($P{$abbr});
- delete($P{$ABBR2FULL{$abbr}});
- # Then feed P with the current values
- foreach my $feature (@{$used_triples{$abbr}}) {
- my ($val, $min, $max) = @$feature;
- my $name = defined $ABBR2FULL{$abbr} ? $ABBR2FULL{$abbr} : $abbr;
- push @{$P{$name}}, $val;
- }
- }
-
- # create new moses.ini decoder config file by cloning and overriding the original one
- open(INI,$infn) or die "Can't read $infn";
- delete($P{"config"}); # never output
- print "Saving new config to: $outfn\n";
- open(OUT,"> $outfn") or die "Can't write $outfn";
- print OUT "# MERT optimized configuration\n";
- print OUT "# decoder $___DECODER\n";
- print OUT "# BLEU $bleu_achieved on dev $___DEV_F\n";
- print OUT "# We were before running iteration $iteration\n";
- print OUT "# finished ".`date`;
- my $line = <INI>;
- while(1) {
- last unless $line;
-
- # skip until hit [parameter]
- if ($line !~ /^\[(.+)\]\s*$/) {
- $line = <INI>;
- print OUT $line if $line =~ /^\#/ || $line =~ /^\s+$/;
- next;
- }
-
- # parameter name
- my $parameter = $1;
- $parameter = $ABBR2FULL{$parameter} if defined($ABBR2FULL{$parameter});
- print OUT "[$parameter]\n";
-
- # change parameter, if new values
- if (defined($P{$parameter})) {
- # write new values
- foreach (@{$P{$parameter}}) {
- print OUT $_."\n";
- }
- delete($P{$parameter});
- # skip until new parameter, only write comments
- while($line = <INI>) {
- print OUT $line if $line =~ /^\#/ || $line =~ /^\s+$/;
- last if $line =~ /^\[/;
- last unless $line;
- }
- next;
- }
-
- # unchanged parameter, write old
- while($line = <INI>) {
- last if $line =~ /^\[/;
- print OUT $line;
- }
- }
-
- # write all additional parameters
- foreach my $parameter (keys %P) {
- print OUT "\n[$parameter]\n";
- foreach (@{$P{$parameter}}) {
- print OUT $_."\n";
- }
- }
-
- close(INI);
- close(OUT);
- print STDERR "Saved: $outfn\n";
-}
-
-sub safesystem {
- print STDERR "Executing: @_\n";
- system(@_);
- if ($? == -1) {
- print STDERR "Failed to execute: @_\n $!\n";
- exit(1);
- }
- elsif ($? & 127) {
- printf STDERR "Execution of: @_\n died with signal %d, %s coredump\n",
- ($? & 127), ($? & 128) ? 'with' : 'without';
- exit(1);
- }
- else {
- my $exitcode = $? >> 8;
- print STDERR "Exit code: $exitcode\n" if $exitcode;
- return ! $exitcode;
- }
-}
-sub ensure_full_path {
- my $PATH = shift;
-$PATH =~ s/\/nfsmnt//;
- return $PATH if $PATH =~ /^\//;
- my $dir = `pawd 2>/dev/null`;
- if(!$dir){$dir = `pwd`;}
- chomp($dir);
- $PATH = $dir."/".$PATH;
- $PATH =~ s/[\r\n]//g;
- $PATH =~ s/\/\.\//\//g;
- $PATH =~ s/\/+/\//g;
- my $sanity = 0;
- while($PATH =~ /\/\.\.\// && $sanity++<10) {
- $PATH =~ s/\/+/\//g;
- $PATH =~ s/\/[^\/]+\/\.\.\//\//g;
- }
- $PATH =~ s/\/[^\/]+\/\.\.$//;
- $PATH =~ s/\/+$//;
-$PATH =~ s/\/nfsmnt//;
- return $PATH;
-}
-
-
-
-
-sub scan_config {
- my $ini = shift;
- my $inishortname = $ini; $inishortname =~ s/^.*\///; # for error reporting
- # we get a pre-filled counts, because some lambdas are always needed (word penalty, for instance)
- # as we walk though the ini file, we record how many extra lambdas do we need
- # and finally, we report it
-
- # in which field (counting from zero) is the filename to check?
- my %where_is_filename = (
- "ttable-file" => 4,
- "generation-file" => 3,
- "lmodel-file" => 3,
- "distortion-file" => 3,
- "global-lexical-file" => 1,
- );
- # by default, each line of each section means one lambda, but some sections
- # explicitly state a custom number of lambdas
- my %where_is_lambda_count = (
- "ttable-file" => 3,
- "generation-file" => 2,
- "distortion-file" => 2,
- );
-
- open INI, $ini or die "Can't read $ini";
- my $section = undef; # name of the section we are reading
- my $shortname = undef; # the corresponding short name
- my $nr = 0;
- my $error = 0;
- my %defined_files;
- my %defined_steps; # check the ini file for compatible mapping steps and actually defined files
- while (<INI>) {
- $nr++;
- next if /^\s*#/; # skip comments
- if (/^\[([^\]]*)\]\s*$/) {
- $section = $1;
- $shortname = $TABLECONFIG2ABBR{$section};
- next;
- }
- if (defined $section && $section eq "mapping") {
- # keep track of mapping steps used
- $defined_steps{$1}++ if /^([TG])/ || /^\d+ ([TG])/;
- }
- if (defined $section && defined $where_is_filename{$section}) {
- print "$section -> $where_is_filename{$section}\n";
- # this ini section is relevant to lambdas
- chomp;
- my @flds = split / +/;
- my $fn = $flds[$where_is_filename{$section}];
- if (defined $fn && $fn !~ /^\s+$/) {
- print "checking weight-count for $section\n";
- # this is a filename! check it
- if ($fn !~ /^\//) {
- $error = 1;
- print STDERR "$inishortname:$nr:Filename not absolute: $fn\n";
- }
- if (! -s $fn && ! -s "$fn.gz" && ! -s "$fn.binphr.idx" && ! -s "$fn.binlexr.idx" ) {
- $error = 1;
- print STDERR "$inishortname:$nr:File does not exist or empty: $fn\n";
- }
- # remember the number of files used, to know how many lambdas do we need
- die "No short name was defined for section $section!"
- if ! defined $shortname;
-
- # how many lambdas does this model need?
- # either specified explicitly, or the default, i.e. one
- my $needlambdas = defined $where_is_lambda_count{$section} ? $flds[$where_is_lambda_count{$section}] : 1;
-
- print STDERR "Config needs $needlambdas lambdas for $section (i.e. $shortname)\n" if $verbose;
- if (!defined $___LAMBDA && (!defined $additional_triples->{$shortname} || scalar(@{$additional_triples->{$shortname}}) < $needlambdas)) {
- print STDERR "$inishortname:$nr:Your model $shortname needs $needlambdas weights but we define the default ranges for only "
- .scalar(@{$additional_triples->{$shortname}})." weights. Cannot use the default, you must supply lambdas by hand.\n";
- $error = 1;
- }
- else {
- # note: table may use less parameters than the maximum number
- # of triples
- for(my $lambda=0;$lambda<$needlambdas;$lambda++) {
- my ($start, $min, $max)
- = @{${$additional_triples->{$shortname}}[$lambda]};
- push @{$used_triples{$shortname}}, [$start, $min, $max];
- }
- }
- $defined_files{$shortname}++;
- }
- }
- }
- die "$inishortname: File was empty!" if !$nr;
- close INI;
- for my $pair (qw/T=tm=translation G=g=generation/) {
- my ($tg, $shortname, $label) = split /=/, $pair;
- $defined_files{$shortname} = 0 if ! defined $defined_files{$shortname};
- $defined_steps{$tg} = 0 if ! defined $defined_steps{$tg};
-
- if ($defined_files{$shortname} != $defined_steps{$tg}) {
- print STDERR "$inishortname: You defined $defined_files{$shortname} files for $label but use $defined_steps{$tg} in [mapping]!\n";
- $error = 1;
- }
- }
-
- #print STDERR "SYNC distortion";
- push @{$used_triples{"d"}}, [1.0, 0.0, 2.0];
-
-
- exit(1) if $error;
- return (\%defined_files);
-}
-
diff --git a/contrib/moses-for-mere-mortals/scripts/modified-scripts/nonbreaking_prefix.pt b/contrib/moses-for-mere-mortals/scripts/modified-scripts/nonbreaking_prefix.pt
deleted file mode 100644
index a50e7245d..000000000
--- a/contrib/moses-for-mere-mortals/scripts/modified-scripts/nonbreaking_prefix.pt
+++ /dev/null
@@ -1,209 +0,0 @@
-#File adapted for PT by H. Leal Fontes from the EN & DE versions published with moses-2009-04-13. Last update: 10.11.2009.
-#Anything in this file, followed by a period (and an upper-case word), does NOT indicate an end-of-sentence marker.
-#Special cases are included for prefixes that ONLY appear before 0-9 numbers.
-
-#any single upper case letter followed by a period is not a sentence ender (excluding I occasionally, but we leave it in)
-#usually upper case letters are initials in a name
-A
-B
-C
-D
-E
-F
-G
-H
-I
-J
-K
-L
-M
-N
-O
-P
-Q
-R
-S
-T
-U
-V
-W
-X
-Y
-Z
-a
-b
-c
-d
-e
-f
-g
-h
-i
-j
-k
-l
-m
-n
-o
-p
-q
-r
-s
-t
-u
-v
-w
-x
-y
-z
-
-
-#Roman Numerals. A dot after one of these is not a sentence break in Portuguese.
-I
-II
-III
-IV
-V
-VI
-VII
-VIII
-IX
-X
-XI
-XII
-XIII
-XIV
-XV
-XVI
-XVII
-XVIII
-XIX
-XX
-i
-ii
-iii
-iv
-v
-vi
-vii
-viii
-ix
-x
-xi
-xii
-xiii
-xiv
-xv
-xvi
-xvii
-xviii
-xix
-xx
-
-#List of titles. These are often followed by upper-case names, but do not indicate sentence breaks
-Adj
-Adm
-Adv
-Art
-Ca
-Capt
-Cmdr
-Col
-Comdr
-Con
-Corp
-Cpl
-DR
-DRA
-Dr
-Dra
-Dras
-Drs
-Eng
-Enga
-Engas
-Engos
-Ex
-Exo
-Exmo
-Fig
-Gen
-Hosp
-Insp
-Lda
-MM
-MR
-MRS
-MS
-Maj
-Mrs
-Ms
-Msgr
-Op
-Ord
-Pfc
-Ph
-Prof
-Pvt
-Rep
-Reps
-Res
-Rev
-Rt
-Sen
-Sens
-Sfc
-Sgt
-Sr
-Sra
-Sras
-Srs
-Sto
-Supt
-Surg
-adj
-adm
-adv
-art
-cit
-col
-con
-corp
-cpl
-dr
-dra
-dras
-drs
-eng
-enga
-engas
-engos
-ex
-exo
-exmo
-fig
-op
-prof
-sr
-sra
-sras
-srs
-sto
-
-#misc - odd period-ending items that NEVER indicate breaks (p.m. does NOT fall into this category - it sometimes ends a sentence)
-v
-vs
-i.e
-rev
-e.g
-
-#Numbers only. These should only induce breaks when followed by a numeric sequence
-# add NUMERIC_ONLY after the word for this function
-#This case is mostly for the english "No." which can either be a sentence of its own, or
-#if followed by a number, a non-breaking prefix
-No #NUMERIC_ONLY#
-Nos
-Art #NUMERIC_ONLY#
-Nr
-p #NUMERIC_ONLY#
-pp #NUMERIC_ONLY#
diff --git a/contrib/moses-for-mere-mortals/scripts/score-0.85 b/contrib/moses-for-mere-mortals/scripts/score-0.85
deleted file mode 100644
index ebe161feb..000000000
--- a/contrib/moses-for-mere-mortals/scripts/score-0.85
+++ /dev/null
@@ -1,509 +0,0 @@
-#!/usr/bin/env bash
-# score-0.85
-# copyright 2010, João L. A. C. Rosas
-# licenced under the GPL licence, version 3
-# date: 02/09/2010
-# Special thanks to Hilário Leal Fontes and Maria José Machado who made research about this script, sent me experimental results, helped to test it and made very helpful suggestions
-
-# ***Purpose***: This script processes all the Moses translation files present in the $mosesdir/translation_files_for_tmx, if you want to prepare a translation to be used with a translation memory, or in the $mosesdir/translation_output directory, if you want to have a plain translation. For each Moses translation present there, it extracts from its name the names of the abbreviations of the source and target languages and of the scorebasename (which must not included the "." sign). With this information, it reconstructs the full name of the source file and reference translation file. For a set of source file, its Moses translation file and its reference (human-made) translation file, this script creates a report presenting, depending on the parameters set by the user, either 1) a score of the whole Moses translation or 2) a score of each segment of the Moses translation. In this latter case, each line of the file consists of the a) BLEU score and b) NIST score of the Moses translation ***of that segment***, c) the number of the segment in the source document, d) the source, e) reference and f) Moses translation segments, in that order. These 6 fields are separated by the "|" character. The lines are sorted by ascending order of BLEU score.
-
-###########################################################################################################################################################
-#THIS SCRIPT ASSUMES THAT A IRSTLM AND RANDLM ENABLED MOSES HAS ALREADY BEEN INSTALLED WITH THE create script IN $mosesdir (BY DEFAULT $HOME/moses-irstlm-randlm), THAT A CORPUS HAS BEEN TRAINED WITH THE train script AND THAT A TRANSLATION HAS ALREADY BEEN MADE WITH THE translate script.
-# IT ALSO ASSUMES THAT THE PACKAGES UPON WHICH IT DEPENDS, INDICATED IN THE create script, HAVE BEEN INSTALLED
-###########################################################################################################################################################
-
-##########################################################################################################################################################
-# The values of the variables that follow should be filled according to your needs: # ##########################################################################################################################################################
-# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-# !!! THIS SCRIPT SHOULD NOT BE USED WITH DOCUMENTS TRANSLATED WITH THE translate script WITH ITS $translate_for_tmx PARAMETER SET TO 1 ***UNLESS*** the $othercleanings, $improvesegmentation and $ removeduplicates parameters of that script were all set to 0 and $minseglen was set to -1 (this processing changes the order of the segments and can also make the source document have a number of segments that is different from the number of segments of the reference translation, namely because it can delete some segments and/or add some new ones) !!!
-
-#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-# !!! The names of the source and target reference translation files used for scoring should not include spaces !!!
-#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-# The source file name and the reference translation file MUST observe the following conventions:
-# Source file : <basename>.<abbreviation of source language> (ex: 100.en)
-# Reference translation file: <basename>.<abbreviation of target language>.ref (ex: 100.pt.ref)
-#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-#Base directory of your Moses installation (made with the create script)
-mosesdir=$HOME/moses-irstlm-randlm
-#Scores documents prepared for TMX translation memories. If this parameter is set to 1, the script will look for the documents $s and $m in the $mosesdir/translation_files_for_tmx directory; if not set to 1, it will look for the $s document in the mosesdir/translation_input directory and for the $m document in $mosesdir/translation_output; in both cases, it will look for the $r document in $mosesdir/translation_reference
-scoreTMXdocuments=0
-#This is an arbitrary commentary that you can use if you want to register something (a parameter used, whatever) in the name of the scorefile. Like this, you might not have to open several files before discovering the one you are really looking for (if you do many scores of the same document translated with different parameters); more useful while you are trying to discover the right combination of parameters for your specific situation; !!!Remember, however, that most Linux systems have a maximum file name length of 255 characters; if the name of the document to translate is already long, you might exceed that limit !!! Example of a note:"12-07-2010" (date of the batch score)
-batch_user_note="12-07-2010"
-#Create a report where each segment gets its own score; 0 = score the whole document; 1 = score each segment
-score_line_by_line=0
-#Remove moses translation segments that are equal to reference translation segments and whose BLEU score is zero (!!! Only active if score_line_by_line=1 !!!)
-remove_equal=1
-#Tokenize the source document and the reference and the Moses translation
-tokenize=1
-#Lowercase the source document and the reference and the Moses translation
-lowercase=1
-##########################################################################################################################################################
-# DO NOT CHANGE THE LINES THAT FOLLOW ... unless you know what you are doing! #
-##########################################################################################################################################################
-#Directory where Moses translation tools are located
-toolsdir=$mosesdir/tools
-if [ "$scoreTMXdocuments" = "1" ]; then
- sourcelanguagedir=$mosesdir/translation_files_for_tmx
- mosestranslationdir=$mosesdir/translation_files_for_tmx
-else
- sourcelanguagedir=$mosesdir/translation_input
- mosestranslationdir=$mosesdir/translation_output
-fi
-reftranslationdir=$mosesdir/translation_reference
-
-#Directory where the output of the present script, the translation scoring document, will be created
-scoredir=$mosesdir/translation_scoring
-
-# Create the input directories, if they do not yet exist; later steps will confirm that the input files do not yet exist (this saves time to the user, who will not have to also create these directories)
-if [ ! -d $sourcelanguagedir ] ; then mkdir -p $sourcelanguagedir ; fi
-if [ ! -d $reftranslationdir ] ; then mkdir -p $reftranslationdir ; fi
-if [ ! -d $mosestranslationdir ] ; then mkdir -p $mosestranslationdir ; fi
-if [ ! -d $scoredir ] ; then mkdir -p $scoredir ; fi
-
-# Define functions
-remove_garbage() {
- if [ -f $scoredir/$s ]; then
- rm $scoredir/$s
- fi
- if [ -f $scoredir/$r ]; then
- rm $scoredir/$r
- fi
- if [ -f $scoredir/$m ]; then
- rm $scoredir/$m
- fi
- if [ -f $scoredir/$scorebasename-src.$lang1.sgm ]; then
- rm $scoredir/$scorebasename-src.$lang1.sgm
- fi
- if [ -f $scoredir/$scorebasename-ref.$lang2.sgm ]; then
- rm $scoredir/$scorebasename-ref.$lang2.sgm
- fi
- if [ -f $scoredir/$scorebasename.moses.sgm ]; then
- rm $scoredir/$scorebasename.moses.sgm
- fi
-}
-log_wrong_file() {
- if [ ! -f $scoredir/$tmp ]; then
- echo "LIST OF NOT SCORED FILES (in the $mosestranslationdir directory):" > $scoredir/$tmp
- echo "==============================================================================================" >> $scoredir/$tmp
- echo "" >> $scoredir/$tmp
- echo "==============================================================================================" >> $scoredir/$tmp
- fi
- echo -e "***$filename*** file:" >> $scoredir/$tmp
- echo "----------------------------------------------------------------------------------------------" >> $scoredir/$tmp
- echo -e "\t$error_msg" >> $scoredir/$tmp
- echo "==============================================================================================" >> $scoredir/$tmp
-}
-#-----------------------------------------------------------------------------------------------------------------------------------------
-SAVEIFS=$IFS
-IFS=$(echo -en "\n\b")
-tmp="!!!SCORES-NOT-DONE!!!"
-if [ -f $scoredir/$tmp ]; then
- rm $scoredir/$tmp
-fi
-
-i=0
-for filetoscore in $mosestranslationdir/*; do
- if [ ! -d $filetoscore ]; then
- error_msg=""
- filename=${filetoscore##*/}
- tempbasename=${filename%.*}
- tempbasename1=${tempbasename%.*}
- scorebasename=${tempbasename1%.*}
- temp=${filename%.*}
- temp1=${temp%.*}
- lang1=${temp1##*.}
- lang2=${temp##*.}
- s=$scorebasename.$lang1
- m=$filename
- r=$scorebasename.$lang2.ref
- #-----------------------------------------------------------------------------------------------------------------------------------------
- #Define report name
- if [ "$lang1" = "$filename" -a "$lang2" = "$filename" ]; then
- lang1t=""
- lang2t=""
- else
- lang1t=$lang1
- lang2t=$lang2
- fi
- if [ "$score_line_by_line" = "1" ]; then
- scorefile=$scorebasename.$batch_user_note.$lang1t-$lang2t.F-$scoreTMXdocuments-R-$remove_equal-T-$tokenize.L-$lowercase.line-by-line
- else
- scorefile=$scorebasename-$batch_user_note-$lang1t-$lang2t.F-$scoreTMXdocuments-R-$remove_equal-T-$tokenize.L-$lowercase.whole-doc
- fi
- #-----------------------------------------------------------------------------------------------------------------------------------------
- scorefile_name_len=${#scorefile}
- if [ "${filetoscore##*.}" = "moses" ]; then
- echo "--------------------------------------------------------------------"
- echo "MOSES TRANSLATION: $filename (in the $mosestranslationdir directory)"
- let i=$i+1
- if [ "$scorefile_name_len" -gt "229" -a "$score_line_by_line" != "1" ]; then
- echo "==============================================================================================" >> $scoredir/$tmp
- error_msg="The translated file name and/or the \$batch_user_note parameter would result in a scorefile name that exceeds the maximal limit of 255 characters. Please try to use translation files and user notes that do not lead to files names exceeding the maximal allowable length."
- echo -e "$error_msg Analysing now next Moses translation."
- log_wrong_file
- scorefile=$(echo $scorefile | cut -c1-229)
- continue
- fi
- if [ "$scorefile_name_len" -gt "242" -a "$score_line_by_line" = "1" ]; then
- error_msg="The translated file name and/or the \$batch_user_note parameter would result in a scorefile name that exceeds the maximal limit of 255 characters. Please try to use translation files and user notes that do not lead to files with names exceeding their maximal allowable length."
- echo -e "$error_msg Analysing now next Moses translation."
- log_wrong_file
- scorefile=$(echo $scorefile | cut -c1-242)
- continue
- fi
- #-----------------------------------------------------------------------------------------------------------------------------------------
- if [ "$lang1" = "$lang2" ]; then
- error_msg="You did not respect the Moses for Mere Mortals conventions for naming the source and or the reference files.\n\tSource file\t\t\t: <scorebasename>.<source language abbreviation> (ex: 100.pt)\n\tReference translation file\t: <scorebasename>.<target language abbreviation> (ex: 100.en.ref)\nPlease correct the name of the files and then run this script again."
- echo -e "$error_msg Analysing now next Moses translation."
- log_wrong_file
- continue
- fi
- #-----------------------------------------------------------------------------------------------------------------------------------------
- #Get number of segments for each input file (source, reference and Moses translation)
- #avoid wc error messages when the file does not exist
- exec 3> /dev/stderr 2> /dev/null
- lines_s=`wc -l "$sourcelanguagedir/$s" | awk '{print $1'}`
- if [ "$lines_s" ]; then
- echo "Source file : $lines_s lines"
- else
- echo "Source file : doesn't exist"
- fi
- lines=`wc -l "$mosestranslationdir/$m" | awk '{print $1'}`
- if [ "$lines" ]; then
- echo "Moses translation: $lines lines"
- else
- echo "Moses translation: doesn't exist"
- fi
- lines_r=`wc -l "$reftranslationdir/$r" | awk '{print $1'}`
- if [ "$lines_r" ]; then
- echo "Reference file : $lines_r lines"
- else
- echo "Reference file : doesn't exist"
- fi
- exec 2>&3
-
- #Check that source, reference and Moses translation files have the same number of segments
- if [ "$lines_s" != "$lines_r" ]; then
- if [ "$lines_s" = "" ]; then
- lines_s=0
- fi
- if [ "$lines_r" = "" ]; then
- lines_r=0
- fi
- error_msg="Source and reference files do not have the same number of lines (source = $lines_s and reference = $lines_r lines) or one or both of them might not exist. If you verify manually that they do have the same number of segments, then wc (a Linux command) is interpreting at least one of the characters of one of the files as something it isn't. If that is the case, you will have to isolate the line(s) that is (are) causing problems and to substitute the character in question by some other character."
- echo "$error_msg Analysing now next Moses translation."
- log_wrong_file
- remove_garbage
- continue
- fi
- if [ "$lines" != "$lines_r" ]; then
- if [ "$lines" = "" ]; then
- lines=0
- fi
- if [ "$lines_r" = "" ]; then
- lines_r=0
- fi
- error_msg="Reference and moses translation files do not have the same number of lines (reference = $lines_r lines and moses translation = $lines) or one or both of them might not exist. If you verify manually that they do have the same number of segments, then wc (a Linux command) is interpreting at least one of the characters of one of the files as something it isn't. If that is the case, you will have to isolate the line(s) that is (are) causing problems and to substitute the character in question by some other character."
- echo "$error_msg Analysing now next Moses translation."
- log_wrong_file
- remove_garbage
- continue
- fi
- #-----------------------------------------------------------------------------------------------------------------------------------------
- #Check that $s, $r and $m exist
- if [ ! -f $sourcelanguagedir/$s ] ; then
- error_msg="The expected source language file ($sourcelanguagedir/$s) needed for scoring the Moses translation ($mosestranslationdir/$m) does not exist. Did you respect the file naming conventions described at the top of the score-0.85 script or did you use the wrong language pair for translating?"
- echo "$error_msg Analysing now next Moses translation."
- log_wrong_file
- continue
- else
- cp $sourcelanguagedir/$s $scoredir
- if [ "$tokenize" = "1" -a "$lowercase" = "1" ]; then
- $toolsdir/scripts/tokenizer.perl -l $lang1 < $scoredir/$s > $scoredir/$s.tok
- $toolsdir/scripts/lowercase.perl < $scoredir/$s.tok > $scoredir/$s
- rm -f $scoredir/$s.tok
- elif [ "$tokenize" = "1" ]; then
- $toolsdir/scripts/tokenizer.perl -l $lang1 < $scoredir/$s > $scoredir/$s.tok
- mv -f $scoredir/$s.tok $scoredir/$s
- elif [ "$lowercase" = "1" ]; then
- $toolsdir/scripts/lowercase.perl < $scoredir/$s > $scoredir/$s.lower
- mv -f $scoredir/$s.lower $scoredir/$s
- fi
- sed 's/\\$/\\ /g' < $scoredir/$s > $scoredir/$s.clean
- mv -f $scoredir/$s.clean $scoredir/$s
- fi
- if [ ! -f $reftranslationdir/$r ] ; then
- error_msg="The expected reference (human-made) file ($reftranslationdir/$r) needed for scoring the Moses translation ($mosestranslationdir/$m) does not exist."
- echo "$error_msg Analysing now next Moses translation. Did you respect the file naming conventions described at the top of the score-0.21 script or did you use the wrong language pair for translating?"
- log_wrong_file
- continue
- else
- cp $reftranslationdir/$r $scoredir
- if [ "$tokenize" = "1" -a "$lowercase" = "1" ]; then
- $toolsdir/scripts/tokenizer.perl -l $lang2 < $scoredir/$r > $scoredir/$r.tok
- $toolsdir/scripts/lowercase.perl < $scoredir/$r.tok > $scoredir/$r
- rm -f $scoredir/$r.tok
- elif [ "$tokenize" = "1" ]; then
- $toolsdir/scripts/tokenizer.perl -l $lang2 < $scoredir/$r > $scoredir/$r.tok
- mv -f $scoredir/$r.tok $scoredir/$r
- elif [ "$lowercase" = "1" ]; then
- $toolsdir/scripts/lowercase.perl < $scoredir/$r > $scoredir/$r.lower
- mv -f $scoredir/$r.lower $scoredir/$r
- fi
- sed 's/\\$/\\ /g' < $scoredir/$r > $scoredir/$r.clean
- mv -f $scoredir/$r.clean $scoredir/$r
- fi
- if [ ! -f $mosestranslationdir/$m ] ; then
- error_msg="The Moses translation file ($mosestranslationdir/$m) file does not exist. Did you respect the file naming conventions described at the top of the score-0.80 script?"
- echo "$error_msg Analysing now next Moses translation."
- log_wrong_file
- continue
- else
- cp $mosestranslationdir/$m $scoredir
- if [ "$tokenize" = "1" -a "$lowercase" = "1" ]; then
- $toolsdir/scripts/tokenizer.perl -l $lang2 < $scoredir/$m > $scoredir/$m.tok
- $toolsdir/scripts/lowercase.perl < $scoredir/$m.tok > $scoredir/$m
- rm -f $scoredir/$m.tok
- elif [ "$tokenize" = "1" ]; then
- $toolsdir/scripts/tokenizer.perl -l $lang2 < $scoredir/$m > $scoredir/$m.tok
- mv -f $scoredir/$m.tok $scoredir/$m
- elif [ "$lowercase" = "1" ]; then
- $toolsdir/scripts/lowercase.perl < $scoredir/$m > $scoredir/$m.lower
- mv -f $scoredir/$m.lower $scoredir/$m
- fi
- sed 's/\\$/\\ /g' < $scoredir/$m > $scoredir/$m.clean
- mv -f $scoredir/$m.clean $scoredir/$m
- fi
-
- echo "===================================================================================" > $scoredir/temp
- echo "*** Script version ***: score-0.85" >> $scoredir/temp
- echo "===================================================================================" >> $scoredir/temp
- echo "===================================================================================" >> $scoredir/temp
- echo "Extracted file names and other data (extracted automatically; errors are possible):" >> $scoredir/temp
- echo "===================================================================================" >> $scoredir/temp
- echo "source language : $lang1" >> $scoredir/temp
- echo "target language : $lang2" >> $scoredir/temp
- echo "-----------------------------------------------------------------------------------" >> $scoredir/temp
- echo "source file : $sourcelanguagedir/$s" >> $scoredir/temp
- echo "moses translation : $mosestranslationdir/$m" >> $scoredir/temp
- echo "reference file : $reftranslationdir/$r" >> $scoredir/temp
- echo "-----------------------------------------------------------------------------------" >> $scoredir/temp
- echo "batch_user_note : $batch_user_note" >> $scoredir/temp
- echo "===================================================================================" >> $scoredir/temp
- echo "score_line_by_line : $score_line_by_line" >> $scoredir/temp
- if [ "$score_line_by_line" = "1" ]; then
- echo "tokenize : $tokenize" >> $scoredir/temp
- echo "lowercase : $lowercase" >> $scoredir/temp
- echo "remove_equal : $remove_equal" >> $scoredir/temp
- fi
- echo "===================================================================================" >> $scoredir/temp
- #=========================================================================================================================================================
- #1. SCORE LINE BY LINE
- #=========================================================================================================================================================
- if [ "$score_line_by_line" = "1" ]; then
- if [ -f $scoredir/$scorefile ]; then
- rm -f $scoredir/$scorefile
- fi
- echo "************************** Score line by line"
- counter=0
- echo "BLEU|NIST|<segnum>|source seg|ref seg|Moses seg" >> $scoredir/temp
- echo "" >> $scoredir/temp
-
- sed -e 's#\& #\&amp\; #g' -e 's#<#\&lt\;#g' $scoredir/$s > $scoredir/$s.tmp
- mv $scoredir/$s.tmp $scoredir/$s
- sed -e 's#\& #\&amp\; #g' -e 's#<#\&lt\;#g' $scoredir/$r > $scoredir/$r.tmp
- mv $scoredir/$r.tmp $scoredir/$r
- sed -e 's#\& #\&amp\; #g' -e 's#<#\&lt\;#g' $scoredir/$m > $scoredir/$m.tmp
- mv $scoredir/$m.tmp $scoredir/$m
- echo "***** Score each segment:"
- while [ "$counter" -lt "$lines" ]; do
- let "counter += 1"
- echo "Segment $counter"
- source_sentence=`awk "NR==$counter{print;exit}" $scoredir/$s`
- ref_sentence=`awk "NR==$counter{print;exit}" $scoredir/$r`
- moses_sentence=`awk "NR==$counter{print;exit}" $scoredir/$m`
- #-----------------------------------------------------------------------------------------------------------------------------------------
- # ******** wrap source file
- if [ "$source_sentence" != "" ]; then
- echo '<srcset setid="'$scorebasename'" srclang="'$lang1'">' > $scoredir/$scorebasename-src.$lang1.sgm
- echo '<DOC docid="'$scorebasename'">' >> $scoredir/$scorebasename-src.$lang1.sgm
- echo "<seg id=$counter>"$source_sentence"</seg>" >> $scoredir/$scorebasename-src.$lang1.sgm
- echo "</DOC>" >> $scoredir/$scorebasename-src.$lang1.sgm
- echo "</srcset>" >> $scoredir/$scorebasename-src.$lang1.sgm
- fi
- #-----------------------------------------------------------------------------------------------------------------------------------------
- # ******** wrap reference (human-made) translation
- if [ "$ref_sentence" != "" ]; then
- echo '<refset setid="'$scorebasename'" srclang="'$lang1'" trglang="'$lang2'">' > $scoredir/$scorebasename-ref.$lang2.sgm
- echo '<DOC docid="'$scorebasename'" sysid="ref">' >> $scoredir/$scorebasename-ref.$lang2.sgm
- echo "<seg id=$counter>"$ref_sentence"</seg>" >> $scoredir/$scorebasename-ref.$lang2.sgm
- echo "</DOC>" >> $scoredir/$scorebasename-ref.$lang2.sgm
- echo "</refset>" >> $scoredir/$scorebasename-ref.$lang2.sgm
- fi
- #-----------------------------------------------------------------------------------------------------------------------------------------
- # ******** wrap Moses translation
- if [ "$moses_sentence" != "" ]; then
- echo '<tstset setid="'$scorebasename'" srclang="'$lang1'" trglang="'$lang2'">' > $scoredir/$scorebasename.moses.sgm
- echo '<DOC docid="'$scorebasename'" sysid="moses">' >> $scoredir/$scorebasename.moses.sgm
- echo "<seg id=$counter>"$moses_sentence"</seg>" >> $scoredir/$scorebasename.moses.sgm
- echo "</DOC>" >> $scoredir/$scorebasename.moses.sgm
- echo "</tstset>" >> $scoredir/$scorebasename.moses.sgm
- fi
- #-----------------------------------------------------------------------------------------------------------------------------------------
- sed -e 's/\x1E/\-/g' $scoredir/$scorebasename-src.$lang1.sgm > $scoredir/temp2
- mv $scoredir/temp2 $scoredir/$scorebasename-src.$lang1.sgm
- sed -e 's/\x1E/\-/g' $scoredir/$scorebasename-ref.$lang2.sgm > $scoredir/temp2
- mv $scoredir/temp2 $scoredir/$scorebasename-ref.$lang2.sgm
- sed -e 's/\x1E/\-/g' $scoredir/$scorebasename.moses.sgm > $scoredir/temp2
- mv $scoredir/temp2 $scoredir/$scorebasename.moses.sgm
-
- # ******** get segment score"
- #in our experience, the mteval-v13a and the mteval-v12 (more recent scorers) stopped with errors (and no score) with strings like " & " and U+001E
- score=`$toolsdir/mteval-v11b.pl -s $scoredir/$scorebasename-src.$lang1.sgm -r $scoredir/$scorebasename-ref.$lang2.sgm -t $scoredir/$scorebasename.moses.sgm -c`
- scoretemp=${score%% for system *}
- scoretemp1=${scoretemp#*NIST score = }
- NIST=${scoretemp1%% *}
- BLEUtemp=${scoretemp1#*BLEU score = }
- BLEU=${BLEUtemp%% *}
- set -f
- BLEUcorr=$(echo "scale=0; $BLEU*10000" | bc)
- set +f
- if [ "$remove_equal" = "1" ]; then
- if [ "$ref_sentence" != "$moses_sentence" ]; then
- echo "$BLEU|$NIST|<$counter>|<seg>$source_sentence</seg>|<seg>$ref_sentence</seg>|<seg>$moses_sentence</seg>" >> $scoredir/$scorefile
- elif [ "$BLEUcorr" = "0" ]; then
- : #do nothing
- else
- echo "$BLEU|$NIST|<$counter>|<seg>$source_sentence</seg>|<seg>$ref_sentence</seg>|<seg>$moses_sentence</seg>" >> $scoredir/$scorefile
- fi
- else
- echo "$BLEU|$NIST|<$counter>|<seg>$source_sentence</seg>|<seg>$ref_sentence</seg>|<seg>$moses_sentence</seg>" >> $scoredir/$scorefile
- fi
- done
- #-----------------------------------------------------------------------------------------------------------------------------------------
- #Sort the output file by score
- sort -g $scoredir/$scorefile -o $scoredir/$scorefile
- echo "===========================================================================" >> $scoredir/temp
- cat $scoredir/$scorefile >> $scoredir/temp
- mv $scoredir/temp $scoredir/$scorefile
- remove_garbage
- else
- #=========================================================================================================================================================
- #2. SCORE WHOLE DOCUMENT
- #=========================================================================================================================================================
- if [ -f $scoredir/$scorefile ]; then
- rm -f $scoredir/$scorefile
- fi
- echo "************************** Score whole document"
- sed -e 's#\& #\&amp\; #g' -e 's#<#\&lt\;#g' $scoredir/$s > $scoredir/$s.tmp
- mv $scoredir/$s.tmp $scoredir/$s
- sed -e 's#\& #\&amp\; #g' -e 's#<#\&lt\;#g' $scoredir/$r > $scoredir/$r.tmp
- mv $scoredir/$r.tmp $scoredir/$r
- sed -e 's#\& #\&amp\; #g' -e 's#<#\&lt\;#g' $scoredir/$m > $scoredir/$m.tmp
- mv $scoredir/$m.tmp $scoredir/$m
- echo "***************** wrap test result in SGM"
- echo "******** wrap source file"
- exec<$scoredir/$s
- echo '<srcset setid="'$scorebasename'" srclang="'$lang1'">' > $scoredir/$scorebasename-src.$lang1.sgm
- echo '<DOC docid="'$scorebasename'">' >> $scoredir/$scorebasename-src.$lang1.sgm
- numseg=0
- while read line
- do
- numseg=$(($numseg+1))
- echo "<seg id=$numseg>"$line"</seg>" >> $scoredir/$scorebasename-src.$lang1.sgm
- done
- echo "</DOC>" >> $scoredir/$scorebasename-src.$lang1.sgm
- echo "</srcset>" >> $scoredir/$scorebasename-src.$lang1.sgm
- #-----------------------------------------------------------------------------------------------------------------------------------------
- echo "******** wrap reference (human-made) translation"
- exec<$scoredir/$r
- echo '<refset setid="'$scorebasename'" srclang="'$lang1'" trglang="'$lang2'">' > $scoredir/$scorebasename-ref.$lang2.sgm
- echo '<DOC docid="'$scorebasename'" sysid="ref">' >> $scoredir/$scorebasename-ref.$lang2.sgm
- numseg=0
- while read line
- do
- numseg=$(($numseg+1))
- echo "<seg id=$numseg>"$line"</seg>" >> $scoredir/$scorebasename-ref.$lang2.sgm
- done
- echo "</DOC>" >> $scoredir/$scorebasename-ref.$lang2.sgm
- echo "</refset>" >> $scoredir/$scorebasename-ref.$lang2.sgm
- #-----------------------------------------------------------------------------------------------------------------------------------------
- echo "******** wrap Moses translation"
- exec<$scoredir/$m
- echo '<tstset setid="'$scorebasename'" srclang="'$lang1'" trglang="'$lang2'">' > $scoredir/$scorebasename.moses.sgm
- echo '<DOC docid="'$scorebasename'" sysid="moses">' >> $scoredir/$scorebasename.moses.sgm
- numseg=0
- while read line
- do
- numseg=$(($numseg+1))
- echo "<seg id=$numseg>"$line"</seg>" >> $scoredir/$scorebasename.moses.sgm
- done
- echo "</DOC>" >> $scoredir/$scorebasename.moses.sgm
- echo "</tstset>" >> $scoredir/$scorebasename.moses.sgm
-
- sed -e 's/\x1E/\-/g' $scoredir/$scorebasename-src.$lang1.sgm > $scoredir/temp2
- mv $scoredir/temp2 $scoredir/$scorebasename-src.$lang1.sgm
- sed -e 's/\x1E/\-/g' $scoredir/$scorebasename-ref.$lang2.sgm > $scoredir/temp2
- mv $scoredir/temp2 $scoredir/$scorebasename-ref.$lang2.sgm
- sed -e 's/\x1E/\-/g' $scoredir/$scorebasename.moses.sgm > $scoredir/temp2
- mv $scoredir/temp2 $scoredir/$scorebasename.moses.sgm
-
- if [ ! -f $scoredir/$scorebasename-src.$lang1.sgm -o ! -f $scoredir/$scorebasename-ref.$lang2.sgm -o ! -f $scoredir/$scorebasename.moses.sgm ]; then
- echo "There was a problem creating the files used by the scorer. Exiting..."
- IFS=$SAVEIFS
- exit 0
- else
- #-----------------------------------------------------------------------------------------------------------------------------------------
- echo "***************** scoring"
- startscoringdate=`date +day:%d/%m/%y-time:%H:%M:%S`
- #in our experience, the mteval-v13a and the mteval-v12 (more recent scorers) stopped with errors (and no score) with strings like " & " and U+001E
- score=`$toolsdir/mteval-v11b.pl -s $scoredir/$scorebasename-src.$lang1.sgm -r $scoredir/$scorebasename-ref.$lang2.sgm -t $scoredir/$scorebasename.moses.sgm -c`
- scoretemp=${score%% for system *}
- scoretemp1=${scoretemp#*NIST score = }
- NIST=${scoretemp1%% *}
- BLEUtemp=${scoretemp1#*BLEU score = }
- BLEU=${BLEUtemp%% *}
- echo $score
- scoretemp2=${score#*NIST score =}
- echo "NIST score = $scoretemp2" > $scoredir/$scorefile
- newscorefile=$scorebasename-BLEU-$BLEU-NIST-$NIST-$batch_user_note-$lang1-$lang2.F-$scoreTMXdocuments-R-$remove_equal-T-$tokenize.L-$lowercase.whole-doc
- echo "===================================================================================" >> $scoredir/$scorefile
- mv -f $scoredir/$scorefile $scoredir/$newscorefile
- #-----------------------------------------------------------------------------------------------------------------------------------------
- fi
- cat $scoredir/$newscorefile >> $scoredir/temp
- mv $scoredir/temp $scoredir/$newscorefile
- remove_garbage
- fi
- else
- filename=${filetoscore##*/}
- if [ "$filename" != "*" ]; then
- let i=$i+1
- echo "--------------------------------------------------------------------"
- echo -e "$filename file (in the $mosestranslationdir directory):\n\tName of moses translation file is illegal (doesn't end in '.moses' or includes spaces)."
- error_msg="Name of moses translation file is illegal (doesn't end in '.moses' or includes spaces)."
- log_wrong_file
- continue
- fi
- fi
- fi
-done
-IFS=$SAVEIFS
-
-echo "--------------------------------------------------------------------"
-echo -e "Score finished.\n$i files treated.\nResults directory:\n\t$scoredir"
-#=================================================================================================================================================
-# Changes in version 0.85
-#=================================================================================================================================================
-# Allows batch processing of the whole $mosesdir/$translation_output directory
-# Extracts automatically the source language and target language, the names of the source file, moses translation file and reference translation file and the batch_user_note
-# Checks for more file naming errors and informs about them
-# More informative report, even in case of error
-# Creation of a new file that lists the translations that could not be scored and the reason why
-# Corrects a bug that made it fail when the scorer files included the word "for" in their name
-# Maintains SGM scorer because newer scorers have caused us more problems with characters that crashed them (ex: " & " and U+001E)
-#=================================================================================================================================================
-
diff --git a/contrib/moses-for-mere-mortals/scripts/train-1.11 b/contrib/moses-for-mere-mortals/scripts/train-1.11
deleted file mode 100644
index dc65cf5d6..000000000
--- a/contrib/moses-for-mere-mortals/scripts/train-1.11
+++ /dev/null
@@ -1,1538 +0,0 @@
-#!/usr/bin/env bash
-# train-1.11
-# copyright 2009,2010, João L. A. C. Rosas
-# licenced under the GPL licence, version 3
-# the Mosesdecoder (http://sourceforge.net/projects/mosesdecoder/), is a tool upon which this script depends that is licenced under the GNU Library or Lesser General Public License (LGPL)
-# date: 25/08/2010
-# Special thanks to Hilário Leal Fontes and Maria José Machado, who helped to test the script and made very helpful suggestions
-# This script is based on instructions from several sources, especially the http://www.dlsi.ua.es/~mlf/fosmt-moses.html and the http://www.statmt.org/moses_steps.html web pages and the Moses, IRSTLM, RandLM, giza-pp and MGIZA documentation, as well as on research on the available literature on Moses, namely the Moses mailing list (http://news.gmane.org/gmane.comp.nlp.moses.user). The comments transcribe parts of the manuals of all the tools used.
-#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-#THIS SCRIPT ASSUMES THAT A IRSTLM AND RANDLM ENABLED MOSES HAS ALREADY BEEN INSTALLED WITH THE create script IN $mosesdir (BY DEFAULT $HOME/moses-irstlm-randlm); CHANGE THIS VARIABLE ACCORDING TO YOUR NEEDS
-# IT ALSO ASSUMES THAT THE PACKAGES UPON WHICH IT DEPENDS, INDICATED IN THE create script, HAVE BEEN INSTALLED
-#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-# ***Purpose***: given a Moses installation made with the create script, this script trains a bilingual corpus consisting of at least 1 file with segments in the source language and 1 file perfectly aligned with it with segments in the target language; it also uses 1 file in the target language to train a language model and another file in the target language for training recasing, and optionally 2 files (one in the source and one in the target language) for tuning and for testing the trained corpus (though not recommended, the corpus files can also be used for all these purposes); the trained corpus can then be used by the translate script in order to get actual translations of real texts; this script allows you to configure (see below) many of the corpus training parameters.
-
-##########################################################################################################################################################
-# The values of the variables that follow should be filled according to your needs: # ##########################################################################################################################################################
-
-#Full path of the base directory location of your Moses system
-mosesdir=$HOME/moses-irstlm-randlm
-#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-#NOTE 1: The corpus that you want to train, together with the respective tuning files (if different), the testing files (if different), the file used for recasing, and the file used to build the language model (if different) should be placed in $mosesdir/corpora_for_training !!!
-#NOTE 2: After the script is executed, you will find a summary of what has been done (the corpus summary file) in $mosesdir/logs
-#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-#=========================================================== 1. LANGUAGES ===============================================================================
-#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-# !!! The names of the languages should not include spaces, as well as special characters, like asterisks, backslashes or question marks. Try to stick with letters, numbers, and the underscore, dash and dot if you want to avoid surprises. Avoid using a dash and the dot as the first character of the name. A 2 letter abbreviation is probably the ideal setting !!!
-#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-#Abbreviation of language 1 (source language)
-lang1=pt
-#Abbreviation of language 2 (target language)
-lang2=en
-#=========================================================== 2. FILES ===================================================================================
-#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-# !!! The names of the files should not include spaces, as well as special characters, like asterisks, backslashes or question marks. Try to stick with letters, numbers, and the dash, dot, and underscore if you want to avoid Bash surprises. Avoid using a dash as the first character of a file name, because most Linux commands will treat it as a switch. If your files start with a dot, they'll become hidden files.!!! The $corpusbasename, $lmbasename and $recaserbasename parameters that follow MUST be filled in!!! The $tuningbasename and the $testbasename only need to be filled in if you want to do a tuning or a test of the trained corpus, respectively.
-#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-#Basename of the corpus placed in $mosesdir/corpora_for_training (the example that follows refers to the 2 files 200000.for_train.en and 200000.for_train.pt, whose basename is 200000.for_train)
-corpusbasename=200000.for_train
-#Basename of the file used to build the language model (LM), placed in $mosesdir/corpora_for_training (!!! this is a file in the target language !!!)
-lmbasename=300000
-#Basename of the recaser training file, placed in $mosesdir/corpora_for_training
-recaserbasename=300000
-#Basename of the tuning corpus, placed in $mosesdir/corpora_for_training
-tuningbasename=800
-#Basename of the test set files (used for testing the trained corpus), placed in $mosesdir/corpora_for_training
-testbasename=200000.for_test
-#======================================================= 3. TRAINING STEPS ==============================================================================
-#--------------------------------------------------------------------------------------------------------------------------------------------------------
-#Reuse all relevant files that have already been created in previous trainings: 1= Do ; Any other value=Don't
-reuse=1
-#--------------------------------------------------------------------------------------------------------------------------------------------------------
-
-#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-#NOTE 1: If in doubt, leave the settings that follow as they are; you will do a full training with memory mapping, tuning, a training test and scoring of the training test of the demo corpus; the results will appear in $mosesdir/corpora_trained and a log file will be available in $mosesdir/logs.
-
-#NOTE 2: You can also proceed step by step (e.g., first doing just LM building and corpus training and then testing), so as to better control the whole process.
-#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-#Do parallel corpus training: 1= Do ; Any other value=Don't !!!
-paralleltraining=1
-#Number of the first training step (possible values: 1-9); choose 1 for a completely new corpus
-firsttrainingstep=1
-#Number of the last training step (possible values: 1-9); choose 9 for a completely new corpus
-lasttrainingstep=9
-#Do memory mapping: 1 = Do ; Any other value = Don't
-memmapping=1
-#Do tuning: 1= Do ; Any other value=Don't; can lead, but does not always lead, to better results; takes much more time
-tuning=1
-#Do a test (with scoring) of the training: 1 = Do ; Any other value = Don't
-runtrainingtest=1
-#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-# If you are new to Moses, stop here for the time being
-#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-#======================================================= 4. LANGUAGE MODEL PARAMETERS ==================================================================
-# Use IRSTLM (1) or RandLM (5)
-lngmdl=1
-#Order of ngrams - the higher the better, but more memory required (choose between 3 and 9; good value: 5)
-Gram=5
-#----------------------------------------------------*** 4.1. IRSTLM PARAMETERS ***----------------------------------------------------------------------
-# Distributed language model: 1= Yes; Any other value = No (splits the file used to build the language model into parts, processes each part separately and finally merges the parts)
-distributed=1
-# Number of parts to split dictionary into balanced n-gram prefix lists (in the creation of a distributed language model); default: 5; !!! Only used if distributed = 1 !!!
-dictnumparts=20
-# Smoothing possible values: witten-bell (default); kneser-ney, improved-kneser-ney
-s='witten-bell'
-# Quantize LM (Reduces memory comsumption at the cost of some loss of performance); 1 = Do ; Any other value = Don't. May induce some accuracy loss. Reduces the size of the LM.
-quantize=0
-# Memory-mapping of the LM. 1 = Do; Any other value = Don't. Avoids the creation of the binary LM directly in RAM (allows bigger LM at the cost of lower speed; often necessary when LM file is very big) !!!
-lmmemmapping=1
-#-----------------------------------------------------*** 4.2. RandLM PARAMETERS ***---------------------------------------------------------------------
-# The format of the input data. The following formats are supported: for a CountRandLM, "corpus" (tokenised text corpora, one sentence per line); for a BackoffRandLM, 'arpa' (an ARPA backoff language model)
-inputtype=corpus
-# The false positive rate of the randomised data structure on an inverse log scale so '-falsepos 8' produces a false positive rate of 1/2^8
-falsepos=8
-# The quantisation range used by the model. For a CountRandLM, quantisation is performed by taking a logarithm. The base of the logarithm is set as 2^{1/'values'}. For a BackoffRandLM, a binning quantisation algorithm is used. The size of the codebook is set as 2^{'values'}
-values=8
-#======================================================= 5. TRAINING PARAMETERS ========================================================================
-#----------------------------------------------------*** 5.1. TRAINING STEP 1 ***----------------------------------------------------------------------
-#********** mkcls options
-#Number of mkcls interations (default:2)
-nummkclsiterations=2
-#Number of word classes
-numclasses=50
-#----------------------------------------------------*** 5.2. TRAINING STEP 2 ***----------------------------------------------------------------------
-#....................................................... 5.2.1. MGIZA parameters .......................................................................
-#Number of processors of your computer that will be used by MGIZA (if you use all the processors available, the training will be considerably speeded)
-mgizanumprocessors=1
-#....................................................... 5.2.2. GIZA parameters .......................................................................
-#maximum sentence length; !!! never exceed 101 !!!
-ml=101
-#No. of iterations:
-#-------------------
-#number of iterations for Model 1
-model1iterations=5
-#number of iterations for Model 2
-model2iterations=0
-#number of iterations for HMM (substitutes model 2)
-hmmiterations=5
-#number of iterations for Model 3
-model3iterations=3
-#number of iterations for Model 4
-model4iterations=3
-#number of iterations for Model 5
-model5iterations=0
-#number of iterations for Model 6
-model6iterations=0
-#
-#parameters for various heuristics in GIZA++ for efficient training:
-#------------------------------------------------------------------
-#Counts increment cutoff threshold
-countincreasecutoff=1e-06
-#Counts increment cutoff threshold for alignments in training of fertility models
-countincreasecutoffal=1e-05
-#minimal count increase
-mincountincrease=1e-07
-#relative cutoff probability for alignment-centers in pegging
-peggedcutoff=0.03
-#Probability cutoff threshold for lexicon probabilities
-probcutoff=1e-07
-#probability smoothing (floor) value
-probsmooth=1e-07
-#
-#parameters for describing the type and amount of output:
-#-----------------------------------------------------------
-#0: detailled alignment format, 1: compact alignment format
-compactalignmentformat=0
-#dump frequency of Model 1
-model1dumpfrequency=0
-#dump frequency of Model 2
-model2dumpfrequency=0
-#dump frequency of HMM
-hmmdumpfrequency=0
-#output: dump of transfer from Model 2 to 3
-transferdumpfrequency=0
-#dump frequency of Model 3/4/5
-model345dumpfrequency=0
-#for printing the n best alignments
-nbestalignments=0
-#1: do not write any files
-nodumps=1
-#1: write alignment files only
-onlyaldumps=1
-#0: not verbose; 1: verbose
-verbose=0
-#number of sentence for which a lot of information should be printed (negative: no output)
-verbosesentence=-10
-#
-#smoothing parameters:
-#---------------------
-#f-b-trn: smoothing factor for HMM alignment model #can be ignored by -emSmoothHMM
-emalsmooth=0.2
-#smoothing parameter for IBM-2/3 (interpolation with constant))
-model23smoothfactor=0
-#smooting parameter for alignment probabilities in Model 4)
-model4smoothfactor=0.4
-#smooting parameter for distortion probabilities in Model 5 (linear interpolation with constant
-model5smoothfactor=0.1
-#smoothing for fertility parameters (good value: 64): weight for wordlength-dependent fertility parameters
-nsmooth=4
-#smoothing for fertility parameters (default: 0): weight for word-independent fertility parameters
-nsmoothgeneral=0
-#
-#parameters modifying the models:
-#--------------------------------
-#0 = IBM-3/IBM-4 as described in (Brown et al. 1993); 1: distortion model of empty word is deficient; 2: distoriton model of empty word is deficient (differently); setting this parameter also helps to avoid that during IBM-3 and IBM-4 training too many words are aligned with the empty word); 1 = only 3-dimensional alignment table for IBM-2 and IBM-3
-compactadtable=1
-deficientdistortionforemptyword=0
-#d_{=1}: &1:l, &2:m, &4:F, &8:E, d_{>1}&16:l, &32:m, &64:F, &128:E)
-depm4=76
-#d_{=1}: &1:l, &2:m, &4:F, &8:E, d_{>1}&16:l, &32:m, &64:F, &128:E)
-depm5=68
-#lextrain: dependencies in the HMM alignment model. &1: sentence length; &2: previous class; &4: previous position; &8: French position; &16: French class)
-emalignmentdependencies=2
-#f-b-trn: probability for empty word
-emprobforempty=0.4
-#
-#parameters modifying the EM-algorithm:
-#--------------------------------------
-#fixed value for parameter p_0 in IBM-5 (if negative then it is determined in training)
-m5p0=-1
-manlexfactor1=0
-manlexfactor2=0
-manlexmaxmultiplicity=20
-#maximum fertility for fertility models
-maxfertility=10
-#fixed value for parameter p_0 in IBM-3/4 (if negative then it is determined in training)
-p0=0.999
-#0: no pegging; 1: do pegging
-pegging=0
-#-----------------------------------------------------*** 5.3. TRAINING SCRIPT PARAMETERS ***------------------------------------------------------------
-#Heuristic used for word alignment; possible values: intersect (intersection seems to be a synonym), union, grow, grow-final, grow-diag, grow-diag-final-and (default value), srctotgt, tgttosrc
-alignment=grow-diag-final-and
-#Reordering model; possible values: msd-bidirectional-fe (default), msd-bidirectional-f, msd-fe, msd-f, monotonicity-bidirectional-fe, monotonicity-bidirectional-f, monotonicity-fe, monotonicity-f
-reordering=msd-bidirectional-fe
-#Minimum length of the sentences (used by clean)
-MinLen=1
-#Maximum length of the sentences (used by clean)
-MaxLen=60
-#Maximum length of phrases entered into phrase table (max: 7; choose a lower value if phrase size length is an issue)
-MaxPhraseLength=7
-#-----------------------------------------------------*** 5.4. DECODER PARAMETERS ***--------------------------------------------------------------------
-#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-# !!! Only used in the training evaluation, and only if tuning = 0 !!!
-#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-#***** QUALITY TUNING:
-# Weights for phrase translation table (good values: 0.1-1; default: 1); ensures that the phrases are good translations of each other
-weight_t=1
-# Weights for language model (good values: 0.1-1; default: 1); ensures that output is fluent in target language
-weight_l=1
-# Weights for reordering model (good values: 0.1-1; default: 1); allows reordering of the input sentence
-weight_d=1
-# Weights for word penalty (good values: -3 to 3; default: 0; negative values favor large output; positive values favour short output); ensures translations do not get too long or too short
-weight_w=0
-#------------------------------------------
-# Use Minumum Bayes Risk (MBR) decoding (1 = Do; Any other value = do not); instead of outputting the translation with the highest probability, MBR decoding outputs the translation that is most similar to the most likely translations.
-mbr=0
-# Number of translation candidates consider. MBR decoding uses by default the top 200 distinct candidate translations to find the translation with minimum Bayes risk
-mbrsize=200
-# Scaling factor used to adjust the translation scores (default = 1.0)
-mbrscale=1.0
-# Adds walls around punctuation ,.!?:;". 1= Do; Any other value = do not. Specifying reordering constraints around punctuation is often a good idea. TODO not sure it does not require annotation of the corpus to be trained
-monotoneatpunctuation=0
-#***** SPEED TUNING:
-# Fixed limit for how many translation options are retrieved for each input phrase (0 = no limit; positive value = number of translation options per phrase)
-ttablelimit=20
-# Use the relative scores of hypothesis for pruning, instead of a fixed limit (0= no pruning; decimal value = more pruning)
-beamthreshold=0
-# Threshold for constructing hypotheses based on estimate cost (default: 0 = not used).During the beam search, many hypotheses are created that are too bad to be even entered on a stack. For many of them, it is even clear before the construction of the hypothesis that it would be not useful. Early discarding of such hypotheses hazards a guess about their viability. This is based on correct score except for the actual language model costs which are very expensive to compute. Hypotheses that, according to this estimate, are worse than the worst hypothesis of the target stack, even given an additional specified threshold as cushion, are not constructed at all. This often speeds up decoding significantly. Try threshold factors between 0.5 and 1
-earlydiscardingthreshold=0
-
-#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-#To get faster performance than the default Moses setting at roughly the same performance, use the parameter settings $searchalgorithm=1, $cubepruningpoplimit=2000 and $stack=2000. With cube pruning, the size of the stack has little impact on performance, so it should be set rather high. The speed/quality trade-off is mostly regulated by the -cube-pruning-pop-limit, i.e. the number of hypotheses added to each stack
-#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-# Search algorithm; cube pruning is faster than the traditional search at comparable levels of search errors; 0 = default; 1 = turns on cube pruning
-searchalgorithm=0
-# Number of hypotheses added to each stack; only a fixed number of hypotheses are generated for each span; default is 1000, higher numbers slow down the decoder, may result in better quality
-cubepruningpoplimit=1000
-# Reduce size of hypothesis stack, that keeps the best partial translations (=beam); default: 100
-stack=100
-# Maximum phrase length (default: 20)
-maxphraselen=20
-# ****** SPEED AND QUALITY TUNING
-# Minimum number of hypotheses from each coverage pattern; you may also require that a minimum number of hypotheses is added for each word coverage (they may be still pruned out, however). This is done using the switch -cube-pruning-diversity, which sets the minimum. The default is 0
-cubepruningdiversity=0
-# Distortion (reordering) limit in maximum number of words (0 = monotone; -1 = unlimited ; any other positive value = maximal number of words; default:6)); limiting distortion often increases speed and quality
-distortionlimit=6
-#======================================================= 6. TUNING PARAMETERS ===========================================================================
-# Maximum number of runs of tuning ( -1 = no limit; Any positive number = maximum number of runs)
-maxruns=10
-##########################################################################################################################################################
-# DO NOT CHANGE THE LINES THAT FOLLOW ... unless you know what you are doing! #
-##########################################################################################################################################################
-
-#=========================================================================================================================================================
-# 1. Do some preparatory work
-#=========================================================================================================================================================
-# Register start date and time of corpus training
-startdate=`date +day:%d/%m/%y-time:%H:%M:%S`
-
-echo "********************** DO PREPARATORY WORK:"
-#to avoid *** glibc detected *** errors with moses compiler
-export MALLOC_CHECK_=0
-
-echo "****** build names of parameters that will dictate the directory structure of the trained corpus files"
-if [ "$lngmdl" = "1" ]; then
- lngmdlparameters="LM-$lmbasename-IRSTLM-$Gram-$distributed-$s-$quantize-$lmmemmapping"
-elif [ "$lngmdl" = "5" ]; then
- lngmdlparameters="LM-$lmbasename-RandLM-$Gram-$inputtype-$falsepos-$values"
-fi
-
-#Use numeric codes in order to avoid file name length to exceed the limit
-case "$alignment" in
-'intersect')
-alignmentcode="1";
-;;
-'intersection')
-alignmentcode="9";
-;;
-'union')
-alignmentcode="2";
-;;
-'grow')
-alignmentcode="3";
-;;
-'grow-final')
-alignmentcode="4";
-;;
-'grow-diag')
-alignmentcode="5";
-;;
-'grow-diag-final-and')
-alignmentcode="6";
-;;
-'srctotgt')
-alignmentcode="7";
-;;
-'tgttosrc')
-alignmentcode="8";
-;;
-*)
-echo "The Moses training script parameter \$alignment has an illegal value. Exiting ...";
-exit 0;
-;;
-esac
-
-#Reordering model; possible values: msd-bidirectional-fe (default), msd-bidirectional-f, msd-fe, msd-f, monotonicity-bidirectional-fe, monotonicity-bidirectional-f, monotonicity-fe, monotonicity-f
-#Use numeric codes in order to avoid file name length to exceed the limit
-case "$reordering" in
-'msd-bidirectional-fe')
-reorderingcode="1";
-param=wbe-$reordering;
-;;
-'msd-bidirectional-f')
-reorderingcode="2";
-param=wbe-$reordering;
-;;
-'msd-fe')
-reorderingcode="3";
-param=wbe-msd-backward-fe;
-;;
-'msd-f')
-reorderingcode="4";
-param=wbe-msd-backward-f;
-;;
-'monotonicity-bidirectional-fe')
-reorderingcode="5";
-param=wbe-$reordering;
-;;
-'monotonicity-bidirectional-f')
-reorderingcode="6";
-param=wbe-$reordering;
-;;
-'monotonicity-fe')
-reorderingcode="7";
-param=wbe-monotonicity-backward-fe;
-;;
-'monotonicity-f')
-reorderingcode="8";
-param=wbe-monotonicity-backward-f;
-;;
-*)
-echo "The Moses training script parameter \$reordering has an illegal value. Exiting ...";
-exit 0;
-;;
-esac
-
-trainingparameters="T-$paralleltraining-$firsttrainingstep-$lasttrainingstep-MKCLS-$nummkclsiterations-$numclasses-MGIZA-$mgizanumprocessors-GIZA-$ml-$model1iterations-$model2iterations-$hmmiterations-$model3iterations-$model4iterations-$model5iterations-$model6iterations-$countincreasecutoff-$countincreasecutoffal-$mincountincrease-$peggedcutoff-$probcutoff-$probsmooth-$compactalignmentformat-$model1dumpfrequency-$model2dumpfrequency-$hmmdumpfrequency-$transferdumpfrequency-$model345dumpfrequency-$nbestalignments-$nodumps-$onlyaldumps-$verbose-$verbosesentence-$emalsmooth-$model23smoothfactor-$model4smoothfactor-$model5smoothfactor-$nsmooth-$nsmoothgeneral-$compactadtable-$deficientdistortionforemptyword-$depm4-$depm5-$emalignmentdependencies-$emprobforempty-$m5p0-$manlexfactor1-$manlexfactor2-$manlexmaxmultiplicity-$maxfertility-$p0-$pegging-MOSES-$alignmentcode-$reorderingcode-$MinLen-$MaxLen-$MaxPhraseLength-$Gram-$weight_t-$weight_l-$weight_d-$weight_w-$mbr-$mbrsize-$mbrscale-$monotoneatpunctuation-$ttablelimit-$beamthreshold-$earlydiscardingthreshold-$searchalgorithm-$cubepruningpoplimit-$stack-$maxphraselen-$cubepruningdiversity-$distortionlimit"
-if [ "$memmapping" = "1" ]; then
- mmparameters="M-1"
-else
- mmparameters="M-0"
-fi
-if [ "$tuning" = "1" ]; then
- tuningparameters="Tu-$tuningbasename-$maxruns"
-else
- tuningparameters="Tu-0"
-fi
-if [ "$runtrainingtest" = "1" ]; then
- evaluationparameters="E-$testbasename-$recaserbasename"
-else
- evaluationparameters="E-0"
-fi
-
-echo "****** build name of directories where corpus trained files will be located"
-#Full path of the tools directory (giza, irstlm, moses, scripts, ...)
-toolsdir="$mosesdir/tools"
-#Full path of the tools subdirectory where modified scripts are located
-modifiedscriptsdir="$toolsdir/modified-scripts"
-#Full path of the files used for training (corpus, language model, recaser, tuning, evaluation)
-datadir="$mosesdir/corpora_for_training"
-#Full path of the training logs
-logdir="$mosesdir/logs"
-#Full path of the base directory where your corpus will be processed (corpus, model, lm, evaluation, recaser)
-workdir="$mosesdir/corpora_trained"
-#Full path of the language model directory
-lmdir="$workdir/lm/$lang2/$lngmdlparameters"
-#Full path of the tokenized files directory
-tokdir="$workdir/tok"
-#Full path of the cleaned files directory
-cleandir="$workdir/clean/MinLen-$MinLen.MaxLen-$MaxLen"
-#Full path of the lowercased (after cleaning) files directory
-lc_clean_dir="$workdir/lc_clean/MinLen-$MinLen.MaxLen-$MaxLen"
-#Full path of the lowercased (and not cleaned) files directory
-lc_no_clean_dir="$workdir/lc_no_clean"
-#Full path of the recaser files directory
-recaserdir="$workdir/recaser/$lang2/$recaserbasename-IRSTLM"
-#Full path of the trained corpus files directory
-modeldir="$workdir/model/$lang1-$lang2-$corpusbasename.$lngmdlparameters/$trainingparameters"
-#Root-dir parameter of Moses
-rootdir=$modeldir
-#Full path of the memory-mapped files directory
-memmapsdir="$workdir/memmaps/$lang1-$lang2-$corpusbasename.$lngmdlparameters/$trainingparameters"
-#Full path of the tuning files directory
-tuningdir="$workdir/tuning/$lang1-$lang2-$corpusbasename.$lngmdlparameters.$mmparameters.$tuningparameters/$trainingparameters"
-#Full path of the training test files directory
-testdir="$workdir/evaluation/$lang1-$lang2-$corpusbasename.$lngmdlparameters.$mmparameters.$tuningparameters.$evaluationparameters/$trainingparameters"
-#Full path of the detokenized files directory
-detokdir="$workdir/detok/$lang2/$testbasename"
-#Full path of the detokenized files directory
-mgizanewdir="mgiza"
-
-#Avoid a nasty mistake that does not lead to an error message
-if [ ! -f $datadir/$lmbasename.$lang2 ]; then
- echo "A corpus training has to specify a valid language model file (parameter \$lmbasename, whose value is set to $lmbasename). If the LM has already been built, then it will not be redone. For example, if you want to use the 1000.pt file, set this parameter to 1000 and that file should be placed in $datadir. Exiting ..."
- exit 0
-fi
-
-if [ "$lngmdl" != "1" -a "$lngmdl" != "5" ]; then
- echo "The language model builder parameter (\$lngmdl, whose value is set to $lngmdl) can only have the following values: 1 <-- IRSTLM or 5 <-- RandLM. Exiting ..."
- exit 0
-fi
-
-if [ ! -f $datadir/$corpusbasename.$lang1 -o ! -f $datadir/$corpusbasename.$lang2 ]; then
- echo "$datadir/$corpusbasename.$lang1"
- echo "A corpus training has to specify a valid corpus file (parameter \$corpusbasename, whose value is set to $corpusbasename). For instance, if you want to use the files 1000.en and 1000.pt as the corpus files, this parameter should be set to 1000 and those files should be placed in $datadir. Exiting ..."
- exit 0
-fi
-
-echo "****** create directories where training and translation files will be located"
-#create the directory where you will put the documents to be translated
-if [ ! -d $mosesdir/translation_input ] ; then mkdir -p $mosesdir/translation_input ; fi
-
-#create the directory where you will put the documents that have been translated
-if [ ! -d $mosesdir/translation_output ] ; then mkdir -p $mosesdir/translation_output ; fi
-
-#create the directory where you will put the human translations that will be used for scoring the documents that have been translated
-if [ ! -d $mosesdir/translation_reference ] ; then mkdir -p $mosesdir/translation_reference ; fi
-
-#Create logs directory (where will be stored info about the training done)
-if [ ! -d $mosesdir/logs ] ; then mkdir -p $mosesdir/logs ; fi
-
-#Create, if it does not exist, the modified-scripts subdirectory of $toolsdir
-if [ ! -d $modifiedscriptsdir ]; then mkdir -p $modifiedscriptsdir; fi
-
-#Create work directory (where the training files will be located) if it does not exist
-if [ ! -d $workdir ]; then mkdir -p $workdir; fi
-
-#Create base language model directory if it does not exist ("base" means for all trained corpora;
-#"current" means for the presently trained corpus; "current" is a subdirectory of "base")
-if [ ! -d $workdir/lm ]; then mkdir -p $workdir/lm; fi
-#Create current language model directory if it does not exist
-if [ ! -d $lmdir ]; then mkdir -p $lmdir; fi
-
-#Create tokenized files directory if it does not exist
-if [ ! -d $tokdir ]; then mkdir -p $tokdir; fi
-
-#Create base cleaned files directory if it does not exist
-if [ ! -d $cleandir ]; then mkdir -p $cleandir; fi
-
-#Create current lowercased (after cleaning) files directory if it does not exist
-if [ ! -d $lc_clean_dir ]; then mkdir -p $lc_clean_dir; fi
-
-#Create current lowercased (and not cleaned) files directory if it does not exist
-if [ ! -d $lc_no_clean_dir ]; then mkdir -p $lc_no_clean_dir; fi
-
-#Create base trained corpus files directory if it does not exist
-if [ ! -d $workdir/model ]; then mkdir -p $workdir/model; fi
-#Create current trained corpus files directory if it does not exist
-if [ ! -d $modeldir ]; then mkdir -p $modeldir; fi
-
-if [ "$memmapping" = "1" ]; then
- #Create base memory-mapping files directory if it does not exist
- if [ ! -d $workdir/memmaps ]; then mkdir -p $workdir/memmaps; fi
- #Create current memory-mapping files directory if it does not exist
- if [ ! -d $memmapsdir ]; then mkdir -p $memmapsdir; fi
-fi
-
-if [ "$tuning" = "1" ]; then
- #Create base tuning files directory if it does not exist
- if [ ! -d $workdir/tuning ]; then mkdir -p $workdir/tuning; fi
- #Create current tuning files directory if it does not exist
- if [ ! -d $tuningdir ]; then mkdir -p $tuningdir; fi
-fi
-
-if [ "$runtrainingtest" = "1" ]; then
- #Create base evaluation files directory if it does not exist
- if [ ! -d $workdir/evaluation ]; then mkdir -p $workdir/evaluation; fi
- #Create current evaluation files directory if it does not exist
- if [ ! -d $testdir ]; then mkdir -p $testdir; fi
-
- #Create base recaser files directory if it does not exist
- if [ ! -d $workdir/recaser ]; then mkdir -p $workdir/recaser; fi
- #Create current recaser files directory if it does not exist
- if [ ! -d $recaserdir ]; then mkdir -p $recaserdir; fi
-
- #Create base detokenized files directory if it does not exist
- if [ ! -d $workdir/detok ]; then mkdir -p $workdir/detok; fi
- #Create base detokenized files directory if it does not exist
- if [ ! -d $detokdir ]; then mkdir -p $detokdir; fi
-fi
-
-#define name of the logfile
-logfile="$lang1-$lang2.C-$corpusbasename-$MaxLen-$MinLen.LM-$lmbasename.MM-$memmapping.`date +day-%d-%m-%y-time-%H-%M-%S`.txt"
-log=$logdir/$logfile
-#Create corpus training log file
-echo "" > $log
-
-echo "****** create some auxiliary functions"
-#function that checks whether a trained corpus exists already
-checktrainedcorpusexists() {
- if [ ! -f $modeldir/moses.ini ]; then
- echo -n "A previously trained corpus does not exist. You have to train a corpus first. Exiting..."
- exit 0
- fi
-}
-
-makeTrainingSummary() {
- dontuse=0
- echo "***************** Writing training summary"
-
- echo "*** Script version ***: train-1.11" > $log
- if [ ! -f $modeldir/moses.ini ]; then
- echo "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" >> $log
- echo "@ !!! THIS IS NOT A VALIDLY TRAINED CORPUS !!! DO NOT USE IT FOR TRANSLATION !!! @" >> $log
- echo "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" >> $log
- dontuse=1
- fi
- echo "========================================================================" >> $log
- echo "*** Duration ***: " >> $log
- echo "========================================================================" >> $log
- echo "Start time: $startdate" >> $log
- echo "Start language model building: $startLMdate" >> $log
- echo "Start recaser training: $startrecasertrainingdate" >> $log
- echo "Start corpus training: $starttrainingdate" >> $log
- echo "Start memory-mapping: $startmmpdate" >> $log
- echo "Start tuning: $starttuningdate" >> $log
- echo "Start test: $starttestdate" >> $log
- echo "Start scoring: $startscoringdate" >> $log
- echo "End time: `date +day:%d/%m/%y-time:%H:%M:%S`" >> $log
- echo "========================================================================" >> $log
- echo "*** Languages*** :" >> $log
- echo "========================================================================" >> $log
- echo "Source language: $lang1" >> $log
- echo "Target language: $lang2" >> $log
- echo "========================================================================" >> $log
- echo "*** Training steps in fact executed *** :" >> $log
- echo "========================================================================" >> $log
- if [ -f $lmdir/$lang2.$lngmdlparameters.blm.mm -o -f $lmdir/$lang2.$lngmdlparameters.BloomMap ]; then
- echo "Language model building executed=yes" >> $log
- else
- echo "Language model building executed=no. !!! THIS CORPUS CANNOT BE USED FOR TRANSLATION !!! Retrain it." >> $log
- dontuse=1
- fi
- if [ -f $recaserdir/moses.ini ]; then
- echo "Recaser training executed=yes" >> $log
- else
- echo "Recaser training executed=no. !!! THIS CORPUS CANNOT BE USED FOR TRANSLATION !!! Retrain it." >> $log
- dontuse=1
- fi
- if [ -f $modeldir/moses.ini ]; then
- echo "Corpus training executed=yes" >> $log
- else
- echo "Corpus training executed=no. !!! THIS CORPUS CANNOT BE USED FOR TRANSLATION !!! Retrain it." >> $log
- dontuse=1
- fi
- if [ "$paralleltraining" = "1" -a -f $modeldir/moses.ini ]; then
- echo "Parallel training executed=yes" >> $log
- else
- echo "Parallel training executed=no" >> $log
- fi
- echo "First training step=$frsttrainingstep" >> $log
- echo "Last training step=$lasttrainingstep" >> $log
- if [ -f $memmapsdir/reordering-table.$corpusbasename.$lang1-$lang2.$param.binlexr.srctree ]; then
- echo "Corpus memmapping executed=yes" >> $log
- else
- echo "Corpus memmapping executed=no" >> $log
- if [ "$memmapping" = "1" ]; then
- echo "Memory-mapping was not successfully finished. Erase the $memmapsdir and retrain the corpus." >> $log
- dontuse=1
- fi
- fi
- if [ -f $tuningdir/moses.ini ]; then
- echo "Tuning executed=yes" >> $log
- else
- echo "Tuning executed=no" >> $log
- fi
- if [ -f $testdir/$testbasename-src.$lang1.sgm ]; then
- echo "Training test executed=yes" >> $log
- else
- echo "Training test executed=no" >> $log
- fi
- if [ "$score" != "" ]; then
- echo "Scoring executed=yes" >> $log
- else
- echo "Scoring executed=no" >> $log
- fi
- if [ "$score" != "" ]; then
- echo "========================================================================" >> $log
- echo "*** Score ***:" >> $log
- echo "========================================================================" >> $log
- echo "$score" >> $log
- fi
- echo "========================================================================" >> $log
- echo "*** Files and directories used:" >> $log
- echo "========================================================================" >> $log
- echo "*** Moses base directory ***:" >> $log
- echo "$mosesdir" >> $log
- echo "------------------------------------------------------------------------" >> $log
- if [ -f $lmdir/$lang2.$lngmdlparameters.blm.mm -o -f $lmdir/$lang2.$lngmdlparameters.BloomMap ]; then
- echo "*** File used to build language model ***: " >> $log
- echo "------------------------------------------------------------------------" >> $log
- echo "$lmdir/$lmbasename.$lang2" >> $log
- fi
- if [ -f $recaserdir/moses.ini ]; then
- echo "------------------------------------------------------------------------" >> $log
- echo "*** File used to build recasing model ***:" >> $log
- echo "$recaserdir/$lang2.$recaserbasename/$lang2.$recaserbasename" >> $log
- fi
- if [ -f $modeldir/moses.ini ]; then
- echo "------------------------------------------------------------------------" >> $log
- echo "*** File used for corpus training ***: " >> $log
- echo "$modeldir/$corpusbasename.$lang1" >> $log
- echo "$modeldir/$corpusbasename.$lang2" >> $log
- fi
- if [ "$tuning" = "1" ]; then
- if [ -f $tuningdir/moses.ini ]; then
- echo "------------------------------------------------------------------------" >> $log
- echo "*** Files used for tuning ***:" >> $log
- echo "$workdir/tuning/$tuningbasename.$lang1" >> $log
- echo "$workdir/tuning/$tuningbasename.$lang2" >> $log
- fi
- fi
- if [ "$runtrainingtest" = "1" ]; then
- echo "*** Files used for testing training ***:" >> $log
- if [ -f $testdir/$testbasename-src.$lang1.xml ]; then
- echo "------------------------------------------------------------------------" >> $log
- echo "$testdir/$testbasename.$lang1" >> $log
- echo "$testdir/$testbasename.$lang2" >> $log
- fi
- fi
- echo "========================================================================" >> $log
- echo "*** Specific settings ***:" >> $log
- echo "========================================================================" >> $log
- if [ "$reuse" = "1" ]; then
- echo "Reuse relevant files created in previous trainings=yes" >> $log
- else
- echo "Reuse relevant files created in previous trainings=no" >> $log
- fi
- echo "------------------------------------------------------------------------" >> $log
- echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" >> $log
- echo "+ Language model (LM) parameters:" >> $log
- echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" >> $log
- echo "------------------------ General parameters ----------------------------" >> $log
- echo "Language model builder=$lngmdl (0 = SRILM, 1 = IRSTLM; 5 = RandLM)" >> $log
- echo "Gram=$Gram" >> $log
- if [ "$lngmdl" = "1" ]; then
- echo "--------------------- IRSTLM parameters ------------------------" >> $log
- echo "distributed=$distributed" >> $log
- if [ "$distributed" = "1" ]; then
- echo "dictnumparts=$dictnumparts" >> $log
- fi
- echo "smoothing=$s" >> $log
- echo "quantized=$quantize" >> $log
- echo "memory-mmapped=$lmmemmapping" >> $log
- elif [ "$lngmdl" = "5" ]; then
- echo "--------------------- RandLM parameters ------------------------" >> $log
- echo "inputtype=$inputtype" >> $log
- echo "false positives=$falsepos" >> $log
- echo "values=$values" >> $log
- fi
- echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" >> $log
- echo "+ Training Settings ***:" >> $log
- echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" >> $log
- echo "*************** mkcls options *********************************" >> $log
- echo "nummkclsiterations=$nummkclsiterations" >> $log
- echo "numclasses=$numclasses" >> $log
- echo "*************** MGIZA option **********************************" >> $log
- echo "mgizanumprocessors=$mgizanumprocessors" >> $log
- echo "*************** GIZA options **********************************" >> $log
- echo "maximum sentence length=$ml" >> $log
- echo "No. of iterations:" >> $log
- echo "m1=$model1iterations" >> $log
- echo "m2=$model2iterations" >> $log
- echo "mh=$hmmiterations" >> $log
- echo "m3=$model3iterations" >> $log
- echo "m4=$model4iterations" >> $log
- echo "m5=$model5iterations" >> $log
- echo "m6=$model6iterations" >> $log
- echo "---------------------------------------------------------------" >> $log
- echo "Parameters for various heuristics in GIZA++ for efficient training:" >> $log
- echo "---------------------------------------------------------------" >> $log
- echo "countincreasecutoff=$countincreasecutoff" >> $log
- echo "countincreasecutoffal=$countincreasecutoffal" >> $log
- echo "mincountincrease=$mincountincrease" >> $log
- echo "peggedcutoff=$peggedcutoff" >> $log
- echo "probcutoff=$probcutoff" >> $log
- echo "probsmooth=$probsmooth" >> $log
- echo "---------------------------------------------------------------" >> $log
- echo "Parameters describing the type and amount of output:" >> $log
- echo "---------------------------------------------------------------" >> $log
- echo "compactalignmentformat=$compactalignmentformat" >> $log
- echo "t1=$model1dumpfrequency" >> $log
- echo "t2=$model2dumpfrequency" >> $log
- echo "th=$hmmdumpfrequency" >> $log
- echo "t2to3=$transferdumpfrequency" >> $log
- echo "t345=$model345dumpfrequency" >> $log
- echo "nbestalignments=$nbestalignments" >> $log
- echo "nodumps=$nodumps" >> $log
- echo "onlyaldumps=$onlyaldumps" >> $log
- echo "verbose=$verbose" >> $log
- echo "verbosesentence=$verbosesentence" >> $log
- echo "---------------------------------------------------------------" >> $log
- echo "Smoothing parameters:" >> $log
- echo "---------------------------------------------------------------" >> $log
- echo "emalsmooth=$emalsmooth" >> $log
- echo "model23smoothfactor=$model23smoothfactor" >> $log
- echo "model4smoothfactor=$model4smoothfactor" >> $log
- echo "model5smoothfactor=$model5smoothfactor" >> $log
- echo "nsmooth=$nsmooth" >> $log
- echo "nsmoothgeneral=$nsmoothgeneral" >> $log
- echo "---------------------------------------------------------------" >> $log
- echo "Parameters modifying the models:" >> $log
- echo "---------------------------------------------------------------" >> $log
- echo "compactadtable=$compactadtable" >> $log
- echo "deficientdistortionforemptyword=$deficientdistortionforemptyword" >> $log
- echo "depm4=$depm4" >> $log
- echo "depm5=$depm5" >> $log
- echo "emalignmentdependencies=$emalignmentdependencies" >> $log
- echo "emprobforempty=$emprobforempty" >> $log
- echo "---------------------------------------------------------------" >> $log
- echo "Parameters modifying the EM-algorithm:" >> $log
- echo "---------------------------------------------------------------" >> $log
- echo "m5p0=$m5p0" >> $log
- echo "manlexfactor1=$manlexfactor1" >> $log
- echo "manlexfactor2=$manlexfactor2" >> $log
- echo "manlexmaxmultiplicity=$manlexmaxmultiplicity" >> $log
- echo "maxfertility=$maxfertility" >> $log
- echo "p0=$p0" >> $log
- echo "pegging=$pegging" >> $log
- echo "********************* Training script parameters **************" >> $log
- echo "alignment=$alignment" >> $log
- echo "reordering=$reordering" >> $log
- echo "MinLen=$MinLen" >> $log
- echo "MaxLen=$MaxLen" >> $log
- echo "MaxPhraseLength=$MaxPhraseLength" >> $log
- echo "********************* Moses decoder parameters **************" >> $log
- echo "NOTE: only used in testing if \$tuning = 0" >> $log
- echo "********** Quality parameters **************" >> $log
- echo "weight-t=$weight_t" >> $log
- echo "weight-l=$weight_l" >> $log
- echo "weight-d=$weight_d" >> $log
- echo "weight-w=$weight_w" >> $log
- echo "mbr=$mbr" >> $log
- echo "mbr-size=$mbrsize" >> $log
- echo "mbr-scale=$mbrscale" >> $log
- echo "monotone-at-punctuation=$monotoneatpunctuation" >> $log
- echo "********** Speed parameters ****************" >> $log
- echo "ttable-limit=$ttablelimit" >> $log
- echo "beam-threshold=$beamthreshold" >> $log
- echo "stack=$stack" >> $log
- echo "early-discarding-threshold=$earlydiscardingthreshold" >> $log
- echo "search-algorithm=$searchalgorithm" >> $log
- echo "cube-pruning-pop-limit=$cubepruningpoplimit" >> $log
- echo "max-phrase-length=$maxphraselen" >> $log
- echo "********** Quality and speed parameters ****" >> $log
- echo "cube-pruning-diversity=$cubepruningdiversity" >> $log
- echo "distortion-limit=$distortionlimit" >> $log
- echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" >> $log
- echo "+ Tuning Settings ***:" >> $log
- echo "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++" >> $log
- echo "Maximum number of tunning runs=$maxruns" >> $log
- echo "###########################################################################################" >> $log
- echo "*** Parameters that will be used by other scripts ***:" >> $log
- echo "###########################################################################################" >> $log
- echo "In order to use this trained corpus for translation, please set the value of the \$logfile " >> $log
- echo "parameter of translate script as follows:" >> $log
- echo "logfile=$logfile" >> $log
- echo "The next parameters will be automatically filled in if you choose the right \$logfile name:" >> $log
- echo "lang1=$lang1" >> $log
- echo "lang2=$lang2" >> $log
- echo "corpusbasename=$corpusbasename" >> $log
- echo "language-model-parameters=$lngmdlparameters" >> $log
- echo "training-parameters=$trainingparameters" >> $log
- echo "memory-mapping-parameters=$memmapping" >> $log
- echo "memory-mapping-extra-parameters=$param" >> $log
- echo "tuning-parameters=$tuningparameters" >> $log
- echo "evaluation-parameters=$evaluationparameters" >> $log
- echo "minlen=$MinLen" >> $log
- echo "maxlen=$MaxLen" >> $log
- echo "recaserbasename=$recaserbasename" >> $log
- echo "###########################################################################################" >> $log
- echo "========================================================================" >> $log
- echo "*** List of files created by the training ***:" >> $log
- echo "========================================================================" >> $log
- sort $logdir/corpus-files.txt | uniq > $logdir/corpus-files-sorted.txt
- cat $logdir/corpus-files-sorted.txt >> $log
- if [ "$dontuse" = "1" ]; then
- mv -f $log $logdir/!!!INVALID!!!$logfile
- fi
- rm $logdir/corpus-files.txt
- rm $logdir/corpus-files-sorted.txt
-}
-
-#function that avoids some unwanted effects of interrupting training
-control_c() {
- makeTrainingSummary
- echo "****** Script interrupted by CTRL + C."
- exit 0
-}
-
-trap control_c SIGINT
-#--------------------------------------------------------------------------------------------------------------------------
-echo "****** export several variables"
-#full path to your moses scripts directory
-export SCRIPTS_ROOTDIR=$toolsdir/moses/scripts*
-export IRSTLM=$toolsdir/irstlm
-export PATH=$toolsdir/irstlm/bin/i686:$toolsdir/irstlm/bin:$PATH
-export RANDLM=$toolsdir/randlm
-export PATH=$toolsdir/randlm/bin:$PATH
-export PATH=$toolsdir/mgiza:$PATH
-export QMT_HOME=$toolsdir/mgiza
-export corpusbasename
-export lmbasename
-export lang1
-export lang2
-
-#=========================================================================================================================================================
-#2. DO LANGUAGE MODEL
-#=========================================================================================================================================================
-startLMdate=`date +day:%d/%m/%y-time:%H:%M:%S`
-echo "********************** BUILD LANGUAGE MODEL (LM):"
-
-if [ -f $datadir/$lmbasename.$lang2 ]; then
- echo "****** substitute problematic characters in LM file"
- if [ "$reuse" != "1" -o ! -f $tokdir/$lmbasename.$lang2.ctrl ]; then
- tr '\a\b\f\r\v|' ' /' < $datadir/$lmbasename.$lang2 > $tokdir/$lmbasename.$lang2.ctrl
- else
- echo "Substituting problematic characters in the $datadir/$lmbasename.$lang2 file already done. Reusing it."
- fi
- echo "****** tokenize LM file"
- if [ "$reuse" != "1" -o ! -f $tokdir/$lmbasename.tok.$lang2 ]; then
- $toolsdir/scripts/tokenizer.perl -l $lang2 < $tokdir/$lmbasename.$lang2.ctrl > $tokdir/$lmbasename.tok.$lang2
- else
- echo "Tokenizing of the $tokdir/$lmbasename.$lang2.ctrl file already done. Reusing it."
- fi
-else
- echo "The $datadir/$lmbasename.$lang2 file, used for the language model (LM) building, does not exist. Please review the \$lmbasename and/or the \$lang2 settings of this script. LM building is done with a target language file. Exiting ..."
- exit 0
-fi
-echo "$tokdir/$lmbasename.$lang2.ctrl" >> $logdir/corpus-files.txt
-echo "$tokdir/$lmbasename.tok.$lang2" >> $logdir/corpus-files.txt
-
-echo "****** lowercase LM file"
-if [ "$reuse" != "1" -o ! -f $lc_no_clean_dir/$lmbasename.lowercase.$lang2 ]; then
- $toolsdir/scripts/lowercase.perl < $tokdir/$lmbasename.tok.$lang2 > $lc_no_clean_dir/$lmbasename.lowercase.$lang2
-else
- echo "Lowercasing of the $tokdir/$lmbasename.tok.$lang2 file already done. Reusing it."
-fi
-echo "$lc_no_clean_dir/$lmbasename.lowercase.$lang2" >> $logdir/corpus-files.txt
-
-echo "****** building LM"
-# If LM built with IRSTLM ...
-if [ "$lngmdl" = "1" ]; then
- if [ "$reuse" != "1" -o ! -f $lmdir/$lang2.$lngmdlparameters.blm.mm ]; then
- #if this operation was previously unsuccessful
- if [ -f $lmdir/$lang2.$lngmdlparameters.lm.gz ]; then
- rm -rf $lmdir
- mkdir -p $lmdir
- fi
- echo "****** build corpus IRSTLM language model (LM)"
- echo "*** build iARPA LM file"
- datestamp=`date +day-%d-%m-%y-time-%H-%M-%S`
- if [ ! -d /tmp/$datestamp ]; then mkdir -p /tmp/$datestamp; fi
- if [ ! -f $lmdir/$lang2.$lngmdlparameters.lm.gz -a "$distributed" = "1" ]; then
- echo "*** distributed building of LM file; training procedure split into $k parts"
- $toolsdir/irstlm/bin/build-lm.sh -t /tmp/$datestamp -i $lc_no_clean_dir/$lmbasename.lowercase.$lang2 -o $lmdir/$lang2.$lngmdlparameters.lm.gz -n $Gram -k $dictnumparts -s $s
- elif [ ! -f $lmdir/$lang2.$lngmdlparameters.lm.gz ]; then
- echo "*** non-distributed building of LM file"
- $toolsdir/irstlm/bin/build-lm.sh -t /tmp/$datestamp -i $lc_no_clean_dir/$lmbasename.lowercase.$lang2 -o $lmdir/$lang2.$lngmdlparameters.lm.gz -n $Gram -s $s
- fi
- rm -rf /tmp/$datestamp
- if [ ! -f $lmdir/$lang2.$lngmdlparameters.blm.mm ]; then
- if [ "$quantize" = "1" ]; then
- echo "*** quantize language model"
- $toolsdir/irstlm/bin/quantize-lm $lmdir/$lang2.$lngmdlparameters.lm.gz $lmdir/$lang2.$lngmdlparameters.qlm.gz
- echo "*** binarize language model"
- $toolsdir/irstlm/bin/compile-lm --memmap $lmmemmapping $lmdir/$lang2.$lngmdlparameters.qlm.gz $lmdir/$lang2.$lngmdlparameters.blm.mm
- else
- echo "*** binarize language model"
- $toolsdir/irstlm/bin/compile-lm --memmap $lmmemmapping $lmdir/$lang2.$lngmdlparameters.lm.gz $lmdir/$lang2.$lngmdlparameters.blm.mm
- fi
- fi
- else
- echo "Language model already exists in $lmdir/$lang2.$lngmdlparameters.blm.mm. Reusing it."
- fi
-#... else if LM built with RandLM ...
-elif [ "$lngmdl" = "5" ]; then
- if [ "$reuse" != "1" -o ! -f $lmdir/$lang2.$lngmdlparameters.BloomMap ]; then
- #if this operation was previously unsuccessful
- if [ -f $lmdir/$lang2.$lngmdlparameters.counts.sorted.gz -o -f $lmdir/$lang2.$lngmdlparameters.gz ]; then
- rm -rf $lmdir
- mkdir -p $lmdir
- fi
-
- if [ "$inputtype" = "corpus" ]; then
- echo "****** build corpus RandLM language model"
- cd $lmdir
- if [ ! -f $lc_no_clean_dir/$lmbasename.lowercase.$lang2.gz ]; then
- gzip -f < $lc_no_clean_dir/$lmbasename.lowercase.$lang2 > $lc_no_clean_dir/$lmbasename.lowercase.$lang2.gz
- fi
- echo "$lc_no_clean_dir/$lmbasename.lowercase.$lang2.gz" >> $logdir/corpus-files.txt
- $toolsdir/randlm/bin/buildlm -struct BloomMap -order $Gram -falsepos $falsepos -values $values -output-prefix $lang2.$lngmdlparameters -input-type $inputtype -input-path $lc_no_clean_dir/$lmbasename.lowercase.$lang2.gz
- elif [ "$inputtype" = "arpa" ]; then
- echo "****** build ARPA RandLM language model"
- cd $lmdir
- $toolsdir/irstlm/bin/build-lm.sh -i $lc_no_clean_dir/$lmbasename.lowercase.$lang2 -n $Gram -o $lmdir/$lang2.$lngmdlparameters.gz -k $dictnumparts
- cd $lmdir
- $toolsdir/randlm/bin/buildlm -struct BloomMap -order $Gram -falsepos $falsepos -values $values -output-prefix $lang2.$lngmdlparameters -input-type $inputtype -input-path $lmdir/$lang2.$lngmdlparameters.gz
- fi
- else
- echo "Language model already exists in $lmdir/$lang2.$lngmdlparameters.BloomMap. Reusing it."
- fi
-fi
-for createdfile in `ls $lmdir`; do
- echo "$lmdir/$createdfile" >> $logdir/corpus-files.txt
-done
-if [ -d $lmdir/stat ]; then
- for createdfile in `ls $lmdir/stat`; do
- echo "$lmdir/stat/$createdfile" >> $logdir/corpus-files.txt
- done
-fi
-
-if [ ! -f $lmdir/$lang2.$lngmdlparameters.blm.mm -a ! -f $lmdir/$lang2.$lngmdlparameters.BloomMap ]; then
- makeTrainingSummary
- echo "Linguistic model not correctly trained. Exiting..."
- exit 0
-fi
-
-cd $workdir
-#=========================================================================================================================================================
-#3. RECASER TRAINING
-#=========================================================================================================================================================
-
-startrecasertrainingdate=`date +day:%d/%m/%y-time:%H:%M:%S`
-echo "********************** TRAIN RECASER WITH IRSTLM:"
-
-if [ "$reuse" != "1" -o ! -f $recaserdir/phrase-table.$lang2.$recaserbasename.binphr.tgtvoc ]; then
- if [ -f $recaserdir/cased.irstlm.$lang2.$recaserbasename.gz ]; then
- rm -rf $recaserdir
- mkdir -p $recaserdir
- fi
- echo "****** Check recaser file exists"
- if [ ! -f $datadir/$recaserbasename.$lang2 ]; then
- echo "The file $datadir/$recaserbasename.$lang2, used for recaser training, does not exist. Please review the \$recaserbasename and possibly the \$lang2 settings of this script. Exiting ..."
- exit 0
- fi
-
- cd $toolsdir/moses/script*
- cd recaser
- echo "****** patch train-recaser.perl"
- sed -e 's#^.*my \$cmd.*NGRAM_COUNT.*$#\tmy $cmd = "toolsdir/irstlm/bin/build-lm.sh -t /tmp/datestamp -i $CORPUS -n 3 -o $DIR/cased.irstlm.gz";#g' -e "s#toolsdir#$toolsdir#g" -e "s#datestamp#$datestamp#g" train-recaser.perl > train-recaser.perl.out
- sed -e 's#^.*my \$cmd.*TRAIN\_SCRIPT.*$#\tmy $cmd = "$TRAIN_SCRIPT --root-dir $DIR --model-dir $DIR --first-step $first --alignment a --corpus $DIR/aligned --f lowercased --e cased --max-phrase-length $MAX_LEN --lm 0:3:$DIR/cased.irstlm.gz:1";#g' train-recaser.perl.out > train-recaser.perl
- chmod +x train-recaser.perl
- echo "****** substitute control characters by space"
- if [ "$reuse" != "1" -o ! -f $tokdir/$recaserbasename.$lang2.ctrl ]; then
- tr '\a\b\f\r\v' ' ' < $datadir/$recaserbasename.$lang2 > $tokdir/$recaserbasename.$lang2.ctrl
- else
- echo "Substitute control characters by a space in the $datadir/$recaserbasename.$lang2 file already done. Reusing it."
- fi
- echo "$tokdir/$recaserbasename.$lang2.ctrl" >> $logdir/corpus-files.txt
- echo "****** tokenize recaser file"
- if [ "$reuse" != "1" -o ! -f $tokdir/$recaserbasename.tok.$lang2 ]; then
- $toolsdir/scripts/tokenizer.perl -l $lang2 < $tokdir/$recaserbasename.$lang2.ctrl > $tokdir/$recaserbasename.tok.$lang2
- else
- echo "Tokenizing of the $tokdir/$recaserbasename.$lang2.ctrl already done. Reusing it."
- fi
- echo "$tokdir/$recaserbasename.tok.$lang2" >> $logdir/corpus-files.txt
-
- echo "****** train recaser"
- $toolsdir/moses/script*/recaser/train-recaser.perl -train-script $toolsdir/moses/script*/training/train-model.perl -corpus $tokdir/$recaserbasename.tok.$lang2 -dir $recaserdir -scripts-root-dir $toolsdir/moses/scripts*
- mv $recaserdir/cased.irstlm.gz $recaserdir/cased.irstlm.$lang2.$recaserbasename.gz
-
- echo "****** binarize recaser language model"
- $toolsdir/irstlm/bin/compile-lm --memmap 1 $recaserdir/cased.irstlm.$lang2.$recaserbasename.gz $recaserdir/cased.irstlm.$lang2.$recaserbasename.blm.mm
-
-
- echo "****** create binary phrase table"
- cd $recaserdir
- gzip -cd $recaserdir/phrase-table.gz | LC_ALL=C sort | $toolsdir/moses/misc/processPhraseTable -ttable 0 0 - -nscores 5 -out $recaserdir/phrase-table.$lang2.$recaserbasename
-
- echo "****** patch recaser's moses.ini"
- if (( $lngmdl == 1 )) ; then
- sed -e 's#^.*cased.*$#1 0 1 workdir/recaser/lang2/recaserbasename-IRSTLM/cased.irstlm.lang2.recaserbasename.blm.mm#g' -e "s#workdir#$workdir#g" -e "s#recaserbasename#$recaserbasename#g" -e "s#lang2#$lang2#g" $recaserdir/moses.ini > $recaserdir/moses.ini.out
- sed -e 's#^.*phrase-table.0-0.gz$#0 0 5 workdir/recaser/lang2/recaserbasename-IRSTLM/phrase-table.lang2.recaserbasename#g' -e "s#workdir#$workdir#g" -e "s#recaserbasename#$recaserbasename#g" -e "s#lang2#$lang2#g" $recaserdir/moses.ini.out > $recaserdir/moses.ini
- rm -f moses.ini.out
- fi
-else
- echo "Recaser training already done. Reusing it."
-fi
-
-for createdfile in `ls $recaserdir`; do
- echo "$recaserdir/$createdfile" >> $logdir/corpus-files.txt
-done
-
-if [ ! -f $recaserdir/phrase-table.$lang2.$recaserbasename.binphr.tgtvoc ]; then
- makeTrainingSummary
- echo "Recaser not correctly trained. Exiting..."
- exit 0
-fi
-#=========================================================================================================================================================
-#4. TRAIN CORPUS
-#=========================================================================================================================================================
-starttrainingdate=`date +day:%d/%m/%y-time:%H:%M:%S`
-echo "********************** CORPUS TRAINING:"
-if [ "$reuse" = "1" ]; then
- if [ ! -f $modeldir/moses.ini ]; then
- if [ -f $modeldir/aligned.grow-diag-final-and ]; then
- rm -rf $modeldir
- mkdir -p $modeldir
- fi
- if [ -f $workdir/model/$lang2-$lang1-$corpusbasename.$lngmdlparameters/$trainingparameters/$lang1-$lang2.A3.final.gz -a -f $workdir/model/$lang2-$lang1-$corpusbasename.$lngmdlparameters/$trainingparameters/$lang2-$lang1.A3.final.gz ]; then
- echo "****** Reusing an already trained inverted corpus"
- frsttrainingstep=3
- cp -fR $workdir/model/$lang2-$lang1-$corpusbasename.$lngmdlparameters/$trainingparameters $workdir/model/$lang1-$lang2-$corpusbasename.$lngmdlparameters
- rm $modeldir/moses.ini 2>/dev/null
- rm $modeldir/aligned.grow-diag-final-and 2>/dev/null
- rm $modeldir/aligned.intersect 2>/dev/null
- rm $modeldir/aligned.union 2>/dev/null
- rm $modeldir/aligned.grow-diag 2>/dev/null
- rm $modeldir/aligned.grow 2>/dev/null
- rm $modeldir/aligned.grow-final 2>/dev/null
- rm $modeldir/lex.e2f 2>/dev/null
- rm $modeldir/lex.f2e 2>/dev/null
- rm $modeldir/extract.gz 2>/dev/null
- rm $modeldir/extract.inv.gz 2>/dev/null
- rm $modeldir/extract.o.gz 2>/dev/null
- rm $modeldir/phrase-table.$corpusbasename.$lang2-$lang1.gz 2>/dev/null
- rm $modeldir/reordering-table.$corpusbasename.$lang2-$lang1.$param.gz 2>/dev/null
- else
- frsttrainingstep=$firsttrainingstep
- fi
- fi
-else
- frsttrainingstep=$firsttrainingstep
-fi
-#------------------------------------------------------------------------------------------------------------------------------------------------
-if [ "$reuse" != "1" -o ! -f $modeldir/moses.ini ]; then
- echo "****** substitute control characters by space in corpus files"
- if [ "$reuse" != "1" -o ! -f $tokdir/$corpusbasename.$lang1.ctrl ]; then
- tr '\a\b\f\r\v' ' ' < $datadir/$corpusbasename.$lang1 > $tokdir/$corpusbasename.$lang1.ctrl
- echo "$lang1 file ($datadir/$corpusbasename.$lang1) done"
- else
- echo "Substitute control characters by a space in the $lang1 file ($datadir/$corpusbasename.$lang1) already done. Reusing it."
- fi
- echo "$tokdir/$corpusbasename.$lang1.ctrl" >> $logdir/corpus-files.txt
- if [ "$reuse" != "1" -o ! -f $tokdir/$corpusbasename.$lang2.ctrl ]; then
- tr '\a\b\f\r\v' ' ' < $datadir/$corpusbasename.$lang2 > $tokdir/$corpusbasename.$lang2.ctrl
- echo "$lang2 file ($datadir/$corpusbasename.$lang2) done"
- else
- echo "Substitute control characters by a space in the $lang2 file ($datadir/$corpusbasename.$lang2) already done. Reusing it."
- fi
- echo "$tokdir/$corpusbasename.$lang2.ctrl" >> $logdir/corpus-files.txt
- echo "****** tokenize corpus files"
- if [ "$reuse" != "1" -o ! -f $tokdir/$corpusbasename.tok.$lang1 ]; then
- $toolsdir/scripts/tokenizer.perl -l $lang1 < $tokdir/$corpusbasename.$lang1.ctrl > $tokdir/$corpusbasename.tok.$lang1
- else
- echo "The $tokdir/$corpusbasename.$lang1.ctrl file was already tokenized. Reusing it."
- fi
- echo "$tokdir/$corpusbasename.tok.$lang1" >> $logdir/corpus-files.txt
- if [ "$reuse" != "1" -o ! -f $tokdir/$corpusbasename.tok.$lang2 ]; then
- $toolsdir/scripts/tokenizer.perl -l $lang2 < $tokdir/$corpusbasename.$lang2.ctrl > $tokdir/$corpusbasename.tok.$lang2
- else
- echo "The $tokdir/$corpusbasename.$lang2.ctrl file was already tokenized. Reusing it."
- fi
- echo "$tokdir/$corpusbasename.tok.$lang2" >> $logdir/corpus-files.txt
- #----------------------------------------------------------------------------------------------------------------------------------------
- echo "****** clean corpus files"
- if [ "$reuse" != "1" -o ! -f $cleandir/$corpusbasename.clean.$lang1 -o ! -f $cleandir/$corpusbasename.clean.$lang2 ]; then
- $toolsdir/moses/scripts*/training/clean-corpus-n.perl $tokdir/$corpusbasename.tok $lang1 $lang2 $cleandir/$corpusbasename.clean $MinLen $MaxLen
- else
- echo "The $cleandir/$corpusbasename.clean.$lang1 and $cleandir/$corpusbasename.clean.$lang2 files already exist. Reusing them."
- fi
- echo "$cleandir/$corpusbasename.clean.$lang1" >> $logdir/corpus-files.txt
- echo "$cleandir/$corpusbasename.clean.$lang2" >> $logdir/corpus-files.txt
- #----------------------------------------------------------------------------------------------------------------------------------------
- echo "****** lowercase corpus files"
- if [ "$reuse" != "1" -o ! -f $lc_clean_dir/$corpusbasename.lowercase.$lang1 ]; then
- $toolsdir/scripts/lowercase.perl < $cleandir/$corpusbasename.clean.$lang1 > $lc_clean_dir/$corpusbasename.lowercase.$lang1
- else
- echo "The $lc_clean_dir/$corpusbasename.lowercase.$lang1 file already exists. Reusing it."
- fi
- echo "$lc_clean_dir/$corpusbasename.lowercase.$lang1" >> $logdir/corpus-files.txt
- if [ "$reuse" != "1" -o ! -f $lc_clean_dir/$corpusbasename.lowercase.$lang2 ]; then
- $toolsdir/scripts/lowercase.perl < $cleandir/$corpusbasename.clean.$lang2 > $lc_clean_dir/$corpusbasename.lowercase.$lang2
- else
- echo "The $lc_clean_dir/$corpusbasename.lowercase.$lang2 file already exists. Reusing it."
- fi
- echo "$lc_clean_dir/$corpusbasename.lowercase.$lang2" >> $logdir/corpus-files.txt
- #----------------------------------------------------------------------------------------------------------------------------------------
- #create data to be used in moses.ini
- if [ "$lngmdl" = "1" ]; then
- lmstr="0:$Gram:$lmdir/$lang2.$lngmdlparameters.blm.mm:1"
- elif [ "$lngmdl" = "5" ]; then
- lmstr="0:$Gram:$lmdir/$lang2.$lngmdlparameters.BloomMap:5"
- fi
- if [ "$frsttrainingstep" -lt "3" ]; then
- #------------------------------------------------------------------------------------------------------------------------
- echo "****** phase 1 of training"
- cd $toolsdir/moses/scripts*/training
- sed -e 's#^.*my \$cmd.*\$MKCLS.*opt.*$#\tmy $cmd = "$MKCLS -cnumclasses -nnummkclsiterations -p$corpus -V$classes opt";#g' -e "s#numclasses#$numclasses#g" -e "s#nummkclsiterations#$nummkclsiterations#g" train-model.perl > train-model.modif.perl
- sed -e 's#BINDIR=\"\"#BINDIR="toolsdir/mgizanewdir/bin"#g' -e "s#toolsdir#$toolsdir#g" -e "s#mgizanewdir#$mgizanewdir#g" train-model.modif.perl > train-model.perl
- rm -f train-model.modif.perl
- chmod +x train-model.perl
- if [ "$paralleltraining" = "1" ]; then
- $toolsdir/moses/scripts*/training/train-model.perl -parallel -scripts-root-dir $toolsdir/moses/scripts* -root-dir $workdir -corpus $lc_clean_dir/$corpusbasename.lowercase -f $lang1 -e $lang2 -alignment $alignment -reordering $reordering -lm $lmstr -phrase-translation-table $modeldir/phrase-table.$corpusbasename.$lang1-$lang2 -reordering-table $modeldir/reordering-table.$corpusbasename.$lang1-$lang2 -max-phrase-length $MaxPhraseLength -first-step 1 -last-step 1 -model-dir $modeldir -corpus-dir $modeldir -giza-f2e $modeldir -giza-e2f $modeldir
- else
- $toolsdir/moses/scripts*/training/train-model.perl -scripts-root-dir $toolsdir/moses/scripts* -root-dir $workdir -corpus $lc_clean_dir/$corpusbasename.lowercase -f $lang1 -e $lang2 -alignment $alignment -reordering $reordering -lm $lmstr -phrase-translation-table $modeldir/phrase-table.$corpusbasename.$lang1-$lang2 -reordering-table $modeldir/reordering-table.$corpusbasename.$lang1-$lang2 -max-phrase-length $MaxPhraseLength -first-step 1 -last-step 1 -model-dir $modeldir -corpus-dir $modeldir -giza-f2e $modeldir -giza-e2f $modeldir
- fi
- #------------------------------------------------------------------------------------------------------------------------
- echo "****** phase 2 of training: MGIZA alignment"
- $toolsdir/mgiza/bin/snt2cooc $modeldir/$lang2-$lang1.cooc $modeldir/$lang2.vcb $modeldir/$lang1.vcb $modeldir/$lang1-$lang2-int-train.snt
- $toolsdir/mgiza/bin/snt2cooc $modeldir/$lang1-$lang2.cooc $modeldir/$lang1.vcb $modeldir/$lang2.vcb $modeldir/$lang2-$lang1-int-train.snt
- $toolsdir/mgiza/bin/mgiza -ncpus $mgizanumprocessors -c $modeldir/$lang2-$lang1-int-train.snt -o $modeldir/$lang2-$lang1 -s $modeldir/$lang1.vcb -t $modeldir/$lang2.vcb -coocurrencefile $modeldir/$lang1-$lang2.cooc -ml $ml -countincreasecutoff $countincreasecutoff -countincreasecutoffal $countincreasecutoffal -mincountincrease $mincountincrease -peggedcutoff $peggedcutoff -probcutoff $probcutoff -probsmooth $probsmooth -m1 $model1iterations -m2 $model2iterations -mh $hmmiterations -m3 $model3iterations -m4 $model4iterations -m5 $model5iterations -m6 $model6iterations -t1 $model1dumpfrequency -t2 $model2dumpfrequency -t2to3 $transferdumpfrequency -t345 $model345dumpfrequency -th $hmmdumpfrequency -onlyaldumps $onlyaldumps -nodumps $nodumps -compactadtable $compactadtable -model4smoothfactor $model4smoothfactor -compactalignmentformat $compactalignmentformat -verbose $verbose -verbosesentence $verbosesentence -emalsmooth $emalsmooth -model23smoothfactor $model23smoothfactor -model4smoothfactor $model4smoothfactor -model5smoothfactor $model5smoothfactor -nsmooth $nsmooth -nsmoothgeneral $nsmoothgeneral -deficientdistortionforemptyword $deficientdistortionforemptyword -depm4 $depm4 -depm5 $depm5 -emalignmentdependencies $emalignmentdependencies -emprobforempty $emprobforempty -m5p0 $m5p0 -manlexfactor1 $manlexfactor1 -manlexfactor2 $manlexfactor2 -manlexmaxmultiplicity $manlexmaxmultiplicity -maxfertility $maxfertility -p0 $p0 -pegging $pegging
- $toolsdir/mgiza/bin/mgiza -ncpus $mgizanumprocessors -c $modeldir/$lang1-$lang2-int-train.snt -o $modeldir/$lang1-$lang2 -s $modeldir/$lang2.vcb -t $modeldir/$lang1.vcb -coocurrencefile $modeldir/$lang2-$lang1.cooc -ml $ml -countincreasecutoff $countincreasecutoff -countincreasecutoffal $countincreasecutoffal -mincountincrease $mincountincrease -peggedcutoff $peggedcutoff -probcutoff $probcutoff -probsmooth $probsmooth -m1 $model1iterations -m2 $model2iterations -mh $hmmiterations -m3 $model3iterations -m4 $model4iterations -m5 $model5iterations -m6 $model6iterations -t1 $model1dumpfrequency -t2 $model2dumpfrequency -t2to3 $transferdumpfrequency -t345 $model345dumpfrequency -th $hmmdumpfrequency -onlyaldumps $onlyaldumps -nodumps $nodumps -compactadtable $compactadtable -model4smoothfactor $model4smoothfactor -compactalignmentformat $compactalignmentformat -verbose $verbose -verbosesentence $verbosesentence -emalsmooth $emalsmooth -model23smoothfactor $model23smoothfactor -model4smoothfactor $model4smoothfactor -model5smoothfactor $model5smoothfactor -nsmooth $nsmooth -nsmoothgeneral $nsmoothgeneral -deficientdistortionforemptyword $deficientdistortionforemptyword -depm4 $depm4 -depm5 $depm5 -emalignmentdependencies $emalignmentdependencies -emprobforempty $emprobforempty -m5p0 $m5p0 -manlexfactor1 $manlexfactor1 -manlexfactor2 $manlexfactor2 -manlexmaxmultiplicity $manlexmaxmultiplicity -maxfertility $maxfertility -p0 $p0 -pegging $pegging
- echo "****** phase 2.1 of training (merge alignments)"
- $toolsdir/mgiza/scripts/merge_alignment.py $modeldir/$lang1-$lang2.A3.final.part* > $modeldir/$lang1-$lang2.A3.final
- $toolsdir/mgiza/scripts/merge_alignment.py $modeldir/$lang2-$lang1.A3.final.part* > $modeldir/$lang2-$lang1.A3.final
- gzip -f $modeldir/$lang1-$lang2.A3.final > $modeldir/$lang1-$lang2.A3.final.gz
- gzip -f $modeldir/$lang2-$lang1.A3.final > $modeldir/$lang2-$lang1.A3.final.gz
- if [ -f $modeldir/$lang1-$lang2.A3.final ]; then
- rm -f $modeldir/$lang1-$lang2.A3.final
- fi
- if [ -f $modeldir/$lang2-$lang1.A3.final ]; then
- rm -f $modeldir/$lang2-$lang1.A3.final
- fi
- rm -f $modeldir/$lang1-$lang2.A3.final.part* 2>/dev/null
- rm -f $modeldir/$lang2-$lang1.A3.final.part* 2>/dev/null
- fi
- #-------------------------------------------------------------------------------------------------------------------------------
- if [ "$paralleltraining" = "1" ]; then
- echo "****** Rest of parallel training"
- $toolsdir/moses/scripts*/training/train-model.perl -parallel -scripts-root-dir $toolsdir/moses/scripts* -root-dir $workdir -corpus $lc_clean_dir/$corpusbasename.lowercase -f $lang1 -e $lang2 -alignment $alignment -reordering $reordering -lm $lmstr -phrase-translation-table $modeldir/phrase-table.$corpusbasename.$lang1-$lang2 -reordering-table $modeldir/reordering-table.$corpusbasename.$lang1-$lang2 -max-phrase-length $MaxPhraseLength -first-step 3 -last-step $lasttrainingstep -model-dir $modeldir -corpus-dir $modeldir -giza-f2e $modeldir -giza-e2f $modeldir
- else
- echo "****** Rest of non-parallel training"
- $toolsdir/moses/scripts*/training/train-model.perl -scripts-root-dir $toolsdir/moses/scripts* -root-dir $workdir -corpus $lc_clean_dir/$corpusbasename.lowercase -f $lang1 -e $lang2 -alignment $alignment -reordering $reordering -lm $lmstr -phrase-translation-table $modeldir/phrase-table.$corpusbasename.$lang1-$lang2 -reordering-table $modeldir/reordering-table.$corpusbasename.$lang1-$lang2 -max-phrase-length $MaxPhraseLength -first-step 3 -last-step $lasttrainingstep -model-dir $modeldir -corpus-dir $modeldir -giza-f2e $modeldir -giza-e2f $modeldir
- fi
- #-------------------------------------------------------------------------------------------------------------------------------
- if [ "$memmapping" = "1" ]; then
- cp $modeldir/moses.ini $memmapsdir
- echo "$memmapsdir/moses.ini" >> $logdir/corpus-files.txt
- fi
- cp $modeldir/moses.ini $modeldir/moses.ini.bak.train
-else
- echo "Training already done. Reusing it."
-fi
-
-for createdfile in `ls $modeldir`; do
- echo "$modeldir/$createdfile" >> $logdir/corpus-files.txt
-done
-
-if [ ! -f $modeldir/moses.ini ]; then
- makeTrainingSummary
- echo "Corpus not correctly trained. Exiting..."
- exit 0
-fi
-
-cd $workdir
-#=========================================================================================================================================================
-#5. CORPUS MEMORY-MAPPING
-#=========================================================================================================================================================
-if (( $memmapping == 1 )) ; then
- echo "********************** MEMORY-MAPPING:"
- #If you have no trained corpus, then alert that you should create it
- checktrainedcorpusexists
-
- startmmpdate=`date +day:%d/%m/%y-time:%H:%M:%S`
-
-
- if [ "$reuse" != "1" -o "$domemmapping" = "1" -o ! -f $memmapsdir/reordering-table.$corpusbasename.$lang1-$lang2.$param.binlexr.srctree ]; then
- if [ -f $memmapsdir/phrase-table.$corpusbasename.$lang1-$lang2.binphr.idx ]; then
- rm -rf $memmapsdir
- mkdir -p $memmapsdir
- fi
- #-----------------------------------------------------------------------------------------------------------------------------------------
- echo "****** create binary phrase table"
- gzip -cd $modeldir/phrase-table.$corpusbasename.$lang1-$lang2.gz | LC_ALL=C sort | $toolsdir/moses/misc/processPhraseTable -ttable 0 0 - -nscores 5 -out $memmapsdir/phrase-table.$corpusbasename.$lang1-$lang2
- #-----------------------------------------------------------------------------------------------------------------------------------------
- echo "****** create binary reordering table"
-
- gzip -cd $modeldir/reordering-table.$corpusbasename.$lang1-$lang2.$param.gz | LC_ALL=C sort | $toolsdir/moses/misc/processLexicalTable -out $memmapsdir/reordering-table.$corpusbasename.$lang1-$lang2.$param
- #-----------------------------------------------------------------------------------------------------------------------------------------
- #Save the present moses.ini just in case it is erased if you interrupt one of the subsequent steps
- cp $modeldir/moses.ini $modeldir/moses.ini.bak.memmap
- echo "$modeldir/moses.ini.bak.memmap" >> $logdir/corpus-files.txt
- cp $modeldir/moses.ini $memmapsdir/moses.ini
- sed -e "s#$modeldir#$memmapsdir#g" -e "s#wbe\-$reordering\.gz#wbe-$reordering#g" -e "s#wbe\-msd\-backward\-fe\.gz#wbe-msd-backward-fe#g" -e "s#wbe\-msd\-backward\-f\.gz#wbe-msd-backward-f#g" -e "s#wbe\-monotonicity\-backward\-fe\.gz#wbe-monotonicity-backward-fe#g" -e "s#wbe\-monotonicity\-backward\-f\.gz#wbe-monotonicity-backward-f#g" -e "s#0 0 0 5 $memmapsdir\/phrase\-table\.$corpusbasename\.$lang1\-$lang2#1 0 0 5 $memmapsdir/phrase-table.$corpusbasename.$lang1-$lang2#g" $memmapsdir/moses.ini > $memmapsdir/moses.ini.memmap
- mv $memmapsdir/moses.ini.memmap $memmapsdir/moses.ini
- #-----------------------------------------------------------------------------------------------------------------------------------------
- else
- echo "Memory-mapping already done. Reusing it."
- fi
-
- for createdfile in `ls $memmapsdir`; do
- echo "$memmapsdir/$createdfile" >> $logdir/corpus-files.txt
- done
-
- if [ ! -f $memmapsdir/reordering-table.$corpusbasename.$lang1-$lang2.$param.binlexr.srctree ]; then
- makeTrainingSummary
- echo "Memory-mapping not correctly done. Exiting..."
- exit 0
- fi
-fi
-cd $workdir
-
-#=========================================================================================================================================================
-#6. TUNING
-#=========================================================================================================================================================
-if (( $tuning == 1 )) ; then
- echo "********************** TUNING:"
- #If you have no trained corpus, then alert that you should create it
- checktrainedcorpusexists
-
- starttuningdate=`date +day:%d/%m/%y-time:%H:%M:%S`
-
- if [ "$reuse" != "1" -o "$dotuning" = "1" -o ! -f $tuningdir/moses.ini ]; then
- if [ -f $tuningdir/run1.moses.ini ]; then
- rm -rf $tuningdir
- mkdir -p $tuningdir
- dotrainingtest=1
- fi
- #-----------------------------------------------------------------------------------------------------------------------------------------
- echo "****** tokenize language 1 tuning data"
- if [ "$reuse" != "1" -o ! -f $tokdir/$tuningbasename.tok.$lang1 ]; then
- if [ -f $datadir/$tuningbasename.$lang1 ]; then
- tr '\a\b\f\r\v' ' ' < $datadir/$tuningbasename.$lang1 > $datadir/$tuningbasename.$lang1.tmp
- $toolsdir/scripts/tokenizer.perl -l $lang1 < $datadir/$tuningbasename.$lang1.tmp > $tokdir/$tuningbasename.tok.$lang1
- else
- echo "The $datadir/$tuningbasename.$lang1 file, used for tuning, does not exist. Please review the tuningbasename setting of this script. Exiting ..."
- exit 0
- fi
- else
- echo "The $tokdir/$tuningbasename.tok.$lang1 file already exists. Reusing it."
- fi
- echo "$tokdir/$tuningbasename.tok.$lang1" >> $logdir/corpus-files.txt
- #-----------------------------------------------------------------------------------------------------------------------------------------
- echo "****** tokenize language 2 tuning data"
- if [ "$reuse" != "1" -o ! -f $tokdir/$tuningbasename.tok.$lang2 ]; then
- if [ -f $datadir/$tuningbasename.$lang2 ]; then
- tr '\a\b\f\r\v' ' ' < $datadir/$tuningbasename.$lang2 > $datadir/$tuningbasename.$lang2.tmp
- $toolsdir/scripts/tokenizer.perl -l $lang2 < $datadir/$tuningbasename.$lang2.tmp > $tokdir/$tuningbasename.tok.$lang2
- else
- echo "The $datadir/$tuningbasename.$lang2 file, used for tuning, does not exist. Please review the tuningbasename setting of this script. Exiting ..."
- exit 0
- fi
- else
- echo "The $tokdir/$tuningbasename.tok.$lang2 file already exists. Reusing it."
- fi
- echo "$tokdir/$tuningbasename.tok.$lang2" >> $logdir/corpus-files.txt
- #-----------------------------------------------------------------------------------------------------------------------------------------
- echo "****** lowercase language 1 tuning data"
- if [ "$reuse" != "1" -o ! -f $lc_no_clean_dir/$tuningbasename.lowercase.$lang1 ]; then
- $toolsdir/scripts/lowercase.perl < $tokdir/$tuningbasename.tok.$lang1 > $lc_no_clean_dir/$tuningbasename.lowercase.$lang1
- else
- echo "The $lc_no_clean_dir/$tuningbasename.lowercase.$lang1 file already exists. Reusing it."
- fi
- echo "$lc_no_clean_dir/$tuningbasename.lowercase.$lang1" >> $logdir/corpus-files.txt
- #-----------------------------------------------------------------------------------------------------------------------------------------
- echo "****** lowercase language 2 tuning data"
- if [ "$reuse" != "1" -o ! -f $lc_no_clean_dir/$tuningbasename.lowercase.$lang2 ]; then
- $toolsdir/scripts/lowercase.perl < $tokdir/$tuningbasename.tok.$lang2 > $lc_no_clean_dir/$tuningbasename.lowercase.$lang2
- else
- echo "The $lc_no_clean_dir/$tuningbasename.lowercase.$lang2 file already exists. Reusing it."
- fi
- echo "$lc_no_clean_dir/$tuningbasename.lowercase.$lang2" >> $logdir/corpus-files.txt
- #-----------------------------------------------------------------------------------------------------------------------------------------
-
- echo "****** tuning!!!"
- cd $workdir/tuning/
- # if corpus was memory-mapped
- if [ "$memmapping" = "1" ]; then
- #use memory-mapping
- mosesinidir1=$memmapsdir
- else
- mosesinidir1=$modeldir
- fi
- $modifiedscriptsdir/mert-moses-new-modif.pl $lc_no_clean_dir/$tuningbasename.lowercase.$lang1 $lc_no_clean_dir/$tuningbasename.lowercase.$lang2 $toolsdir/moses/moses-cmd/src/moses $mosesinidir1/moses.ini --mertdir $toolsdir/moses/mert --rootdir $toolsdir/moses/scripts* --no-filter-phrase-table --working-dir $tuningdir --max-runs $maxruns
- #-----------------------------------------------------------------------------------------------------------------------------------------
- echo "****** insert tuning weights in moses.ini"
- $toolsdir/scripts/reuse-weights.perl $tuningdir/moses.ini < $mosesinidir1/moses.ini > $tuningdir/moses.weight-reused.ini
- #-----------------------------------------------------------------------------------------------------------------------------------------
- else
- echo "Tuning already done. Reusing it."
- fi
-
- for createdfile in `ls $tuningdir`; do
- echo "$tuningdir/$createdfile" >> $logdir/corpus-files.txt
- done
-
- if [ ! -f $tuningdir/moses.ini ]; then
- makeTrainingSummary
- echo "Tuning not correctly done. Exiting..."
- exit 0
- fi
-fi
-#=========================================================================================================================================================
-#7. TRAINING TEST
-#=========================================================================================================================================================
-if (( $runtrainingtest == 1 )) ; then
-
- echo "********************** RUN TRAINING TEST:"
- #If you have no trained corpus, then alert that you should create it
- checktrainedcorpusexists
-
- starttestdate=`date +day:%d/%m/%y-time:%H:%M:%S`
-
- if [ "$reuse" != "1" -o "$dotrainingtest" = "1" -o ! -d $testdir -o ! -f $testdir/$testbasename.moses.sgm ]; then
- echo $dotrainingtest
- if [ -f $testdir/$testbasename.moses.$lang2 ]; then
- rm -rf $testdir
- mkdir -p $testdir
- fi
- #-----------------------------------------------------------------------------------------------------------------------------------------
- echo "****** tokenize language 1 training test data"
- if [ "$reuse" != "1" -o ! -f $tokdir/$testbasename.tok.$lang1 ]; then
- if [ -f $datadir/$testbasename.$lang1 ]; then
- tr '\a\b\f\r\v' ' ' < $datadir/$testbasename.$lang1 > $datadir/$testbasename.$lang1.tmp
- $toolsdir/scripts/tokenizer.perl -l $lang1 < $datadir/$testbasename.$lang1.tmp > $tokdir/$testbasename.tok.$lang1
- else
- echo "The $datadir/$testbasename.$lang1 file, used for testing the trained corpus, does not exist. Please review the \$testbasename and possibly the \$lang1 settings of this script. Exiting ..."
- exit 0
- fi
- else
- echo "The $tokdir/$testbasename.tok.$lang1 file already exists. Reusing it."
- fi
- echo "$tokdir/$testbasename.tok.$lang1" >> $logdir/corpus-files.txt
- #-----------------------------------------------------------------------------------------------------------------------------------------
- echo "****** tokenize language 2 training test data"
- if [ "$reuse" != "1" -o ! -f $tokdir/$testbasename.tok.$lang2 ]; then
- if [ -f $datadir/$testbasename.$lang2 ]; then
- tr '\a\b\f\r\v' ' ' < $datadir/$testbasename.$lang2 > $datadir/$testbasename.$lang2.tmp
- $toolsdir/scripts/tokenizer.perl -l $lang1 < $datadir/$testbasename.$lang2.tmp > $tokdir/$testbasename.tok.$lang2
- else
- echo "The $datadir/$testbasename.$lang2 file, used for testing the trained corpus, does not exist. Please review the \$testbasename and possibly the \$lang1 settings of this script. Exiting ..."
- exit 0
- fi
- else
- echo "The $tokdir/$testbasename.tok.$lang2 file already exists. Reusing it."
- fi
- echo "$tokdir/$testbasename.tok.$lang2" >> $logdir/corpus-files.txt
- #-----------------------------------------------------------------------------------------------------------------------------------------
- echo "****** lowercase training test data"
- if [ "$reuse" != "1" -o ! -f $lc_no_clean_dir/$testbasename.lowercase.$lang1 ]; then
- $toolsdir/scripts/lowercase.perl < $tokdir/$testbasename.tok.$lang1 > $lc_no_clean_dir/$testbasename.lowercase.$lang1
- else
- echo "The $lc_no_clean_dir/$testbasename.lowercase.$lang1 file already exists. Reusing it."
- fi
- echo "$lc_no_clean_dir/$testbasename.lowercase.$lang1" >> $logdir/corpus-files.txt
- if [ "$reuse" != "1" -o ! -f $lc_no_clean_dir/$testbasename.lowercase.$lang2 ]; then
- $toolsdir/scripts/lowercase.perl < $tokdir/$testbasename.tok.$lang2 > $lc_no_clean_dir/$testbasename.lowercase.$lang2
- else
- echo "The $lc_no_clean_dir/$testbasename.lowercase.$lang2 file already exists. Reusing it."
- fi
- echo "$lc_no_clean_dir/$testbasename.lowercase.$lang2" >> $logdir/corpus-files.txt
- cp $modeldir/moses.ini $testdir/
- #-----------------------------------------------------------------------------------------------------------------------------------------
-
- echo "****** run decoder test"
- if [ "$reuse" != "1" -o ! -f $testdir/$testbasename.moses.$lang2 ]; then
- #Choose the moses.ini file that best reflects the type of training done
- if [ "$tuning" = "1" ]; then
- mosesinidir2=$tuningdir/moses.weight-reused.ini
- elif [ "$memmapping" = "1" ]; then
- mosesinidir2=$memmapsdir/moses.ini
- else
- mosesinidir2=$modeldir/moses.ini
- fi
- if [ "$tuning" = "0" ]; then
- $toolsdir/moses/moses-cmd/src/moses -f $mosesinidir2 -weight-t $weight_t -weight-l $weight_l -weight-d $weight_d -weight-w $weight_w -mbr $mbr -mbr-size $mbrsize -mbr-scale $mbrscale -monotone-at-punctuation $monotoneatpunctuation -ttable-limit $ttablelimit -b $beamthreshold -early-discarding-threshold $earlydiscardingthreshold -search-algorithm $searchalgorithm -cube-pruning-pop-limit $cubepruningpoplimit -s $stack -max-phrase-length $maxphraselen -cube-pruning-diversity $cubepruningdiversity -distortion-limit $distortionlimit < $lc_no_clean_dir/$testbasename.lowercase.$lang1 > $testdir/$testbasename.moses.$lang2
- else
- $toolsdir/moses/moses-cmd/src/moses -f $mosesinidir2 < $lc_no_clean_dir/$testbasename.lowercase.$lang1 > $testdir/$testbasename.moses.$lang2
- fi
- else
- echo "The $testdir/$testbasename.moses.$lang2 file already exists. Reusing it."
- fi
- #-----------------------------------------------------------------------------------------------------------------------------------------
- echo "****** recase the output"
- if [ "$reuse" != "1" -o ! -f $testdir/$testbasename.moses.recased.$lang2 ]; then
- $toolsdir/moses/script*/recaser/recase.perl -model $recaserdir/moses.ini -in $testdir/$testbasename.moses.$lang2 -moses $toolsdir/moses/moses-cmd/src/moses > $testdir/$testbasename.moses.recased.$lang2
- else
- echo "The $testdir/$testbasename.moses.recased.$lang2 file already exists. Reusing it."
- fi
- #-----------------------------------------------------------------------------------------------------------------------------------------
- echo "****** detokenize test results"
- $toolsdir/scripts/detokenizer.perl -l $lang2 < $testdir/$testbasename.moses.recased.$lang2 > $detokdir/$testbasename.moses.detok.$lang2
- echo "$detokdir/$testbasename.moses.detok.$lang2" >> $logdir/corpus-files.txt
- #-----------------------------------------------------------------------------------------------------------------------------------------
- echo "****** wrap test result in SGM"
- echo "*** wrap source file"
- if [ "$reuse" != "1" -o ! -f $testdir/$testbasename-src.$lang1.sgm ]; then
- exec<$datadir/$testbasename.$lang1
- echo '<srcset setid="'$testbasename'" srclang="'$lang1'">' > $testdir/$testbasename-src.$lang1.sgm
- echo '<DOC docid="'$testbasename'">' >> $testdir/$testbasename-src.$lang1.sgm
- numseg=0
- while read line
- do
- if [ "$line" != "" ]; then
- numseg=$(($numseg+1))
- echo "<seg id=$numseg>"$line"</seg>" >> $testdir/$testbasename-src.$lang1.sgm
- fi
- done
- echo "</DOC>" >> $testdir/$testbasename-src.$lang1.sgm
- echo "</srcset>" >> $testdir/$testbasename-src.$lang1.sgm
- else
- echo "The $testdir/$testbasename-src.$lang1.sgm file already exists. Reusing it."
- fi
- #-----------------------------------------------------------------------------------------------------------------------------------------
- echo "*** wrap reference (human-made) translation"
- if [ "$reuse" != "1" -o ! -f $testdir/$testbasename-ref.$lang2.sgm ]; then
- exec<$datadir/$testbasename.$lang2
- echo '<refset setid="'$testbasename'" srclang="'$lang1'" trglang="'$lang2'">' > $testdir/$testbasename-ref.$lang2.sgm
- echo '<DOC docid="'$testbasename'" sysid="ref">' >> $testdir/$testbasename-ref.$lang2.sgm
- numseg=0
- while read line
- do
- if [ "$line" != "" ]; then
- numseg=$(($numseg+1))
- echo "<seg id=$numseg>"$line"</seg>" >> $testdir/$testbasename-ref.$lang2.sgm
- fi
- done
- echo "</DOC>" >> $testdir/$testbasename-ref.$lang2.sgm
- echo "</refset>" >> $testdir/$testbasename-ref.$lang2.sgm
- else
- echo "The $testdir/$testbasename-ref.$lang2.sgm file already exists. Reusing it."
- fi
- #-----------------------------------------------------------------------------------------------------------------------------------------
- echo "*** wrap Moses translation"
- if [ "$reuse" != "1" -o ! -f $testdir/$testbasename.moses.sgm ]; then
- exec<$detokdir/$testbasename.moses.detok.$lang2
- echo '<tstset setid="'$testbasename'" srclang="'$lang1'" trglang="'$lang2'">' > $testdir/$testbasename.moses.sgm
- echo '<DOC docid="'$testbasename'" sysid="moses">' >> $testdir/$testbasename.moses.sgm
- numseg=0
- while read line
- do
- if [ "$line" != "" ]; then
- numseg=$(($numseg+1))
- echo "<seg id=$numseg>"$line"</seg>" >> $testdir/$testbasename.moses.sgm
- fi
- done
- echo "</DOC>" >> $testdir/$testbasename.moses.sgm
- echo "</tstset>" >> $testdir/$testbasename.moses.sgm
- else
- echo "The $testdir/$testbasename.moses.sgm file already exists. Reusing it."
- fi
- #-----------------------------------------------------------------------------------------------------------------------------------------
- else
- echo "Training test already done. Reusing it."
- fi
-
- for createdfile in `ls $testdir`; do
- echo "$testdir/$createdfile" >> $logdir/corpus-files.txt
- done
-
- if [ ! -f $testdir/$testbasename.moses.sgm ]; then
- makeTrainingSummary
- echo "Corpus training test not correctly done. Exiting..."
- exit 0
- fi
-
- echo "***************** GET SCORE:"
- #check if a trained corpus exists and react appropriately
- checktrainedcorpusexists
-
- #If a training test was not done before, alert for that and exit
- if [ ! -f $testdir/$testbasename.moses.sgm ]; then
- echo "In order to get a training test score, you must have done a training test first. Please set the \$runtrainingtest variable of this script to 1 in order to run a training test. Exiting..."
- exit 0
- else
- #-----------------------------------------------------------------------------------------------------------------------------------------
- echo "****** scoring"
- startscoringdate=`date +day:%d/%m/%y-time:%H:%M:%S`
- score=`$toolsdir/mteval-v11b.pl -s $testdir/$testbasename-src.$lang1.sgm -r $testdir/$testbasename-ref.$lang2.sgm -t $testdir/$testbasename.moses.sgm -c`
- echo $score
- #-----------------------------------------------------------------------------------------------------------------------------------------
- fi
-fi
-
-makeTrainingSummary
-
-echo "!!! Corpus training finished. A summary of it is located in $mosesdir/logs !!!"
-
-#=================================================================================================================================================
-# Changes in version 1.11
-#=================================================================================================================================================
-# Uses new Moses decoder (published on August 13, 2010 and updated on August 14, 2010)
-# Protects users better from mistakes (namely from a deficiently trained inverse corpus and from the deletion of a training in a previously trained corpus)
-# Reuses previous work better
-# Stops with an informative message if the products of one training phase (LM building, recaser training, corpus training, memmory-mapping, tuning or training test)
-# do not produce the expected results
-# Much more informative and accurate training log file that now reflects the work actually done, even if it is interrupted by CTRL+C; continues to show
-# the settings chosen by the user too
-#=================================================================================================================================================
-# Changes in version 1.01
-#=================================================================================================================================================
-# Uses new Moses decoder (published on August 9, 2010)
-# Works in Ubuntu 10.04 LTS (and, if you adapt the package dependencies, with Ubuntu 9.10 and 9.04
-# Appends to the end of the name of the translated files ".$lang2.moses"
-# Does not translate files already translated
-# Indicates to user what to do if the $logfile parameter wasn't set
-# Special treatment of files translated for being used in TMX translation memories
-#=================================================================================================================================================
-#Changes in version 0.992
-#=================================================================================================================================================
-# Scripts adapted to both Ubuntu 10.04 and to the new Moses (version published on April 26, 2010)
-#=================================================================================================================================================
-#Changes in version 0.99
-#=================================================================================================================================================
-# ***training steps*** chosen by the user cannot be illogical (for instance, it is not possible to tune or to evaluate a corpus not yet trained); user can still enter illegal parameters values, though)
-# does not overwrite files previously created in trainings with different settings
-# does not redo work previously done with the same settings, or parts of work that share the same settings
-# can reuse phases 1 and 2 of training previously made with a lang2-lang1 corpus when a new lang1-lang2 (inverted corpus) corpus is being trained
-# can limit tuning duration
-# parallel training works (in 64 bits Ubuntu 9.04 version)
-# no segmentation fault with RandLM (in 64 bits Ubuntu 9.04 version)
-# can compile-lm --memmap IRSTLM (in 64 bits Ubuntu 9.04 version)
-# creates a log of all the files created
-# work directory renamed corpora_trained directory
-
diff --git a/contrib/moses-for-mere-mortals/scripts/transfer-training-to-another-location-0.07 b/contrib/moses-for-mere-mortals/scripts/transfer-training-to-another-location-0.07
deleted file mode 100644
index ab593a678..000000000
--- a/contrib/moses-for-mere-mortals/scripts/transfer-training-to-another-location-0.07
+++ /dev/null
@@ -1,55 +0,0 @@
-#!/usr/bin/env bash
-# transfer-training-to-another-location-0.07
-# copyright 2010 João L. A. C. Rosas
-# date: 27/02/2010
-# licenced under the GPL licence, version 3
-# Special thanks to Hilário Leal Fontes and Maria José Machado, who helped to test the script and made very helpful suggestions
-
-# ***Purpose***: Create a copy of your trained corpora that can be used by someone else (even if in another computer) or by you yourself in a different Moses installation (you can have more than one Moses installation in the same computer). Your $mosesdir is written literally (e.g., "/home/john") in several trained corpora files. You have to change that string so that it reflects the $mosesdir to which you want to transfer your trainings. This script locates your $mosesdir string in your trained corpora files and substitutes it by the equivalent $mosesdir string that defines the location where you want your trainings transferred to. It creates a $mosesdir/corpora_trained_for_another_location/newusername directory, within which it will create the corpora_trained and logs directory prepared for the other user/Moses installation. Takes a good while to run if you have trained very large corpora.
-
-############################################################################################################################################
-# THIS SCRIPT ASSUMES THAT A IRSTLM AND RANDLM ENABLED MOSES HAS ALREADY BEEN INSTALLED WITH THE create script IN $mosesdir; ITS #
-# DEFAULT VALUE IS $HOME/moses-irstlm-randlm; CHANGE THIS VARIABLE IF YOU WANT IT TO REFER TO A DIFFERENT LOCATION. #
-# IT ALSO ASSUMES THAT THE TRAINING OF A CORPUS HAS ALREADY BEEN DONE WITH train-moses-irstlm-randlm. #
-############################################################################################################################################
-
-############################################################################################################################################
-# The values of the variables that follow should be filled according to your needs: #
-############################################################################################################################################
-# Base dir of your the Moses system (e.g., $HOME/moses-irstlm-randlm) whose trainings you want to transfer (!!! you have to fill this parameter !!!)
-mosesdirmine=$HOME/moses-irstlm-randlm
-# ***Login name*** of the user to whom the trained corpora will be transferred; ex: "john" (!!! you have to fill this parameter !!!)
-newusername=john
-# Basedir of the Moses system of the user to which the trained corpora will be transferred; ex: "/media/1.5TB/moses-irstlm-randlm" (!!! you have to fill this parameter !!!)
-mosesdirotheruser=
-############################################################################################################################################
-#end of parameters that you should fill #
-############################################################################################################################################
-
-############################################################################################################################################
-# DON'T CHANGE THE LINES THAT FOLLOW ... unless you know what you are doing! #
-############################################################################################################################################
-# Register start date and time of corpus training
-startdate=`date +day:%d/%m/%y-time:%H:%M:%S`
-#Base dir of trained corpora
-corporatraineddir=$mosesdirmine/corpora_trained
-#Base dir of copy of your trained corpora prepared to be used by user $newusername
-corporatoexchange=$mosesdirmine/corpora_trained_for_another_location/$newusername
-if [ ! -d $corporatoexchange ]; then
- mkdir -p $corporatoexchange
-fi
-
-echo "Please wait. This can take a long time if $mosesdirmine has many trained corpora or especially large trained corpora..."
-#copy present corporatraineddir to a safe place
-cp -rf $mosesdirmine/corpora_trained $corporatoexchange
-cp -rf $mosesdirmine/logs $corporatoexchange
-
-if [ -d $corporatoexchange ]; then
- cd $corporatoexchange
- grep -lr -e "$mosesdirmine" * | xargs sed -i "s#$mosesdirmine#$mosesdirotheruser#g"
-fi
-echo ""
-echo "Processing done. The trained corpora prepared for user $newusername are located in the $corporatoexchange directory. Please transfer manually its corpora_trained and logs subdirectories to the $mosesdirotheruser directory. YOU ARE STRONGLY ADVISED TO MAKE A BACKUP OF THIS LATTER DIRECTORY BEFORE THAT TRANSFER. After having done it, you can safely erase the $mosesdirmine/corpora_trained_for_another_location directory. Your trained corpora in $mosesdirmine were not changed."
-echo "Starting time: $startdate"
-echo "End time : `date +day:%d/%m/%y-time:%H:%M:%S`"
-
diff --git a/contrib/moses-for-mere-mortals/scripts/translate-1.32 b/contrib/moses-for-mere-mortals/scripts/translate-1.32
deleted file mode 100644
index 4f5c14052..000000000
--- a/contrib/moses-for-mere-mortals/scripts/translate-1.32
+++ /dev/null
@@ -1,453 +0,0 @@
-#!/usr/bin/env bash
-# translate-1.32
-# copyright 2010 João L. A. C. Rosas
-# date: 11/09/2010
-# licenced under the GPL licence, version 3
-# the Mosesdecoder (http://sourceforge.net/projects/mosesdecoder/), is a tool upon which this script depends that is licenced under the GNU Library or Lesser General Public License (LGPL)
-# The comments transcribe parts of the Moses manual (http://www.statmt.org/moses/manual/manual.pdf).
-# Special thanks to Hilário Leal Fontes and Maria José Machado, who helped to test the script and made very helpful suggestions
-
-# ***Purpose***: Given a set of documents for translation in $mosesdir/translation_input, this script produces the Moses translation of that set of documents. If its $translate_for_tmx parameter is set to 1, it segments better the input file, erases empty and repeated segments and some types of segments containing just non-alphabetic characters and produces a translation adapted to the TMX specification of translation memories. The modified input file and its translation are placed in the $mosesdir/files_for_tmx directory. If the $translate_for_tmx is set to a value different from 1, this script translates the unchanged input document and its translation is placed in the $mosesdir/translation_output directory.
-
-############################################################################################################################################
-# THIS SCRIPT ASSUMES THAT A IRSTLM AND RANDLM ENABLED MOSES HAS ALREADY BEEN INSTALLED WITH the create script IN $mosesdir, WHOSE #
-# DEFAULT VALUE IS $HOME/moses-irstlm-randlm; CHANGE THIS VARIABLE IF YOU WANT IT TO REFER TO A DIFFERENT LOCATION. #
-# IT ALSO ASSUMES THAT THE TRAINING OF A CORPUS HAS ALREADY BEEN DONE WITH THE train script. #
-# IT FINALLY ASSUMES THAT YOU HAVE PLACED THE DOCUMENTS TO BE TRANSLATED IN THE $mosesdir/translation_input DIRECTORY #
-#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ #
-# !!! The names of the files to be translated should not include spaces !!! #
-#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ #
-#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ #
-# The names of the files to be translated MUST observe the following convention: #
-# <basename>.<abbreviation of source language> (ex: 100.en) #
-#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ #
-############################################################################################################################################
-
-############################################################################################################################################
-# The values of the variables that follow should be filled according to your needs: #
-############################################################################################################################################
-
-#Full path of the base directory location of your Moses system
-mosesdir=$HOME/moses-irstlm-randlm
-#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-# Even if you are using the demonstration corpus, you have to fill the $logfile parameter so that the script can be executed !!!
-#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-#Name of the log file of the corpus to be used (time-saving tip: copy and paste it here; the default directory of the log files is $mosesdir/logs); example of a possible name of a log file: pt-en.C-200000.for_train-60-1.LM-300000.MM-1.day-18-01-10-time-14-08-50.txt) (!!! omit the path !!!; you MUST fill in this parameter !!!)
-logfile=
-#Create a translation report when translations are finished; 1 = Do; Any other value = Do not
-create_translation_report=1
-
-#-----------------------------------------------------*** TMX OPTIONS ***---------------------------------------------------------------------------------
-
-#Process both the document to be translated and the Moses translation so that the machine translation can be used with a translation memory tool
-#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-#!!! If you set this parameter to 1, you MUST NOT use the score script unless the $othercleanings, $improvesegmentation and $ removeduplicates parameters are all set to 0 and $minseglen is set to -1, since this processing changes the order of the segments and can also make the source document have a number of segments that is different from the number of segments of the reference translation (namely because it can delete some segments and/or add some new ones) !!!
-#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-translate_for_tmx=0
-#Minimal length of sentences; -1=any length; any other value=segments with less than $minseglen will be erased ( !!! only active if translate_for_tmx =1 !!!)
-minseglen=-1
-#Substitute tabulation signs by newlines and remove lines composed only of digits, spaces and parentheses ( !!! only active if translate_for_tmx = 1 !!!)
-othercleanings=1
-# Substitute any of the characters [:;.!?] followed by a space by that character followed by a newline; delete empty lines; substitute doublespaces by one space ( !!! only active if translate_for_tmx = 1 !!!)
-improvesegmentation=1
-#Sort segments and remove those segments that are identical ( !!! only active if translate_for_tmx =1 !!! )
-removeduplicates=1
-
-#-----------------------------------------------------*** MOSES DECODER PARAMETERS ***--------------------------------------------------------------------
-
-#***** QUALITY TUNING:
-# Weights for phrase translation table (good values: 0.1-1; default: 1); ensures that the phrases are good translations of each other
-weight_t=1
-# Weights for language model (good values: 0.1-1; default: 1); ensures that output is fluent in target language
-weight_l=1
-# Weights for reordering model (good values: 0.1-1; default: 1); allows reordering of the input sentence
-weight_d=1
-# Weights for word penalty (good values: -3 to 3; default: 0; negative values favor large output; positive values favour short output); ensures translations do not get too long or too short
-weight_w=0
-#------------------------------------------
-# Use Minumum Bayes Risk (MBR) decoding (1 = Do; Any other value = do not); instead of outputting the translation with the highest probability, MBR decoding outputs the translation that is most similar to the most likely translations.
-mbr=0
-# Number of translation candidates consider. MBR decoding uses by default the top 200 distinct candidate translations to find the translation with minimum Bayes risk
-mbrsize=200
-# Scaling factor used to adjust the translation scores (default = 1.0)
-mbrscale=1.0
-# Adds walls around punctuation ,.!?:;". 1= Do; Any other value = do not. Specifying reordering constraints around punctuation is often a good idea. TODO not sure it does not require annotation of the corpus to be trained
-monotoneatpunctuation=0
-#***** SPEED TUNING:
-# Fixed limit for how many translation options are retrieved for each input phrase (0 = no limit; positive value = number of translation options per phrase)
-ttablelimit=20
-# Use the relative scores of hypothesis for pruning, instead of a fixed limit (0= no pruning; decimal value = more pruning)
-beamthreshold=0
-# Threshold for constructing hypotheses based on estimate cost (default: 0 = not used). During the beam search, many hypotheses are created that are too bad to be even entered on a stack. For many of them, it is even clear before the construction of the hypothesis that it would be not useful. Early discarding of such hypotheses hazards a guess about their viability. This is based on correct score except for the actual language model costs which are very expensive to compute. Hypotheses that, according to this estimate, are worse than the worst hypothesis of the target stack, even given an additional specified threshold as cushion, are not constructed at all. This often speeds up decoding significantly. Try threshold factors between 0.5 and 1
-earlydiscardingthreshold=0
-
-#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-#To get faster performance than the default Moses setting at roughly the same performance, use the parameter settings $searchalgorithm=1, $cubepruningpoplimit=2000 and $stack=2000. With cube pruning, the size of the stack has little impact on performance, so it should be set rather high. The speed/quality trade-off is mostly regulated by the -cube-pruning-pop-limit, i.e. the number of hypotheses added to each stack
-#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-# Search algorithm; cube pruning is faster than the traditional search at comparable levels of search errors; 0 = default; 1 = turns on cube pruning
-searchalgorithm=0
-# Number of hypotheses added to each stack; only a fixed number of hypotheses are generated for each span; default is 1000, higher numbers slow down the decoder, may result in better quality
-cubepruningpoplimit=1000
-# Reduce size of hypothesis stack, that keeps the best partial translations (=beam); default: 100
-stack=100
-# Maximum phrase length (default: 20) TODO not sure to what it refers
-maxphraselength=20
-# ****** SPEED AND QUALITY TUNING
-# Minimum number of hypotheses from each coverage pattern; you may also require that a minimum number of hypotheses is added for each word coverage (they may be still pruned out, however). This is done using the switch -cube-pruning-diversity, which sets the minimum. The default is 0
-cubepruningdiversity=0
-# Distortion (reordering) limit in maximum number of words (0 = monotone; -1 = unlimited ; any other positive value = maximal number of words; default:6)); limiting distortion often increases speed and quality
-distortionlimit=6
-
-############################################################################################################################################
-#end of parameters that you should fill #
-############################################################################################################################################
-
-
-############################################################################################################################################
-# DON'T CHANGE THE LINES THAT FOLLOW ... unless you know what you are doing! #
-############################################################################################################################################
-startdate=`date +day:%d/%m/%y-time:%H:%M:%S`
-echo "********************************** DO PREPARATORY WORK:"
-
-#to avoid *** glibc detected *** errors with moses compiler
-export MALLOC_CHECK_=0
-
-if [ "$logfile" = "" ]; then
- echo "In order to use this script, you have to at least fill its \$logfile parameter. Its allowable values are the names of the files located in $mosesdir/logs. You should also not forget to put the files to be translated in the $mosesdir/translation_input directory. Exiting ..."
- exit 0
-fi
-
-echo "****** Set some important directories"
-#Base directory of corpora training logfiles
-logdir=$mosesdir/logs
-#Name of the directory where files to be translated are placed by the user
-docs_to_translate_dir="$mosesdir/translation_input"
-#Name of the directory where reference (man-made) translated files are located
-translation_reference_dir="$mosesdir/translation_reference"
-#Name of the directory where translated files not to be used to create TMX files are placed
-translated_docs_dir="$mosesdir/translation_output"
-#Name of the directory where translated files used to create a TMX memory are placed (both source and target segments will be placed there)
-commonplacefortmx="$mosesdir/translation_files_for_tmx"
-if [ "$translate_for_tmx" = "1" ]; then
- outputdir=$commonplacefortmx
-else
- outputdir=$translated_docs_dir
-fi
-#Full path of the trained corpus files directory
-workdir=$mosesdir/corpora_trained
-#Full path of the tools (Moses, etc.) directory
-toolsdir=$mosesdir/tools
-stampdate=`date +day-%d-%m-%y-time-%H-%M-%S`
-#Full path of a temporary directory used for translating
-tmp=$mosesdir/$stampdate
-
-echo "check that log file exists"
-if [ ! -f $logdir/$logfile ]; then
- echo "The log file you are trying to use ($logdir/$logfile) does not exist (please check). You may be using a log file of a previous training that you have already moved or erased. Exiting ..."
- exit 0
-fi
-
-if `echo ${logfile} | grep "!!!INVALID!!!" 1>/dev/null 2>&1`; then
- echo "The log file you are trying to use ($logdir/$logfile) points to a deficiently trained corpus. Exiting ..."
- exit 0
-fi
-
-echo "****** Set some important variables"
-#Extract first language name
-lang1=`grep lang1 $logdir/$logfile | sed -e 's/.*lang1=\(\S*\).*/\1/g'`
-#Extract second language name
-lang2=`grep lang2 $logdir/$logfile | sed -e 's/.*lang2=\(\S*\).*/\1/g'`
-#Extract corpus name
-corpusbasename=`grep corpusbasename $logdir/$logfile | sed -e 's/.*corpusbasename=\(\S*\).*/\1/g'`
-#Extract language parameters
-lngmdlparameters=`grep language-model-parameters $logdir/$logfile | sed -e 's/.*language-model-parameters=\(\S*\).*/\1/g'`
-#Extract LM name
-lmbasenametemp=${lngmdlparameters#LM-*}
-lmbasename=${lmbasenametemp%%-*}
-#Extract training parameters
-trainingparameters=`grep training-parameters $logdir/$logfile | sed -e 's/\/*training-parameters=\(\S*\)*$/\1/g'`
-#Extract memorymapping parameters
-mm=`grep memory-mapping-parameters $logdir/$logfile | sed -e 's/\/*memory-mapping-parameters=\(\S*\)*$/\1/g'`
-param=`grep memory-mapping-extra-parameters $logdir/$logfile | sed -e 's/\/*memory-mapping-extra-parameters=\(\S*\)*$/\1/g'`
-tuningparameters=`grep tuning-parameters $logdir/$logfile | sed -e 's/\/*tuning-parameters=\(\S*\)*$/\1/g'`
-if [ "$tuningparameters" != "Tu-0" ]; then
- tuning=1
-else
- tuning=0
-fi
-#Extract $MinLen parameter
-MinLen=`grep minseglen $logdir/$logfile | sed -e 's/\/*minseglen=\(\S*\)*$/\1/g'`
-#Extract $MaxLen parameter
-MaxLen=`grep maxlen $logdir/$logfile | sed -e 's/\/*maxlen=\(\S*\)*$/\1/g'`
-#Extract $recaserbasename parameter
-recaserbasename=`grep recaserbasename $logdir/$logfile | sed -e 's/\/*recaserbasename=\(\S*\)*$/\1/g'`
-reportname="translation_summary-`date +day-%d-%m-%y-time-%H-%M-%S`"
-
-echo "****** Build name of directories where training files are located"
-#Full path of the tools directory (giza, irstlm, moses, scripts, ...)
-toolsdir="$mosesdir/tools"
-#Full path of the tools subdirectory where modified scripts are located
-modifiedscriptsdir="$toolsdir/modified-scripts"
-#Full path of the files used for training (corpus, language model, recaser, tuning, evaluation)
-datadir="$mosesdir/corpora_for_training"
-#Full path of the training logs
-logsdir="$mosesdir/logs"
-#Full path of the base directory where your corpus will be processed (corpus, model, lm, evaluation, recaser)
-workdir="$mosesdir/corpora_trained"
-#Full path of the language model directory
-lmdir="$workdir/lm/$lang2/$lngmdlparameters"
-#Full path of the tokenized files directory
-tokdir="$workdir/tok"
-#Full path of the cleaned files directory
-cleandir="$workdir/clean/MinLen-$MinLen.MaxLen-$MaxLen"
-#Full path of the lowercased (after cleaning) files directory
-lc_clean_dir="$workdir/lc_clean/MinLen-$MinLen.MaxLen-$MaxLen"
-#Full path of the lowercased (and not cleaned) files directory
-lc_no_clean_dir="$workdir/lc_no_clean"
-#Full path of the trained corpus files directory
-modeldir="$workdir/model/$lang1-$lang2-$corpusbasename.$lngmdlparameters/$trainingparameters"
-#Root-dir parameter of Moses
-rootdir=$modeldir
-#Full path of the memory-mapped files directory
-memmapsdir="$workdir/memmaps/$lang1-$lang2-$corpusbasename.$lngmdlparameters/$trainingparameters"
-if [ "$mm" = "1" ]; then
- mmparameters="M-1"
-else
- mmparameters="M-0"
-fi
-#Full path of the recaser files directory
-recaserdir="$workdir/recaser/$lang2/$recaserbasename-IRSTLM"
-#Full path of the detokenized files directory
-detokdir="$workdir/detok/$lang2/$testbasename"
-#Full path of the tuning files directory
-tuningdir="$workdir/tuning/$lang1-$lang2-$corpusbasename.$lngmdlparameters.$mmparameters.$tuningparameters/$trainingparameters"
-
-#Choose the moses.ini file that best reflects the type of training done
-echo "using $mosesinidir"
-if [ "$tuning" = "1" ]; then
- mosesinidir=$tuningdir/moses.weight-reused.ini
-elif [ "$mm" = "1" ]; then
- mosesinidir=$memmapsdir/moses.ini
-else
- mosesinidir=$modeldir/moses.ini
-fi
-
-echo "****** Create auxiliary routines"
-#function that avoids some unwanted effects of interrupting training
-control_c() {
- echo "******* Script interrupted by CTRL + C."
- exit 0
-}
-trap control_c SIGINT
-
-#function that checks whether a trained corpus exists already
-checktrainedcorpusexists() {
- if [ ! -f $lmdir/$lang2.$lngmdlparameters.blm.mm -a ! -f $lmdir/$lang2.$lngmdlparameters.BloomMap ]; then
- echo "The trained corpus you are trying to use ($logdir/$logfile) wasn't correctly trained or does not exist. Its language model (for instance, file $lmdir/$lang2.$lngmdlparameters.blm.mm ***or** file $lmdir/$lang2.$lngmdlparameters.BloomMap) does not exist. Please train or retrain it, or use another trained corpus. Exiting ..."
- exit 0
- fi
- if [ ! -f $recaserdir/phrase-table.$lang2.$recaserbasename.binphr.tgtvoc ]; then
- echo "The trained corpus you are trying to use ($logdir/$logfile) wasn't correctly trained or does not exist. Its recaser training (for instance, file $recaserdir/phrase-table.$lang2.$recaserbasename.binphr.tgtvoc) does not exist. Please train or retrain it, or use another trained corpus. Exiting ..."
- exit 0
- fi
- if [ ! -f $mosesinidir -o ! -d $modeldir ]; then
- echo "The trained corpus you are trying to use ('$logdir/$logfile') wasn't correctly trained or does not exist. Its moses.ini file ($mosesinidir) ***or*** its training model directory ($modeldir) does not exist. Please train or retrain it, or use another trained corpus. Exiting ..."
- exit 0
- fi
- if [ ! -f $memmapsdir/reordering-table.$corpusbasename.$lang1-$lang2.$param.binlexr.srctree ]; then
- echo "The trained corpus you are trying to use ($logdir/$logfile) wasn't correctly trained. You have chosen to train it with memory-mapping and the memory-mapping files (for instance, $memmapsdir/reordering-table.$corpusbasename.$lang1-$lang2.$param.binlexr.srctree) do not exist. Please train or retrain it, or use another trained corpus. Exiting ..."
- exit 0
- fi
-}
-
-echo "****** Check that selected training is OK"
-checktrainedcorpusexists
-
-echo "****** Create some necessary directories if they do not yet exist"
-if [ ! -d $commonplacefortmx ]; then mkdir -p $commonplacefortmx; fi
-
-if [ ! -d $docs_to_translate_dir ]; then
- mkdir -p $docs_to_translate_dir
- echo "You need to put the file(s) you want to translate in the $docs_to_translate_dir directory."
- exit 0
-fi
-
-if [ ! -d $translated_docs_dir ]; then mkdir -p $translated_docs_dir; fi
-
-if [ ! -d $translation_reference_dir ]; then mkdir -p $translation_reference_dir; fi
-
-if [ ! -d $tmp ]; then mkdir -p $tmp; fi
-
-echo "****** Export some important variables"
-#base directory of Moses scripts
-export SCRIPTS_ROOTDIR=$toolsdir/moses/scripts*
-export IRSTLM=$toolsdir/irstlm
-export PATH=$toolsdir/irstlm/bin/i686:$toolsdir/irstlm/bin:$PATH
-export RANDLM=$toolsdir/randlm
-export PATH=$toolsdir/randlm/bin:$PATH
-export PATH=$toolsdir/mgiza:$PATH
-export QMT_HOME=$toolsdir/mgiza
-export corpusbasename
-export lmbasename
-export lang1
-export lang2
-
-echo "********************************** TRANSLATE:"
-numtranslateddocs=0
-if (( $minseglen > 0 )); then
- let "minseglen -= 1"
-fi
-tmpfilename=`date +day-%d-%m-%y-time-%H-%M-%S`
-#Prepare and translate all the files in $docs_to_translate_dir OR do the demo of this script; present the results in $translated_docs_dir
-for filetotranslate in $docs_to_translate_dir/*; do
- echo $filetotranslate
- if [ -f $filetotranslate ]; then
- echo "********* $filetotranslate"
- fromdos $filetotranslate
- tr '\a\b\f\r\v|' ' /' < $filetotranslate > $filetotranslate.tmp
- mv $filetotranslate.tmp $filetotranslate
- name=${filetotranslate##*/}
- if [ ! -f $outputdir/$name.$lang2.moses ]; then
- if [ "$translate_for_tmx" = "1" ]; then
- cp $filetotranslate $tmp/$name
- echo "*** remove segments with less than $minseglen characters"
- if (( $minseglen > 0 )); then
- sed "/^.\{1,$minseglen\}$/d" < $tmp/$name > $tmp/$tmpfilename.txt
- mv $tmp/$tmpfilename.txt $tmp/$name
- fi
- echo "*** clean segments with non-alphanumeric characters"
- if [ "$othercleanings" = "1" ]; then
- sed "s#\t#\n#g; /^[0-9]\+$/d; /^[0-9.)( ]\+$/d" < $tmp/$name > $tmp/$tmpfilename.txt
- mv $tmp/$tmpfilename.txt $tmp/$name
- fi
- echo "*** improve segmentation"
- if [ "$improvesegmentation" = "1" ]; then
- sed "s#\: #\:\n#g; s#\. #.\n#g; s#\; #\;\n#g; s#\! #\!\n#g; s#\? #\?\n#g; s# # #g; s# # #g; s# # #g; s# # #g; s# # #g; s# # #g; s# # #g; /^$/d; /^$/d; /^$/d; /^$/d; /^$/d; /^$/d; /^ $/d" < $tmp/$name > $tmp/$tmpfilename.txt
- mv $tmp/$tmpfilename.txt $tmp/$name
- fi
- echo "*** sort and then remove duplicates"
- if [ "$removeduplicates" = "1" ]; then
- awk '!($0 in a) {a[$0];print}' $tmp/$name > $tmp/$tmpfilename.txt
- mv $tmp/$tmpfilename.txt $tmp/$name
- fi
- cp $tmp/$name $commonplacefortmx
- fi
- let "numtranslateddocs += 1"
- if [ "$translate_for_tmx" = "1" ]; then
- $toolsdir/scripts/tokenizer.perl -l $lang1 < $tmp/$name > $tmp/$name.tok
- else
- $toolsdir/scripts/tokenizer.perl -l $lang1 < $filetotranslate > $tmp/$name.tok
- fi
- $toolsdir/scripts/lowercase.perl < $tmp/$name.tok > $tmp/$name.lowercase
- echo "****** Translate"
- $toolsdir/moses/moses-cmd/src/moses -f $mosesinidir -weight-t $weight_t -weight-l $weight_l -weight-d $weight_d -weight-w $weight_w -mbr $mbr -mbr-size $mbrsize -mbr-scale $mbrscale -monotone-at-punctuation $monotoneatpunctuation -ttable-limit $ttablelimit -b $beamthreshold -early-discarding-threshold $earlydiscardingthreshold -search-algorithm $searchalgorithm -cube-pruning-pop-limit $cubepruningpoplimit -s $stack -max-phrase-length $maxphraselength -cube-pruning-diversity $cubepruningdiversity -distortion-limit $distortionlimit < $tmp/$name.lowercase > $tmp/$name.$lang2
- if [ -f $recaserdir/moses.ini ]; then
- echo "****** Recase the output"
- $toolsdir/moses/script*/recaser/recase.perl -model $recaserdir/moses.ini -in $tmp/$name.$lang2 -moses $toolsdir/moses/moses-cmd/src/moses > $tmp/$name.$lang2.recased
- recased=1
- fi
- echo "****** Detokenize the output"
- if [ "$recased" = "1" ]; then
- $toolsdir/scripts/detokenizer.perl -l $lang2 < $tmp/$name.$lang2.recased > $tmp/$name.$lang2.txt
- else
- $toolsdir/scripts/detokenizer.perl -l $lang2 < $tmp/$name.$lang2 > $tmp/$name.$lang2.txt
- fi
- if [ "$translate_for_tmx" = "1" ]; then
- sed "s#<#\&lt\;#g; s#>#\&gt\;#g; s#'#\&apos\;#g; s#\"#\&quot\;#g; s# / #/#g" < $tmp/$name.$lang2.txt > $commonplacefortmx/$name.$lang2.moses
- sed "s#<#\&lt\;#g; s#>#\&gt\;#g; s#'#\&apos\;#g; s#\"#\&quot\;#g; s# / #/#g" < $tmp/$name > $commonplacefortmx/$name
- else
- sed 's# / #/#g; s/\\$/\\ /g' < $tmp/$name.$lang2.txt > $tmp/$name.$lang2.txt1
- cp -f $tmp/$name.$lang2.txt1 $outputdir/$name.$lang2.moses
- fi
- else
- echo "Document $name has already been translated to $outputdir/$name.$lang2. Translation will not be repeated."
- fi
- fi
-done
-#Remove the now superfluous $mosesdir/temp directory
-if [ -d $tmp ]; then
- rm -rf $tmp
-fi
-if [ "$numtranslateddocs" = "0" ]; then
- echo "The \$docs_to_translate_dir ($docs_to_translate_dir) has no new documents to be translated. You should place there the documents you want to translate. It should have no subdirectories. Exiting ..."
- `find $tmp -type d -empty -exec rmdir {} \; 2>/dev/null`
- exit 0
-fi
-
-
-if [ $create_translation_report -eq 1 ]; then
- echo "********************************** BUILD TRANSLATION REPORT:"
- echo "*** Script version ***: translate-1.32" > $outputdir/$reportname
- echo "========================================================================" >> $outputdir/$reportname
- echo "*** Duration ***: " >> $outputdir/$reportname
- echo "========================================================================" >> $outputdir/$reportname
- echo "Start time: $startdate" >> $outputdir/$reportname
- echo "End time: `date +day:%d/%m/%y-time:%H:%M:%S`" >> $outputdir/$reportname
- echo "========================================================================" >> $outputdir/$reportname
- echo "*** Moses base directory ***: $mosesdir" >> $outputdir/$reportname
- echo "========================================================================" >> $outputdir/$reportname
- echo "*** Languages*** :" >> $outputdir/$reportname
- echo "========================================================================" >> $outputdir/$reportname
- echo "Source language: $lang1" >> $outputdir/$reportname
- echo "Destination language: $lang2" >> $outputdir/$reportname
- echo "========================================================================" >> $outputdir/$reportname
- echo "*** Trained corpus used ***:" >> $outputdir/$reportname
- echo "========================================================================" >> $outputdir/$reportname
- if [[ ${logfile-_} ]]; then
- echo "$logfile" >> $outputdir/$reportname
- fi
- echo "========================================================================" >> $outputdir/$reportname
- echo "*** Translated Files ***:" >> $outputdir/$reportname
- echo "========================================================================" >> $outputdir/$reportname
- for filetotranslate in $docs_to_translate_dir/*.*; do
- if [[ ${filetotranslate-_} ]]; then
- echo "$filetotranslate" >> $outputdir/$reportname
- fi
- done
- echo "========================================================================" >> $outputdir/$reportname
- echo "*** TMX parameters ***:" >> $outputdir/$reportname
- echo "========================================================================" >> $outputdir/$reportname
- echo "translate_for_tmx=$translate_for_tmx" >> $outputdir/$reportname
- echo "minseglen=$minseglen" >> $outputdir/$reportname
- echo "========================================================================" >> $outputdir/$reportname
- echo "*** Moses decoder parameters ***:" >> $outputdir/$reportname
- echo "========================================================================" >> $outputdir/$reportname
- echo "********** Quality parameters **************" >> $outputdir/$reportname
- echo "weight-t=$weight_t" >> $outputdir/$reportname
- echo "weight-l=$weight_l" >> $outputdir/$reportname
- echo "weight-d=$weight_d" >> $outputdir/$reportname
- echo "weight-w=$weight_w" >> $outputdir/$reportname
- echo "mbr=$mbr" >> $outputdir/$reportname
- echo "mbr-size=$mbrsize" >> $outputdir/$reportname
- echo "mbr-scale=$mbrscale" >> $outputdir/$reportname
- echo "monotone-at-punctuation=$monotoneatpunctuation" >> $outputdir/$reportname
- echo "********** Speed parameters ****************" >> $outputdir/$reportname
- echo "ttable-limit=$ttablelimit" >> $outputdir/$reportname
- echo "beam-threshold=$beamthreshold" >> $outputdir/$reportname
- echo "early-discarding-threshold=$earlydiscardingthreshold" >> $outputdir/$reportname
- echo "search-algorithm=$searchalgorithm" >> $outputdir/$reportname
- echo "cube-pruning-pop-limit=$cubepruningpoplimit" >> $outputdir/$reportname
- echo "stack=$stack" >> $outputdir/$reportname
- echo "maxphraselength=$maxphraselength" >> $outputdir/$reportname
- echo "********** Quality and speed parameters ****" >> $outputdir/$reportname
- echo "cube-pruning-diversity=$cubepruningdiversity" >> $outputdir/$reportname
- echo "distortion-limit=$distortionlimit" >> $outputdir/$reportname
-fi
-
-`find $tmp -type d -empty -exec rmdir {} \; 2>/dev/null`
-
-echo "Translation finished. The translations and a summary report of the translation are located in the $outputdir directory."
-
-#=================================================================================================================================================
-#Changed in version 1.32
-#=================================================================================================================================================
-# Adaptation to a change in the tofrodos package upon which this script depends
-# Better reactivity to user errors
-#=================================================================================================================================================
-#Changed in version 1.26
-#=================================================================================================================================================
-# Appends to the end of the name of the translated files ".$lang2.moses"
-# Does not translate files already translated
-# Tells user what to do if the $logfile parameter wasn't set
-# Special processing of translated files that will be used with a translation memory tool
diff --git a/contrib/other-builds/CreateOnDisk.vcxproj b/contrib/other-builds/CreateOnDisk.vcxproj
index f7fa3729b..10073b7fe 100644
--- a/contrib/other-builds/CreateOnDisk.vcxproj
+++ b/contrib/other-builds/CreateOnDisk.vcxproj
@@ -43,11 +43,13 @@
<OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
<IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
<LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+ <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">C:\Program Files\boost\boost_1_47;$(IncludePath)</IncludePath>
+ <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">C:\Program Files\boost\boost_1_47;$(IncludePath)</IncludePath>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<Optimization>Disabled</Optimization>
- <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<MinimalRebuild>true</MinimalRebuild>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
@@ -55,35 +57,37 @@
</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>EditAndContinue</DebugInformationFormat>
- <AdditionalIncludeDirectories>C:\Program Files\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <AdditionalIncludeDirectories>C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
<Link>
- <AdditionalDependencies>zdll.lib;$(SolutionDir)/$(Configuration)/moses.lib;$(SolutionDir)/$(Configuration)/kenlm.lib;$(SolutionDir)/$(Configuration)/OnDiskPt.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ <AdditionalDependencies>C:\GnuWin32\lib\zlib.lib;$(SolutionDir)/$(Configuration)/moses.lib;$(SolutionDir)/$(Configuration)/kenlm.lib;$(SolutionDir)/$(Configuration)/OnDiskPt.lib;%(AdditionalDependencies)</AdditionalDependencies>
<GenerateDebugInformation>true</GenerateDebugInformation>
<SubSystem>Console</SubSystem>
<TargetMachine>MachineX86</TargetMachine>
+ <AdditionalLibraryDirectories>C:\boost\boost_1_47\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<IntrinsicFunctions>true</IntrinsicFunctions>
- <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
<FunctionLevelLinking>true</FunctionLevelLinking>
<PrecompiledHeader>
</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
- <AdditionalIncludeDirectories>C:\Program Files\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <AdditionalIncludeDirectories>C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
<Link>
- <AdditionalDependencies>zdll.lib;$(SolutionDir)/$(Configuration)/moses.lib;$(SolutionDir)/$(Configuration)/kenlm.lib;$(SolutionDir)/$(Configuration)/OnDiskPt.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ <AdditionalDependencies>C:\GnuWin32\lib\zlib.lib;$(SolutionDir)/$(Configuration)/moses.lib;$(SolutionDir)/$(Configuration)/kenlm.lib;$(SolutionDir)/$(Configuration)/OnDiskPt.lib;%(AdditionalDependencies)</AdditionalDependencies>
<GenerateDebugInformation>true</GenerateDebugInformation>
<SubSystem>Console</SubSystem>
<OptimizeReferences>true</OptimizeReferences>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<TargetMachine>MachineX86</TargetMachine>
+ <AdditionalLibraryDirectories>C:\boost\boost_1_47\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
diff --git a/contrib/other-builds/CreateOnDisk.xcodeproj/project.pbxproj b/contrib/other-builds/CreateOnDisk.xcodeproj/project.pbxproj
index 90bf911d3..9c5b8215a 100644
--- a/contrib/other-builds/CreateOnDisk.xcodeproj/project.pbxproj
+++ b/contrib/other-builds/CreateOnDisk.xcodeproj/project.pbxproj
@@ -288,6 +288,7 @@
../../irstlm/lib,
../../srilm/lib/macosx,
../../randlm/lib,
+ /opt/local/lib,
);
OTHER_LDFLAGS = (
"-lz",
@@ -298,6 +299,7 @@
"-lflm",
"-llattice",
"-lrandlm",
+ "-lboost_thread-mt",
);
PRODUCT_NAME = CreateOnDisk;
};
@@ -318,6 +320,7 @@
../../irstlm/lib,
../../srilm/lib/macosx,
../../randlm/lib,
+ /opt/local/lib,
);
OTHER_LDFLAGS = (
"-lz",
@@ -328,6 +331,7 @@
"-lflm",
"-llattice",
"-lrandlm",
+ "-lboost_thread-mt",
);
PRODUCT_NAME = CreateOnDisk;
};
diff --git a/contrib/other-builds/OnDiskPt.vcxproj b/contrib/other-builds/OnDiskPt.vcxproj
index f9ee6e1a0..827291e7d 100644
--- a/contrib/other-builds/OnDiskPt.vcxproj
+++ b/contrib/other-builds/OnDiskPt.vcxproj
@@ -69,7 +69,7 @@
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<Optimization>Disabled</Optimization>
- <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<MinimalRebuild>true</MinimalRebuild>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
@@ -77,21 +77,21 @@
</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>EditAndContinue</DebugInformationFormat>
- <AdditionalIncludeDirectories>$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <AdditionalIncludeDirectories>C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<IntrinsicFunctions>true</IntrinsicFunctions>
- <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
<FunctionLevelLinking>true</FunctionLevelLinking>
<PrecompiledHeader>
</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
- <AdditionalIncludeDirectories>$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <AdditionalIncludeDirectories>C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
</ItemDefinitionGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
diff --git a/contrib/other-builds/OnDiskPt/.cproject b/contrib/other-builds/OnDiskPt/.cproject
index 41f2a5141..472888f48 100644
--- a/contrib/other-builds/OnDiskPt/.cproject
+++ b/contrib/other-builds/OnDiskPt/.cproject
@@ -41,9 +41,13 @@
<option id="gnu.cpp.compilermacosx.exe.debug.option.optimization.level.676959181" name="Optimization Level" superClass="gnu.cpp.compilermacosx.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.1484480101" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.1556683035" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
- <listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt"/>
+ <listOptionValue builtIn="false" value="${workspace_loc}/../../"/>
+ <listOptionValue builtIn="false" value="${workspace_loc}/../../moses/src"/>
<listOptionValue builtIn="false" value="/opt/local/include"/>
</option>
+ <option id="gnu.cpp.compiler.option.preprocessor.def.1052680347" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
+ <listOptionValue builtIn="false" value="TRACE_ENABLE"/>
+ </option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1930757481" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.1161943634" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug">
@@ -128,4 +132,5 @@
<storageModule moduleId="refreshScope" versionNumber="1">
<resource resourceType="PROJECT" workspacePath="/OnDiskPt"/>
</storageModule>
+ <storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
</cproject>
diff --git a/contrib/other-builds/fuzzy-match.xcodeproj/project.pbxproj b/contrib/other-builds/fuzzy-match.xcodeproj/project.pbxproj
new file mode 100644
index 000000000..8abb9ae17
--- /dev/null
+++ b/contrib/other-builds/fuzzy-match.xcodeproj/project.pbxproj
@@ -0,0 +1,292 @@
+// !$*UTF8*$!
+{
+ archiveVersion = 1;
+ classes = {
+ };
+ objectVersion = 46;
+ objects = {
+
+/* Begin PBXBuildFile section */
+ 1E42EFB615BEFAEB00E937EB /* fuzzy-match2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E42EFA515BEFABD00E937EB /* fuzzy-match2.cpp */; };
+ 1E42EFB715BEFAEB00E937EB /* SuffixArray.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E806DCF15BED3D4001914A2 /* SuffixArray.cpp */; };
+ 1E42EFB815BEFAEB00E937EB /* SuffixArray.h in Sources */ = {isa = PBXBuildFile; fileRef = 1E806DD015BED3D4001914A2 /* SuffixArray.h */; };
+ 1E42EFB915BEFAEB00E937EB /* Vocabulary.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E806DCA15BED3AC001914A2 /* Vocabulary.cpp */; };
+ 1E42EFBA15BEFAEB00E937EB /* Vocabulary.h in Sources */ = {isa = PBXBuildFile; fileRef = 1E806DCB15BED3AC001914A2 /* Vocabulary.h */; };
+ 1E806DCC15BED3AC001914A2 /* Vocabulary.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E806DCA15BED3AC001914A2 /* Vocabulary.cpp */; };
+ 1E806DD115BED3D4001914A2 /* SuffixArray.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E806DCF15BED3D4001914A2 /* SuffixArray.cpp */; };
+ 1ECD60A815C15E28004172A4 /* Util.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1ECD60A515C15D3A004172A4 /* Util.cpp */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXCopyFilesBuildPhase section */
+ 1E42EFAA15BEFAD300E937EB /* CopyFiles */ = {
+ isa = PBXCopyFilesBuildPhase;
+ buildActionMask = 2147483647;
+ dstPath = /usr/share/man/man1/;
+ dstSubfolderSpec = 0;
+ files = (
+ );
+ runOnlyForDeploymentPostprocessing = 1;
+ };
+ 1ED87EEB15BED331003E47AA /* CopyFiles */ = {
+ isa = PBXCopyFilesBuildPhase;
+ buildActionMask = 2147483647;
+ dstPath = /usr/share/man/man1/;
+ dstSubfolderSpec = 0;
+ files = (
+ );
+ runOnlyForDeploymentPostprocessing = 1;
+ };
+/* End PBXCopyFilesBuildPhase section */
+
+/* Begin PBXFileReference section */
+ 1E42EFA515BEFABD00E937EB /* fuzzy-match2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = "fuzzy-match2.cpp"; path = "../tm-mt-integration/fuzzy-match2.cpp"; sourceTree = "<group>"; };
+ 1E42EFAC15BEFAD300E937EB /* fuzzy-match2 */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "fuzzy-match2"; sourceTree = BUILT_PRODUCTS_DIR; };
+ 1E42EFD115C00AC100E937EB /* fuzzy-match2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "fuzzy-match2.h"; path = "../tm-mt-integration/fuzzy-match2.h"; sourceTree = "<group>"; };
+ 1E42EFD215C00BAE00E937EB /* Util.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Util.h; path = "../tm-mt-integration/Util.h"; sourceTree = "<group>"; };
+ 1E42EFD315C00C0A00E937EB /* SentenceAlignment.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SentenceAlignment.h; path = "../tm-mt-integration/SentenceAlignment.h"; sourceTree = "<group>"; };
+ 1E42EFD715C00D6300E937EB /* Match.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Match.h; path = "../tm-mt-integration/Match.h"; sourceTree = "<group>"; };
+ 1E806DCA15BED3AC001914A2 /* Vocabulary.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Vocabulary.cpp; path = "../tm-mt-integration/Vocabulary.cpp"; sourceTree = "<group>"; };
+ 1E806DCB15BED3AC001914A2 /* Vocabulary.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Vocabulary.h; path = "../tm-mt-integration/Vocabulary.h"; sourceTree = "<group>"; };
+ 1E806DCF15BED3D4001914A2 /* SuffixArray.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = SuffixArray.cpp; path = "../tm-mt-integration/SuffixArray.cpp"; sourceTree = "<group>"; };
+ 1E806DD015BED3D4001914A2 /* SuffixArray.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SuffixArray.h; path = "../tm-mt-integration/SuffixArray.h"; sourceTree = "<group>"; };
+ 1ECD60A515C15D3A004172A4 /* Util.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Util.cpp; path = "../tm-mt-integration/Util.cpp"; sourceTree = "<group>"; };
+ 1ED87EED15BED331003E47AA /* fuzzy-match */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "fuzzy-match"; sourceTree = BUILT_PRODUCTS_DIR; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+ 1E42EFA915BEFAD300E937EB /* Frameworks */ = {
+ isa = PBXFrameworksBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+ 1ED87EEA15BED331003E47AA /* Frameworks */ = {
+ isa = PBXFrameworksBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+ 1ED87EE215BED32F003E47AA = {
+ isa = PBXGroup;
+ children = (
+ 1E42EFD715C00D6300E937EB /* Match.h */,
+ 1E42EFD315C00C0A00E937EB /* SentenceAlignment.h */,
+ 1E42EFD215C00BAE00E937EB /* Util.h */,
+ 1ECD60A515C15D3A004172A4 /* Util.cpp */,
+ 1E806DCF15BED3D4001914A2 /* SuffixArray.cpp */,
+ 1E806DD015BED3D4001914A2 /* SuffixArray.h */,
+ 1E42EFD115C00AC100E937EB /* fuzzy-match2.h */,
+ 1E42EFA515BEFABD00E937EB /* fuzzy-match2.cpp */,
+ 1E806DCA15BED3AC001914A2 /* Vocabulary.cpp */,
+ 1E806DCB15BED3AC001914A2 /* Vocabulary.h */,
+ 1ED87EEE15BED331003E47AA /* Products */,
+ );
+ sourceTree = "<group>";
+ };
+ 1ED87EEE15BED331003E47AA /* Products */ = {
+ isa = PBXGroup;
+ children = (
+ 1ED87EED15BED331003E47AA /* fuzzy-match */,
+ 1E42EFAC15BEFAD300E937EB /* fuzzy-match2 */,
+ );
+ name = Products;
+ sourceTree = "<group>";
+ };
+/* End PBXGroup section */
+
+/* Begin PBXNativeTarget section */
+ 1E42EFAB15BEFAD300E937EB /* fuzzy-match2 */ = {
+ isa = PBXNativeTarget;
+ buildConfigurationList = 1E42EFB315BEFAD300E937EB /* Build configuration list for PBXNativeTarget "fuzzy-match2" */;
+ buildPhases = (
+ 1E42EFA815BEFAD300E937EB /* Sources */,
+ 1E42EFA915BEFAD300E937EB /* Frameworks */,
+ 1E42EFAA15BEFAD300E937EB /* CopyFiles */,
+ );
+ buildRules = (
+ );
+ dependencies = (
+ );
+ name = "fuzzy-match2";
+ productName = "fuzzy-match2";
+ productReference = 1E42EFAC15BEFAD300E937EB /* fuzzy-match2 */;
+ productType = "com.apple.product-type.tool";
+ };
+ 1ED87EEC15BED331003E47AA /* fuzzy-match */ = {
+ isa = PBXNativeTarget;
+ buildConfigurationList = 1ED87EF715BED331003E47AA /* Build configuration list for PBXNativeTarget "fuzzy-match" */;
+ buildPhases = (
+ 1ED87EE915BED331003E47AA /* Sources */,
+ 1ED87EEA15BED331003E47AA /* Frameworks */,
+ 1ED87EEB15BED331003E47AA /* CopyFiles */,
+ );
+ buildRules = (
+ );
+ dependencies = (
+ );
+ name = "fuzzy-match";
+ productName = "fuzzy-match";
+ productReference = 1ED87EED15BED331003E47AA /* fuzzy-match */;
+ productType = "com.apple.product-type.tool";
+ };
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+ 1ED87EE415BED32F003E47AA /* Project object */ = {
+ isa = PBXProject;
+ buildConfigurationList = 1ED87EE715BED32F003E47AA /* Build configuration list for PBXProject "fuzzy-match" */;
+ compatibilityVersion = "Xcode 3.2";
+ developmentRegion = English;
+ hasScannedForEncodings = 0;
+ knownRegions = (
+ en,
+ );
+ mainGroup = 1ED87EE215BED32F003E47AA;
+ productRefGroup = 1ED87EEE15BED331003E47AA /* Products */;
+ projectDirPath = "";
+ projectRoot = "";
+ targets = (
+ 1ED87EEC15BED331003E47AA /* fuzzy-match */,
+ 1E42EFAB15BEFAD300E937EB /* fuzzy-match2 */,
+ );
+ };
+/* End PBXProject section */
+
+/* Begin PBXSourcesBuildPhase section */
+ 1E42EFA815BEFAD300E937EB /* Sources */ = {
+ isa = PBXSourcesBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ 1ECD60A815C15E28004172A4 /* Util.cpp in Sources */,
+ 1E42EFB615BEFAEB00E937EB /* fuzzy-match2.cpp in Sources */,
+ 1E42EFB715BEFAEB00E937EB /* SuffixArray.cpp in Sources */,
+ 1E42EFB815BEFAEB00E937EB /* SuffixArray.h in Sources */,
+ 1E42EFB915BEFAEB00E937EB /* Vocabulary.cpp in Sources */,
+ 1E42EFBA15BEFAEB00E937EB /* Vocabulary.h in Sources */,
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+ 1ED87EE915BED331003E47AA /* Sources */ = {
+ isa = PBXSourcesBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ 1E806DCC15BED3AC001914A2 /* Vocabulary.cpp in Sources */,
+ 1E806DD115BED3D4001914A2 /* SuffixArray.cpp in Sources */,
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXSourcesBuildPhase section */
+
+/* Begin XCBuildConfiguration section */
+ 1E42EFB415BEFAD300E937EB /* Debug */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ PRODUCT_NAME = "$(TARGET_NAME)";
+ };
+ name = Debug;
+ };
+ 1E42EFB515BEFAD300E937EB /* Release */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ PRODUCT_NAME = "$(TARGET_NAME)";
+ };
+ name = Release;
+ };
+ 1ED87EF515BED331003E47AA /* Debug */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ ALWAYS_SEARCH_USER_PATHS = NO;
+ ARCHS = "$(ARCHS_STANDARD_64_BIT)";
+ COPY_PHASE_STRIP = NO;
+ GCC_C_LANGUAGE_STANDARD = gnu99;
+ GCC_DYNAMIC_NO_PIC = NO;
+ GCC_ENABLE_OBJC_EXCEPTIONS = YES;
+ GCC_OPTIMIZATION_LEVEL = 0;
+ GCC_PREPROCESSOR_DEFINITIONS = (
+ "DEBUG=1",
+ "$(inherited)",
+ );
+ GCC_SYMBOLS_PRIVATE_EXTERN = NO;
+ GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+ GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+ GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
+ GCC_WARN_ABOUT_RETURN_TYPE = YES;
+ GCC_WARN_UNUSED_VARIABLE = YES;
+ MACOSX_DEPLOYMENT_TARGET = 10.7;
+ ONLY_ACTIVE_ARCH = YES;
+ SDKROOT = macosx;
+ };
+ name = Debug;
+ };
+ 1ED87EF615BED331003E47AA /* Release */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ ALWAYS_SEARCH_USER_PATHS = NO;
+ ARCHS = "$(ARCHS_STANDARD_64_BIT)";
+ COPY_PHASE_STRIP = YES;
+ DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+ GCC_C_LANGUAGE_STANDARD = gnu99;
+ GCC_ENABLE_OBJC_EXCEPTIONS = YES;
+ GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+ GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+ GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
+ GCC_WARN_ABOUT_RETURN_TYPE = YES;
+ GCC_WARN_UNUSED_VARIABLE = YES;
+ MACOSX_DEPLOYMENT_TARGET = 10.7;
+ SDKROOT = macosx;
+ };
+ name = Release;
+ };
+ 1ED87EF815BED331003E47AA /* Debug */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ PRODUCT_NAME = "$(TARGET_NAME)";
+ };
+ name = Debug;
+ };
+ 1ED87EF915BED331003E47AA /* Release */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ PRODUCT_NAME = "$(TARGET_NAME)";
+ };
+ name = Release;
+ };
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+ 1E42EFB315BEFAD300E937EB /* Build configuration list for PBXNativeTarget "fuzzy-match2" */ = {
+ isa = XCConfigurationList;
+ buildConfigurations = (
+ 1E42EFB415BEFAD300E937EB /* Debug */,
+ 1E42EFB515BEFAD300E937EB /* Release */,
+ );
+ defaultConfigurationIsVisible = 0;
+ defaultConfigurationName = Release;
+ };
+ 1ED87EE715BED32F003E47AA /* Build configuration list for PBXProject "fuzzy-match" */ = {
+ isa = XCConfigurationList;
+ buildConfigurations = (
+ 1ED87EF515BED331003E47AA /* Debug */,
+ 1ED87EF615BED331003E47AA /* Release */,
+ );
+ defaultConfigurationIsVisible = 0;
+ defaultConfigurationName = Release;
+ };
+ 1ED87EF715BED331003E47AA /* Build configuration list for PBXNativeTarget "fuzzy-match" */ = {
+ isa = XCConfigurationList;
+ buildConfigurations = (
+ 1ED87EF815BED331003E47AA /* Debug */,
+ 1ED87EF915BED331003E47AA /* Release */,
+ );
+ defaultConfigurationIsVisible = 0;
+ defaultConfigurationName = Release;
+ };
+/* End XCConfigurationList section */
+ };
+ rootObject = 1ED87EE415BED32F003E47AA /* Project object */;
+}
diff --git a/contrib/other-builds/fuzzy-match.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcdebugger/Breakpoints.xcbkptlist b/contrib/other-builds/fuzzy-match.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcdebugger/Breakpoints.xcbkptlist
new file mode 100644
index 000000000..cebcbdcb5
--- /dev/null
+++ b/contrib/other-builds/fuzzy-match.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcdebugger/Breakpoints.xcbkptlist
@@ -0,0 +1,21 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Bucket
+ type = "1"
+ version = "1.0">
+ <FileBreakpoints>
+ <FileBreakpoint
+ shouldBeEnabled = "Yes"
+ ignoreCount = "0"
+ continueAfterRunningActions = "No"
+ isPathRelative = "0"
+ filePath = "/Users/hieuhoang/unison/workspace/github/hieuhoang/contrib/tm-mt-integration/fuzzy-match2.cpp"
+ timestampString = "364996019.762643"
+ startingColumnNumber = "9223372036854775807"
+ endingColumnNumber = "9223372036854775807"
+ startingLineNumber = "456"
+ endingLineNumber = "456"
+ landmarkName = "create_extract(int sentenceInd, int cost, const vector&lt; WORD_ID &gt; &amp;sourceSentence, const vector&lt;SentenceAlignment&gt; &amp;targets, const string &amp;inputStr, const string &amp;path)"
+ landmarkType = "7">
+ </FileBreakpoint>
+ </FileBreakpoints>
+</Bucket>
diff --git a/contrib/other-builds/fuzzy-match.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/fuzzy-match.xcscheme b/contrib/other-builds/fuzzy-match.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/fuzzy-match.xcscheme
new file mode 100644
index 000000000..4ffb0bc96
--- /dev/null
+++ b/contrib/other-builds/fuzzy-match.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/fuzzy-match.xcscheme
@@ -0,0 +1,78 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Scheme
+ version = "1.3">
+ <BuildAction
+ parallelizeBuildables = "YES"
+ buildImplicitDependencies = "YES">
+ <BuildActionEntries>
+ <BuildActionEntry
+ buildForTesting = "YES"
+ buildForRunning = "YES"
+ buildForProfiling = "YES"
+ buildForArchiving = "YES"
+ buildForAnalyzing = "YES">
+ <BuildableReference
+ BuildableIdentifier = "primary"
+ BlueprintIdentifier = "1ED87EEC15BED331003E47AA"
+ BuildableName = "fuzzy-match"
+ BlueprintName = "fuzzy-match"
+ ReferencedContainer = "container:fuzzy-match.xcodeproj">
+ </BuildableReference>
+ </BuildActionEntry>
+ </BuildActionEntries>
+ </BuildAction>
+ <TestAction
+ selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
+ selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
+ shouldUseLaunchSchemeArgsEnv = "YES"
+ buildConfiguration = "Debug">
+ <Testables>
+ </Testables>
+ </TestAction>
+ <LaunchAction
+ selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
+ selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
+ launchStyle = "0"
+ useCustomWorkingDirectory = "NO"
+ buildConfiguration = "Debug">
+ <BuildableProductRunnable>
+ <BuildableReference
+ BuildableIdentifier = "primary"
+ BlueprintIdentifier = "1ED87EEC15BED331003E47AA"
+ BuildableName = "fuzzy-match"
+ BlueprintName = "fuzzy-match"
+ ReferencedContainer = "container:fuzzy-match.xcodeproj">
+ </BuildableReference>
+ </BuildableProductRunnable>
+ <CommandLineArguments>
+ <CommandLineArgument
+ argument = "--multiple /Users/hieuhoang/workspace/experiment/data/tm-mt-integration//in/ac-test.input.tc.4 /Users/hieuhoang/workspace/experiment/data/tm-mt-integration//in/acquis.truecased.4.en.uniq"
+ isEnabled = "YES">
+ </CommandLineArgument>
+ </CommandLineArguments>
+ <AdditionalOptions>
+ </AdditionalOptions>
+ </LaunchAction>
+ <ProfileAction
+ shouldUseLaunchSchemeArgsEnv = "YES"
+ savedToolIdentifier = ""
+ useCustomWorkingDirectory = "NO"
+ buildConfiguration = "Release">
+ <BuildableProductRunnable>
+ <BuildableReference
+ BuildableIdentifier = "primary"
+ BlueprintIdentifier = "1ED87EEC15BED331003E47AA"
+ BuildableName = "fuzzy-match"
+ BlueprintName = "fuzzy-match"
+ ReferencedContainer = "container:fuzzy-match.xcodeproj">
+ </BuildableReference>
+ </BuildableProductRunnable>
+ </ProfileAction>
+ <AnalyzeAction
+ buildConfiguration = "Debug">
+ </AnalyzeAction>
+ <ArchiveAction
+ buildConfiguration = "Release"
+ revealArchiveInOrganizer = "YES">
+ </ArchiveAction>
+</Scheme>
diff --git a/contrib/other-builds/fuzzy-match.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/fuzzy-match2.xcscheme b/contrib/other-builds/fuzzy-match.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/fuzzy-match2.xcscheme
new file mode 100644
index 000000000..124bfd4b2
--- /dev/null
+++ b/contrib/other-builds/fuzzy-match.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/fuzzy-match2.xcscheme
@@ -0,0 +1,79 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Scheme
+ version = "1.3">
+ <BuildAction
+ parallelizeBuildables = "YES"
+ buildImplicitDependencies = "YES">
+ <BuildActionEntries>
+ <BuildActionEntry
+ buildForTesting = "YES"
+ buildForRunning = "YES"
+ buildForProfiling = "YES"
+ buildForArchiving = "YES"
+ buildForAnalyzing = "YES">
+ <BuildableReference
+ BuildableIdentifier = "primary"
+ BlueprintIdentifier = "1E42EFAB15BEFAD300E937EB"
+ BuildableName = "fuzzy-match2"
+ BlueprintName = "fuzzy-match2"
+ ReferencedContainer = "container:fuzzy-match.xcodeproj">
+ </BuildableReference>
+ </BuildActionEntry>
+ </BuildActionEntries>
+ </BuildAction>
+ <TestAction
+ selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
+ selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
+ shouldUseLaunchSchemeArgsEnv = "YES"
+ buildConfiguration = "Debug">
+ <Testables>
+ </Testables>
+ </TestAction>
+ <LaunchAction
+ selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
+ selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
+ launchStyle = "0"
+ useCustomWorkingDirectory = "YES"
+ customWorkingDirectory = "/Users/hieuhoang/unison/workspace/experiment/data/tm-mt-integration/in"
+ buildConfiguration = "Debug">
+ <BuildableProductRunnable>
+ <BuildableReference
+ BuildableIdentifier = "primary"
+ BlueprintIdentifier = "1E42EFAB15BEFAD300E937EB"
+ BuildableName = "fuzzy-match2"
+ BlueprintName = "fuzzy-match2"
+ ReferencedContainer = "container:fuzzy-match.xcodeproj">
+ </BuildableReference>
+ </BuildableProductRunnable>
+ <CommandLineArguments>
+ <CommandLineArgument
+ argument = "--multiple ac-test.input.tc.4 acquis.truecased.4.en.uniq acquis.truecased.4.fr.uniq acquis.truecased.4.align.uniq"
+ isEnabled = "YES">
+ </CommandLineArgument>
+ </CommandLineArguments>
+ <AdditionalOptions>
+ </AdditionalOptions>
+ </LaunchAction>
+ <ProfileAction
+ shouldUseLaunchSchemeArgsEnv = "YES"
+ savedToolIdentifier = ""
+ useCustomWorkingDirectory = "NO"
+ buildConfiguration = "Release">
+ <BuildableProductRunnable>
+ <BuildableReference
+ BuildableIdentifier = "primary"
+ BlueprintIdentifier = "1E42EFAB15BEFAD300E937EB"
+ BuildableName = "fuzzy-match2"
+ BlueprintName = "fuzzy-match2"
+ ReferencedContainer = "container:fuzzy-match.xcodeproj">
+ </BuildableReference>
+ </BuildableProductRunnable>
+ </ProfileAction>
+ <AnalyzeAction
+ buildConfiguration = "Debug">
+ </AnalyzeAction>
+ <ArchiveAction
+ buildConfiguration = "Release"
+ revealArchiveInOrganizer = "YES">
+ </ArchiveAction>
+</Scheme>
diff --git a/contrib/other-builds/fuzzy-match.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/xcschememanagement.plist b/contrib/other-builds/fuzzy-match.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/xcschememanagement.plist
new file mode 100644
index 000000000..8a9f26d81
--- /dev/null
+++ b/contrib/other-builds/fuzzy-match.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/xcschememanagement.plist
@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+ <key>SchemeUserState</key>
+ <dict>
+ <key>fuzzy-match.xcscheme</key>
+ <dict>
+ <key>orderHint</key>
+ <integer>0</integer>
+ </dict>
+ <key>fuzzy-match2.xcscheme</key>
+ <dict>
+ <key>orderHint</key>
+ <integer>1</integer>
+ </dict>
+ </dict>
+ <key>SuppressBuildableAutocreation</key>
+ <dict>
+ <key>1E42EFAB15BEFAD300E937EB</key>
+ <dict>
+ <key>primary</key>
+ <true/>
+ </dict>
+ <key>1ED87EEC15BED331003E47AA</key>
+ <dict>
+ <key>primary</key>
+ <true/>
+ </dict>
+ </dict>
+</dict>
+</plist>
diff --git a/contrib/other-builds/kbmira.xcodeproj/project.pbxproj b/contrib/other-builds/kbmira.xcodeproj/project.pbxproj
new file mode 100644
index 000000000..1cea39a8e
--- /dev/null
+++ b/contrib/other-builds/kbmira.xcodeproj/project.pbxproj
@@ -0,0 +1,311 @@
+// !$*UTF8*$!
+{
+ archiveVersion = 1;
+ classes = {
+ };
+ objectVersion = 46;
+ objects = {
+
+/* Begin PBXBuildFile section */
+ 1E73031E1597355A00C0E7FB /* kbmira.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E73031D1597355A00C0E7FB /* kbmira.cpp */; };
+ 1EC060861597392900614957 /* libmert_lib.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1EC060821597386600614957 /* libmert_lib.a */; };
+ 1EC060B41597490F00614957 /* liblm.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1EC060B11597490800614957 /* liblm.a */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXContainerItemProxy section */
+ 1EC060811597386600614957 /* PBXContainerItemProxy */ = {
+ isa = PBXContainerItemProxy;
+ containerPortal = 1EC0607A1597386500614957 /* mert_lib.xcodeproj */;
+ proxyType = 2;
+ remoteGlobalIDString = 1E2CCF3315939E2D00D858D1;
+ remoteInfo = mert_lib;
+ };
+ 1EC060841597386C00614957 /* PBXContainerItemProxy */ = {
+ isa = PBXContainerItemProxy;
+ containerPortal = 1EC0607A1597386500614957 /* mert_lib.xcodeproj */;
+ proxyType = 1;
+ remoteGlobalIDString = 1E2CCF3215939E2D00D858D1;
+ remoteInfo = mert_lib;
+ };
+ 1EC060B01597490800614957 /* PBXContainerItemProxy */ = {
+ isa = PBXContainerItemProxy;
+ containerPortal = 1EC060A51597490800614957 /* lm.xcodeproj */;
+ proxyType = 2;
+ remoteGlobalIDString = 1EE8C2E91476A48E002496F2;
+ remoteInfo = lm;
+ };
+ 1EC060B51597491400614957 /* PBXContainerItemProxy */ = {
+ isa = PBXContainerItemProxy;
+ containerPortal = 1EC060A51597490800614957 /* lm.xcodeproj */;
+ proxyType = 1;
+ remoteGlobalIDString = 1EE8C2E81476A48E002496F2;
+ remoteInfo = lm;
+ };
+/* End PBXContainerItemProxy section */
+
+/* Begin PBXCopyFilesBuildPhase section */
+ 1E43CA3E159734A5000E29D3 /* CopyFiles */ = {
+ isa = PBXCopyFilesBuildPhase;
+ buildActionMask = 2147483647;
+ dstPath = /usr/share/man/man1/;
+ dstSubfolderSpec = 0;
+ files = (
+ );
+ runOnlyForDeploymentPostprocessing = 1;
+ };
+/* End PBXCopyFilesBuildPhase section */
+
+/* Begin PBXFileReference section */
+ 1E43CA40159734A5000E29D3 /* kbmira */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = kbmira; sourceTree = BUILT_PRODUCTS_DIR; };
+ 1E73031D1597355A00C0E7FB /* kbmira.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = kbmira.cpp; path = ../../mert/kbmira.cpp; sourceTree = "<group>"; };
+ 1EC0607A1597386500614957 /* mert_lib.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; path = mert_lib.xcodeproj; sourceTree = "<group>"; };
+ 1EC060A51597490800614957 /* lm.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; path = lm.xcodeproj; sourceTree = "<group>"; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+ 1E43CA3D159734A5000E29D3 /* Frameworks */ = {
+ isa = PBXFrameworksBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ 1EC060B41597490F00614957 /* liblm.a in Frameworks */,
+ 1EC060861597392900614957 /* libmert_lib.a in Frameworks */,
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+ 1E43CA35159734A5000E29D3 = {
+ isa = PBXGroup;
+ children = (
+ 1EC060A51597490800614957 /* lm.xcodeproj */,
+ 1EC0607A1597386500614957 /* mert_lib.xcodeproj */,
+ 1E73031D1597355A00C0E7FB /* kbmira.cpp */,
+ 1E43CA41159734A5000E29D3 /* Products */,
+ );
+ sourceTree = "<group>";
+ };
+ 1E43CA41159734A5000E29D3 /* Products */ = {
+ isa = PBXGroup;
+ children = (
+ 1E43CA40159734A5000E29D3 /* kbmira */,
+ );
+ name = Products;
+ sourceTree = "<group>";
+ };
+ 1EC0607B1597386500614957 /* Products */ = {
+ isa = PBXGroup;
+ children = (
+ 1EC060821597386600614957 /* libmert_lib.a */,
+ );
+ name = Products;
+ sourceTree = "<group>";
+ };
+ 1EC060A61597490800614957 /* Products */ = {
+ isa = PBXGroup;
+ children = (
+ 1EC060B11597490800614957 /* liblm.a */,
+ );
+ name = Products;
+ sourceTree = "<group>";
+ };
+/* End PBXGroup section */
+
+/* Begin PBXNativeTarget section */
+ 1E43CA3F159734A5000E29D3 /* kbmira */ = {
+ isa = PBXNativeTarget;
+ buildConfigurationList = 1E43CA4A159734A5000E29D3 /* Build configuration list for PBXNativeTarget "kbmira" */;
+ buildPhases = (
+ 1E43CA3C159734A5000E29D3 /* Sources */,
+ 1E43CA3D159734A5000E29D3 /* Frameworks */,
+ 1E43CA3E159734A5000E29D3 /* CopyFiles */,
+ );
+ buildRules = (
+ );
+ dependencies = (
+ 1EC060B61597491400614957 /* PBXTargetDependency */,
+ 1EC060851597386C00614957 /* PBXTargetDependency */,
+ );
+ name = kbmira;
+ productName = kbmira;
+ productReference = 1E43CA40159734A5000E29D3 /* kbmira */;
+ productType = "com.apple.product-type.tool";
+ };
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+ 1E43CA37159734A5000E29D3 /* Project object */ = {
+ isa = PBXProject;
+ buildConfigurationList = 1E43CA3A159734A5000E29D3 /* Build configuration list for PBXProject "kbmira" */;
+ compatibilityVersion = "Xcode 3.2";
+ developmentRegion = English;
+ hasScannedForEncodings = 0;
+ knownRegions = (
+ en,
+ );
+ mainGroup = 1E43CA35159734A5000E29D3;
+ productRefGroup = 1E43CA41159734A5000E29D3 /* Products */;
+ projectDirPath = "";
+ projectReferences = (
+ {
+ ProductGroup = 1EC060A61597490800614957 /* Products */;
+ ProjectRef = 1EC060A51597490800614957 /* lm.xcodeproj */;
+ },
+ {
+ ProductGroup = 1EC0607B1597386500614957 /* Products */;
+ ProjectRef = 1EC0607A1597386500614957 /* mert_lib.xcodeproj */;
+ },
+ );
+ projectRoot = "";
+ targets = (
+ 1E43CA3F159734A5000E29D3 /* kbmira */,
+ );
+ };
+/* End PBXProject section */
+
+/* Begin PBXReferenceProxy section */
+ 1EC060821597386600614957 /* libmert_lib.a */ = {
+ isa = PBXReferenceProxy;
+ fileType = archive.ar;
+ path = libmert_lib.a;
+ remoteRef = 1EC060811597386600614957 /* PBXContainerItemProxy */;
+ sourceTree = BUILT_PRODUCTS_DIR;
+ };
+ 1EC060B11597490800614957 /* liblm.a */ = {
+ isa = PBXReferenceProxy;
+ fileType = archive.ar;
+ path = liblm.a;
+ remoteRef = 1EC060B01597490800614957 /* PBXContainerItemProxy */;
+ sourceTree = BUILT_PRODUCTS_DIR;
+ };
+/* End PBXReferenceProxy section */
+
+/* Begin PBXSourcesBuildPhase section */
+ 1E43CA3C159734A5000E29D3 /* Sources */ = {
+ isa = PBXSourcesBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ 1E73031E1597355A00C0E7FB /* kbmira.cpp in Sources */,
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXSourcesBuildPhase section */
+
+/* Begin PBXTargetDependency section */
+ 1EC060851597386C00614957 /* PBXTargetDependency */ = {
+ isa = PBXTargetDependency;
+ name = mert_lib;
+ targetProxy = 1EC060841597386C00614957 /* PBXContainerItemProxy */;
+ };
+ 1EC060B61597491400614957 /* PBXTargetDependency */ = {
+ isa = PBXTargetDependency;
+ name = lm;
+ targetProxy = 1EC060B51597491400614957 /* PBXContainerItemProxy */;
+ };
+/* End PBXTargetDependency section */
+
+/* Begin XCBuildConfiguration section */
+ 1E43CA48159734A5000E29D3 /* Debug */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ ALWAYS_SEARCH_USER_PATHS = NO;
+ ARCHS = "$(ARCHS_STANDARD_64_BIT)";
+ COPY_PHASE_STRIP = NO;
+ GCC_C_LANGUAGE_STANDARD = gnu99;
+ GCC_DYNAMIC_NO_PIC = NO;
+ GCC_ENABLE_OBJC_EXCEPTIONS = YES;
+ GCC_OPTIMIZATION_LEVEL = 0;
+ GCC_PREPROCESSOR_DEFINITIONS = (
+ "DEBUG=1",
+ "$(inherited)",
+ );
+ GCC_SYMBOLS_PRIVATE_EXTERN = NO;
+ GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+ GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+ GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
+ GCC_WARN_ABOUT_RETURN_TYPE = YES;
+ GCC_WARN_UNUSED_VARIABLE = YES;
+ LIBRARY_SEARCH_PATHS = /opt/local/lib;
+ MACOSX_DEPLOYMENT_TARGET = 10.7;
+ ONLY_ACTIVE_ARCH = YES;
+ OTHER_LDFLAGS = "";
+ SDKROOT = macosx;
+ };
+ name = Debug;
+ };
+ 1E43CA49159734A5000E29D3 /* Release */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ ALWAYS_SEARCH_USER_PATHS = NO;
+ ARCHS = "$(ARCHS_STANDARD_64_BIT)";
+ COPY_PHASE_STRIP = YES;
+ DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+ GCC_C_LANGUAGE_STANDARD = gnu99;
+ GCC_ENABLE_OBJC_EXCEPTIONS = YES;
+ GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+ GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+ GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
+ GCC_WARN_ABOUT_RETURN_TYPE = YES;
+ GCC_WARN_UNUSED_VARIABLE = YES;
+ LIBRARY_SEARCH_PATHS = /opt/local/lib;
+ MACOSX_DEPLOYMENT_TARGET = 10.7;
+ OTHER_LDFLAGS = "";
+ SDKROOT = macosx;
+ };
+ name = Release;
+ };
+ 1E43CA4B159734A5000E29D3 /* Debug */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ HEADER_SEARCH_PATHS = (
+ ../..,
+ /opt/local/include,
+ );
+ OTHER_LDFLAGS = (
+ "-lboost_program_options",
+ "-lz",
+ );
+ PRODUCT_NAME = "$(TARGET_NAME)";
+ };
+ name = Debug;
+ };
+ 1E43CA4C159734A5000E29D3 /* Release */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ HEADER_SEARCH_PATHS = (
+ ../..,
+ /opt/local/include,
+ );
+ OTHER_LDFLAGS = (
+ "-lboost_program_options",
+ "-lz",
+ );
+ PRODUCT_NAME = "$(TARGET_NAME)";
+ };
+ name = Release;
+ };
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+ 1E43CA3A159734A5000E29D3 /* Build configuration list for PBXProject "kbmira" */ = {
+ isa = XCConfigurationList;
+ buildConfigurations = (
+ 1E43CA48159734A5000E29D3 /* Debug */,
+ 1E43CA49159734A5000E29D3 /* Release */,
+ );
+ defaultConfigurationIsVisible = 0;
+ defaultConfigurationName = Release;
+ };
+ 1E43CA4A159734A5000E29D3 /* Build configuration list for PBXNativeTarget "kbmira" */ = {
+ isa = XCConfigurationList;
+ buildConfigurations = (
+ 1E43CA4B159734A5000E29D3 /* Debug */,
+ 1E43CA4C159734A5000E29D3 /* Release */,
+ );
+ defaultConfigurationIsVisible = 0;
+ defaultConfigurationName = Release;
+ };
+/* End XCConfigurationList section */
+ };
+ rootObject = 1E43CA37159734A5000E29D3 /* Project object */;
+}
diff --git a/contrib/other-builds/kenlm.vcxproj b/contrib/other-builds/kenlm.vcxproj
index 96f537acc..544600117 100755
--- a/contrib/other-builds/kenlm.vcxproj
+++ b/contrib/other-builds/kenlm.vcxproj
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="utf-8"?>
+<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
@@ -39,6 +39,8 @@
<None Include="..\..\lm\test_nounk.arpa" />
<None Include="..\..\lm\trie.hh" />
<None Include="..\..\lm\trie_sort.hh" />
+ <None Include="..\..\lm\value.hh" />
+ <None Include="..\..\lm\value_build.hh" />
<None Include="..\..\lm\virtual_interface.hh" />
<None Include="..\..\lm\vocab.hh" />
<None Include="..\..\lm\weights.hh" />
@@ -82,6 +84,7 @@
<ClCompile Include="..\..\lm\search_trie.cc" />
<ClCompile Include="..\..\lm\trie.cc" />
<ClCompile Include="..\..\lm\trie_sort.cc" />
+ <ClCompile Include="..\..\lm\value_build.cc" />
<ClCompile Include="..\..\lm\virtual_interface.cc" />
<ClCompile Include="..\..\lm\vocab.cc" />
<ClCompile Include="..\..\util\bit_packing.cc" />
@@ -120,15 +123,20 @@
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
- <PropertyGroup />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <IncludePath>C:\Program Files\boost\boost_1_47;$(IncludePath)</IncludePath>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <IncludePath>C:\Program Files\boost\boost_1_47;$(IncludePath)</IncludePath>
+ </PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<PrecompiledHeader>
</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
- <PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
- <AdditionalIncludeDirectories>C:\Program Files\boost\boost_1_47;$(SolutionDir)/../..</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>C:\boost\boost_1_47;$(SolutionDir)/../..</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
@@ -143,8 +151,9 @@
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
- <PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
- <AdditionalIncludeDirectories>C:\Program Files\boost\boost_1_47;$(SolutionDir)/../..</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>C:\boost\boost_1_47;$(SolutionDir)/../..</AdditionalIncludeDirectories>
+ <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
diff --git a/contrib/other-builds/lm.xcodeproj/project.pbxproj b/contrib/other-builds/lm.xcodeproj/project.pbxproj
index c891ea126..2488f1439 100644
--- a/contrib/other-builds/lm.xcodeproj/project.pbxproj
+++ b/contrib/other-builds/lm.xcodeproj/project.pbxproj
@@ -7,6 +7,9 @@
objects = {
/* Begin PBXBuildFile section */
+ 1E890C71159D1B260031F9F3 /* value_build.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1E890C6E159D1B260031F9F3 /* value_build.cc */; };
+ 1E890C72159D1B260031F9F3 /* value_build.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1E890C6F159D1B260031F9F3 /* value_build.hh */; };
+ 1E890C73159D1B260031F9F3 /* value.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1E890C70159D1B260031F9F3 /* value.hh */; };
1EBA44AD14B97E22003CC0EA /* bhiksha.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA442B14B97E22003CC0EA /* bhiksha.cc */; };
1EBA44AE14B97E22003CC0EA /* bhiksha.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA442C14B97E22003CC0EA /* bhiksha.hh */; };
1EBA44D414B97E22003CC0EA /* binary_format.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA447D14B97E22003CC0EA /* binary_format.cc */; };
@@ -93,6 +96,9 @@
/* End PBXContainerItemProxy section */
/* Begin PBXFileReference section */
+ 1E890C6E159D1B260031F9F3 /* value_build.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = value_build.cc; path = ../../lm/value_build.cc; sourceTree = "<group>"; };
+ 1E890C6F159D1B260031F9F3 /* value_build.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = value_build.hh; path = ../../lm/value_build.hh; sourceTree = "<group>"; };
+ 1E890C70159D1B260031F9F3 /* value.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = value.hh; path = ../../lm/value.hh; sourceTree = "<group>"; };
1EBA442B14B97E22003CC0EA /* bhiksha.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = bhiksha.cc; path = ../../lm/bhiksha.cc; sourceTree = "<group>"; };
1EBA442C14B97E22003CC0EA /* bhiksha.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = bhiksha.hh; path = ../../lm/bhiksha.hh; sourceTree = "<group>"; };
1EBA447D14B97E22003CC0EA /* binary_format.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = binary_format.cc; path = ../../lm/binary_format.cc; sourceTree = "<group>"; };
@@ -196,6 +202,9 @@
1EBA44FB14B97E6A003CC0EA /* lm */ = {
isa = PBXGroup;
children = (
+ 1E890C6E159D1B260031F9F3 /* value_build.cc */,
+ 1E890C6F159D1B260031F9F3 /* value_build.hh */,
+ 1E890C70159D1B260031F9F3 /* value.hh */,
1EBA442B14B97E22003CC0EA /* bhiksha.cc */,
1EBA442C14B97E22003CC0EA /* bhiksha.hh */,
1EBA447D14B97E22003CC0EA /* binary_format.cc */,
@@ -366,6 +375,8 @@
1EBA459E14B97E92003CC0EA /* sorted_uniform.hh in Headers */,
1EBA459F14B97E92003CC0EA /* string_piece.hh in Headers */,
1EBA45A114B97E92003CC0EA /* tokenize_piece.hh in Headers */,
+ 1E890C72159D1B260031F9F3 /* value_build.hh in Headers */,
+ 1E890C73159D1B260031F9F3 /* value.hh in Headers */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@@ -394,6 +405,9 @@
/* Begin PBXProject section */
1EE8C2E01476A48E002496F2 /* Project object */ = {
isa = PBXProject;
+ attributes = {
+ LastUpgradeCheck = 0420;
+ };
buildConfigurationList = 1EE8C2E31476A48E002496F2 /* Build configuration list for PBXProject "lm" */;
compatibilityVersion = "Xcode 3.2";
developmentRegion = English;
@@ -464,6 +478,7 @@
1EBA459814B97E92003CC0EA /* probing_hash_table_test.cc in Sources */,
1EBA459D14B97E92003CC0EA /* sorted_uniform_test.cc in Sources */,
1EBA45A014B97E92003CC0EA /* tokenize_piece_test.cc in Sources */,
+ 1E890C71159D1B260031F9F3 /* value_build.cc in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@@ -527,6 +542,7 @@
isa = XCBuildConfiguration;
buildSettings = {
EXECUTABLE_PREFIX = lib;
+ GCC_PREPROCESSOR_DEFINITIONS = "KENLM_MAX_ORDER=7";
LIBRARY_SEARCH_PATHS = (
"$(inherited)",
"\"$(SRCROOT)/../../lm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi\"",
@@ -544,6 +560,7 @@
isa = XCBuildConfiguration;
buildSettings = {
EXECUTABLE_PREFIX = lib;
+ GCC_PREPROCESSOR_DEFINITIONS = "KENLM_MAX_ORDER=7";
LIBRARY_SEARCH_PATHS = (
"$(inherited)",
"\"$(SRCROOT)/../../lm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi\"",
diff --git a/contrib/other-builds/lm.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/lm.xcscheme b/contrib/other-builds/lm.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/lm.xcscheme
new file mode 100644
index 000000000..d6a2f2b1d
--- /dev/null
+++ b/contrib/other-builds/lm.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/lm.xcscheme
@@ -0,0 +1,54 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Scheme
+ version = "1.3">
+ <BuildAction
+ parallelizeBuildables = "YES"
+ buildImplicitDependencies = "YES">
+ <BuildActionEntries>
+ <BuildActionEntry
+ buildForTesting = "YES"
+ buildForRunning = "YES"
+ buildForProfiling = "YES"
+ buildForArchiving = "YES"
+ buildForAnalyzing = "YES">
+ <BuildableReference
+ BuildableIdentifier = "primary"
+ BlueprintIdentifier = "1EE8C2E81476A48E002496F2"
+ BuildableName = "liblm.a"
+ BlueprintName = "lm"
+ ReferencedContainer = "container:lm.xcodeproj">
+ </BuildableReference>
+ </BuildActionEntry>
+ </BuildActionEntries>
+ </BuildAction>
+ <TestAction
+ selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
+ selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
+ shouldUseLaunchSchemeArgsEnv = "YES"
+ buildConfiguration = "Debug">
+ <Testables>
+ </Testables>
+ </TestAction>
+ <LaunchAction
+ selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
+ selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
+ launchStyle = "0"
+ useCustomWorkingDirectory = "NO"
+ buildConfiguration = "Debug">
+ <AdditionalOptions>
+ </AdditionalOptions>
+ </LaunchAction>
+ <ProfileAction
+ shouldUseLaunchSchemeArgsEnv = "YES"
+ savedToolIdentifier = ""
+ useCustomWorkingDirectory = "NO"
+ buildConfiguration = "Release">
+ </ProfileAction>
+ <AnalyzeAction
+ buildConfiguration = "Debug">
+ </AnalyzeAction>
+ <ArchiveAction
+ buildConfiguration = "Release"
+ revealArchiveInOrganizer = "YES">
+ </ArchiveAction>
+</Scheme>
diff --git a/contrib/other-builds/lm.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/xcschememanagement.plist b/contrib/other-builds/lm.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/xcschememanagement.plist
new file mode 100644
index 000000000..4a2ad2a48
--- /dev/null
+++ b/contrib/other-builds/lm.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/xcschememanagement.plist
@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+ <key>SchemeUserState</key>
+ <dict>
+ <key>lm.xcscheme</key>
+ <dict>
+ <key>orderHint</key>
+ <integer>0</integer>
+ </dict>
+ </dict>
+ <key>SuppressBuildableAutocreation</key>
+ <dict>
+ <key>1EE8C2E81476A48E002496F2</key>
+ <dict>
+ <key>primary</key>
+ <true/>
+ </dict>
+ </dict>
+</dict>
+</plist>
diff --git a/contrib/other-builds/lm/.cproject b/contrib/other-builds/lm/.cproject
index f89e80f49..8ecb60e02 100644
--- a/contrib/other-builds/lm/.cproject
+++ b/contrib/other-builds/lm/.cproject
@@ -42,7 +42,11 @@
<option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.7139692" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.1988092227" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="/opt/local/include"/>
- <listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt"/>
+ <listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../&quot;"/>
+ </option>
+ <option id="gnu.cpp.compiler.option.preprocessor.def.1980966336" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
+ <listOptionValue builtIn="false" value="KENLM_MAX_ORDER=7"/>
+ <listOptionValue builtIn="false" value="TRACE_ENABLE"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.20502600" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
@@ -53,6 +57,9 @@
</tool>
</toolChain>
</folderInfo>
+ <sourceEntries>
+ <entry excluding="left_test.cc|model_test.cc" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
+ </sourceEntries>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
@@ -122,4 +129,5 @@
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="refreshScope"/>
+ <storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
</cproject>
diff --git a/contrib/other-builds/lm/.project b/contrib/other-builds/lm/.project
index 0d30e24cb..204771764 100644
--- a/contrib/other-builds/lm/.project
+++ b/contrib/other-builds/lm/.project
@@ -327,6 +327,21 @@
<locationURI>PARENT-3-PROJECT_LOC/lm/trie_sort.hh</locationURI>
</link>
<link>
+ <name>value.hh</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/value.hh</locationURI>
+ </link>
+ <link>
+ <name>value_build.cc</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/value_build.cc</locationURI>
+ </link>
+ <link>
+ <name>value_build.hh</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/lm/value_build.hh</locationURI>
+ </link>
+ <link>
<name>virtual_interface.cc</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/lm/virtual_interface.cc</locationURI>
diff --git a/contrib/other-builds/mert.xcodeproj/project.pbxproj b/contrib/other-builds/mert.xcodeproj/project.pbxproj
new file mode 100644
index 000000000..76879e58e
--- /dev/null
+++ b/contrib/other-builds/mert.xcodeproj/project.pbxproj
@@ -0,0 +1,338 @@
+// !$*UTF8*$!
+{
+ archiveVersion = 1;
+ classes = {
+ };
+ objectVersion = 46;
+ objects = {
+
+/* Begin PBXBuildFile section */
+ 1E1D826915AC641600FE42E9 /* extractor.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E1D825915AC63ED00FE42E9 /* extractor.cpp */; };
+ 1E1D826A15AC642B00FE42E9 /* libmert_lib.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1E2B6B141593A6F30028137E /* libmert_lib.a */; };
+ 1E2B6ADE1593A5500028137E /* mert.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2B6ADD1593A5500028137E /* mert.cpp */; };
+ 1E2B6B1F1593CA8A0028137E /* libmert_lib.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1E2B6B141593A6F30028137E /* libmert_lib.a */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXContainerItemProxy section */
+ 1E2B6B131593A6F30028137E /* PBXContainerItemProxy */ = {
+ isa = PBXContainerItemProxy;
+ containerPortal = 1E2B6B0F1593A6F30028137E /* mert_lib.xcodeproj */;
+ proxyType = 2;
+ remoteGlobalIDString = 1E2CCF3315939E2D00D858D1;
+ remoteInfo = mert_lib;
+ };
+/* End PBXContainerItemProxy section */
+
+/* Begin PBXCopyFilesBuildPhase section */
+ 1E1D825D15AC640800FE42E9 /* CopyFiles */ = {
+ isa = PBXCopyFilesBuildPhase;
+ buildActionMask = 2147483647;
+ dstPath = /usr/share/man/man1/;
+ dstSubfolderSpec = 0;
+ files = (
+ );
+ runOnlyForDeploymentPostprocessing = 1;
+ };
+ 1EB0AF031593A2180007E2A4 /* CopyFiles */ = {
+ isa = PBXCopyFilesBuildPhase;
+ buildActionMask = 2147483647;
+ dstPath = /usr/share/man/man1/;
+ dstSubfolderSpec = 0;
+ files = (
+ );
+ runOnlyForDeploymentPostprocessing = 1;
+ };
+/* End PBXCopyFilesBuildPhase section */
+
+/* Begin PBXFileReference section */
+ 1E1D825915AC63ED00FE42E9 /* extractor.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = extractor.cpp; path = ../../mert/extractor.cpp; sourceTree = "<group>"; };
+ 1E1D825F15AC640800FE42E9 /* extractor */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = extractor; sourceTree = BUILT_PRODUCTS_DIR; };
+ 1E2B6ADD1593A5500028137E /* mert.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mert.cpp; path = ../../mert/mert.cpp; sourceTree = "<group>"; };
+ 1E2B6B0F1593A6F30028137E /* mert_lib.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; path = mert_lib.xcodeproj; sourceTree = "<group>"; };
+ 1EB0AF051593A2180007E2A4 /* mert */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = mert; sourceTree = BUILT_PRODUCTS_DIR; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+ 1E1D825C15AC640800FE42E9 /* Frameworks */ = {
+ isa = PBXFrameworksBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ 1E1D826A15AC642B00FE42E9 /* libmert_lib.a in Frameworks */,
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+ 1EB0AF021593A2180007E2A4 /* Frameworks */ = {
+ isa = PBXFrameworksBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ 1E2B6B1F1593CA8A0028137E /* libmert_lib.a in Frameworks */,
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+ 1E2B6B101593A6F30028137E /* Products */ = {
+ isa = PBXGroup;
+ children = (
+ 1E2B6B141593A6F30028137E /* libmert_lib.a */,
+ );
+ name = Products;
+ sourceTree = "<group>";
+ };
+ 1EB0AEFA1593A2180007E2A4 = {
+ isa = PBXGroup;
+ children = (
+ 1E2B6B0F1593A6F30028137E /* mert_lib.xcodeproj */,
+ 1E2B6ADD1593A5500028137E /* mert.cpp */,
+ 1E1D825915AC63ED00FE42E9 /* extractor.cpp */,
+ 1EB0AF061593A2180007E2A4 /* Products */,
+ );
+ sourceTree = "<group>";
+ };
+ 1EB0AF061593A2180007E2A4 /* Products */ = {
+ isa = PBXGroup;
+ children = (
+ 1EB0AF051593A2180007E2A4 /* mert */,
+ 1E1D825F15AC640800FE42E9 /* extractor */,
+ );
+ name = Products;
+ sourceTree = "<group>";
+ };
+/* End PBXGroup section */
+
+/* Begin PBXNativeTarget section */
+ 1E1D825E15AC640800FE42E9 /* extractor */ = {
+ isa = PBXNativeTarget;
+ buildConfigurationList = 1E1D826615AC640800FE42E9 /* Build configuration list for PBXNativeTarget "extractor" */;
+ buildPhases = (
+ 1E1D825B15AC640800FE42E9 /* Sources */,
+ 1E1D825C15AC640800FE42E9 /* Frameworks */,
+ 1E1D825D15AC640800FE42E9 /* CopyFiles */,
+ );
+ buildRules = (
+ );
+ dependencies = (
+ );
+ name = extractor;
+ productName = extractor;
+ productReference = 1E1D825F15AC640800FE42E9 /* extractor */;
+ productType = "com.apple.product-type.tool";
+ };
+ 1EB0AF041593A2180007E2A4 /* mert */ = {
+ isa = PBXNativeTarget;
+ buildConfigurationList = 1EB0AF0F1593A2180007E2A4 /* Build configuration list for PBXNativeTarget "mert" */;
+ buildPhases = (
+ 1EB0AF011593A2180007E2A4 /* Sources */,
+ 1EB0AF021593A2180007E2A4 /* Frameworks */,
+ 1EB0AF031593A2180007E2A4 /* CopyFiles */,
+ );
+ buildRules = (
+ );
+ dependencies = (
+ );
+ name = mert;
+ productName = mert;
+ productReference = 1EB0AF051593A2180007E2A4 /* mert */;
+ productType = "com.apple.product-type.tool";
+ };
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+ 1EB0AEFC1593A2180007E2A4 /* Project object */ = {
+ isa = PBXProject;
+ buildConfigurationList = 1EB0AEFF1593A2180007E2A4 /* Build configuration list for PBXProject "mert" */;
+ compatibilityVersion = "Xcode 3.2";
+ developmentRegion = English;
+ hasScannedForEncodings = 0;
+ knownRegions = (
+ en,
+ );
+ mainGroup = 1EB0AEFA1593A2180007E2A4;
+ productRefGroup = 1EB0AF061593A2180007E2A4 /* Products */;
+ projectDirPath = "";
+ projectReferences = (
+ {
+ ProductGroup = 1E2B6B101593A6F30028137E /* Products */;
+ ProjectRef = 1E2B6B0F1593A6F30028137E /* mert_lib.xcodeproj */;
+ },
+ );
+ projectRoot = "";
+ targets = (
+ 1EB0AF041593A2180007E2A4 /* mert */,
+ 1E1D825E15AC640800FE42E9 /* extractor */,
+ );
+ };
+/* End PBXProject section */
+
+/* Begin PBXReferenceProxy section */
+ 1E2B6B141593A6F30028137E /* libmert_lib.a */ = {
+ isa = PBXReferenceProxy;
+ fileType = archive.ar;
+ path = libmert_lib.a;
+ remoteRef = 1E2B6B131593A6F30028137E /* PBXContainerItemProxy */;
+ sourceTree = BUILT_PRODUCTS_DIR;
+ };
+/* End PBXReferenceProxy section */
+
+/* Begin PBXSourcesBuildPhase section */
+ 1E1D825B15AC640800FE42E9 /* Sources */ = {
+ isa = PBXSourcesBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ 1E1D826915AC641600FE42E9 /* extractor.cpp in Sources */,
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+ 1EB0AF011593A2180007E2A4 /* Sources */ = {
+ isa = PBXSourcesBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ 1E2B6ADE1593A5500028137E /* mert.cpp in Sources */,
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXSourcesBuildPhase section */
+
+/* Begin XCBuildConfiguration section */
+ 1E1D826715AC640800FE42E9 /* Debug */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ HEADER_SEARCH_PATHS = (
+ ../..,
+ /opt/local/include,
+ );
+ PRODUCT_NAME = "$(TARGET_NAME)";
+ };
+ name = Debug;
+ };
+ 1E1D826815AC640800FE42E9 /* Release */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ HEADER_SEARCH_PATHS = (
+ ../..,
+ /opt/local/include,
+ );
+ PRODUCT_NAME = "$(TARGET_NAME)";
+ };
+ name = Release;
+ };
+ 1EB0AF0D1593A2180007E2A4 /* Debug */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ ALWAYS_SEARCH_USER_PATHS = NO;
+ ARCHS = "$(ARCHS_STANDARD_64_BIT)";
+ COPY_PHASE_STRIP = NO;
+ GCC_C_LANGUAGE_STANDARD = gnu99;
+ GCC_DYNAMIC_NO_PIC = NO;
+ GCC_ENABLE_OBJC_EXCEPTIONS = YES;
+ GCC_OPTIMIZATION_LEVEL = 0;
+ GCC_PREPROCESSOR_DEFINITIONS = (
+ "DEBUG=1",
+ "$(inherited)",
+ );
+ GCC_SYMBOLS_PRIVATE_EXTERN = NO;
+ GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+ GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+ GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
+ GCC_WARN_ABOUT_RETURN_TYPE = YES;
+ GCC_WARN_UNUSED_VARIABLE = YES;
+ MACOSX_DEPLOYMENT_TARGET = 10.7;
+ ONLY_ACTIVE_ARCH = YES;
+ OTHER_LDFLAGS = "-lz";
+ SDKROOT = macosx;
+ };
+ name = Debug;
+ };
+ 1EB0AF0E1593A2180007E2A4 /* Release */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ ALWAYS_SEARCH_USER_PATHS = NO;
+ ARCHS = "$(ARCHS_STANDARD_64_BIT)";
+ COPY_PHASE_STRIP = YES;
+ DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+ GCC_C_LANGUAGE_STANDARD = gnu99;
+ GCC_ENABLE_OBJC_EXCEPTIONS = YES;
+ GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+ GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+ GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
+ GCC_WARN_ABOUT_RETURN_TYPE = YES;
+ GCC_WARN_UNUSED_VARIABLE = YES;
+ MACOSX_DEPLOYMENT_TARGET = 10.7;
+ OTHER_LDFLAGS = "-lz";
+ SDKROOT = macosx;
+ };
+ name = Release;
+ };
+ 1EB0AF101593A2180007E2A4 /* Debug */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ GCC_PREPROCESSOR_DEFINITIONS = (
+ "DEBUG=1",
+ "$(inherited)",
+ WITH_THREADS,
+ );
+ HEADER_SEARCH_PATHS = (
+ ../..,
+ /opt/local/include,
+ );
+ LIBRARY_SEARCH_PATHS = /opt/local/lib/;
+ OTHER_LDFLAGS = (
+ "-lz",
+ "-lboost_thread-mt",
+ );
+ PRODUCT_NAME = "$(TARGET_NAME)";
+ };
+ name = Debug;
+ };
+ 1EB0AF111593A2180007E2A4 /* Release */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ GCC_PREPROCESSOR_DEFINITIONS = WITH_THREADS;
+ HEADER_SEARCH_PATHS = (
+ ../..,
+ /opt/local/include,
+ );
+ LIBRARY_SEARCH_PATHS = /opt/local/lib/;
+ OTHER_LDFLAGS = (
+ "-lz",
+ "-lboost_thread-mt",
+ );
+ PRODUCT_NAME = "$(TARGET_NAME)";
+ };
+ name = Release;
+ };
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+ 1E1D826615AC640800FE42E9 /* Build configuration list for PBXNativeTarget "extractor" */ = {
+ isa = XCConfigurationList;
+ buildConfigurations = (
+ 1E1D826715AC640800FE42E9 /* Debug */,
+ 1E1D826815AC640800FE42E9 /* Release */,
+ );
+ defaultConfigurationIsVisible = 0;
+ defaultConfigurationName = Release;
+ };
+ 1EB0AEFF1593A2180007E2A4 /* Build configuration list for PBXProject "mert" */ = {
+ isa = XCConfigurationList;
+ buildConfigurations = (
+ 1EB0AF0D1593A2180007E2A4 /* Debug */,
+ 1EB0AF0E1593A2180007E2A4 /* Release */,
+ );
+ defaultConfigurationIsVisible = 0;
+ defaultConfigurationName = Release;
+ };
+ 1EB0AF0F1593A2180007E2A4 /* Build configuration list for PBXNativeTarget "mert" */ = {
+ isa = XCConfigurationList;
+ buildConfigurations = (
+ 1EB0AF101593A2180007E2A4 /* Debug */,
+ 1EB0AF111593A2180007E2A4 /* Release */,
+ );
+ defaultConfigurationIsVisible = 0;
+ defaultConfigurationName = Release;
+ };
+/* End XCConfigurationList section */
+ };
+ rootObject = 1EB0AEFC1593A2180007E2A4 /* Project object */;
+}
diff --git a/contrib/other-builds/mert.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/contrib/other-builds/mert.xcodeproj/project.xcworkspace/contents.xcworkspacedata
new file mode 100644
index 000000000..03c6b7b80
--- /dev/null
+++ b/contrib/other-builds/mert.xcodeproj/project.xcworkspace/contents.xcworkspacedata
@@ -0,0 +1,7 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Workspace
+ version = "1.0">
+ <FileRef
+ location = "self:mert.xcodeproj">
+ </FileRef>
+</Workspace>
diff --git a/contrib/other-builds/mert.xcodeproj/project.xcworkspace/xcuserdata/hieuhoang.xcuserdatad/UserInterfaceState.xcuserstate b/contrib/other-builds/mert.xcodeproj/project.xcworkspace/xcuserdata/hieuhoang.xcuserdatad/UserInterfaceState.xcuserstate
new file mode 100644
index 000000000..eef05294a
--- /dev/null
+++ b/contrib/other-builds/mert.xcodeproj/project.xcworkspace/xcuserdata/hieuhoang.xcuserdatad/UserInterfaceState.xcuserstate
@@ -0,0 +1,8628 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+ <key>$archiver</key>
+ <string>NSKeyedArchiver</string>
+ <key>$objects</key>
+ <array>
+ <string>$null</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>89</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>2</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>3</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>4</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>177</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>A0ED48DA-D116-4801-AB51-861E1E3CE459</string>
+ <string>IDEWorkspaceDocument</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>5</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>6</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>7</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>8</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>9</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>10</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>11</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>12</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>13</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>14</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>16</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>17</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>2</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>8</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>128</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>128</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>IDEWindowFrame</string>
+ <string>IDEOrderedWorkspaceTabControllers</string>
+ <string>IDEWindowInFullscreenMode</string>
+ <string>IDEWorkspaceTabController_47815CCD-573D-4957-A6D1-F7389545EB27</string>
+ <string>IDEWorkspaceWindowControllerUniqueIdentifier</string>
+ <string>IDEActiveWorkspaceTabController</string>
+ <string>IDEWindowToolbarIsVisible</string>
+ <string>IDEWindowTabBarIsVisible</string>
+ <string>{{0, 58}, {1280, 720}}</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>15</integer>
+ </dict>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>8</integer>
+ </dict>
+ </array>
+ </dict>
+ <dict>
+ <key>$classes</key>
+ <array>
+ <string>NSArray</string>
+ <string>NSObject</string>
+ </array>
+ <key>$classname</key>
+ <string>NSArray</string>
+ </dict>
+ <false/>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>18</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>19</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>20</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>21</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>22</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>23</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>24</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>25</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>26</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>128</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>47</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>138</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>145</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>167</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>16</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>176</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>IDEEditorArea</string>
+ <string>IDEShowNavigator</string>
+ <string>AssistantEditorsLayout</string>
+ <string>IDEWorkspaceTabControllerUtilityAreaSplitView</string>
+ <string>IDENavigatorArea</string>
+ <string>IDEWorkspaceTabControllerDesignAreaSplitView</string>
+ <string>IDEShowUtilities</string>
+ <string>IDETabLabel</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>27</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>28</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>29</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>30</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>31</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>32</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>33</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>34</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>35</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>57</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>98</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>128</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>47</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>129</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>137</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>16</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>layoutTree</string>
+ <string>IDEEditorMode_Standard</string>
+ <string>IDEEDitorArea_DebugArea</string>
+ <string>IDEShowEditor</string>
+ <string>EditorMode</string>
+ <string>DebuggerSplitView</string>
+ <string>DefaultPersistentRepresentations</string>
+ <string>ShowDebuggerArea</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>56</integer>
+ </dict>
+ <key>geniusEditorContextNode</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>0</integer>
+ </dict>
+ <key>primaryEditorContextNode</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>36</integer>
+ </dict>
+ <key>rootLayoutTreeNode</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>53</integer>
+ </dict>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>55</integer>
+ </dict>
+ <key>children</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>0</integer>
+ </dict>
+ <key>contentType</key>
+ <integer>1</integer>
+ <key>documentArchivableRepresentation</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>37</integer>
+ </dict>
+ <key>orientation</key>
+ <integer>0</integer>
+ <key>parent</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>53</integer>
+ </dict>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>52</integer>
+ </dict>
+ <key>DocumentLocation</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>48</integer>
+ </dict>
+ <key>DomainIdentifier</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>38</integer>
+ </dict>
+ <key>IdentifierPath</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>39</integer>
+ </dict>
+ <key>IndexOfDocumentIdentifier</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>47</integer>
+ </dict>
+ </dict>
+ <string>Xcode.IDENavigableItemDomain.WorkspaceStructure</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>15</integer>
+ </dict>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>40</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>43</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>45</integer>
+ </dict>
+ </array>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>42</integer>
+ </dict>
+ <key>Identifier</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>41</integer>
+ </dict>
+ </dict>
+ <string>InterpolatedScorer.h</string>
+ <dict>
+ <key>$classes</key>
+ <array>
+ <string>IDEArchivableStringIndexPair</string>
+ <string>NSObject</string>
+ </array>
+ <key>$classname</key>
+ <string>IDEArchivableStringIndexPair</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>42</integer>
+ </dict>
+ <key>Identifier</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>44</integer>
+ </dict>
+ </dict>
+ <string>mert_lib.xcodeproj</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>42</integer>
+ </dict>
+ <key>Identifier</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>46</integer>
+ </dict>
+ </dict>
+ <string>mert</string>
+ <integer>0</integer>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>51</integer>
+ </dict>
+ <key>documentURL</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>49</integer>
+ </dict>
+ <key>timestamp</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>0</integer>
+ </dict>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>file://localhost/Users/hieuhoang/unison/workspace/github/hieuhoang/mert/InterpolatedScorer.h</string>
+ </dict>
+ <dict>
+ <key>$classes</key>
+ <array>
+ <string>NSMutableString</string>
+ <string>NSString</string>
+ <string>NSObject</string>
+ </array>
+ <key>$classname</key>
+ <string>NSMutableString</string>
+ </dict>
+ <dict>
+ <key>$classes</key>
+ <array>
+ <string>DVTDocumentLocation</string>
+ <string>NSObject</string>
+ </array>
+ <key>$classname</key>
+ <string>DVTDocumentLocation</string>
+ </dict>
+ <dict>
+ <key>$classes</key>
+ <array>
+ <string>IDENavigableItemArchivableRepresentation</string>
+ <string>NSObject</string>
+ </array>
+ <key>$classname</key>
+ <string>IDENavigableItemArchivableRepresentation</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>55</integer>
+ </dict>
+ <key>children</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>54</integer>
+ </dict>
+ <key>contentType</key>
+ <integer>0</integer>
+ <key>documentArchivableRepresentation</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>0</integer>
+ </dict>
+ <key>orientation</key>
+ <integer>0</integer>
+ <key>parent</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>0</integer>
+ </dict>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>15</integer>
+ </dict>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>36</integer>
+ </dict>
+ </array>
+ </dict>
+ <dict>
+ <key>$classes</key>
+ <array>
+ <string>IDEWorkspaceTabControllerLayoutTreeNode</string>
+ <string>NSObject</string>
+ </array>
+ <key>$classname</key>
+ <string>IDEWorkspaceTabControllerLayoutTreeNode</string>
+ </dict>
+ <dict>
+ <key>$classes</key>
+ <array>
+ <string>IDEWorkspaceTabControllerLayoutTree</string>
+ <string>NSObject</string>
+ </array>
+ <key>$classname</key>
+ <string>IDEWorkspaceTabControllerLayoutTree</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>58</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>59</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>EditorLayout_PersistentRepresentation</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>60</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>61</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>Main</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>89</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>62</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>63</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>64</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>65</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>47</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>96</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>EditorLayout_StateSavingStateDictionaries</string>
+ <string>EditorLayout_Selected</string>
+ <string>EditorLayout_Geometry</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>15</integer>
+ </dict>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>66</integer>
+ </dict>
+ </array>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>67</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>68</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>69</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>70</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>71</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>72</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>73</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>74</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>75</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>81</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>90</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>41</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>91</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>92</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>FileDataType</string>
+ <string>ArchivableRepresentation</string>
+ <string>EditorState</string>
+ <string>NavigableItemName</string>
+ <string>DocumentNavigableItemName</string>
+ <string>DocumentExtensionIdentifier</string>
+ <string>DocumentURL</string>
+ <string>public.c-header</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>52</integer>
+ </dict>
+ <key>DocumentLocation</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>48</integer>
+ </dict>
+ <key>DomainIdentifier</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>38</integer>
+ </dict>
+ <key>IdentifierPath</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>76</integer>
+ </dict>
+ <key>IndexOfDocumentIdentifier</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>47</integer>
+ </dict>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>15</integer>
+ </dict>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>77</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>78</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>79</integer>
+ </dict>
+ </array>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>42</integer>
+ </dict>
+ <key>Identifier</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>41</integer>
+ </dict>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>42</integer>
+ </dict>
+ <key>Identifier</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>44</integer>
+ </dict>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>42</integer>
+ </dict>
+ <key>Identifier</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>80</integer>
+ </dict>
+ </dict>
+ <string>mert</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>89</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>82</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>83</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>84</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>85</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>86</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>87</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>16</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>88</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>PrimaryDocumentTimestamp</string>
+ <string>PrimaryDocumentVisibleCharacterRange</string>
+ <string>HideAllIssues</string>
+ <string>PrimaryDocumentSelectedCharacterRange</string>
+ <real>363696391.20448101</real>
+ <string>{0, 1309}</string>
+ <string>{332, 0}</string>
+ <dict>
+ <key>$classes</key>
+ <array>
+ <string>NSDictionary</string>
+ <string>NSObject</string>
+ </array>
+ <key>$classname</key>
+ <string>NSDictionary</string>
+ </dict>
+ <string>class InterpolatedScorer</string>
+ <string>Xcode.IDEKit.EditorDocument.SourceCode</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>94</integer>
+ </dict>
+ <key>NS.base</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>0</integer>
+ </dict>
+ <key>NS.relative</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>93</integer>
+ </dict>
+ </dict>
+ <string>file://localhost/Users/hieuhoang/unison/workspace/github/hieuhoang/mert/InterpolatedScorer.h</string>
+ <dict>
+ <key>$classes</key>
+ <array>
+ <string>NSURL</string>
+ <string>NSObject</string>
+ </array>
+ <key>$classname</key>
+ <string>NSURL</string>
+ </dict>
+ <dict>
+ <key>$classes</key>
+ <array>
+ <string>NSMutableDictionary</string>
+ <string>NSDictionary</string>
+ <string>NSObject</string>
+ </array>
+ <key>$classname</key>
+ <string>NSMutableDictionary</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>15</integer>
+ </dict>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>97</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>{{0, 0}, {1020, 622}}</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>99</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>100</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>101</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>102</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>103</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>104</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>105</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>106</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>108</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>105</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>110</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>122</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>LayoutFocusMode</string>
+ <string>console</string>
+ <string>variables</string>
+ <string>LayoutMode</string>
+ <string>IDEDebuggerAreaSplitView</string>
+ <string>IDEDebugArea_SplitView</string>
+ <integer>1</integer>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>107</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>47</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>ConsoleFilterMode</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>109</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>105</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>VariablesViewSelectedScope</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>111</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>112</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>DVTSplitViewItems</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>121</integer>
+ </dict>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>113</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>118</integer>
+ </dict>
+ </array>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>89</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>114</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>115</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>116</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>117</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>DVTIdentifier</string>
+ <string>DVTViewMagnitude</string>
+ <string>VariablesView</string>
+ <real>510</real>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>89</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>114</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>115</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>119</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>120</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>ConsoleArea</string>
+ <real>509</real>
+ <dict>
+ <key>$classes</key>
+ <array>
+ <string>NSMutableArray</string>
+ <string>NSArray</string>
+ <string>NSObject</string>
+ </array>
+ <key>$classname</key>
+ <string>NSMutableArray</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>111</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>123</integer>
+ </dict>
+ </array>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>121</integer>
+ </dict>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>124</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>126</integer>
+ </dict>
+ </array>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>89</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>114</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>115</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>116</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>125</integer>
+ </dict>
+ </array>
+ </dict>
+ <real>510</real>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>89</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>114</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>115</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>119</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>127</integer>
+ </dict>
+ </array>
+ </dict>
+ <real>509</real>
+ <true/>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>111</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>130</integer>
+ </dict>
+ </array>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>121</integer>
+ </dict>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>131</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>134</integer>
+ </dict>
+ </array>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>89</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>114</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>115</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>132</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>133</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>IDEEditor</string>
+ <real>203</real>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>89</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>114</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>115</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>135</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>136</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>IDEDebuggerArea</string>
+ <real>115</real>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array/>
+ <key>NS.objects</key>
+ <array/>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>111</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>139</integer>
+ </dict>
+ </array>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>121</integer>
+ </dict>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>140</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>143</integer>
+ </dict>
+ </array>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>89</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>114</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>115</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>141</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>142</integer>
+ </dict>
+ </array>
+ </dict>
+ <string></string>
+ <real>398</real>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>89</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>114</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>115</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>141</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>144</integer>
+ </dict>
+ </array>
+ </dict>
+ <real>224</real>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>146</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>147</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>147</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>148</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>SelectedNavigator</string>
+ <string>Xcode.IDEKit.Navigator.Structure</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>149</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>150</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>151</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>152</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>153</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>154</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>155</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>156</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>16</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>157</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>16</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>16</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>159</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>162</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>IDEVisibleRect</string>
+ <string>IDEUnsavedDocumentFilteringEnabled</string>
+ <string>IDENavigatorExpandedItemsBeforeFilteringSet</string>
+ <string>IDERecentDocumentFilteringEnabled</string>
+ <string>IDESCMStatusFilteringEnabled</string>
+ <string>IDESelectedObjects</string>
+ <string>IDEExpandedItemsSet</string>
+ <string>{{0, 300}, {259, 578}}</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>158</integer>
+ </dict>
+ <key>NS.objects</key>
+ <array/>
+ </dict>
+ <dict>
+ <key>$classes</key>
+ <array>
+ <string>NSSet</string>
+ <string>NSObject</string>
+ </array>
+ <key>$classname</key>
+ <string>NSSet</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>15</integer>
+ </dict>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>160</integer>
+ </dict>
+ </array>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>121</integer>
+ </dict>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>161</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>44</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>41</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>mert</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>158</integer>
+ </dict>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>163</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>165</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>166</integer>
+ </dict>
+ </array>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>121</integer>
+ </dict>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>161</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>164</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>Products</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>121</integer>
+ </dict>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>161</integer>
+ </dict>
+ </array>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>121</integer>
+ </dict>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>161</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>44</integer>
+ </dict>
+ </array>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>111</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>168</integer>
+ </dict>
+ </array>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>121</integer>
+ </dict>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>169</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>171</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>173</integer>
+ </dict>
+ </array>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>89</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>114</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>115</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>22</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>170</integer>
+ </dict>
+ </array>
+ </dict>
+ <real>260</real>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>89</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>114</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>115</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>18</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>172</integer>
+ </dict>
+ </array>
+ </dict>
+ <real>1020</real>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>89</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>114</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>115</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>174</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>175</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>IDEUtilitiesArea</string>
+ <real>260</real>
+ <string>InterpolatedScorer.h</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>178</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>179</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>180</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>181</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>182</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>183</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>184</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>185</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>186</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>187</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>16</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>188</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>47</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>655</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>660</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>663</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>694</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>695</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>16</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>16</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>BreakpointsActivated</string>
+ <string>DefaultEditorStatesForURLs</string>
+ <string>DebuggingWindowBehavior</string>
+ <string>ActiveRunDestination</string>
+ <string>ActiveScheme</string>
+ <string>LastCompletedPersistentSchemeBasedActivityReport</string>
+ <string>DocumentWindows</string>
+ <string>RecentEditorDocumentURLs</string>
+ <string>AppFocusInMiniDebugging</string>
+ <string>MiniDebuggingConsole</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>189</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>190</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>191</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>613</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>Xcode.Xcode3ProjectSupport.EditorDocument.Xcode3Project</string>
+ <string>Xcode.IDEKit.EditorDocument.SourceCode</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>192</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>194</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>196</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>414</integer>
+ </dict>
+ </array>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>94</integer>
+ </dict>
+ <key>NS.base</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>0</integer>
+ </dict>
+ <key>NS.relative</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>193</integer>
+ </dict>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>file://localhost/Users/hieuhoang/unison/workspace/github/hieuhoang/contrib/other-builds/mert.xcodeproj/</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>94</integer>
+ </dict>
+ <key>NS.base</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>0</integer>
+ </dict>
+ <key>NS.relative</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>195</integer>
+ </dict>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>file://localhost/Users/hieuhoang/unison/workspace/github/hieuhoang/contrib/other-builds/mert_lib.xcodeproj/</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>197</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>198</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>199</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>200</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>201</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>211</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>212</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>413</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>Xcode3ProjectEditor.sourceList.splitview</string>
+ <string>Xcode3ProjectEditorPreviousTargetEditorClass</string>
+ <string>Xcode3ProjectEditorSelectedDocumentLocations</string>
+ <string>Xcode3ProjectEditor_Xcode3BuildSettingsEditor</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>202</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>203</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>DVTSplitViewItems</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>121</integer>
+ </dict>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>204</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>209</integer>
+ </dict>
+ </array>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>89</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>205</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>206</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>207</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>208</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>DVTIdentifier</string>
+ <string>DVTViewMagnitude</string>
+ <string></string>
+ <real>162</real>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>89</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>205</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>206</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>207</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>210</integer>
+ </dict>
+ </array>
+ </dict>
+ <real>858</real>
+ <string>Xcode3BuildSettingsEditor</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>15</integer>
+ </dict>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>213</integer>
+ </dict>
+ </array>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>412</integer>
+ </dict>
+ <key>documentURL</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>214</integer>
+ </dict>
+ <key>selection</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>216</integer>
+ </dict>
+ <key>timestamp</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>215</integer>
+ </dict>
+ </dict>
+ <string>file://localhost/Users/hieuhoang/unison/workspace/github/hieuhoang/contrib/other-builds/mert.xcodeproj/</string>
+ <real>363627943.189156</real>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>217</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>218</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>219</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>211</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>220</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>221</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>Editor</string>
+ <string>Target</string>
+ <string>Xcode3BuildSettingsEditorLocations</string>
+ <string>mert</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>15</integer>
+ </dict>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>222</integer>
+ </dict>
+ </array>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>223</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>224</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>225</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>226</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>227</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>228</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>229</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>230</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>229</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>229</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>231</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>105</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>Xcode3BuildSettingsEditorMode</string>
+ <string>Selected Build Properties</string>
+ <string>Xcode3BuildSettingsEditorDisplayMode</string>
+ <string>Xcode3BuildPropertyValueDisplayMode</string>
+ <string>Collapsed Build Property Categories</string>
+ <string>Xcode3BuildPropertyNameDisplayMode</string>
+ <integer>0</integer>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>121</integer>
+ </dict>
+ <key>NS.objects</key>
+ <array/>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>121</integer>
+ </dict>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>232</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>233</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>234</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>235</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>236</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>237</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>238</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>239</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>240</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>241</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>242</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>243</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>244</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>245</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>246</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>247</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>248</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>249</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>250</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>251</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>252</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>253</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>254</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>255</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>256</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>257</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>258</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>259</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>260</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>261</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>262</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>263</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>264</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>265</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>266</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>267</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>268</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>269</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>270</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>271</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>272</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>273</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>274</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>275</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>276</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>277</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>278</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>279</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>280</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>281</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>282</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>283</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>284</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>285</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>286</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>287</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>288</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>289</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>290</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>291</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>292</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>293</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>294</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>295</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>296</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>297</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>298</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>299</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>300</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>301</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>302</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>303</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>304</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>305</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>306</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>307</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>308</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>309</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>310</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>311</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>312</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>313</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>314</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>315</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>316</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>317</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>318</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>319</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>320</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>321</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>322</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>323</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>324</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>325</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>326</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>327</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>328</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>329</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>330</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>331</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>332</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>333</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>334</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>335</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>336</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>337</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>338</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>339</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>340</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>341</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>342</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>343</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>344</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>345</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>346</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>347</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>348</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>349</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>350</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>351</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>352</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>353</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>354</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>355</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>356</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>357</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>358</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>359</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>360</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>361</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>362</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>363</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>364</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>365</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>366</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>367</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>368</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>369</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>370</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>371</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>372</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>373</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>374</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>375</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>376</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>377</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>378</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>379</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>380</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>381</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>382</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>383</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>384</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>385</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>386</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>387</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>388</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>389</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>390</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>391</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>392</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>393</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>394</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>395</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>396</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>397</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>398</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>399</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>400</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>401</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>402</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>403</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>404</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>405</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>406</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>407</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>408</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>409</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>410</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>411</integer>
+ </dict>
+ </array>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Architectures||ADDITIONAL_SDKS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Architectures||ARCHS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Architectures||SDKROOT</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Architectures||SUPPORTED_PLATFORMS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Architectures||VALID_ARCHS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Build Locations||SYMROOT</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Build Locations||OBJROOT</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Build Locations||SHARED_PRECOMPS_DIR</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Build Options||BUILD_VARIANTS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Build Options||GCC_VERSION</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Build Options||ENABLE_OPENMP_SUPPORT</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Build Options||GENERATE_PROFILING_CODE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Build Options||PRECOMPS_INCLUDE_HEADERS_FROM_BUILT_PRODUCTS_DIR</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Build Options||RUN_CLANG_STATIC_ANALYZER</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Build Options||SCAN_ALL_SOURCE_FILES_FOR_INCLUDES</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Build Options||VALIDATE_PRODUCT</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Code Signing||CODE_SIGN_ENTITLEMENTS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Code Signing||CODE_SIGN_IDENTITY</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Code Signing||CODE_SIGN_RESOURCE_RULES_PATH</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Code Signing||OTHER_CODE_SIGN_FLAGS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||STRIPFLAGS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||ALTERNATE_GROUP</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||ALTERNATE_OWNER</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||ALTERNATE_MODE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||ALTERNATE_PERMISSIONS_FILES</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||COMBINE_HIDPI_IMAGES</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||DEPLOYMENT_LOCATION</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||DEPLOYMENT_POSTPROCESSING</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||INSTALL_GROUP</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||INSTALL_OWNER</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||INSTALL_MODE_FLAG</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||DSTROOT</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||INSTALL_PATH</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||MACOSX_DEPLOYMENT_TARGET</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||SKIP_INSTALL</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||STRIP_INSTALLED_PRODUCT</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||STRIP_STYLE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||SEPARATE_STRIP</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Kernel Module||MODULE_NAME</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Kernel Module||MODULE_START</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Kernel Module||MODULE_STOP</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Kernel Module||MODULE_VERSION</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||BUNDLE_LOADER</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||DYLIB_COMPATIBILITY_VERSION</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||DYLIB_CURRENT_VERSION</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||DEAD_CODE_STRIPPING</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||LINKER_DISPLAYS_MANGLED_NAMES</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||LD_NO_PIE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||PRESERVE_DEAD_CODE_INITS_AND_TERMS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||LD_DYLIB_INSTALL_NAME</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||EXPORTED_SYMBOLS_FILE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||INIT_ROUTINE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||LINK_WITH_STANDARD_LIBRARIES</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||MACH_O_TYPE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||LD_OPENMP_FLAGS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||ORDER_FILE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||OTHER_LDFLAGS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||GENERATE_MASTER_OBJECT_FILE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||PRELINK_LIBS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||KEEP_PRIVATE_EXTERNS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||LD_RUNPATH_SEARCH_PATHS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||SEPARATE_SYMBOL_EDIT</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||PRELINK_FLAGS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||SECTORDER_FLAGS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||UNEXPORTED_SYMBOLS_FILE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||WARNING_LDFLAGS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||LD_GENERATE_MAP_FILE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||APPLY_RULES_IN_COPY_FILES</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||EXECUTABLE_EXTENSION</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||EXECUTABLE_PREFIX</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||INFOPLIST_EXPAND_BUILD_SETTINGS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||GENERATE_PKGINFO_FILE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||FRAMEWORK_VERSION</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||INFOPLIST_FILE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||INFOPLIST_OTHER_PREPROCESSOR_FLAGS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||INFOPLIST_OUTPUT_FORMAT</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||INFOPLIST_PREPROCESSOR_DEFINITIONS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||INFOPLIST_PREFIX_HEADER</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||INFOPLIST_PREPROCESS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||COPYING_PRESERVES_HFS_DATA</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||PRIVATE_HEADERS_FOLDER_PATH</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||PRODUCT_NAME</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||PLIST_FILE_OUTPUT_FORMAT</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||PUBLIC_HEADERS_FOLDER_PATH</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||STRINGS_FILE_OUTPUT_ENCODING</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||WRAPPER_EXTENSION</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Search Paths||ALWAYS_SEARCH_USER_PATHS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Search Paths||FRAMEWORK_SEARCH_PATHS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Search Paths||HEADER_SEARCH_PATHS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Search Paths||LIBRARY_SEARCH_PATHS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Search Paths||REZ_SEARCH_PATHS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Search Paths||EXCLUDED_RECURSIVE_SEARCH_PATH_SUBDIRECTORIES</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Search Paths||INCLUDED_RECURSIVE_SEARCH_PATH_SUBDIRECTORIES</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Search Paths||USER_HEADER_SEARCH_PATHS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Unit Testing||OTHER_TEST_FLAGS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Unit Testing||TEST_AFTER_BUILD</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Unit Testing||TEST_HOST</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Unit Testing||TEST_RIG</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Versioning||CURRENT_PROJECT_VERSION</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Versioning||VERSION_INFO_FILE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Versioning||VERSION_INFO_EXPORT_DECL</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Versioning||VERSION_INFO_PREFIX</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Versioning||VERSION_INFO_SUFFIX</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Versioning||VERSIONING_SYSTEM</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Versioning||VERSION_INFO_BUILDER</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Code Generation||GCC_FAST_OBJC_DISPATCH</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Code Generation||GCC_ENABLE_SSE3_EXTENSIONS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Code Generation||GCC_ENABLE_SSE41_EXTENSIONS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Code Generation||GCC_ENABLE_SSE42_EXTENSIONS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Code Generation||GCC_ENABLE_SUPPLEMENTAL_SSE3_INSTRUCTIONS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Code Generation||GCC_STRICT_ALIASING</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Code Generation||GCC_GENERATE_DEBUGGING_SYMBOLS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Code Generation||GCC_DYNAMIC_NO_PIC</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Code Generation||GCC_INLINES_ARE_PRIVATE_EXTERN</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Code Generation||GCC_ENABLE_KERNEL_DEVELOPMENT</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Code Generation||LLVM_LTO</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Code Generation||GCC_REUSE_STRINGS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Code Generation||GCC_NO_COMMON_BLOCKS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Code Generation||GCC_ENABLE_OBJC_GC</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Code Generation||GCC_FAST_MATH</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Code Generation||GCC_THREADSAFE_STATICS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Code Generation||GCC_UNROLL_LOOPS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_CHAR_IS_UNSIGNED_CHAR</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_ENABLE_ASM_KEYWORD</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_C_LANGUAGE_STANDARD</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_INPUT_FILETYPE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_ENABLE_CPP_EXCEPTIONS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_ENABLE_CPP_RTTI</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_LINK_WITH_DYNAMIC_LIBRARIES</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_ENABLE_OBJC_EXCEPTIONS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_ENABLE_TRIGRAPHS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_ENABLE_FLOATING_POINT_LIBRARY_CALLS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_USE_INDIRECT_FUNCTION_CALLS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_USE_REGISTER_FUNCTION_CALLS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_INCREASE_PRECOMPILED_HEADER_SHARING</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_CW_ASM_SYNTAX</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||OTHER_CFLAGS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||OTHER_CPLUSPLUSFLAGS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_PRECOMPILE_PREFIX_HEADER</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_PREFIX_HEADER</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_ENABLE_BUILTIN_FUNCTIONS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_ENABLE_PASCAL_STRINGS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_FORCE_CPU_SUBTYPE_ALL</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_SHORT_ENUMS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_USE_STANDARD_INCLUDE_SEARCHING</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Preprocessing||GCC_PREPROCESSOR_DEFINITIONS_NOT_USED_IN_PRECOMPS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_CHECK_SWITCH_STATEMENTS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_FOUR_CHARACTER_CONSTANTS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_SHADOW</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_64_TO_32_BIT_CONVERSION</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_ALLOW_INCOMPLETE_PROTOCOL</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_INHIBIT_ALL_WARNINGS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_INITIALIZER_NOT_FULLY_BRACKETED</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_ABOUT_RETURN_TYPE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_MISSING_PARENTHESES</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_ABOUT_MISSING_FIELD_INITIALIZERS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_ABOUT_MISSING_PROTOTYPES</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_ABOUT_MISSING_NEWLINE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_MULTIPLE_DEFINITION_TYPES_FOR_SELECTOR</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_NON_VIRTUAL_DESTRUCTOR</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||WARNING_CFLAGS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_HIDDEN_VIRTUAL_FUNCTIONS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_PEDANTIC</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_ABOUT_POINTER_SIGNEDNESS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_SIGN_COMPARE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_STRICT_SELECTOR_MATCH</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_TREAT_INCOMPATIBLE_POINTER_TYPE_WARNINGS_AS_ERRORS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_TREAT_IMPLICIT_FUNCTION_DECLARATIONS_AS_ERRORS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_TREAT_WARNINGS_AS_ERRORS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_TYPECHECK_CALLS_TO_PRINTF</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_UNDECLARED_SELECTOR</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_UNINITIALIZED_AUTOS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_UNKNOWN_PRAGMAS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_UNUSED_FUNCTION</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_UNUSED_LABEL</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_UNUSED_PARAMETER</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_UNUSED_VALUE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_UNUSED_VARIABLE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_ABOUT_DEPRECATED_FUNCTIONS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_ABOUT_INVALID_OFFSETOF_MACRO</string>
+ </dict>
+ <dict>
+ <key>$classes</key>
+ <array>
+ <string>Xcode3ProjectDocumentLocation</string>
+ <string>DVTDocumentLocation</string>
+ <string>NSObject</string>
+ </array>
+ <key>$classname</key>
+ <string>Xcode3ProjectDocumentLocation</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array/>
+ <key>NS.objects</key>
+ <array/>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>197</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>198</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>199</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>200</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>415</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>211</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>421</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>612</integer>
+ </dict>
+ </array>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>202</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>416</integer>
+ </dict>
+ </array>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>121</integer>
+ </dict>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>417</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>419</integer>
+ </dict>
+ </array>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>89</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>205</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>206</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>207</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>418</integer>
+ </dict>
+ </array>
+ </dict>
+ <real>170</real>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>89</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>205</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>206</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>207</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>420</integer>
+ </dict>
+ </array>
+ </dict>
+ <real>850</real>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>15</integer>
+ </dict>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>422</integer>
+ </dict>
+ </array>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>412</integer>
+ </dict>
+ <key>documentURL</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>423</integer>
+ </dict>
+ <key>selection</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>425</integer>
+ </dict>
+ <key>timestamp</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>424</integer>
+ </dict>
+ </dict>
+ <string>file://localhost/Users/hieuhoang/unison/workspace/github/hieuhoang/contrib/other-builds/mert_lib.xcodeproj/</string>
+ <real>363694729.26263899</real>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>217</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>218</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>219</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>211</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>426</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>427</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>mert_lib</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>15</integer>
+ </dict>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>428</integer>
+ </dict>
+ </array>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>228</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>224</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>225</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>226</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>227</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>223</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>105</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>429</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>229</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>229</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>430</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>229</integer>
+ </dict>
+ </array>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>121</integer>
+ </dict>
+ <key>NS.objects</key>
+ <array/>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>121</integer>
+ </dict>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>431</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>432</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>433</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>434</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>435</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>436</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>437</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>438</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>439</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>440</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>441</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>442</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>443</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>444</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>445</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>446</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>447</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>448</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>449</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>450</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>451</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>452</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>453</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>454</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>455</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>456</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>457</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>458</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>459</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>460</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>461</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>462</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>463</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>464</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>465</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>466</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>467</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>468</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>469</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>470</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>471</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>472</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>473</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>474</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>475</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>476</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>477</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>478</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>479</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>480</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>481</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>482</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>483</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>484</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>485</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>486</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>487</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>488</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>489</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>490</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>491</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>492</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>493</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>494</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>495</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>496</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>497</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>498</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>499</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>500</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>501</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>502</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>503</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>504</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>505</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>506</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>507</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>508</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>509</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>510</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>511</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>512</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>513</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>514</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>515</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>516</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>517</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>518</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>519</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>520</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>521</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>522</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>523</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>524</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>525</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>526</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>527</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>528</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>529</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>530</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>531</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>532</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>533</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>534</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>535</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>536</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>537</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>538</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>539</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>540</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>541</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>542</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>543</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>544</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>545</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>546</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>547</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>548</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>549</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>550</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>551</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>552</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>553</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>554</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>555</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>556</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>557</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>558</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>559</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>560</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>561</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>562</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>563</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>564</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>565</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>566</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>567</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>568</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>569</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>570</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>571</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>572</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>573</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>574</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>575</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>576</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>577</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>578</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>579</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>580</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>581</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>582</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>583</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>584</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>585</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>586</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>587</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>588</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>589</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>590</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>591</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>592</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>593</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>594</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>595</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>596</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>597</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>598</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>599</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>600</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>601</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>602</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>603</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>604</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>605</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>606</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>607</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>608</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>609</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>610</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>611</integer>
+ </dict>
+ </array>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Architectures||ADDITIONAL_SDKS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Architectures||ARCHS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Architectures||SDKROOT</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Architectures||SUPPORTED_PLATFORMS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Architectures||VALID_ARCHS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Build Locations||SYMROOT</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Build Locations||OBJROOT</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Build Locations||SHARED_PRECOMPS_DIR</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Build Options||BUILD_VARIANTS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Build Options||GCC_VERSION</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Build Options||ENABLE_OPENMP_SUPPORT</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Build Options||GENERATE_PROFILING_CODE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Build Options||PRECOMPS_INCLUDE_HEADERS_FROM_BUILT_PRODUCTS_DIR</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Build Options||RUN_CLANG_STATIC_ANALYZER</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Build Options||SCAN_ALL_SOURCE_FILES_FOR_INCLUDES</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Build Options||VALIDATE_PRODUCT</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Code Signing||CODE_SIGN_ENTITLEMENTS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Code Signing||CODE_SIGN_IDENTITY</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Code Signing||CODE_SIGN_RESOURCE_RULES_PATH</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Code Signing||OTHER_CODE_SIGN_FLAGS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||STRIPFLAGS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||ALTERNATE_GROUP</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||ALTERNATE_OWNER</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||ALTERNATE_MODE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||ALTERNATE_PERMISSIONS_FILES</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||COMBINE_HIDPI_IMAGES</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||DEPLOYMENT_LOCATION</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||DEPLOYMENT_POSTPROCESSING</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||INSTALL_GROUP</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||INSTALL_OWNER</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||INSTALL_MODE_FLAG</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||DSTROOT</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||INSTALL_PATH</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||MACOSX_DEPLOYMENT_TARGET</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||SKIP_INSTALL</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||STRIP_INSTALLED_PRODUCT</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||STRIP_STYLE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Deployment||SEPARATE_STRIP</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Kernel Module||MODULE_NAME</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Kernel Module||MODULE_START</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Kernel Module||MODULE_STOP</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Kernel Module||MODULE_VERSION</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||BUNDLE_LOADER</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||DYLIB_COMPATIBILITY_VERSION</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||DYLIB_CURRENT_VERSION</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||DEAD_CODE_STRIPPING</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||LINKER_DISPLAYS_MANGLED_NAMES</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||LD_NO_PIE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||PRESERVE_DEAD_CODE_INITS_AND_TERMS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||LD_DYLIB_INSTALL_NAME</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||EXPORTED_SYMBOLS_FILE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||INIT_ROUTINE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||LINK_WITH_STANDARD_LIBRARIES</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||MACH_O_TYPE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||LD_OPENMP_FLAGS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||ORDER_FILE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||OTHER_LDFLAGS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||GENERATE_MASTER_OBJECT_FILE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||PRELINK_LIBS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||KEEP_PRIVATE_EXTERNS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||LD_RUNPATH_SEARCH_PATHS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||SEPARATE_SYMBOL_EDIT</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||PRELINK_FLAGS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||SECTORDER_FLAGS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||UNEXPORTED_SYMBOLS_FILE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||WARNING_LDFLAGS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Linking||LD_GENERATE_MAP_FILE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||APPLY_RULES_IN_COPY_FILES</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||EXECUTABLE_EXTENSION</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||EXECUTABLE_PREFIX</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||INFOPLIST_EXPAND_BUILD_SETTINGS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||GENERATE_PKGINFO_FILE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||FRAMEWORK_VERSION</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||INFOPLIST_FILE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||INFOPLIST_OTHER_PREPROCESSOR_FLAGS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||INFOPLIST_OUTPUT_FORMAT</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||INFOPLIST_PREPROCESSOR_DEFINITIONS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||INFOPLIST_PREFIX_HEADER</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||INFOPLIST_PREPROCESS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||COPYING_PRESERVES_HFS_DATA</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||PRIVATE_HEADERS_FOLDER_PATH</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||PRODUCT_NAME</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||PLIST_FILE_OUTPUT_FORMAT</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||PUBLIC_HEADERS_FOLDER_PATH</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||STRINGS_FILE_OUTPUT_ENCODING</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Packaging||WRAPPER_EXTENSION</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Search Paths||ALWAYS_SEARCH_USER_PATHS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Search Paths||FRAMEWORK_SEARCH_PATHS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Search Paths||HEADER_SEARCH_PATHS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Search Paths||LIBRARY_SEARCH_PATHS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Search Paths||REZ_SEARCH_PATHS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Search Paths||EXCLUDED_RECURSIVE_SEARCH_PATH_SUBDIRECTORIES</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Search Paths||INCLUDED_RECURSIVE_SEARCH_PATH_SUBDIRECTORIES</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Search Paths||USER_HEADER_SEARCH_PATHS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Unit Testing||OTHER_TEST_FLAGS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Unit Testing||TEST_AFTER_BUILD</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Unit Testing||TEST_HOST</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Unit Testing||TEST_RIG</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Versioning||CURRENT_PROJECT_VERSION</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Versioning||VERSION_INFO_FILE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Versioning||VERSION_INFO_EXPORT_DECL</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Versioning||VERSION_INFO_PREFIX</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Versioning||VERSION_INFO_SUFFIX</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Versioning||VERSIONING_SYSTEM</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>Versioning||VERSION_INFO_BUILDER</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Code Generation||GCC_FAST_OBJC_DISPATCH</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Code Generation||GCC_ENABLE_SSE3_EXTENSIONS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Code Generation||GCC_ENABLE_SSE41_EXTENSIONS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Code Generation||GCC_ENABLE_SSE42_EXTENSIONS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Code Generation||GCC_ENABLE_SUPPLEMENTAL_SSE3_INSTRUCTIONS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Code Generation||GCC_STRICT_ALIASING</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Code Generation||GCC_GENERATE_DEBUGGING_SYMBOLS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Code Generation||GCC_DYNAMIC_NO_PIC</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Code Generation||GCC_INLINES_ARE_PRIVATE_EXTERN</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Code Generation||GCC_ENABLE_KERNEL_DEVELOPMENT</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Code Generation||LLVM_LTO</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Code Generation||GCC_REUSE_STRINGS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Code Generation||GCC_NO_COMMON_BLOCKS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Code Generation||GCC_ENABLE_OBJC_GC</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Code Generation||GCC_FAST_MATH</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Code Generation||GCC_THREADSAFE_STATICS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Code Generation||GCC_SYMBOLS_PRIVATE_EXTERN</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Code Generation||GCC_UNROLL_LOOPS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_CHAR_IS_UNSIGNED_CHAR</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_ENABLE_ASM_KEYWORD</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_C_LANGUAGE_STANDARD</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_INPUT_FILETYPE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_ENABLE_CPP_EXCEPTIONS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_ENABLE_CPP_RTTI</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_LINK_WITH_DYNAMIC_LIBRARIES</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_ENABLE_OBJC_EXCEPTIONS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_ENABLE_TRIGRAPHS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_ENABLE_FLOATING_POINT_LIBRARY_CALLS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_USE_INDIRECT_FUNCTION_CALLS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_USE_REGISTER_FUNCTION_CALLS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_INCREASE_PRECOMPILED_HEADER_SHARING</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_CW_ASM_SYNTAX</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||OTHER_CFLAGS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||OTHER_CPLUSPLUSFLAGS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_PRECOMPILE_PREFIX_HEADER</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_PREFIX_HEADER</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_ENABLE_BUILTIN_FUNCTIONS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_ENABLE_PASCAL_STRINGS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_FORCE_CPU_SUBTYPE_ALL</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_SHORT_ENUMS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Language||GCC_USE_STANDARD_INCLUDE_SEARCHING</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Preprocessing||GCC_PREPROCESSOR_DEFINITIONS_NOT_USED_IN_PRECOMPS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_CHECK_SWITCH_STATEMENTS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_FOUR_CHARACTER_CONSTANTS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_SHADOW</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_64_TO_32_BIT_CONVERSION</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_ALLOW_INCOMPLETE_PROTOCOL</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_INHIBIT_ALL_WARNINGS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_INITIALIZER_NOT_FULLY_BRACKETED</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_ABOUT_RETURN_TYPE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_MISSING_PARENTHESES</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_ABOUT_MISSING_FIELD_INITIALIZERS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_ABOUT_MISSING_PROTOTYPES</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_ABOUT_MISSING_NEWLINE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_MULTIPLE_DEFINITION_TYPES_FOR_SELECTOR</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_NON_VIRTUAL_DESTRUCTOR</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||WARNING_CFLAGS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_HIDDEN_VIRTUAL_FUNCTIONS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_PEDANTIC</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_ABOUT_POINTER_SIGNEDNESS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_SIGN_COMPARE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_STRICT_SELECTOR_MATCH</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_TREAT_INCOMPATIBLE_POINTER_TYPE_WARNINGS_AS_ERRORS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_TREAT_IMPLICIT_FUNCTION_DECLARATIONS_AS_ERRORS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_TREAT_WARNINGS_AS_ERRORS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_TYPECHECK_CALLS_TO_PRINTF</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_UNDECLARED_SELECTOR</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_UNINITIALIZED_AUTOS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_UNKNOWN_PRAGMAS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_UNUSED_FUNCTION</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_UNUSED_LABEL</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_UNUSED_PARAMETER</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_UNUSED_VALUE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_UNUSED_VARIABLE</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_ABOUT_DEPRECATED_FUNCTIONS</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>LLVM compiler 2.1 - Warnings||GCC_WARN_ABOUT_INVALID_OFFSETOF_MACRO</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array/>
+ <key>NS.objects</key>
+ <array/>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>614</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>616</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>618</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>619</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>621</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>623</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>625</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>627</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>635</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>638</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>642</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>645</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>649</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>652</integer>
+ </dict>
+ </array>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>94</integer>
+ </dict>
+ <key>NS.base</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>0</integer>
+ </dict>
+ <key>NS.relative</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>615</integer>
+ </dict>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>file://localhost/Users/hieuhoang/unison/workspace/github/hieuhoang/moses/src/ThreadPool.h</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>94</integer>
+ </dict>
+ <key>NS.base</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>0</integer>
+ </dict>
+ <key>NS.relative</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>617</integer>
+ </dict>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>file://localhost/Users/hieuhoang/unison/workspace/github/hieuhoang/moses/src/ThreadPool.cpp</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>94</integer>
+ </dict>
+ <key>NS.base</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>0</integer>
+ </dict>
+ <key>NS.relative</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>49</integer>
+ </dict>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>94</integer>
+ </dict>
+ <key>NS.base</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>0</integer>
+ </dict>
+ <key>NS.relative</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>620</integer>
+ </dict>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>file://localhost/Users/hieuhoang/unison/workspace/github/hieuhoang/mert/StatisticsBasedScorer.h</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>94</integer>
+ </dict>
+ <key>NS.base</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>0</integer>
+ </dict>
+ <key>NS.relative</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>622</integer>
+ </dict>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>file://localhost/Users/hieuhoang/unison/workspace/github/hieuhoang/mert/extractor.cpp</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>94</integer>
+ </dict>
+ <key>NS.base</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>0</integer>
+ </dict>
+ <key>NS.relative</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>624</integer>
+ </dict>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>file://localhost/Users/hieuhoang/unison/workspace/github/hieuhoang/mert/mert.cpp</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>94</integer>
+ </dict>
+ <key>NS.base</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>0</integer>
+ </dict>
+ <key>NS.relative</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>626</integer>
+ </dict>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>50</integer>
+ </dict>
+ <key>NS.string</key>
+ <string>file://localhost/Users/hieuhoang/unison/workspace/github/hieuhoang/mert/StatisticsBasedScorer.cpp</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>628</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>629</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>630</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>631</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>632</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>633</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>16</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>634</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>PrimaryDocumentTimestamp</string>
+ <string>PrimaryDocumentVisibleCharacterRange</string>
+ <string>HideAllIssues</string>
+ <string>PrimaryDocumentSelectedCharacterRange</string>
+ <real>363694733.737234</real>
+ <string>{0, 1387}</string>
+ <string>{0, 0}</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>628</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>629</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>630</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>631</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>636</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>637</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>16</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>634</integer>
+ </dict>
+ </array>
+ </dict>
+ <real>363694729.53642899</real>
+ <string>{0, 1485}</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>82</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>83</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>84</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>85</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>639</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>640</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>16</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>641</integer>
+ </dict>
+ </array>
+ </dict>
+ <real>363696391.20240802</real>
+ <string>{0, 1309}</string>
+ <string>{332, 0}</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>628</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>629</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>630</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>631</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>643</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>644</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>16</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>634</integer>
+ </dict>
+ </array>
+ </dict>
+ <real>363694750.12241</real>
+ <string>{0, 1049}</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>628</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>629</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>630</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>631</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>646</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>647</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>16</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>648</integer>
+ </dict>
+ </array>
+ </dict>
+ <real>363694727.34139502</real>
+ <string>{992, 1572}</string>
+ <string>{247, 0}</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>628</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>629</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>630</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>631</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>650</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>651</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>16</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>634</integer>
+ </dict>
+ </array>
+ </dict>
+ <real>363627943.92405301</real>
+ <string>{0, 1056}</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>628</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>629</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>630</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>631</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>653</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>654</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>16</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>634</integer>
+ </dict>
+ </array>
+ </dict>
+ <real>363694734.10040599</real>
+ <string>{0, 1404}</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>656</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>657</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>658</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>659</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>IDEDeviceLocation</string>
+ <string>IDEDeviceArchitecture</string>
+ <string>dvtdevice-local-computer:localhost</string>
+ <string>x86_64</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>661</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>662</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>IDENameString</string>
+ <string>extractor</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>664</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>665</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>666</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>667</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>693</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>426</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>IDEActivityReportCompletionSummaryStringSegments</string>
+ <string>IDEActivityReportOptions</string>
+ <string>IDEActivityReportTitle</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>121</integer>
+ </dict>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>668</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>675</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>679</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>684</integer>
+ </dict>
+ </array>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>669</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>670</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>671</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>672</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>673</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>674</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>IDEActivityReportStringSegmentPriority</string>
+ <string>IDEActivityReportStringSegmentBackSeparator</string>
+ <string>IDEActivityReportStringSegmentStringValue</string>
+ <real>2</real>
+ <string> </string>
+ <string>Build</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>669</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>670</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>671</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>676</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>677</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>678</integer>
+ </dict>
+ </array>
+ </dict>
+ <real>4</real>
+ <string>: </string>
+ <string>extractor</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>669</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>670</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>671</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>680</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>681</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>682</integer>
+ </dict>
+ </array>
+ </dict>
+ <real>1</real>
+ <string> │ </string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>683</integer>
+ </dict>
+ <key>NS.data</key>
+ <data>
+ YnBsaXN0MDDUAQIDBAUGOzxYJHZlcnNpb25YJG9iamVjdHNZJGFy
+ Y2hpdmVyVCR0b3ASAAGGoK0HCA8QGhscJCUrMTQ3VSRudWxs0wkK
+ CwwNDlxOU0F0dHJpYnV0ZXNWJGNsYXNzWE5TU3RyaW5ngAOADIAC
+ WVN1Y2NlZWRlZNMKERITFBdXTlMua2V5c1pOUy5vYmplY3RzgAui
+ FRaABIAFohgZgAaACVZOU0ZvbnRXTlNDb2xvctQKHR4fICEiI1ZO
+ U05hbWVWTlNTaXplWE5TZkZsYWdzgAiAByNAJgAAAAAAABENEF8Q
+ EUx1Y2lkYUdyYW5kZS1Cb2xk0iYnKClaJGNsYXNzbmFtZVgkY2xh
+ c3Nlc1ZOU0ZvbnSiKCpYTlNPYmplY3TTCiwtLi8wXE5TQ29sb3JT
+ cGFjZVdOU1doaXRlgAoQA0IwANImJzIzV05TQ29sb3KiMirSJic1
+ NlxOU0RpY3Rpb25hcnmiNSrSJic4OV8QEk5TQXR0cmlidXRlZFN0
+ cmluZ6I6Kl8QEk5TQXR0cmlidXRlZFN0cmluZ18QD05TS2V5ZWRB
+ cmNoaXZlctE9PlRyb290gAEACAARABoAIwAtADIANwBFAEsAUgBf
+ AGYAbwBxAHMAdQB/AIYAjgCZAJsAngCgAKIApQCnAKkAsAC4AMEA
+ yADPANgA2gDcAOUA6AD8AQEBDAEVARwBHwEoAS8BPAFEAUYBSAFL
+ AVABWAFbAWABbQFwAXUBigGNAaIBtAG3AbwAAAAAAAACAQAAAAAA
+ AAA/AAAAAAAAAAAAAAAAAAABvg==
+ </data>
+ </dict>
+ <dict>
+ <key>$classes</key>
+ <array>
+ <string>NSMutableData</string>
+ <string>NSData</string>
+ <string>NSObject</string>
+ </array>
+ <key>$classname</key>
+ <string>NSMutableData</string>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>95</integer>
+ </dict>
+ <key>NS.keys</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>669</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>685</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>686</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>671</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>687</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>688</integer>
+ </dict>
+ </array>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>689</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>105</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>690</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>692</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>105</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>105</integer>
+ </dict>
+ </array>
+ </dict>
+ <string>IDEActivityReportStringSegmentType</string>
+ <string>IDEActivityReportStringSegmentDate</string>
+ <string>IDEActivityReportStringSegmentDateStyle</string>
+ <string>IDEActivityReportStringSegmentTimeStyle</string>
+ <real>3</real>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>691</integer>
+ </dict>
+ <key>NS.time</key>
+ <real>363631454.18081301</real>
+ </dict>
+ <dict>
+ <key>$classes</key>
+ <array>
+ <string>NSDate</string>
+ <string>NSObject</string>
+ </array>
+ <key>$classname</key>
+ <string>NSDate</string>
+ </dict>
+ <string>Yesterday at 17:44</string>
+ <integer>106</integer>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>121</integer>
+ </dict>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>2</integer>
+ </dict>
+ </array>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>121</integer>
+ </dict>
+ <key>NS.objects</key>
+ <array>
+ <dict>
+ <key>CF$UID</key>
+ <integer>696</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>698</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>700</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>702</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>704</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>706</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>707</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>709</integer>
+ </dict>
+ <dict>
+ <key>CF$UID</key>
+ <integer>711</integer>
+ </dict>
+ </array>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>94</integer>
+ </dict>
+ <key>NS.base</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>0</integer>
+ </dict>
+ <key>NS.relative</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>697</integer>
+ </dict>
+ </dict>
+ <string>file://localhost/Users/hieuhoang/unison/workspace/github/hieuhoang/mert/InterpolatedScorer.h</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>94</integer>
+ </dict>
+ <key>NS.base</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>0</integer>
+ </dict>
+ <key>NS.relative</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>699</integer>
+ </dict>
+ </dict>
+ <string>file://localhost/Users/hieuhoang/unison/workspace/github/hieuhoang/mert/StatisticsBasedScorer.h</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>94</integer>
+ </dict>
+ <key>NS.base</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>0</integer>
+ </dict>
+ <key>NS.relative</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>701</integer>
+ </dict>
+ </dict>
+ <string>file://localhost/Users/hieuhoang/unison/workspace/github/hieuhoang/mert/StatisticsBasedScorer.cpp</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>94</integer>
+ </dict>
+ <key>NS.base</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>0</integer>
+ </dict>
+ <key>NS.relative</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>703</integer>
+ </dict>
+ </dict>
+ <string>file://localhost/Users/hieuhoang/unison/workspace/github/hieuhoang/moses/src/ThreadPool.h</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>94</integer>
+ </dict>
+ <key>NS.base</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>0</integer>
+ </dict>
+ <key>NS.relative</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>705</integer>
+ </dict>
+ </dict>
+ <string>file://localhost/Users/hieuhoang/unison/workspace/github/hieuhoang/moses/src/ThreadPool.cpp</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>94</integer>
+ </dict>
+ <key>NS.base</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>0</integer>
+ </dict>
+ <key>NS.relative</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>423</integer>
+ </dict>
+ </dict>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>94</integer>
+ </dict>
+ <key>NS.base</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>0</integer>
+ </dict>
+ <key>NS.relative</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>708</integer>
+ </dict>
+ </dict>
+ <string>file://localhost/Users/hieuhoang/unison/workspace/github/hieuhoang/mert/extractor.cpp</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>94</integer>
+ </dict>
+ <key>NS.base</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>0</integer>
+ </dict>
+ <key>NS.relative</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>710</integer>
+ </dict>
+ </dict>
+ <string>file://localhost/Users/hieuhoang/unison/workspace/github/hieuhoang/mert/mert.cpp</string>
+ <dict>
+ <key>$class</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>94</integer>
+ </dict>
+ <key>NS.base</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>0</integer>
+ </dict>
+ <key>NS.relative</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>214</integer>
+ </dict>
+ </dict>
+ </array>
+ <key>$top</key>
+ <dict>
+ <key>State</key>
+ <dict>
+ <key>CF$UID</key>
+ <integer>1</integer>
+ </dict>
+ </dict>
+ <key>$version</key>
+ <integer>100000</integer>
+</dict>
+</plist>
diff --git a/contrib/other-builds/mert.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcdebugger/Breakpoints.xcbkptlist b/contrib/other-builds/mert.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcdebugger/Breakpoints.xcbkptlist
new file mode 100644
index 000000000..5029ca7bd
--- /dev/null
+++ b/contrib/other-builds/mert.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcdebugger/Breakpoints.xcbkptlist
@@ -0,0 +1,35 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Bucket
+ type = "1"
+ version = "1.0">
+ <FileBreakpoints>
+ <FileBreakpoint
+ shouldBeEnabled = "Yes"
+ ignoreCount = "0"
+ continueAfterRunningActions = "No"
+ isPathRelative = "0"
+ filePath = "/Users/hieuhoang/unison/workspace/github/hieuhoang/mert/mert.cpp"
+ timestampString = "363625029.073606"
+ startingColumnNumber = "9223372036854775807"
+ endingColumnNumber = "9223372036854775807"
+ startingLineNumber = "316"
+ endingLineNumber = "316"
+ landmarkName = "main(int argc, char **argv)"
+ landmarkType = "7">
+ </FileBreakpoint>
+ <FileBreakpoint
+ shouldBeEnabled = "Yes"
+ ignoreCount = "0"
+ continueAfterRunningActions = "No"
+ isPathRelative = "0"
+ filePath = "/Users/hieuhoang/unison/workspace/github/hieuhoang/mert/mert.cpp"
+ timestampString = "363625081.848519"
+ startingColumnNumber = "9223372036854775807"
+ endingColumnNumber = "9223372036854775807"
+ startingLineNumber = "326"
+ endingLineNumber = "326"
+ landmarkName = "main(int argc, char **argv)"
+ landmarkType = "7">
+ </FileBreakpoint>
+ </FileBreakpoints>
+</Bucket>
diff --git a/contrib/other-builds/mert.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/extractor.xcscheme b/contrib/other-builds/mert.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/extractor.xcscheme
new file mode 100644
index 000000000..48258bc54
--- /dev/null
+++ b/contrib/other-builds/mert.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/extractor.xcscheme
@@ -0,0 +1,72 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Scheme
+ version = "1.3">
+ <BuildAction
+ parallelizeBuildables = "YES"
+ buildImplicitDependencies = "YES">
+ <BuildActionEntries>
+ <BuildActionEntry
+ buildForTesting = "YES"
+ buildForRunning = "YES"
+ buildForProfiling = "YES"
+ buildForArchiving = "YES"
+ buildForAnalyzing = "YES">
+ <BuildableReference
+ BuildableIdentifier = "primary"
+ BlueprintIdentifier = "1E1D825E15AC640800FE42E9"
+ BuildableName = "extractor"
+ BlueprintName = "extractor"
+ ReferencedContainer = "container:mert.xcodeproj">
+ </BuildableReference>
+ </BuildActionEntry>
+ </BuildActionEntries>
+ </BuildAction>
+ <TestAction
+ selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
+ selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
+ shouldUseLaunchSchemeArgsEnv = "YES"
+ buildConfiguration = "Debug">
+ <Testables>
+ </Testables>
+ </TestAction>
+ <LaunchAction
+ selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
+ selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
+ launchStyle = "0"
+ useCustomWorkingDirectory = "NO"
+ buildConfiguration = "Debug">
+ <BuildableProductRunnable>
+ <BuildableReference
+ BuildableIdentifier = "primary"
+ BlueprintIdentifier = "1E1D825E15AC640800FE42E9"
+ BuildableName = "extractor"
+ BlueprintName = "extractor"
+ ReferencedContainer = "container:mert.xcodeproj">
+ </BuildableReference>
+ </BuildableProductRunnable>
+ <AdditionalOptions>
+ </AdditionalOptions>
+ </LaunchAction>
+ <ProfileAction
+ shouldUseLaunchSchemeArgsEnv = "YES"
+ savedToolIdentifier = ""
+ useCustomWorkingDirectory = "NO"
+ buildConfiguration = "Release">
+ <BuildableProductRunnable>
+ <BuildableReference
+ BuildableIdentifier = "primary"
+ BlueprintIdentifier = "1E1D825E15AC640800FE42E9"
+ BuildableName = "extractor"
+ BlueprintName = "extractor"
+ ReferencedContainer = "container:mert.xcodeproj">
+ </BuildableReference>
+ </BuildableProductRunnable>
+ </ProfileAction>
+ <AnalyzeAction
+ buildConfiguration = "Debug">
+ </AnalyzeAction>
+ <ArchiveAction
+ buildConfiguration = "Release"
+ revealArchiveInOrganizer = "YES">
+ </ArchiveAction>
+</Scheme>
diff --git a/contrib/other-builds/mert.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/mert.xcscheme b/contrib/other-builds/mert.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/mert.xcscheme
new file mode 100644
index 000000000..2d41b933c
--- /dev/null
+++ b/contrib/other-builds/mert.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/mert.xcscheme
@@ -0,0 +1,72 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Scheme
+ version = "1.3">
+ <BuildAction
+ parallelizeBuildables = "YES"
+ buildImplicitDependencies = "YES">
+ <BuildActionEntries>
+ <BuildActionEntry
+ buildForTesting = "YES"
+ buildForRunning = "YES"
+ buildForProfiling = "YES"
+ buildForArchiving = "YES"
+ buildForAnalyzing = "YES">
+ <BuildableReference
+ BuildableIdentifier = "primary"
+ BlueprintIdentifier = "1EB0AF041593A2180007E2A4"
+ BuildableName = "mert"
+ BlueprintName = "mert"
+ ReferencedContainer = "container:mert.xcodeproj">
+ </BuildableReference>
+ </BuildActionEntry>
+ </BuildActionEntries>
+ </BuildAction>
+ <TestAction
+ selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
+ selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
+ shouldUseLaunchSchemeArgsEnv = "YES"
+ buildConfiguration = "Debug">
+ <Testables>
+ </Testables>
+ </TestAction>
+ <LaunchAction
+ selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
+ selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
+ launchStyle = "0"
+ useCustomWorkingDirectory = "NO"
+ buildConfiguration = "Debug">
+ <BuildableProductRunnable>
+ <BuildableReference
+ BuildableIdentifier = "primary"
+ BlueprintIdentifier = "1EB0AF041593A2180007E2A4"
+ BuildableName = "mert"
+ BlueprintName = "mert"
+ ReferencedContainer = "container:mert.xcodeproj">
+ </BuildableReference>
+ </BuildableProductRunnable>
+ <AdditionalOptions>
+ </AdditionalOptions>
+ </LaunchAction>
+ <ProfileAction
+ shouldUseLaunchSchemeArgsEnv = "YES"
+ savedToolIdentifier = ""
+ useCustomWorkingDirectory = "NO"
+ buildConfiguration = "Release">
+ <BuildableProductRunnable>
+ <BuildableReference
+ BuildableIdentifier = "primary"
+ BlueprintIdentifier = "1EB0AF041593A2180007E2A4"
+ BuildableName = "mert"
+ BlueprintName = "mert"
+ ReferencedContainer = "container:mert.xcodeproj">
+ </BuildableReference>
+ </BuildableProductRunnable>
+ </ProfileAction>
+ <AnalyzeAction
+ buildConfiguration = "Debug">
+ </AnalyzeAction>
+ <ArchiveAction
+ buildConfiguration = "Release"
+ revealArchiveInOrganizer = "YES">
+ </ArchiveAction>
+</Scheme>
diff --git a/contrib/other-builds/mert.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/xcschememanagement.plist b/contrib/other-builds/mert.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/xcschememanagement.plist
new file mode 100644
index 000000000..d55559c75
--- /dev/null
+++ b/contrib/other-builds/mert.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/xcschememanagement.plist
@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+ <key>SchemeUserState</key>
+ <dict>
+ <key>extractor.xcscheme</key>
+ <dict>
+ <key>orderHint</key>
+ <integer>1</integer>
+ </dict>
+ <key>mert.xcscheme</key>
+ <dict>
+ <key>orderHint</key>
+ <integer>2</integer>
+ </dict>
+ </dict>
+ <key>SuppressBuildableAutocreation</key>
+ <dict>
+ <key>1E1D825E15AC640800FE42E9</key>
+ <dict>
+ <key>primary</key>
+ <true/>
+ </dict>
+ <key>1EB0AF041593A2180007E2A4</key>
+ <dict>
+ <key>primary</key>
+ <true/>
+ </dict>
+ </dict>
+</dict>
+</plist>
diff --git a/contrib/other-builds/mert_lib.xcodeproj/project.pbxproj b/contrib/other-builds/mert_lib.xcodeproj/project.pbxproj
new file mode 100644
index 000000000..2729d67fa
--- /dev/null
+++ b/contrib/other-builds/mert_lib.xcodeproj/project.pbxproj
@@ -0,0 +1,621 @@
+// !$*UTF8*$!
+{
+ archiveVersion = 1;
+ classes = {
+ };
+ objectVersion = 46;
+ objects = {
+
+/* Begin PBXBuildFile section */
+ 1E2CCFB915939E5D00D858D1 /* BleuScorer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCF3A15939E5D00D858D1 /* BleuScorer.cpp */; };
+ 1E2CCFBA15939E5D00D858D1 /* BleuScorer.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF3B15939E5D00D858D1 /* BleuScorer.h */; };
+ 1E2CCFBC15939E5D00D858D1 /* CderScorer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCF3D15939E5D00D858D1 /* CderScorer.cpp */; };
+ 1E2CCFBD15939E5D00D858D1 /* CderScorer.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF3E15939E5D00D858D1 /* CderScorer.h */; };
+ 1E2CCFBE15939E5D00D858D1 /* Data.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCF3F15939E5D00D858D1 /* Data.cpp */; };
+ 1E2CCFBF15939E5D00D858D1 /* Data.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF4015939E5D00D858D1 /* Data.h */; };
+ 1E2CCFC315939E5D00D858D1 /* Fdstream.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF5115939E5D00D858D1 /* Fdstream.h */; };
+ 1E2CCFC415939E5D00D858D1 /* FeatureArray.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCF5215939E5D00D858D1 /* FeatureArray.cpp */; };
+ 1E2CCFC515939E5D00D858D1 /* FeatureArray.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF5315939E5D00D858D1 /* FeatureArray.h */; };
+ 1E2CCFC615939E5D00D858D1 /* FeatureData.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCF5415939E5D00D858D1 /* FeatureData.cpp */; };
+ 1E2CCFC715939E5D00D858D1 /* FeatureData.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF5515939E5D00D858D1 /* FeatureData.h */; };
+ 1E2CCFC815939E5D00D858D1 /* FeatureDataIterator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCF5615939E5D00D858D1 /* FeatureDataIterator.cpp */; };
+ 1E2CCFC915939E5D00D858D1 /* FeatureDataIterator.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF5715939E5D00D858D1 /* FeatureDataIterator.h */; };
+ 1E2CCFCB15939E5D00D858D1 /* FeatureStats.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCF5915939E5D00D858D1 /* FeatureStats.cpp */; };
+ 1E2CCFCC15939E5D00D858D1 /* FeatureStats.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF5A15939E5D00D858D1 /* FeatureStats.h */; };
+ 1E2CCFCD15939E5D00D858D1 /* FileStream.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCF5B15939E5D00D858D1 /* FileStream.cpp */; };
+ 1E2CCFCE15939E5D00D858D1 /* FileStream.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF5C15939E5D00D858D1 /* FileStream.h */; };
+ 1E2CCFCF15939E5D00D858D1 /* GzFileBuf.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCF5D15939E5D00D858D1 /* GzFileBuf.cpp */; };
+ 1E2CCFD015939E5D00D858D1 /* GzFileBuf.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF5E15939E5D00D858D1 /* GzFileBuf.h */; };
+ 1E2CCFD115939E5D00D858D1 /* HypPackEnumerator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCF5F15939E5D00D858D1 /* HypPackEnumerator.cpp */; };
+ 1E2CCFD215939E5D00D858D1 /* HypPackEnumerator.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF6015939E5D00D858D1 /* HypPackEnumerator.h */; };
+ 1E2CCFD315939E5D00D858D1 /* InterpolatedScorer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCF6115939E5D00D858D1 /* InterpolatedScorer.cpp */; };
+ 1E2CCFD415939E5D00D858D1 /* InterpolatedScorer.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF6215939E5D00D858D1 /* InterpolatedScorer.h */; };
+ 1E2CCFD715939E5D00D858D1 /* MergeScorer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCF6515939E5D00D858D1 /* MergeScorer.cpp */; };
+ 1E2CCFD815939E5D00D858D1 /* MergeScorer.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF6615939E5D00D858D1 /* MergeScorer.h */; };
+ 1E2CCFD915939E5D00D858D1 /* mert.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCF6715939E5D00D858D1 /* mert.cpp */; };
+ 1E2CCFDA15939E5D00D858D1 /* MiraFeatureVector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCF6815939E5D00D858D1 /* MiraFeatureVector.cpp */; };
+ 1E2CCFDB15939E5D00D858D1 /* MiraFeatureVector.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF6915939E5D00D858D1 /* MiraFeatureVector.h */; };
+ 1E2CCFDC15939E5D00D858D1 /* MiraWeightVector.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCF6A15939E5D00D858D1 /* MiraWeightVector.cpp */; };
+ 1E2CCFDD15939E5D00D858D1 /* MiraWeightVector.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF6B15939E5D00D858D1 /* MiraWeightVector.h */; };
+ 1E2CCFDE15939E5D00D858D1 /* Ngram.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF6C15939E5D00D858D1 /* Ngram.h */; };
+ 1E2CCFE015939E5D00D858D1 /* Optimizer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCF6E15939E5D00D858D1 /* Optimizer.cpp */; };
+ 1E2CCFE115939E5D00D858D1 /* Optimizer.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF6F15939E5D00D858D1 /* Optimizer.h */; };
+ 1E2CCFE215939E5D00D858D1 /* OptimizerFactory.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCF7015939E5D00D858D1 /* OptimizerFactory.cpp */; };
+ 1E2CCFE315939E5D00D858D1 /* OptimizerFactory.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF7115939E5D00D858D1 /* OptimizerFactory.h */; };
+ 1E2CCFE515939E5D00D858D1 /* PerScorer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCF7315939E5D00D858D1 /* PerScorer.cpp */; };
+ 1E2CCFE615939E5D00D858D1 /* PerScorer.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF7415939E5D00D858D1 /* PerScorer.h */; };
+ 1E2CCFE715939E5D00D858D1 /* Point.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCF7515939E5D00D858D1 /* Point.cpp */; };
+ 1E2CCFE815939E5D00D858D1 /* Point.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF7615939E5D00D858D1 /* Point.h */; };
+ 1E2CCFEA15939E5D00D858D1 /* PreProcessFilter.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCF7815939E5D00D858D1 /* PreProcessFilter.cpp */; };
+ 1E2CCFEB15939E5D00D858D1 /* PreProcessFilter.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF7915939E5D00D858D1 /* PreProcessFilter.h */; };
+ 1E2CCFED15939E5D00D858D1 /* Reference.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF7B15939E5D00D858D1 /* Reference.h */; };
+ 1E2CCFEF15939E5D00D858D1 /* ScopedVector.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF7D15939E5D00D858D1 /* ScopedVector.h */; };
+ 1E2CCFF015939E5D00D858D1 /* ScoreArray.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCF7E15939E5D00D858D1 /* ScoreArray.cpp */; };
+ 1E2CCFF115939E5D00D858D1 /* ScoreArray.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF7F15939E5D00D858D1 /* ScoreArray.h */; };
+ 1E2CCFF215939E5D00D858D1 /* ScoreData.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCF8015939E5D00D858D1 /* ScoreData.cpp */; };
+ 1E2CCFF315939E5D00D858D1 /* ScoreData.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF8115939E5D00D858D1 /* ScoreData.h */; };
+ 1E2CCFF415939E5D00D858D1 /* ScoreDataIterator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCF8215939E5D00D858D1 /* ScoreDataIterator.cpp */; };
+ 1E2CCFF515939E5D00D858D1 /* ScoreDataIterator.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF8315939E5D00D858D1 /* ScoreDataIterator.h */; };
+ 1E2CCFF615939E5D00D858D1 /* Scorer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCF8415939E5D00D858D1 /* Scorer.cpp */; };
+ 1E2CCFF715939E5D00D858D1 /* Scorer.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF8515939E5D00D858D1 /* Scorer.h */; };
+ 1E2CCFF815939E5D00D858D1 /* ScorerFactory.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCF8615939E5D00D858D1 /* ScorerFactory.cpp */; };
+ 1E2CCFF915939E5D00D858D1 /* ScorerFactory.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF8715939E5D00D858D1 /* ScorerFactory.h */; };
+ 1E2CCFFA15939E5D00D858D1 /* ScoreStats.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCF8815939E5D00D858D1 /* ScoreStats.cpp */; };
+ 1E2CCFFB15939E5D00D858D1 /* ScoreStats.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF8915939E5D00D858D1 /* ScoreStats.h */; };
+ 1E2CCFFC15939E5D00D858D1 /* SemposOverlapping.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCF8A15939E5D00D858D1 /* SemposOverlapping.cpp */; };
+ 1E2CCFFD15939E5D00D858D1 /* SemposOverlapping.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF8B15939E5D00D858D1 /* SemposOverlapping.h */; };
+ 1E2CCFFE15939E5D00D858D1 /* SemposScorer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCF8C15939E5D00D858D1 /* SemposScorer.cpp */; };
+ 1E2CCFFF15939E5D00D858D1 /* SemposScorer.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF8D15939E5D00D858D1 /* SemposScorer.h */; };
+ 1E2CD00015939E5D00D858D1 /* Singleton.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF8E15939E5D00D858D1 /* Singleton.h */; };
+ 1E2CD00215939E5D00D858D1 /* alignmentStruct.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCF9115939E5D00D858D1 /* alignmentStruct.cpp */; };
+ 1E2CD00315939E5D00D858D1 /* alignmentStruct.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF9215939E5D00D858D1 /* alignmentStruct.h */; };
+ 1E2CD00415939E5D00D858D1 /* bestShiftStruct.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF9315939E5D00D858D1 /* bestShiftStruct.h */; };
+ 1E2CD00515939E5D00D858D1 /* hashMap.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCF9415939E5D00D858D1 /* hashMap.cpp */; };
+ 1E2CD00615939E5D00D858D1 /* hashMap.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF9515939E5D00D858D1 /* hashMap.h */; };
+ 1E2CD00715939E5D00D858D1 /* hashMapInfos.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCF9615939E5D00D858D1 /* hashMapInfos.cpp */; };
+ 1E2CD00815939E5D00D858D1 /* hashMapInfos.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF9715939E5D00D858D1 /* hashMapInfos.h */; };
+ 1E2CD00915939E5D00D858D1 /* hashMapStringInfos.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCF9815939E5D00D858D1 /* hashMapStringInfos.cpp */; };
+ 1E2CD00A15939E5D00D858D1 /* hashMapStringInfos.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF9915939E5D00D858D1 /* hashMapStringInfos.h */; };
+ 1E2CD00B15939E5D00D858D1 /* infosHasher.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCF9A15939E5D00D858D1 /* infosHasher.cpp */; };
+ 1E2CD00C15939E5D00D858D1 /* infosHasher.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF9B15939E5D00D858D1 /* infosHasher.h */; };
+ 1E2CD00D15939E5D00D858D1 /* stringHasher.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCF9C15939E5D00D858D1 /* stringHasher.cpp */; };
+ 1E2CD00E15939E5D00D858D1 /* stringHasher.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF9D15939E5D00D858D1 /* stringHasher.h */; };
+ 1E2CD00F15939E5D00D858D1 /* stringInfosHasher.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCF9E15939E5D00D858D1 /* stringInfosHasher.cpp */; };
+ 1E2CD01015939E5D00D858D1 /* stringInfosHasher.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCF9F15939E5D00D858D1 /* stringInfosHasher.h */; };
+ 1E2CD01115939E5D00D858D1 /* terAlignment.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCFA015939E5D00D858D1 /* terAlignment.cpp */; };
+ 1E2CD01215939E5D00D858D1 /* terAlignment.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCFA115939E5D00D858D1 /* terAlignment.h */; };
+ 1E2CD01315939E5D00D858D1 /* tercalc.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCFA215939E5D00D858D1 /* tercalc.cpp */; };
+ 1E2CD01415939E5D00D858D1 /* tercalc.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCFA315939E5D00D858D1 /* tercalc.h */; };
+ 1E2CD01515939E5D00D858D1 /* terShift.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCFA415939E5D00D858D1 /* terShift.cpp */; };
+ 1E2CD01615939E5D00D858D1 /* terShift.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCFA515939E5D00D858D1 /* terShift.h */; };
+ 1E2CD01715939E5D00D858D1 /* tools.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCFA615939E5D00D858D1 /* tools.cpp */; };
+ 1E2CD01815939E5D00D858D1 /* tools.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCFA715939E5D00D858D1 /* tools.h */; };
+ 1E2CD01915939E5D00D858D1 /* TerScorer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCFA815939E5D00D858D1 /* TerScorer.cpp */; };
+ 1E2CD01A15939E5D00D858D1 /* TerScorer.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCFA915939E5D00D858D1 /* TerScorer.h */; };
+ 1E2CD01C15939E5D00D858D1 /* Timer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCFAE15939E5D00D858D1 /* Timer.cpp */; };
+ 1E2CD01D15939E5D00D858D1 /* Timer.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCFAF15939E5D00D858D1 /* Timer.h */; };
+ 1E2CD01F15939E5D00D858D1 /* Types.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCFB215939E5D00D858D1 /* Types.h */; };
+ 1E2CD02015939E5D00D858D1 /* Util.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCFB315939E5D00D858D1 /* Util.cpp */; };
+ 1E2CD02115939E5D00D858D1 /* Util.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCFB415939E5D00D858D1 /* Util.h */; };
+ 1E2CD02315939E5D00D858D1 /* Vocabulary.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2CCFB615939E5D00D858D1 /* Vocabulary.cpp */; };
+ 1E2CD02415939E5D00D858D1 /* Vocabulary.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E2CCFB715939E5D00D858D1 /* Vocabulary.h */; };
+ 1E39621B1594CFD1006FE978 /* PermutationScorer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E3962191594CFD1006FE978 /* PermutationScorer.cpp */; };
+ 1E3962201594CFF9006FE978 /* Permutation.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E39621E1594CFF9006FE978 /* Permutation.cpp */; };
+ 1E3962211594CFF9006FE978 /* Permutation.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E39621F1594CFF9006FE978 /* Permutation.h */; };
+ 1E3962231594D0FF006FE978 /* SentenceLevelScorer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E3962221594D0FF006FE978 /* SentenceLevelScorer.cpp */; };
+ 1E3962251594D12C006FE978 /* SentenceLevelScorer.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E3962241594D12C006FE978 /* SentenceLevelScorer.h */; };
+ 1E43CA3415973474000E29D3 /* PermutationScorer.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E43CA3315973474000E29D3 /* PermutationScorer.h */; };
+ 1E689F21159A529C00DD995A /* ThreadPool.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E689F1F159A529C00DD995A /* ThreadPool.cpp */; };
+ 1E689F22159A529C00DD995A /* ThreadPool.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E689F20159A529C00DD995A /* ThreadPool.h */; };
+ 1EE52B561596B3E4006DC938 /* StatisticsBasedScorer.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EE52B551596B3E4006DC938 /* StatisticsBasedScorer.h */; };
+ 1EE52B591596B3FC006DC938 /* StatisticsBasedScorer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EE52B581596B3FC006DC938 /* StatisticsBasedScorer.cpp */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXFileReference section */
+ 1E2CCF3315939E2D00D858D1 /* libmert_lib.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libmert_lib.a; sourceTree = BUILT_PRODUCTS_DIR; };
+ 1E2CCF3A15939E5D00D858D1 /* BleuScorer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = BleuScorer.cpp; path = ../../mert/BleuScorer.cpp; sourceTree = "<group>"; };
+ 1E2CCF3B15939E5D00D858D1 /* BleuScorer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = BleuScorer.h; path = ../../mert/BleuScorer.h; sourceTree = "<group>"; };
+ 1E2CCF3D15939E5D00D858D1 /* CderScorer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = CderScorer.cpp; path = ../../mert/CderScorer.cpp; sourceTree = "<group>"; };
+ 1E2CCF3E15939E5D00D858D1 /* CderScorer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CderScorer.h; path = ../../mert/CderScorer.h; sourceTree = "<group>"; };
+ 1E2CCF3F15939E5D00D858D1 /* Data.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Data.cpp; path = ../../mert/Data.cpp; sourceTree = "<group>"; };
+ 1E2CCF4015939E5D00D858D1 /* Data.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Data.h; path = ../../mert/Data.h; sourceTree = "<group>"; };
+ 1E2CCF5115939E5D00D858D1 /* Fdstream.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Fdstream.h; path = ../../mert/Fdstream.h; sourceTree = "<group>"; };
+ 1E2CCF5215939E5D00D858D1 /* FeatureArray.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = FeatureArray.cpp; path = ../../mert/FeatureArray.cpp; sourceTree = "<group>"; };
+ 1E2CCF5315939E5D00D858D1 /* FeatureArray.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = FeatureArray.h; path = ../../mert/FeatureArray.h; sourceTree = "<group>"; };
+ 1E2CCF5415939E5D00D858D1 /* FeatureData.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = FeatureData.cpp; path = ../../mert/FeatureData.cpp; sourceTree = "<group>"; };
+ 1E2CCF5515939E5D00D858D1 /* FeatureData.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = FeatureData.h; path = ../../mert/FeatureData.h; sourceTree = "<group>"; };
+ 1E2CCF5615939E5D00D858D1 /* FeatureDataIterator.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = FeatureDataIterator.cpp; path = ../../mert/FeatureDataIterator.cpp; sourceTree = "<group>"; };
+ 1E2CCF5715939E5D00D858D1 /* FeatureDataIterator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = FeatureDataIterator.h; path = ../../mert/FeatureDataIterator.h; sourceTree = "<group>"; };
+ 1E2CCF5915939E5D00D858D1 /* FeatureStats.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = FeatureStats.cpp; path = ../../mert/FeatureStats.cpp; sourceTree = "<group>"; };
+ 1E2CCF5A15939E5D00D858D1 /* FeatureStats.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = FeatureStats.h; path = ../../mert/FeatureStats.h; sourceTree = "<group>"; };
+ 1E2CCF5B15939E5D00D858D1 /* FileStream.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = FileStream.cpp; path = ../../mert/FileStream.cpp; sourceTree = "<group>"; };
+ 1E2CCF5C15939E5D00D858D1 /* FileStream.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = FileStream.h; path = ../../mert/FileStream.h; sourceTree = "<group>"; };
+ 1E2CCF5D15939E5D00D858D1 /* GzFileBuf.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = GzFileBuf.cpp; path = ../../mert/GzFileBuf.cpp; sourceTree = "<group>"; };
+ 1E2CCF5E15939E5D00D858D1 /* GzFileBuf.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = GzFileBuf.h; path = ../../mert/GzFileBuf.h; sourceTree = "<group>"; };
+ 1E2CCF5F15939E5D00D858D1 /* HypPackEnumerator.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = HypPackEnumerator.cpp; path = ../../mert/HypPackEnumerator.cpp; sourceTree = "<group>"; };
+ 1E2CCF6015939E5D00D858D1 /* HypPackEnumerator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = HypPackEnumerator.h; path = ../../mert/HypPackEnumerator.h; sourceTree = "<group>"; };
+ 1E2CCF6115939E5D00D858D1 /* InterpolatedScorer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = InterpolatedScorer.cpp; path = ../../mert/InterpolatedScorer.cpp; sourceTree = "<group>"; };
+ 1E2CCF6215939E5D00D858D1 /* InterpolatedScorer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = InterpolatedScorer.h; path = ../../mert/InterpolatedScorer.h; sourceTree = "<group>"; };
+ 1E2CCF6515939E5D00D858D1 /* MergeScorer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = MergeScorer.cpp; path = ../../mert/MergeScorer.cpp; sourceTree = "<group>"; };
+ 1E2CCF6615939E5D00D858D1 /* MergeScorer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = MergeScorer.h; path = ../../mert/MergeScorer.h; sourceTree = "<group>"; };
+ 1E2CCF6715939E5D00D858D1 /* mert.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mert.cpp; path = ../../mert/mert.cpp; sourceTree = "<group>"; };
+ 1E2CCF6815939E5D00D858D1 /* MiraFeatureVector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = MiraFeatureVector.cpp; path = ../../mert/MiraFeatureVector.cpp; sourceTree = "<group>"; };
+ 1E2CCF6915939E5D00D858D1 /* MiraFeatureVector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = MiraFeatureVector.h; path = ../../mert/MiraFeatureVector.h; sourceTree = "<group>"; };
+ 1E2CCF6A15939E5D00D858D1 /* MiraWeightVector.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = MiraWeightVector.cpp; path = ../../mert/MiraWeightVector.cpp; sourceTree = "<group>"; };
+ 1E2CCF6B15939E5D00D858D1 /* MiraWeightVector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = MiraWeightVector.h; path = ../../mert/MiraWeightVector.h; sourceTree = "<group>"; };
+ 1E2CCF6C15939E5D00D858D1 /* Ngram.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Ngram.h; path = ../../mert/Ngram.h; sourceTree = "<group>"; };
+ 1E2CCF6E15939E5D00D858D1 /* Optimizer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Optimizer.cpp; path = ../../mert/Optimizer.cpp; sourceTree = "<group>"; };
+ 1E2CCF6F15939E5D00D858D1 /* Optimizer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Optimizer.h; path = ../../mert/Optimizer.h; sourceTree = "<group>"; };
+ 1E2CCF7015939E5D00D858D1 /* OptimizerFactory.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = OptimizerFactory.cpp; path = ../../mert/OptimizerFactory.cpp; sourceTree = "<group>"; };
+ 1E2CCF7115939E5D00D858D1 /* OptimizerFactory.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = OptimizerFactory.h; path = ../../mert/OptimizerFactory.h; sourceTree = "<group>"; };
+ 1E2CCF7315939E5D00D858D1 /* PerScorer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PerScorer.cpp; path = ../../mert/PerScorer.cpp; sourceTree = "<group>"; };
+ 1E2CCF7415939E5D00D858D1 /* PerScorer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PerScorer.h; path = ../../mert/PerScorer.h; sourceTree = "<group>"; };
+ 1E2CCF7515939E5D00D858D1 /* Point.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Point.cpp; path = ../../mert/Point.cpp; sourceTree = "<group>"; };
+ 1E2CCF7615939E5D00D858D1 /* Point.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Point.h; path = ../../mert/Point.h; sourceTree = "<group>"; };
+ 1E2CCF7815939E5D00D858D1 /* PreProcessFilter.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PreProcessFilter.cpp; path = ../../mert/PreProcessFilter.cpp; sourceTree = "<group>"; };
+ 1E2CCF7915939E5D00D858D1 /* PreProcessFilter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PreProcessFilter.h; path = ../../mert/PreProcessFilter.h; sourceTree = "<group>"; };
+ 1E2CCF7B15939E5D00D858D1 /* Reference.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Reference.h; path = ../../mert/Reference.h; sourceTree = "<group>"; };
+ 1E2CCF7D15939E5D00D858D1 /* ScopedVector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ScopedVector.h; path = ../../mert/ScopedVector.h; sourceTree = "<group>"; };
+ 1E2CCF7E15939E5D00D858D1 /* ScoreArray.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ScoreArray.cpp; path = ../../mert/ScoreArray.cpp; sourceTree = "<group>"; };
+ 1E2CCF7F15939E5D00D858D1 /* ScoreArray.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ScoreArray.h; path = ../../mert/ScoreArray.h; sourceTree = "<group>"; };
+ 1E2CCF8015939E5D00D858D1 /* ScoreData.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ScoreData.cpp; path = ../../mert/ScoreData.cpp; sourceTree = "<group>"; };
+ 1E2CCF8115939E5D00D858D1 /* ScoreData.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ScoreData.h; path = ../../mert/ScoreData.h; sourceTree = "<group>"; };
+ 1E2CCF8215939E5D00D858D1 /* ScoreDataIterator.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ScoreDataIterator.cpp; path = ../../mert/ScoreDataIterator.cpp; sourceTree = "<group>"; };
+ 1E2CCF8315939E5D00D858D1 /* ScoreDataIterator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ScoreDataIterator.h; path = ../../mert/ScoreDataIterator.h; sourceTree = "<group>"; };
+ 1E2CCF8415939E5D00D858D1 /* Scorer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Scorer.cpp; path = ../../mert/Scorer.cpp; sourceTree = "<group>"; };
+ 1E2CCF8515939E5D00D858D1 /* Scorer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Scorer.h; path = ../../mert/Scorer.h; sourceTree = "<group>"; };
+ 1E2CCF8615939E5D00D858D1 /* ScorerFactory.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ScorerFactory.cpp; path = ../../mert/ScorerFactory.cpp; sourceTree = "<group>"; };
+ 1E2CCF8715939E5D00D858D1 /* ScorerFactory.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ScorerFactory.h; path = ../../mert/ScorerFactory.h; sourceTree = "<group>"; };
+ 1E2CCF8815939E5D00D858D1 /* ScoreStats.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ScoreStats.cpp; path = ../../mert/ScoreStats.cpp; sourceTree = "<group>"; };
+ 1E2CCF8915939E5D00D858D1 /* ScoreStats.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ScoreStats.h; path = ../../mert/ScoreStats.h; sourceTree = "<group>"; };
+ 1E2CCF8A15939E5D00D858D1 /* SemposOverlapping.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = SemposOverlapping.cpp; path = ../../mert/SemposOverlapping.cpp; sourceTree = "<group>"; };
+ 1E2CCF8B15939E5D00D858D1 /* SemposOverlapping.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SemposOverlapping.h; path = ../../mert/SemposOverlapping.h; sourceTree = "<group>"; };
+ 1E2CCF8C15939E5D00D858D1 /* SemposScorer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = SemposScorer.cpp; path = ../../mert/SemposScorer.cpp; sourceTree = "<group>"; };
+ 1E2CCF8D15939E5D00D858D1 /* SemposScorer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SemposScorer.h; path = ../../mert/SemposScorer.h; sourceTree = "<group>"; };
+ 1E2CCF8E15939E5D00D858D1 /* Singleton.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Singleton.h; path = ../../mert/Singleton.h; sourceTree = "<group>"; };
+ 1E2CCF9115939E5D00D858D1 /* alignmentStruct.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = alignmentStruct.cpp; sourceTree = "<group>"; };
+ 1E2CCF9215939E5D00D858D1 /* alignmentStruct.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = alignmentStruct.h; sourceTree = "<group>"; };
+ 1E2CCF9315939E5D00D858D1 /* bestShiftStruct.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = bestShiftStruct.h; sourceTree = "<group>"; };
+ 1E2CCF9415939E5D00D858D1 /* hashMap.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = hashMap.cpp; sourceTree = "<group>"; };
+ 1E2CCF9515939E5D00D858D1 /* hashMap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hashMap.h; sourceTree = "<group>"; };
+ 1E2CCF9615939E5D00D858D1 /* hashMapInfos.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = hashMapInfos.cpp; sourceTree = "<group>"; };
+ 1E2CCF9715939E5D00D858D1 /* hashMapInfos.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hashMapInfos.h; sourceTree = "<group>"; };
+ 1E2CCF9815939E5D00D858D1 /* hashMapStringInfos.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = hashMapStringInfos.cpp; sourceTree = "<group>"; };
+ 1E2CCF9915939E5D00D858D1 /* hashMapStringInfos.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hashMapStringInfos.h; sourceTree = "<group>"; };
+ 1E2CCF9A15939E5D00D858D1 /* infosHasher.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = infosHasher.cpp; sourceTree = "<group>"; };
+ 1E2CCF9B15939E5D00D858D1 /* infosHasher.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = infosHasher.h; sourceTree = "<group>"; };
+ 1E2CCF9C15939E5D00D858D1 /* stringHasher.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = stringHasher.cpp; sourceTree = "<group>"; };
+ 1E2CCF9D15939E5D00D858D1 /* stringHasher.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = stringHasher.h; sourceTree = "<group>"; };
+ 1E2CCF9E15939E5D00D858D1 /* stringInfosHasher.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = stringInfosHasher.cpp; sourceTree = "<group>"; };
+ 1E2CCF9F15939E5D00D858D1 /* stringInfosHasher.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = stringInfosHasher.h; sourceTree = "<group>"; };
+ 1E2CCFA015939E5D00D858D1 /* terAlignment.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = terAlignment.cpp; sourceTree = "<group>"; };
+ 1E2CCFA115939E5D00D858D1 /* terAlignment.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = terAlignment.h; sourceTree = "<group>"; };
+ 1E2CCFA215939E5D00D858D1 /* tercalc.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = tercalc.cpp; sourceTree = "<group>"; };
+ 1E2CCFA315939E5D00D858D1 /* tercalc.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = tercalc.h; sourceTree = "<group>"; };
+ 1E2CCFA415939E5D00D858D1 /* terShift.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = terShift.cpp; sourceTree = "<group>"; };
+ 1E2CCFA515939E5D00D858D1 /* terShift.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = terShift.h; sourceTree = "<group>"; };
+ 1E2CCFA615939E5D00D858D1 /* tools.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = tools.cpp; sourceTree = "<group>"; };
+ 1E2CCFA715939E5D00D858D1 /* tools.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = tools.h; sourceTree = "<group>"; };
+ 1E2CCFA815939E5D00D858D1 /* TerScorer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TerScorer.cpp; path = ../../mert/TerScorer.cpp; sourceTree = "<group>"; };
+ 1E2CCFA915939E5D00D858D1 /* TerScorer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TerScorer.h; path = ../../mert/TerScorer.h; sourceTree = "<group>"; };
+ 1E2CCFAE15939E5D00D858D1 /* Timer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Timer.cpp; path = ../../mert/Timer.cpp; sourceTree = "<group>"; };
+ 1E2CCFAF15939E5D00D858D1 /* Timer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Timer.h; path = ../../mert/Timer.h; sourceTree = "<group>"; };
+ 1E2CCFB215939E5D00D858D1 /* Types.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Types.h; path = ../../mert/Types.h; sourceTree = "<group>"; };
+ 1E2CCFB315939E5D00D858D1 /* Util.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Util.cpp; path = ../../mert/Util.cpp; sourceTree = "<group>"; };
+ 1E2CCFB415939E5D00D858D1 /* Util.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Util.h; path = ../../mert/Util.h; sourceTree = "<group>"; };
+ 1E2CCFB615939E5D00D858D1 /* Vocabulary.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Vocabulary.cpp; path = ../../mert/Vocabulary.cpp; sourceTree = "<group>"; };
+ 1E2CCFB715939E5D00D858D1 /* Vocabulary.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Vocabulary.h; path = ../../mert/Vocabulary.h; sourceTree = "<group>"; };
+ 1E3962191594CFD1006FE978 /* PermutationScorer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PermutationScorer.cpp; path = ../../mert/PermutationScorer.cpp; sourceTree = "<group>"; };
+ 1E39621E1594CFF9006FE978 /* Permutation.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Permutation.cpp; path = ../../mert/Permutation.cpp; sourceTree = "<group>"; };
+ 1E39621F1594CFF9006FE978 /* Permutation.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Permutation.h; path = ../../mert/Permutation.h; sourceTree = "<group>"; };
+ 1E3962221594D0FF006FE978 /* SentenceLevelScorer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = SentenceLevelScorer.cpp; path = ../../mert/SentenceLevelScorer.cpp; sourceTree = "<group>"; };
+ 1E3962241594D12C006FE978 /* SentenceLevelScorer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SentenceLevelScorer.h; path = ../../mert/SentenceLevelScorer.h; sourceTree = "<group>"; };
+ 1E43CA3315973474000E29D3 /* PermutationScorer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PermutationScorer.h; path = ../../mert/PermutationScorer.h; sourceTree = "<group>"; };
+ 1E689F1F159A529C00DD995A /* ThreadPool.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ThreadPool.cpp; path = ../../moses/src/ThreadPool.cpp; sourceTree = "<group>"; };
+ 1E689F20159A529C00DD995A /* ThreadPool.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ThreadPool.h; path = ../../moses/src/ThreadPool.h; sourceTree = "<group>"; };
+ 1EE52B551596B3E4006DC938 /* StatisticsBasedScorer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = StatisticsBasedScorer.h; path = ../../mert/StatisticsBasedScorer.h; sourceTree = "<group>"; };
+ 1EE52B581596B3FC006DC938 /* StatisticsBasedScorer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = StatisticsBasedScorer.cpp; path = ../../mert/StatisticsBasedScorer.cpp; sourceTree = "<group>"; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+ 1E2CCF3015939E2D00D858D1 /* Frameworks */ = {
+ isa = PBXFrameworksBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+ 1E2CCF2815939E2D00D858D1 = {
+ isa = PBXGroup;
+ children = (
+ 1E689F1F159A529C00DD995A /* ThreadPool.cpp */,
+ 1E689F20159A529C00DD995A /* ThreadPool.h */,
+ 1EE52B581596B3FC006DC938 /* StatisticsBasedScorer.cpp */,
+ 1EE52B551596B3E4006DC938 /* StatisticsBasedScorer.h */,
+ 1E3962241594D12C006FE978 /* SentenceLevelScorer.h */,
+ 1E3962221594D0FF006FE978 /* SentenceLevelScorer.cpp */,
+ 1E39621E1594CFF9006FE978 /* Permutation.cpp */,
+ 1E39621F1594CFF9006FE978 /* Permutation.h */,
+ 1E3962191594CFD1006FE978 /* PermutationScorer.cpp */,
+ 1E43CA3315973474000E29D3 /* PermutationScorer.h */,
+ 1E2CCF3A15939E5D00D858D1 /* BleuScorer.cpp */,
+ 1E2CCF3B15939E5D00D858D1 /* BleuScorer.h */,
+ 1E2CCF3D15939E5D00D858D1 /* CderScorer.cpp */,
+ 1E2CCF3E15939E5D00D858D1 /* CderScorer.h */,
+ 1E2CCF3F15939E5D00D858D1 /* Data.cpp */,
+ 1E2CCF4015939E5D00D858D1 /* Data.h */,
+ 1E2CCF5115939E5D00D858D1 /* Fdstream.h */,
+ 1E2CCF5215939E5D00D858D1 /* FeatureArray.cpp */,
+ 1E2CCF5315939E5D00D858D1 /* FeatureArray.h */,
+ 1E2CCF5415939E5D00D858D1 /* FeatureData.cpp */,
+ 1E2CCF5515939E5D00D858D1 /* FeatureData.h */,
+ 1E2CCF5615939E5D00D858D1 /* FeatureDataIterator.cpp */,
+ 1E2CCF5715939E5D00D858D1 /* FeatureDataIterator.h */,
+ 1E2CCF5915939E5D00D858D1 /* FeatureStats.cpp */,
+ 1E2CCF5A15939E5D00D858D1 /* FeatureStats.h */,
+ 1E2CCF5B15939E5D00D858D1 /* FileStream.cpp */,
+ 1E2CCF5C15939E5D00D858D1 /* FileStream.h */,
+ 1E2CCF5D15939E5D00D858D1 /* GzFileBuf.cpp */,
+ 1E2CCF5E15939E5D00D858D1 /* GzFileBuf.h */,
+ 1E2CCF5F15939E5D00D858D1 /* HypPackEnumerator.cpp */,
+ 1E2CCF6015939E5D00D858D1 /* HypPackEnumerator.h */,
+ 1E2CCF6115939E5D00D858D1 /* InterpolatedScorer.cpp */,
+ 1E2CCF6215939E5D00D858D1 /* InterpolatedScorer.h */,
+ 1E2CCF6515939E5D00D858D1 /* MergeScorer.cpp */,
+ 1E2CCF6615939E5D00D858D1 /* MergeScorer.h */,
+ 1E2CCF6715939E5D00D858D1 /* mert.cpp */,
+ 1E2CCF6815939E5D00D858D1 /* MiraFeatureVector.cpp */,
+ 1E2CCF6915939E5D00D858D1 /* MiraFeatureVector.h */,
+ 1E2CCF6A15939E5D00D858D1 /* MiraWeightVector.cpp */,
+ 1E2CCF6B15939E5D00D858D1 /* MiraWeightVector.h */,
+ 1E2CCF6C15939E5D00D858D1 /* Ngram.h */,
+ 1E2CCF6E15939E5D00D858D1 /* Optimizer.cpp */,
+ 1E2CCF6F15939E5D00D858D1 /* Optimizer.h */,
+ 1E2CCF7015939E5D00D858D1 /* OptimizerFactory.cpp */,
+ 1E2CCF7115939E5D00D858D1 /* OptimizerFactory.h */,
+ 1E2CCF7315939E5D00D858D1 /* PerScorer.cpp */,
+ 1E2CCF7415939E5D00D858D1 /* PerScorer.h */,
+ 1E2CCF7515939E5D00D858D1 /* Point.cpp */,
+ 1E2CCF7615939E5D00D858D1 /* Point.h */,
+ 1E2CCF7815939E5D00D858D1 /* PreProcessFilter.cpp */,
+ 1E2CCF7915939E5D00D858D1 /* PreProcessFilter.h */,
+ 1E2CCF7B15939E5D00D858D1 /* Reference.h */,
+ 1E2CCF7D15939E5D00D858D1 /* ScopedVector.h */,
+ 1E2CCF7E15939E5D00D858D1 /* ScoreArray.cpp */,
+ 1E2CCF7F15939E5D00D858D1 /* ScoreArray.h */,
+ 1E2CCF8015939E5D00D858D1 /* ScoreData.cpp */,
+ 1E2CCF8115939E5D00D858D1 /* ScoreData.h */,
+ 1E2CCF8215939E5D00D858D1 /* ScoreDataIterator.cpp */,
+ 1E2CCF8315939E5D00D858D1 /* ScoreDataIterator.h */,
+ 1E2CCF8415939E5D00D858D1 /* Scorer.cpp */,
+ 1E2CCF8515939E5D00D858D1 /* Scorer.h */,
+ 1E2CCF8615939E5D00D858D1 /* ScorerFactory.cpp */,
+ 1E2CCF8715939E5D00D858D1 /* ScorerFactory.h */,
+ 1E2CCF8815939E5D00D858D1 /* ScoreStats.cpp */,
+ 1E2CCF8915939E5D00D858D1 /* ScoreStats.h */,
+ 1E2CCF8A15939E5D00D858D1 /* SemposOverlapping.cpp */,
+ 1E2CCF8B15939E5D00D858D1 /* SemposOverlapping.h */,
+ 1E2CCF8C15939E5D00D858D1 /* SemposScorer.cpp */,
+ 1E2CCF8D15939E5D00D858D1 /* SemposScorer.h */,
+ 1E2CCF8E15939E5D00D858D1 /* Singleton.h */,
+ 1E2CCF9015939E5D00D858D1 /* TER */,
+ 1E2CCFA815939E5D00D858D1 /* TerScorer.cpp */,
+ 1E2CCFA915939E5D00D858D1 /* TerScorer.h */,
+ 1E2CCFAE15939E5D00D858D1 /* Timer.cpp */,
+ 1E2CCFAF15939E5D00D858D1 /* Timer.h */,
+ 1E2CCFB215939E5D00D858D1 /* Types.h */,
+ 1E2CCFB315939E5D00D858D1 /* Util.cpp */,
+ 1E2CCFB415939E5D00D858D1 /* Util.h */,
+ 1E2CCFB615939E5D00D858D1 /* Vocabulary.cpp */,
+ 1E2CCFB715939E5D00D858D1 /* Vocabulary.h */,
+ 1E2CCF3415939E2D00D858D1 /* Products */,
+ );
+ sourceTree = "<group>";
+ };
+ 1E2CCF3415939E2D00D858D1 /* Products */ = {
+ isa = PBXGroup;
+ children = (
+ 1E2CCF3315939E2D00D858D1 /* libmert_lib.a */,
+ );
+ name = Products;
+ sourceTree = "<group>";
+ };
+ 1E2CCF9015939E5D00D858D1 /* TER */ = {
+ isa = PBXGroup;
+ children = (
+ 1E2CCF9115939E5D00D858D1 /* alignmentStruct.cpp */,
+ 1E2CCF9215939E5D00D858D1 /* alignmentStruct.h */,
+ 1E2CCF9315939E5D00D858D1 /* bestShiftStruct.h */,
+ 1E2CCF9415939E5D00D858D1 /* hashMap.cpp */,
+ 1E2CCF9515939E5D00D858D1 /* hashMap.h */,
+ 1E2CCF9615939E5D00D858D1 /* hashMapInfos.cpp */,
+ 1E2CCF9715939E5D00D858D1 /* hashMapInfos.h */,
+ 1E2CCF9815939E5D00D858D1 /* hashMapStringInfos.cpp */,
+ 1E2CCF9915939E5D00D858D1 /* hashMapStringInfos.h */,
+ 1E2CCF9A15939E5D00D858D1 /* infosHasher.cpp */,
+ 1E2CCF9B15939E5D00D858D1 /* infosHasher.h */,
+ 1E2CCF9C15939E5D00D858D1 /* stringHasher.cpp */,
+ 1E2CCF9D15939E5D00D858D1 /* stringHasher.h */,
+ 1E2CCF9E15939E5D00D858D1 /* stringInfosHasher.cpp */,
+ 1E2CCF9F15939E5D00D858D1 /* stringInfosHasher.h */,
+ 1E2CCFA015939E5D00D858D1 /* terAlignment.cpp */,
+ 1E2CCFA115939E5D00D858D1 /* terAlignment.h */,
+ 1E2CCFA215939E5D00D858D1 /* tercalc.cpp */,
+ 1E2CCFA315939E5D00D858D1 /* tercalc.h */,
+ 1E2CCFA415939E5D00D858D1 /* terShift.cpp */,
+ 1E2CCFA515939E5D00D858D1 /* terShift.h */,
+ 1E2CCFA615939E5D00D858D1 /* tools.cpp */,
+ 1E2CCFA715939E5D00D858D1 /* tools.h */,
+ );
+ name = TER;
+ path = ../../mert/TER;
+ sourceTree = "<group>";
+ };
+/* End PBXGroup section */
+
+/* Begin PBXHeadersBuildPhase section */
+ 1E2CCF3115939E2D00D858D1 /* Headers */ = {
+ isa = PBXHeadersBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ 1E2CCFBA15939E5D00D858D1 /* BleuScorer.h in Headers */,
+ 1E2CCFBD15939E5D00D858D1 /* CderScorer.h in Headers */,
+ 1E2CCFBF15939E5D00D858D1 /* Data.h in Headers */,
+ 1E2CCFC315939E5D00D858D1 /* Fdstream.h in Headers */,
+ 1E2CCFC515939E5D00D858D1 /* FeatureArray.h in Headers */,
+ 1E2CCFC715939E5D00D858D1 /* FeatureData.h in Headers */,
+ 1E2CCFC915939E5D00D858D1 /* FeatureDataIterator.h in Headers */,
+ 1E2CCFCC15939E5D00D858D1 /* FeatureStats.h in Headers */,
+ 1E2CCFCE15939E5D00D858D1 /* FileStream.h in Headers */,
+ 1E2CCFD015939E5D00D858D1 /* GzFileBuf.h in Headers */,
+ 1E2CCFD215939E5D00D858D1 /* HypPackEnumerator.h in Headers */,
+ 1E2CCFD415939E5D00D858D1 /* InterpolatedScorer.h in Headers */,
+ 1E2CCFD815939E5D00D858D1 /* MergeScorer.h in Headers */,
+ 1E2CCFDB15939E5D00D858D1 /* MiraFeatureVector.h in Headers */,
+ 1E2CCFDD15939E5D00D858D1 /* MiraWeightVector.h in Headers */,
+ 1E2CCFDE15939E5D00D858D1 /* Ngram.h in Headers */,
+ 1E2CCFE115939E5D00D858D1 /* Optimizer.h in Headers */,
+ 1E2CCFE315939E5D00D858D1 /* OptimizerFactory.h in Headers */,
+ 1E2CCFE615939E5D00D858D1 /* PerScorer.h in Headers */,
+ 1E2CCFE815939E5D00D858D1 /* Point.h in Headers */,
+ 1E2CCFEB15939E5D00D858D1 /* PreProcessFilter.h in Headers */,
+ 1E2CCFED15939E5D00D858D1 /* Reference.h in Headers */,
+ 1E2CCFEF15939E5D00D858D1 /* ScopedVector.h in Headers */,
+ 1E2CCFF115939E5D00D858D1 /* ScoreArray.h in Headers */,
+ 1E2CCFF315939E5D00D858D1 /* ScoreData.h in Headers */,
+ 1E2CCFF515939E5D00D858D1 /* ScoreDataIterator.h in Headers */,
+ 1E2CCFF715939E5D00D858D1 /* Scorer.h in Headers */,
+ 1E2CCFF915939E5D00D858D1 /* ScorerFactory.h in Headers */,
+ 1E2CCFFB15939E5D00D858D1 /* ScoreStats.h in Headers */,
+ 1E2CCFFD15939E5D00D858D1 /* SemposOverlapping.h in Headers */,
+ 1E2CCFFF15939E5D00D858D1 /* SemposScorer.h in Headers */,
+ 1E2CD00015939E5D00D858D1 /* Singleton.h in Headers */,
+ 1E2CD00315939E5D00D858D1 /* alignmentStruct.h in Headers */,
+ 1E2CD00415939E5D00D858D1 /* bestShiftStruct.h in Headers */,
+ 1E2CD00615939E5D00D858D1 /* hashMap.h in Headers */,
+ 1E2CD00815939E5D00D858D1 /* hashMapInfos.h in Headers */,
+ 1E2CD00A15939E5D00D858D1 /* hashMapStringInfos.h in Headers */,
+ 1E2CD00C15939E5D00D858D1 /* infosHasher.h in Headers */,
+ 1E2CD00E15939E5D00D858D1 /* stringHasher.h in Headers */,
+ 1E2CD01015939E5D00D858D1 /* stringInfosHasher.h in Headers */,
+ 1E2CD01215939E5D00D858D1 /* terAlignment.h in Headers */,
+ 1E2CD01415939E5D00D858D1 /* tercalc.h in Headers */,
+ 1E2CD01615939E5D00D858D1 /* terShift.h in Headers */,
+ 1E2CD01815939E5D00D858D1 /* tools.h in Headers */,
+ 1E2CD01A15939E5D00D858D1 /* TerScorer.h in Headers */,
+ 1E2CD01D15939E5D00D858D1 /* Timer.h in Headers */,
+ 1E2CD01F15939E5D00D858D1 /* Types.h in Headers */,
+ 1E2CD02115939E5D00D858D1 /* Util.h in Headers */,
+ 1E2CD02415939E5D00D858D1 /* Vocabulary.h in Headers */,
+ 1E3962211594CFF9006FE978 /* Permutation.h in Headers */,
+ 1E3962251594D12C006FE978 /* SentenceLevelScorer.h in Headers */,
+ 1EE52B561596B3E4006DC938 /* StatisticsBasedScorer.h in Headers */,
+ 1E43CA3415973474000E29D3 /* PermutationScorer.h in Headers */,
+ 1E689F22159A529C00DD995A /* ThreadPool.h in Headers */,
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXHeadersBuildPhase section */
+
+/* Begin PBXNativeTarget section */
+ 1E2CCF3215939E2D00D858D1 /* mert_lib */ = {
+ isa = PBXNativeTarget;
+ buildConfigurationList = 1E2CCF3715939E2D00D858D1 /* Build configuration list for PBXNativeTarget "mert_lib" */;
+ buildPhases = (
+ 1E2CCF2F15939E2D00D858D1 /* Sources */,
+ 1E2CCF3015939E2D00D858D1 /* Frameworks */,
+ 1E2CCF3115939E2D00D858D1 /* Headers */,
+ );
+ buildRules = (
+ );
+ dependencies = (
+ );
+ name = mert_lib;
+ productName = mert_lib;
+ productReference = 1E2CCF3315939E2D00D858D1 /* libmert_lib.a */;
+ productType = "com.apple.product-type.library.static";
+ };
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+ 1E2CCF2A15939E2D00D858D1 /* Project object */ = {
+ isa = PBXProject;
+ buildConfigurationList = 1E2CCF2D15939E2D00D858D1 /* Build configuration list for PBXProject "mert_lib" */;
+ compatibilityVersion = "Xcode 3.2";
+ developmentRegion = English;
+ hasScannedForEncodings = 0;
+ knownRegions = (
+ en,
+ );
+ mainGroup = 1E2CCF2815939E2D00D858D1;
+ productRefGroup = 1E2CCF3415939E2D00D858D1 /* Products */;
+ projectDirPath = "";
+ projectRoot = "";
+ targets = (
+ 1E2CCF3215939E2D00D858D1 /* mert_lib */,
+ );
+ };
+/* End PBXProject section */
+
+/* Begin PBXSourcesBuildPhase section */
+ 1E2CCF2F15939E2D00D858D1 /* Sources */ = {
+ isa = PBXSourcesBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ 1E2CCFB915939E5D00D858D1 /* BleuScorer.cpp in Sources */,
+ 1E2CCFBC15939E5D00D858D1 /* CderScorer.cpp in Sources */,
+ 1E2CCFBE15939E5D00D858D1 /* Data.cpp in Sources */,
+ 1E2CCFC415939E5D00D858D1 /* FeatureArray.cpp in Sources */,
+ 1E2CCFC615939E5D00D858D1 /* FeatureData.cpp in Sources */,
+ 1E2CCFC815939E5D00D858D1 /* FeatureDataIterator.cpp in Sources */,
+ 1E2CCFCB15939E5D00D858D1 /* FeatureStats.cpp in Sources */,
+ 1E2CCFCD15939E5D00D858D1 /* FileStream.cpp in Sources */,
+ 1E2CCFCF15939E5D00D858D1 /* GzFileBuf.cpp in Sources */,
+ 1E2CCFD115939E5D00D858D1 /* HypPackEnumerator.cpp in Sources */,
+ 1E2CCFD315939E5D00D858D1 /* InterpolatedScorer.cpp in Sources */,
+ 1E2CCFD715939E5D00D858D1 /* MergeScorer.cpp in Sources */,
+ 1E2CCFD915939E5D00D858D1 /* mert.cpp in Sources */,
+ 1E2CCFDA15939E5D00D858D1 /* MiraFeatureVector.cpp in Sources */,
+ 1E2CCFDC15939E5D00D858D1 /* MiraWeightVector.cpp in Sources */,
+ 1E2CCFE015939E5D00D858D1 /* Optimizer.cpp in Sources */,
+ 1E2CCFE215939E5D00D858D1 /* OptimizerFactory.cpp in Sources */,
+ 1E2CCFE515939E5D00D858D1 /* PerScorer.cpp in Sources */,
+ 1E2CCFE715939E5D00D858D1 /* Point.cpp in Sources */,
+ 1E2CCFEA15939E5D00D858D1 /* PreProcessFilter.cpp in Sources */,
+ 1E2CCFF015939E5D00D858D1 /* ScoreArray.cpp in Sources */,
+ 1E2CCFF215939E5D00D858D1 /* ScoreData.cpp in Sources */,
+ 1E2CCFF415939E5D00D858D1 /* ScoreDataIterator.cpp in Sources */,
+ 1E2CCFF615939E5D00D858D1 /* Scorer.cpp in Sources */,
+ 1E2CCFF815939E5D00D858D1 /* ScorerFactory.cpp in Sources */,
+ 1E2CCFFA15939E5D00D858D1 /* ScoreStats.cpp in Sources */,
+ 1E2CCFFC15939E5D00D858D1 /* SemposOverlapping.cpp in Sources */,
+ 1E2CCFFE15939E5D00D858D1 /* SemposScorer.cpp in Sources */,
+ 1E2CD00215939E5D00D858D1 /* alignmentStruct.cpp in Sources */,
+ 1E2CD00515939E5D00D858D1 /* hashMap.cpp in Sources */,
+ 1E2CD00715939E5D00D858D1 /* hashMapInfos.cpp in Sources */,
+ 1E2CD00915939E5D00D858D1 /* hashMapStringInfos.cpp in Sources */,
+ 1E2CD00B15939E5D00D858D1 /* infosHasher.cpp in Sources */,
+ 1E2CD00D15939E5D00D858D1 /* stringHasher.cpp in Sources */,
+ 1E2CD00F15939E5D00D858D1 /* stringInfosHasher.cpp in Sources */,
+ 1E2CD01115939E5D00D858D1 /* terAlignment.cpp in Sources */,
+ 1E2CD01315939E5D00D858D1 /* tercalc.cpp in Sources */,
+ 1E2CD01515939E5D00D858D1 /* terShift.cpp in Sources */,
+ 1E2CD01715939E5D00D858D1 /* tools.cpp in Sources */,
+ 1E2CD01915939E5D00D858D1 /* TerScorer.cpp in Sources */,
+ 1E2CD01C15939E5D00D858D1 /* Timer.cpp in Sources */,
+ 1E2CD02015939E5D00D858D1 /* Util.cpp in Sources */,
+ 1E2CD02315939E5D00D858D1 /* Vocabulary.cpp in Sources */,
+ 1E39621B1594CFD1006FE978 /* PermutationScorer.cpp in Sources */,
+ 1E3962201594CFF9006FE978 /* Permutation.cpp in Sources */,
+ 1E3962231594D0FF006FE978 /* SentenceLevelScorer.cpp in Sources */,
+ 1EE52B591596B3FC006DC938 /* StatisticsBasedScorer.cpp in Sources */,
+ 1E689F21159A529C00DD995A /* ThreadPool.cpp in Sources */,
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXSourcesBuildPhase section */
+
+/* Begin XCBuildConfiguration section */
+ 1E2CCF3515939E2D00D858D1 /* Debug */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ ALWAYS_SEARCH_USER_PATHS = NO;
+ ARCHS = "$(ARCHS_STANDARD_64_BIT)";
+ COPY_PHASE_STRIP = NO;
+ GCC_C_LANGUAGE_STANDARD = gnu99;
+ GCC_DYNAMIC_NO_PIC = NO;
+ GCC_ENABLE_OBJC_EXCEPTIONS = YES;
+ GCC_OPTIMIZATION_LEVEL = 0;
+ GCC_PREPROCESSOR_DEFINITIONS = (
+ "DEBUG=1",
+ "$(inherited)",
+ );
+ GCC_SYMBOLS_PRIVATE_EXTERN = NO;
+ GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+ GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+ GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
+ GCC_WARN_ABOUT_RETURN_TYPE = YES;
+ GCC_WARN_UNUSED_VARIABLE = YES;
+ MACOSX_DEPLOYMENT_TARGET = 10.7;
+ ONLY_ACTIVE_ARCH = YES;
+ SDKROOT = macosx;
+ };
+ name = Debug;
+ };
+ 1E2CCF3615939E2D00D858D1 /* Release */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ ALWAYS_SEARCH_USER_PATHS = NO;
+ ARCHS = "$(ARCHS_STANDARD_64_BIT)";
+ COPY_PHASE_STRIP = YES;
+ DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+ GCC_C_LANGUAGE_STANDARD = gnu99;
+ GCC_ENABLE_OBJC_EXCEPTIONS = YES;
+ GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+ GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+ GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
+ GCC_WARN_ABOUT_RETURN_TYPE = YES;
+ GCC_WARN_UNUSED_VARIABLE = YES;
+ MACOSX_DEPLOYMENT_TARGET = 10.7;
+ SDKROOT = macosx;
+ };
+ name = Release;
+ };
+ 1E2CCF3815939E2D00D858D1 /* Debug */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ EXECUTABLE_PREFIX = lib;
+ "GCC_PREPROCESSOR_DEFINITIONS[arch=*]" = (
+ "DEBUG=1",
+ "$(inherited)",
+ WITH_THREADS,
+ );
+ HEADER_SEARCH_PATHS = (
+ ../..,
+ /opt/local/include,
+ );
+ PRODUCT_NAME = "$(TARGET_NAME)";
+ };
+ name = Debug;
+ };
+ 1E2CCF3915939E2D00D858D1 /* Release */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ EXECUTABLE_PREFIX = lib;
+ GCC_PREPROCESSOR_DEFINITIONS = WITH_THREADS;
+ HEADER_SEARCH_PATHS = (
+ ../..,
+ /opt/local/include,
+ );
+ PRODUCT_NAME = "$(TARGET_NAME)";
+ };
+ name = Release;
+ };
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+ 1E2CCF2D15939E2D00D858D1 /* Build configuration list for PBXProject "mert_lib" */ = {
+ isa = XCConfigurationList;
+ buildConfigurations = (
+ 1E2CCF3515939E2D00D858D1 /* Debug */,
+ 1E2CCF3615939E2D00D858D1 /* Release */,
+ );
+ defaultConfigurationIsVisible = 0;
+ defaultConfigurationName = Release;
+ };
+ 1E2CCF3715939E2D00D858D1 /* Build configuration list for PBXNativeTarget "mert_lib" */ = {
+ isa = XCConfigurationList;
+ buildConfigurations = (
+ 1E2CCF3815939E2D00D858D1 /* Debug */,
+ 1E2CCF3915939E2D00D858D1 /* Release */,
+ );
+ defaultConfigurationIsVisible = 0;
+ defaultConfigurationName = Release;
+ };
+/* End XCConfigurationList section */
+ };
+ rootObject = 1E2CCF2A15939E2D00D858D1 /* Project object */;
+}
diff --git a/contrib/other-builds/mert_lib.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/mert_lib.xcscheme b/contrib/other-builds/mert_lib.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/mert_lib.xcscheme
new file mode 100644
index 000000000..70f1edce0
--- /dev/null
+++ b/contrib/other-builds/mert_lib.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/mert_lib.xcscheme
@@ -0,0 +1,54 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Scheme
+ version = "1.3">
+ <BuildAction
+ parallelizeBuildables = "YES"
+ buildImplicitDependencies = "YES">
+ <BuildActionEntries>
+ <BuildActionEntry
+ buildForTesting = "YES"
+ buildForRunning = "YES"
+ buildForProfiling = "YES"
+ buildForArchiving = "YES"
+ buildForAnalyzing = "YES">
+ <BuildableReference
+ BuildableIdentifier = "primary"
+ BlueprintIdentifier = "1E2CCF3215939E2D00D858D1"
+ BuildableName = "libmert_lib.a"
+ BlueprintName = "mert_lib"
+ ReferencedContainer = "container:mert_lib.xcodeproj">
+ </BuildableReference>
+ </BuildActionEntry>
+ </BuildActionEntries>
+ </BuildAction>
+ <TestAction
+ selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
+ selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
+ shouldUseLaunchSchemeArgsEnv = "YES"
+ buildConfiguration = "Debug">
+ <Testables>
+ </Testables>
+ </TestAction>
+ <LaunchAction
+ selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
+ selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
+ launchStyle = "0"
+ useCustomWorkingDirectory = "NO"
+ buildConfiguration = "Debug">
+ <AdditionalOptions>
+ </AdditionalOptions>
+ </LaunchAction>
+ <ProfileAction
+ shouldUseLaunchSchemeArgsEnv = "YES"
+ savedToolIdentifier = ""
+ useCustomWorkingDirectory = "NO"
+ buildConfiguration = "Release">
+ </ProfileAction>
+ <AnalyzeAction
+ buildConfiguration = "Debug">
+ </AnalyzeAction>
+ <ArchiveAction
+ buildConfiguration = "Release"
+ revealArchiveInOrganizer = "YES">
+ </ArchiveAction>
+</Scheme>
diff --git a/contrib/other-builds/mert_lib.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/xcschememanagement.plist b/contrib/other-builds/mert_lib.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/xcschememanagement.plist
new file mode 100644
index 000000000..fcf1c691d
--- /dev/null
+++ b/contrib/other-builds/mert_lib.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/xcschememanagement.plist
@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+ <key>SchemeUserState</key>
+ <dict>
+ <key>mert_lib.xcscheme</key>
+ <dict>
+ <key>orderHint</key>
+ <integer>0</integer>
+ </dict>
+ </dict>
+ <key>SuppressBuildableAutocreation</key>
+ <dict>
+ <key>1E2CCF3215939E2D00D858D1</key>
+ <dict>
+ <key>primary</key>
+ <true/>
+ </dict>
+ </dict>
+</dict>
+</plist>
diff --git a/contrib/other-builds/moses-chart-cmd.vcxproj b/contrib/other-builds/moses-chart-cmd.vcxproj
index 27b260f6f..25fe74588 100644
--- a/contrib/other-builds/moses-chart-cmd.vcxproj
+++ b/contrib/other-builds/moses-chart-cmd.vcxproj
@@ -93,13 +93,11 @@
<ClCompile Include="src\IOWrapper.cpp" />
<ClCompile Include="src\Main.cpp" />
<ClCompile Include="src\mbr.cpp" />
- <ClCompile Include="src\TranslationAnalysis.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="src\IOWrapper.h" />
<ClInclude Include="src\Main.h" />
<ClInclude Include="src\mbr.h" />
- <ClInclude Include="src\TranslationAnalysis.h" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\moses\moses.vcxproj">
diff --git a/contrib/other-builds/moses-chart-cmd.xcodeproj/project.pbxproj b/contrib/other-builds/moses-chart-cmd.xcodeproj/project.pbxproj
index 82fe6607c..775795dee 100644
--- a/contrib/other-builds/moses-chart-cmd.xcodeproj/project.pbxproj
+++ b/contrib/other-builds/moses-chart-cmd.xcodeproj/project.pbxproj
@@ -308,6 +308,7 @@
../../irstlm/lib,
../../srilm/lib/macosx,
/opt/local/lib,
+ ../../cmph/lib,
);
OTHER_LDFLAGS = (
"-lz",
@@ -318,6 +319,9 @@
"-lflm",
"-llattice",
"-lboost_thread-mt",
+ "-lboost_filesystem-mt",
+ "-lboost_system-mt",
+ "-lcmph",
);
PRODUCT_NAME = "moses-chart-cmd";
USER_HEADER_SEARCH_PATHS = "../../ ../../moses/src";
@@ -341,6 +345,7 @@
../../irstlm/lib,
../../srilm/lib/macosx,
/opt/local/lib,
+ ../../cmph/lib,
);
OTHER_LDFLAGS = (
"-lz",
@@ -351,6 +356,9 @@
"-lflm",
"-llattice",
"-lboost_thread-mt",
+ "-lboost_filesystem-mt",
+ "-lboost_system-mt",
+ "-lcmph",
);
PRODUCT_NAME = "moses-chart-cmd";
USER_HEADER_SEARCH_PATHS = "../../ ../../moses/src";
diff --git a/contrib/other-builds/moses-cmd.vcxproj b/contrib/other-builds/moses-cmd.vcxproj
index 524502ecf..3f24ebbdf 100644
--- a/contrib/other-builds/moses-cmd.vcxproj
+++ b/contrib/other-builds/moses-cmd.vcxproj
@@ -43,12 +43,16 @@
<OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
<IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
<LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+ <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">C:\Program Files\boost\boost_1_47;$(IncludePath)</IncludePath>
+ <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">C:\Program Files\boost\boost_1_47;$(IncludePath)</IncludePath>
+ <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">C:\Program Files\boost\boost_1_47\lib;$(LibraryPath)</LibraryPath>
+ <LibraryPath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">C:\Program Files\boost\boost_1_47\lib;$(LibraryPath)</LibraryPath>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<Optimization>Disabled</Optimization>
- <AdditionalIncludeDirectories>C:\Program Files\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<MinimalRebuild>true</MinimalRebuild>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
@@ -58,19 +62,20 @@
<DebugInformationFormat>EditAndContinue</DebugInformationFormat>
</ClCompile>
<Link>
- <AdditionalDependencies>zdll.lib;$(SolutionDir)$(Configuration)\moses.lib;$(SolutionDir)$(Configuration)\kenlm.lib;$(SolutionDir)$(Configuration)\OnDiskPt.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ <AdditionalDependencies>C:\GnuWin32\lib\zlib.lib;$(SolutionDir)$(Configuration)\moses.lib;$(SolutionDir)$(Configuration)\kenlm.lib;$(SolutionDir)$(Configuration)\OnDiskPt.lib;%(AdditionalDependencies)</AdditionalDependencies>
<GenerateDebugInformation>true</GenerateDebugInformation>
<SubSystem>Console</SubSystem>
<RandomizedBaseAddress>false</RandomizedBaseAddress>
<DataExecutionPrevention>
</DataExecutionPrevention>
<TargetMachine>MachineX86</TargetMachine>
+ <AdditionalLibraryDirectories>C:\boost\boost_1_47\lib</AdditionalLibraryDirectories>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
- <AdditionalIncludeDirectories>C:\Program Files\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
<PrecompiledHeader>
</PrecompiledHeader>
@@ -78,7 +83,7 @@
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
</ClCompile>
<Link>
- <AdditionalDependencies>zdll.lib;$(SolutionDir)$(Configuration)\moses.lib;$(SolutionDir)$(Configuration)\kenlm.lib;$(SolutionDir)$(Configuration)\OnDiskPt.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ <AdditionalDependencies>C:\GnuWin32\lib\zlib.lib;$(SolutionDir)$(Configuration)\moses.lib;$(SolutionDir)$(Configuration)\kenlm.lib;$(SolutionDir)$(Configuration)\OnDiskPt.lib;%(AdditionalDependencies)</AdditionalDependencies>
<GenerateDebugInformation>true</GenerateDebugInformation>
<SubSystem>Console</SubSystem>
<OptimizeReferences>true</OptimizeReferences>
@@ -87,6 +92,7 @@
<DataExecutionPrevention>
</DataExecutionPrevention>
<TargetMachine>MachineX86</TargetMachine>
+ <AdditionalLibraryDirectories>C:\boost\boost_1_47\lib</AdditionalLibraryDirectories>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
diff --git a/contrib/other-builds/moses-cmd.xcodeproj/project.pbxproj b/contrib/other-builds/moses-cmd.xcodeproj/project.pbxproj
index 619ecf76c..aac225ced 100644
--- a/contrib/other-builds/moses-cmd.xcodeproj/project.pbxproj
+++ b/contrib/other-builds/moses-cmd.xcodeproj/project.pbxproj
@@ -326,15 +326,20 @@
../../irstlm/lib,
../../srilm/lib/macosx,
/opt/local/lib,
+ ../../cmph/lib,
);
OTHER_LDFLAGS = (
- "-lflm",
- "-lmisc",
- "-loolm",
- "-ldstruct",
"-lz",
"-lirstlm",
+ "-lmisc",
+ "-ldstruct",
+ "-loolm",
+ "-lflm",
+ "-llattice",
"-lboost_thread-mt",
+ "-lboost_filesystem-mt",
+ "-lboost_system-mt",
+ "-lcmph",
);
PREBINDING = NO;
PRODUCT_NAME = "moses-cmd";
@@ -369,15 +374,20 @@
../../irstlm/lib,
../../srilm/lib/macosx,
/opt/local/lib,
+ ../../cmph/lib,
);
OTHER_LDFLAGS = (
- "-lflm",
- "-lmisc",
- "-loolm",
- "-ldstruct",
"-lz",
"-lirstlm",
+ "-lmisc",
+ "-ldstruct",
+ "-loolm",
+ "-lflm",
+ "-llattice",
"-lboost_thread-mt",
+ "-lboost_filesystem-mt",
+ "-lboost_system-mt",
+ "-lcmph",
);
PREBINDING = NO;
PRODUCT_NAME = "moses-cmd";
@@ -409,15 +419,20 @@
../../irstlm/lib,
../../srilm/lib/macosx,
/opt/local/lib,
+ ../../cmph/lib,
);
OTHER_LDFLAGS = (
- "-lflm",
- "-lmisc",
- "-loolm",
- "-ldstruct",
"-lz",
"-lirstlm",
+ "-lmisc",
+ "-ldstruct",
+ "-loolm",
+ "-lflm",
+ "-llattice",
"-lboost_thread-mt",
+ "-lboost_filesystem-mt",
+ "-lboost_system-mt",
+ "-lcmph",
);
PREBINDING = NO;
PRODUCT_NAME = "moses-cmd";
diff --git a/contrib/other-builds/moses-cmd.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/moses-cmd.xcscheme b/contrib/other-builds/moses-cmd.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/moses-cmd.xcscheme
new file mode 100644
index 000000000..80894ecca
--- /dev/null
+++ b/contrib/other-builds/moses-cmd.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/moses-cmd.xcscheme
@@ -0,0 +1,72 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Scheme
+ version = "1.3">
+ <BuildAction
+ parallelizeBuildables = "YES"
+ buildImplicitDependencies = "YES">
+ <BuildActionEntries>
+ <BuildActionEntry
+ buildForTesting = "YES"
+ buildForRunning = "YES"
+ buildForProfiling = "YES"
+ buildForArchiving = "YES"
+ buildForAnalyzing = "YES">
+ <BuildableReference
+ BuildableIdentifier = "primary"
+ BlueprintIdentifier = "8DD76F620486A84900D96B5E"
+ BuildableName = "moses-cmd"
+ BlueprintName = "moses-cmd"
+ ReferencedContainer = "container:moses-cmd.xcodeproj">
+ </BuildableReference>
+ </BuildActionEntry>
+ </BuildActionEntries>
+ </BuildAction>
+ <TestAction
+ selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
+ selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
+ shouldUseLaunchSchemeArgsEnv = "YES"
+ buildConfiguration = "Debug">
+ <Testables>
+ </Testables>
+ </TestAction>
+ <LaunchAction
+ selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
+ selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
+ launchStyle = "0"
+ useCustomWorkingDirectory = "NO"
+ buildConfiguration = "Debug">
+ <BuildableProductRunnable>
+ <BuildableReference
+ BuildableIdentifier = "primary"
+ BlueprintIdentifier = "8DD76F620486A84900D96B5E"
+ BuildableName = "moses-cmd"
+ BlueprintName = "moses-cmd"
+ ReferencedContainer = "container:moses-cmd.xcodeproj">
+ </BuildableReference>
+ </BuildableProductRunnable>
+ <AdditionalOptions>
+ </AdditionalOptions>
+ </LaunchAction>
+ <ProfileAction
+ shouldUseLaunchSchemeArgsEnv = "YES"
+ savedToolIdentifier = ""
+ useCustomWorkingDirectory = "NO"
+ buildConfiguration = "Release">
+ <BuildableProductRunnable>
+ <BuildableReference
+ BuildableIdentifier = "primary"
+ BlueprintIdentifier = "8DD76F620486A84900D96B5E"
+ BuildableName = "moses-cmd"
+ BlueprintName = "moses-cmd"
+ ReferencedContainer = "container:moses-cmd.xcodeproj">
+ </BuildableReference>
+ </BuildableProductRunnable>
+ </ProfileAction>
+ <AnalyzeAction
+ buildConfiguration = "Debug">
+ </AnalyzeAction>
+ <ArchiveAction
+ buildConfiguration = "Release"
+ revealArchiveInOrganizer = "YES">
+ </ArchiveAction>
+</Scheme>
diff --git a/contrib/other-builds/moses-cmd.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/xcschememanagement.plist b/contrib/other-builds/moses-cmd.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/xcschememanagement.plist
new file mode 100644
index 000000000..29af8ddb4
--- /dev/null
+++ b/contrib/other-builds/moses-cmd.xcodeproj/xcuserdata/hieuhoang.xcuserdatad/xcschemes/xcschememanagement.plist
@@ -0,0 +1,22 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
+<plist version="1.0">
+<dict>
+ <key>SchemeUserState</key>
+ <dict>
+ <key>moses-cmd.xcscheme</key>
+ <dict>
+ <key>orderHint</key>
+ <integer>2</integer>
+ </dict>
+ </dict>
+ <key>SuppressBuildableAutocreation</key>
+ <dict>
+ <key>8DD76F620486A84900D96B5E</key>
+ <dict>
+ <key>primary</key>
+ <true/>
+ </dict>
+ </dict>
+</dict>
+</plist>
diff --git a/contrib/other-builds/moses-cmd/.cproject b/contrib/other-builds/moses-cmd/.cproject
index 53c112cb8..cdad4ad64 100644
--- a/contrib/other-builds/moses-cmd/.cproject
+++ b/contrib/other-builds/moses-cmd/.cproject
@@ -25,17 +25,27 @@
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug.84059290" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug">
<option id="macosx.cpp.link.option.libs.1641794848" name="Libraries (-l)" superClass="macosx.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="moses"/>
+ <listOptionValue builtIn="false" value="rt"/>
+ <listOptionValue builtIn="false" value="misc"/>
+ <listOptionValue builtIn="false" value="dstruct"/>
+ <listOptionValue builtIn="false" value="oolm"/>
+ <listOptionValue builtIn="false" value="flm"/>
+ <listOptionValue builtIn="false" value="lattice"/>
<listOptionValue builtIn="false" value="OnDiskPt"/>
<listOptionValue builtIn="false" value="lm"/>
<listOptionValue builtIn="false" value="util"/>
<listOptionValue builtIn="false" value="irstlm"/>
+ <listOptionValue builtIn="false" value="z"/>
+ <listOptionValue builtIn="false" value="boost_system"/>
+ <listOptionValue builtIn="false" value="boost_filesystem"/>
</option>
<option id="macosx.cpp.link.option.paths.1615268628" name="Library search path (-L)" superClass="macosx.cpp.link.option.paths" valueType="libPaths">
- <listOptionValue builtIn="false" value="/Users/hieuhoang/workspace/github/moses-smt/contrib/other-builds/moses/Debug"/>
- <listOptionValue builtIn="false" value="/Users/hieuhoang/workspace/github/moses-smt/contrib/other-builds/OnDiskPt/Debug"/>
- <listOptionValue builtIn="false" value="/Users/hieuhoang/workspace/github/moses-smt/contrib/other-builds/lm/Debug"/>
- <listOptionValue builtIn="false" value="/Users/hieuhoang/workspace/github/moses-smt/contrib/other-builds/util/Debug"/>
- <listOptionValue builtIn="false" value="/Users/hieuhoang/workspace/github/moses-smt/irstlm/lib"/>
+ <listOptionValue builtIn="false" value="${workspace_loc:/moses}/Debug"/>
+ <listOptionValue builtIn="false" value="${workspace_loc:}/../../srilm/lib/i686-m64"/>
+ <listOptionValue builtIn="false" value="${workspace_loc:/OnDiskPt}/Debug"/>
+ <listOptionValue builtIn="false" value="${workspace_loc:/lm}/Debug"/>
+ <listOptionValue builtIn="false" value="${workspace_loc:/util}/Debug"/>
+ <listOptionValue builtIn="false" value="${workspace_loc:}/../../irstlm/lib"/>
</option>
<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.412058804" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
@@ -51,8 +61,11 @@
<option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.1176009559" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.1024398579" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="/opt/local/include"/>
- <listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt/moses/src"/>
- <listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt"/>
+ <listOptionValue builtIn="false" value="${workspace_loc}/../../moses/src"/>
+ <listOptionValue builtIn="false" value="${workspace_loc}/../../"/>
+ </option>
+ <option id="gnu.cpp.compiler.option.preprocessor.def.491464216" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
+ <listOptionValue builtIn="false" value="TRACE_ENABLE"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.240921565" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
@@ -122,12 +135,13 @@
<storageModule moduleId="refreshScope" versionNumber="1">
<resource resourceType="PROJECT" workspacePath="/moses-cmd"/>
</storageModule>
+ <storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.341255150;cdt.managedbuild.config.gnu.macosx.exe.debug.341255150.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.1201400609;cdt.managedbuild.tool.gnu.c.compiler.input.2031799877">
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.1916112479;cdt.managedbuild.config.macosx.exe.release.1916112479.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.759110223;cdt.managedbuild.tool.gnu.c.compiler.input.1452105399">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
- <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.1916112479;cdt.managedbuild.config.macosx.exe.release.1916112479.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.759110223;cdt.managedbuild.tool.gnu.c.compiler.input.1452105399">
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.341255150;cdt.managedbuild.config.gnu.macosx.exe.debug.341255150.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.1201400609;cdt.managedbuild.tool.gnu.c.compiler.input.2031799877">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.1916112479;cdt.managedbuild.config.macosx.exe.release.1916112479.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.1219375865;cdt.managedbuild.tool.gnu.cpp.compiler.input.604224475">
diff --git a/contrib/other-builds/moses.sln b/contrib/other-builds/moses.sln
index c7d4c4417..a9ea31234 100644
--- a/contrib/other-builds/moses.sln
+++ b/contrib/other-builds/moses.sln
@@ -20,6 +20,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CreateOnDisk", "CreateOnDis
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "kenlm", "kenlm.vcxproj", "{A5402E0B-6ED7-465C-9669-E4124A0CDDCB}"
EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mosesserver", "mosesserver.vcxproj", "{85811FDF-8AD1-4490-A545-B2F51931A18C}"
+EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Win32 = Debug|Win32
@@ -46,6 +48,10 @@ Global
{A5402E0B-6ED7-465C-9669-E4124A0CDDCB}.Debug|Win32.Build.0 = Debug|Win32
{A5402E0B-6ED7-465C-9669-E4124A0CDDCB}.Release|Win32.ActiveCfg = Release|Win32
{A5402E0B-6ED7-465C-9669-E4124A0CDDCB}.Release|Win32.Build.0 = Release|Win32
+ {85811FDF-8AD1-4490-A545-B2F51931A18C}.Debug|Win32.ActiveCfg = Debug|Win32
+ {85811FDF-8AD1-4490-A545-B2F51931A18C}.Debug|Win32.Build.0 = Debug|Win32
+ {85811FDF-8AD1-4490-A545-B2F51931A18C}.Release|Win32.ActiveCfg = Release|Win32
+ {85811FDF-8AD1-4490-A545-B2F51931A18C}.Release|Win32.Build.0 = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
diff --git a/contrib/other-builds/moses.vcxproj b/contrib/other-builds/moses.vcxproj
index 4743fb950..4dba07493 100644
--- a/contrib/other-builds/moses.vcxproj
+++ b/contrib/other-builds/moses.vcxproj
@@ -13,6 +13,7 @@
<ItemGroup>
<ClInclude Include="..\..\moses\src\AlignmentInfo.h" />
<ClInclude Include="..\..\moses\src\AlignmentInfoCollection.h" />
+ <ClInclude Include="..\..\moses\src\BilingualDynSuffixArray.h" />
<ClInclude Include="..\..\moses\src\BitmapContainer.h" />
<ClInclude Include="..\..\moses\src\CellCollection.h" />
<ClInclude Include="..\..\moses\src\ChartCell.h" />
@@ -22,9 +23,6 @@
<ClInclude Include="..\..\moses\src\ChartHypothesis.h" />
<ClInclude Include="..\..\moses\src\ChartHypothesisCollection.h" />
<ClInclude Include="..\..\moses\src\ChartManager.h" />
- <ClInclude Include="..\..\moses\src\ChartRuleLookupManager.h" />
- <ClInclude Include="..\..\moses\src\ChartRuleLookupManagerMemory.h" />
- <ClInclude Include="..\..\moses\src\ChartRuleLookupManagerOnDisk.h" />
<ClInclude Include="..\..\moses\src\ChartTranslationOption.h" />
<ClInclude Include="..\..\moses\src\ChartTranslationOptionCollection.h" />
<ClInclude Include="..\..\moses\src\ChartTranslationOptionList.h" />
@@ -34,16 +32,24 @@
<ClInclude Include="..\..\moses\src\ChartTrellisPath.h" />
<ClInclude Include="..\..\moses\src\ChartTrellisPathList.h" />
<ClInclude Include="..\..\moses\src\ConfusionNet.h" />
+ <ClInclude Include="..\..\moses\src\CYKPlusParser\ChartRuleLookupManagerCYKPlus.h" />
+ <ClInclude Include="..\..\moses\src\CYKPlusParser\ChartRuleLookupManagerMemory.h" />
+ <ClInclude Include="..\..\moses\src\CYKPlusParser\ChartRuleLookupManagerOnDisk.h" />
+ <ClInclude Include="..\..\moses\src\CYKPlusParser\DotChart.h" />
+ <ClInclude Include="..\..\moses\src\CYKPlusParser\DotChartInMemory.h" />
+ <ClInclude Include="..\..\moses\src\CYKPlusParser\DotChartOnDisk.h" />
<ClInclude Include="..\..\moses\src\DecodeFeature.h" />
<ClInclude Include="..\..\moses\src\DecodeGraph.h" />
<ClInclude Include="..\..\moses\src\DecodeStep.h" />
<ClInclude Include="..\..\moses\src\DecodeStepGeneration.h" />
<ClInclude Include="..\..\moses\src\DecodeStepTranslation.h" />
<ClInclude Include="..\..\moses\src\Dictionary.h" />
- <ClInclude Include="..\..\moses\src\DotChart.h" />
- <ClInclude Include="..\..\moses\src\DotChartInMemory.h" />
- <ClInclude Include="..\..\moses\src\DotChartOnDisk.h" />
<ClInclude Include="..\..\moses\src\DummyScoreProducers.h" />
+ <ClInclude Include="..\..\moses\src\DynSAInclude\file.h" />
+ <ClInclude Include="..\..\moses\src\DynSAInclude\FileHandler.h" />
+ <ClInclude Include="..\..\moses\src\DynSAInclude\onlineRLM.h" />
+ <ClInclude Include="..\..\moses\src\DynSAInclude\quantizer.h" />
+ <ClInclude Include="..\..\moses\src\DynSAInclude\vocab.h" />
<ClInclude Include="..\..\moses\src\DynSuffixArray.h" />
<ClInclude Include="..\..\moses\src\Factor.h" />
<ClInclude Include="..\..\moses\src\FactorCollection.h" />
@@ -73,6 +79,7 @@
<ClInclude Include="..\..\moses\src\LM\Joint.h" />
<ClInclude Include="..\..\moses\src\LM\Ken.h" />
<ClInclude Include="..\..\moses\src\LM\MultiFactor.h" />
+ <ClInclude Include="..\..\moses\src\LM\ORLM.h" />
<ClInclude Include="..\..\moses\src\LM\SingleFactor.h" />
<ClInclude Include="..\..\moses\src\LVoc.h" />
<ClInclude Include="..\..\moses\src\Manager.h" />
@@ -85,13 +92,9 @@
<ClInclude Include="..\..\moses\src\PDTAimp.h" />
<ClInclude Include="..\..\moses\src\Phrase.h" />
<ClInclude Include="..\..\moses\src\PhraseDictionary.h" />
- <ClInclude Include="..\..\moses\src\PhraseDictionaryALSuffixArray.h" />
<ClInclude Include="..\..\moses\src\PhraseDictionaryDynSuffixArray.h" />
<ClInclude Include="..\..\moses\src\PhraseDictionaryMemory.h" />
<ClInclude Include="..\..\moses\src\PhraseDictionaryNode.h" />
- <ClInclude Include="..\..\moses\src\PhraseDictionaryNodeSCFG.h" />
- <ClInclude Include="..\..\moses\src\PhraseDictionaryOnDisk.h" />
- <ClInclude Include="..\..\moses\src\PhraseDictionarySCFG.h" />
<ClInclude Include="..\..\moses\src\PhraseDictionaryTree.h" />
<ClInclude Include="..\..\moses\src\PhraseDictionaryTreeAdaptor.h" />
<ClInclude Include="..\..\moses\src\PrefixTree.h" />
@@ -106,13 +109,29 @@
<ClInclude Include="..\..\moses\src\RuleTable\LoaderFactory.h" />
<ClInclude Include="..\..\moses\src\RuleTable\LoaderHiero.h" />
<ClInclude Include="..\..\moses\src\RuleTable\LoaderStandard.h" />
+ <ClInclude Include="..\..\moses\src\RuleTable\PhraseDictionaryALSuffixArray.h" />
+ <ClInclude Include="..\..\moses\src\RuleTable\PhraseDictionaryNodeSCFG.h" />
+ <ClInclude Include="..\..\moses\src\RuleTable\PhraseDictionaryOnDisk.h" />
+ <ClInclude Include="..\..\moses\src\RuleTable\PhraseDictionarySCFG.h" />
<ClInclude Include="..\..\moses\src\RuleTable\Trie.h" />
+ <ClInclude Include="..\..\moses\src\RuleTable\UTrie.h" />
+ <ClInclude Include="..\..\moses\src\RuleTable\UTrieNode.h" />
+ <ClInclude Include="..\..\moses\src\Scope3Parser\ApplicableRuleTrie.h" />
+ <ClInclude Include="..\..\moses\src\Scope3Parser\IntermediateVarSpanNode.h" />
+ <ClInclude Include="..\..\moses\src\Scope3Parser\Parser.h" />
+ <ClInclude Include="..\..\moses\src\Scope3Parser\SentenceMap.h" />
+ <ClInclude Include="..\..\moses\src\Scope3Parser\StackLattice.h" />
+ <ClInclude Include="..\..\moses\src\Scope3Parser\StackLatticeBuilder.h" />
+ <ClInclude Include="..\..\moses\src\Scope3Parser\StackLatticeSearcher.h" />
+ <ClInclude Include="..\..\moses\src\Scope3Parser\VarSpanNode.h" />
+ <ClInclude Include="..\..\moses\src\Scope3Parser\VarSpanTrieBuilder.h" />
<ClInclude Include="..\..\moses\src\ScoreComponentCollection.h" />
<ClInclude Include="..\..\moses\src\ScoreIndexManager.h" />
<ClInclude Include="..\..\moses\src\ScoreProducer.h" />
<ClInclude Include="..\..\moses\src\Search.h" />
<ClInclude Include="..\..\moses\src\SearchCubePruning.h" />
<ClInclude Include="..\..\moses\src\SearchNormal.h" />
+ <ClInclude Include="..\..\moses\src\SearchNormalBatch.h" />
<ClInclude Include="..\..\moses\src\Sentence.h" />
<ClInclude Include="..\..\moses\src\SentenceStats.h" />
<ClInclude Include="..\..\moses\src\SquareMatrix.h" />
@@ -144,15 +163,13 @@
<ItemGroup>
<ClCompile Include="..\..\moses\src\AlignmentInfo.cpp" />
<ClCompile Include="..\..\moses\src\AlignmentInfoCollection.cpp" />
+ <ClCompile Include="..\..\moses\src\BilingualDynSuffixArray.cpp" />
<ClCompile Include="..\..\moses\src\BitmapContainer.cpp" />
<ClCompile Include="..\..\moses\src\ChartCell.cpp" />
<ClCompile Include="..\..\moses\src\ChartCellCollection.cpp" />
<ClCompile Include="..\..\moses\src\ChartHypothesis.cpp" />
<ClCompile Include="..\..\moses\src\ChartHypothesisCollection.cpp" />
<ClCompile Include="..\..\moses\src\ChartManager.cpp" />
- <ClCompile Include="..\..\moses\src\ChartRuleLookupManager.cpp" />
- <ClCompile Include="..\..\moses\src\ChartRuleLookupManagerMemory.cpp" />
- <ClCompile Include="..\..\moses\src\ChartRuleLookupManagerOnDisk.cpp" />
<ClCompile Include="..\..\moses\src\ChartTranslationOption.cpp" />
<ClCompile Include="..\..\moses\src\ChartTranslationOptionCollection.cpp" />
<ClCompile Include="..\..\moses\src\ChartTranslationOptionList.cpp" />
@@ -161,16 +178,20 @@
<ClCompile Include="..\..\moses\src\ChartTrellisNode.cpp" />
<ClCompile Include="..\..\moses\src\ChartTrellisPath.cpp" />
<ClCompile Include="..\..\moses\src\ConfusionNet.cpp" />
+ <ClCompile Include="..\..\moses\src\CYKPlusParser\ChartRuleLookupManagerCYKPlus.cpp" />
+ <ClCompile Include="..\..\moses\src\CYKPlusParser\ChartRuleLookupManagerMemory.cpp" />
+ <ClCompile Include="..\..\moses\src\CYKPlusParser\ChartRuleLookupManagerOnDisk.cpp" />
+ <ClCompile Include="..\..\moses\src\CYKPlusParser\DotChartInMemory.cpp" />
+ <ClCompile Include="..\..\moses\src\CYKPlusParser\DotChartOnDisk.cpp" />
<ClCompile Include="..\..\moses\src\DecodeFeature.cpp" />
<ClCompile Include="..\..\moses\src\DecodeGraph.cpp" />
<ClCompile Include="..\..\moses\src\DecodeStep.cpp" />
<ClCompile Include="..\..\moses\src\DecodeStepGeneration.cpp" />
<ClCompile Include="..\..\moses\src\DecodeStepTranslation.cpp" />
<ClCompile Include="..\..\moses\src\Dictionary.cpp" />
- <ClCompile Include="..\..\moses\src\DotChart.cpp" />
- <ClCompile Include="..\..\moses\src\DotChartInMemory.cpp" />
- <ClCompile Include="..\..\moses\src\DotChartOnDisk.cpp" />
<ClCompile Include="..\..\moses\src\DummyScoreProducers.cpp" />
+ <ClCompile Include="..\..\moses\src\DynSAInclude\FileHandler.cpp" />
+ <ClCompile Include="..\..\moses\src\DynSAInclude\vocab.cpp" />
<ClCompile Include="..\..\moses\src\DynSuffixArray.cpp" />
<ClCompile Include="..\..\moses\src\Factor.cpp" />
<ClCompile Include="..\..\moses\src\FactorCollection.cpp" />
@@ -198,6 +219,7 @@
<ClCompile Include="..\..\moses\src\LM\Joint.cpp" />
<ClCompile Include="..\..\moses\src\LM\Ken.cpp" />
<ClCompile Include="..\..\moses\src\LM\MultiFactor.cpp" />
+ <ClCompile Include="..\..\moses\src\LM\ORLM.cpp" />
<ClCompile Include="..\..\moses\src\LM\SingleFactor.cpp" />
<ClCompile Include="..\..\moses\src\LVoc.cpp" />
<ClCompile Include="..\..\moses\src\Manager.cpp" />
@@ -207,13 +229,9 @@
<ClCompile Include="..\..\moses\src\PCNTools.cpp" />
<ClCompile Include="..\..\moses\src\Phrase.cpp" />
<ClCompile Include="..\..\moses\src\PhraseDictionary.cpp" />
- <ClCompile Include="..\..\moses\src\PhraseDictionaryALSuffixArray.cpp" />
<ClCompile Include="..\..\moses\src\PhraseDictionaryDynSuffixArray.cpp" />
<ClCompile Include="..\..\moses\src\PhraseDictionaryMemory.cpp" />
<ClCompile Include="..\..\moses\src\PhraseDictionaryNode.cpp" />
- <ClCompile Include="..\..\moses\src\PhraseDictionaryNodeSCFG.cpp" />
- <ClCompile Include="..\..\moses\src\PhraseDictionaryOnDisk.cpp" />
- <ClCompile Include="..\..\moses\src\PhraseDictionarySCFG.cpp" />
<ClCompile Include="..\..\moses\src\PhraseDictionaryTree.cpp" />
<ClCompile Include="..\..\moses\src\PhraseDictionaryTreeAdaptor.cpp" />
<ClCompile Include="..\..\moses\src\PrefixTreeMap.cpp" />
@@ -226,13 +244,24 @@
<ClCompile Include="..\..\moses\src\RuleTable\LoaderFactory.cpp" />
<ClCompile Include="..\..\moses\src\RuleTable\LoaderHiero.cpp" />
<ClCompile Include="..\..\moses\src\RuleTable\LoaderStandard.cpp" />
+ <ClCompile Include="..\..\moses\src\RuleTable\PhraseDictionaryALSuffixArray.cpp" />
+ <ClCompile Include="..\..\moses\src\RuleTable\PhraseDictionaryNodeSCFG.cpp" />
+ <ClCompile Include="..\..\moses\src\RuleTable\PhraseDictionaryOnDisk.cpp" />
+ <ClCompile Include="..\..\moses\src\RuleTable\PhraseDictionarySCFG.cpp" />
<ClCompile Include="..\..\moses\src\RuleTable\Trie.cpp" />
+ <ClCompile Include="..\..\moses\src\RuleTable\UTrie.cpp" />
+ <ClCompile Include="..\..\moses\src\RuleTable\UTrieNode.cpp" />
+ <ClCompile Include="..\..\moses\src\Scope3Parser\ApplicableRuleTrie.cpp" />
+ <ClCompile Include="..\..\moses\src\Scope3Parser\Parser.cpp" />
+ <ClCompile Include="..\..\moses\src\Scope3Parser\StackLatticeBuilder.cpp" />
+ <ClCompile Include="..\..\moses\src\Scope3Parser\VarSpanTrieBuilder.cpp" />
<ClCompile Include="..\..\moses\src\ScoreComponentCollection.cpp" />
<ClCompile Include="..\..\moses\src\ScoreIndexManager.cpp" />
<ClCompile Include="..\..\moses\src\ScoreProducer.cpp" />
<ClCompile Include="..\..\moses\src\Search.cpp" />
<ClCompile Include="..\..\moses\src\SearchCubePruning.cpp" />
<ClCompile Include="..\..\moses\src\SearchNormal.cpp" />
+ <ClCompile Include="..\..\moses\src\SearchNormalBatch.cpp" />
<ClCompile Include="..\..\moses\src\Sentence.cpp" />
<ClCompile Include="..\..\moses\src\SentenceStats.cpp" />
<ClCompile Include="..\..\moses\src\SquareMatrix.cpp" />
@@ -258,6 +287,9 @@
<ClCompile Include="..\..\moses\src\WordsRange.cpp" />
<ClCompile Include="..\..\moses\src\XmlOption.cpp" />
</ItemGroup>
+ <ItemGroup>
+ <None Include="..\..\util\file.hh" />
+ </ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{8122157A-0DE5-44FF-8E5B-024ED6ACE7AF}</ProjectGuid>
<RootNamespace>moses</RootNamespace>
@@ -289,17 +321,17 @@
<IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
<OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
<IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
- <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">C:\Program Files\boost\boost_1_47;$(IncludePath)</IncludePath>
- <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">C:\Program Files\boost\boost_1_47;$(IncludePath)</IncludePath>
+ <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">C:\Program Files\boost\boost_1_47;C:\GnuWin32\include;$(IncludePath)</IncludePath>
+ <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">C:\Program Files\boost\boost_1_47;C:\GnuWin32\include;$(IncludePath)</IncludePath>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<Optimization>Disabled</Optimization>
- <AdditionalIncludeDirectories>C:\Program Files\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;LM_INTERNAL;TRACE_ENABLE;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;_DEBUG;_CONSOLE;TRACE_ENABLE;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<MinimalRebuild>true</MinimalRebuild>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
- <RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
+ <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
<PrecompiledHeader>
</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
@@ -314,9 +346,9 @@
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
<IntrinsicFunctions>true</IntrinsicFunctions>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
- <AdditionalIncludeDirectories>C:\Program Files\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
- <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;LM_INTERNAL;TRACE_ENABLE;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
- <RuntimeLibrary>MultiThreaded</RuntimeLibrary>
+ <AdditionalIncludeDirectories>C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;NDEBUG;_CONSOLE;LM_INTERNAL;TRACE_ENABLE;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
<PrecompiledHeader>
</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
diff --git a/contrib/other-builds/moses.xcodeproj/project.pbxproj b/contrib/other-builds/moses.xcodeproj/project.pbxproj
index b870a74c9..2864615c6 100644
--- a/contrib/other-builds/moses.xcodeproj/project.pbxproj
+++ b/contrib/other-builds/moses.xcodeproj/project.pbxproj
@@ -7,6 +7,40 @@
objects = {
/* Begin PBXBuildFile section */
+ 1E0BA41815B70E5F00AC70E1 /* PhraseDictionaryFuzzyMatch.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E0BA41615B70E5F00AC70E1 /* PhraseDictionaryFuzzyMatch.cpp */; };
+ 1E0BA41915B70E5F00AC70E1 /* PhraseDictionaryFuzzyMatch.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E0BA41715B70E5F00AC70E1 /* PhraseDictionaryFuzzyMatch.h */; };
+ 1E1D824015AC29BB00FE42E9 /* FileHandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E1D823E15AC29BB00FE42E9 /* FileHandler.cpp */; };
+ 1E1D824115AC29BB00FE42E9 /* FileHandler.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E1D823F15AC29BB00FE42E9 /* FileHandler.h */; };
+ 1E365EEA16120F4600BA335B /* ChartTranslationOptions.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E365EE816120F4600BA335B /* ChartTranslationOptions.cpp */; };
+ 1E365EEB16120F4600BA335B /* ChartTranslationOptions.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E365EE916120F4600BA335B /* ChartTranslationOptions.h */; };
+ 1E619EA115B8713700C2D7A7 /* ChartRuleLookupManagerMemoryPerSentence.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E619E9F15B8713600C2D7A7 /* ChartRuleLookupManagerMemoryPerSentence.cpp */; };
+ 1E619EA215B8713700C2D7A7 /* ChartRuleLookupManagerMemoryPerSentence.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E619EA015B8713700C2D7A7 /* ChartRuleLookupManagerMemoryPerSentence.h */; };
+ 1E6D9FD615D027560064D436 /* BlockHashIndex.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E6D9FBD15D027560064D436 /* BlockHashIndex.cpp */; };
+ 1E6D9FD715D027560064D436 /* BlockHashIndex.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FBE15D027560064D436 /* BlockHashIndex.h */; };
+ 1E6D9FD815D027560064D436 /* CanonicalHuffman.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FBF15D027560064D436 /* CanonicalHuffman.h */; };
+ 1E6D9FD915D027560064D436 /* CmphStringVectorAdapter.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E6D9FC015D027560064D436 /* CmphStringVectorAdapter.cpp */; };
+ 1E6D9FDA15D027560064D436 /* CmphStringVectorAdapter.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FC115D027560064D436 /* CmphStringVectorAdapter.h */; };
+ 1E6D9FDB15D027560064D436 /* ConsistantPhrases.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FC215D027560064D436 /* ConsistantPhrases.h */; };
+ 1E6D9FDD15D027560064D436 /* LexicalReorderingTableCompact.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E6D9FC415D027560064D436 /* LexicalReorderingTableCompact.cpp */; };
+ 1E6D9FDE15D027560064D436 /* LexicalReorderingTableCompact.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FC515D027560064D436 /* LexicalReorderingTableCompact.h */; };
+ 1E6D9FDF15D027560064D436 /* LexicalReorderingTableCreator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E6D9FC615D027560064D436 /* LexicalReorderingTableCreator.cpp */; };
+ 1E6D9FE015D027560064D436 /* LexicalReorderingTableCreator.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FC715D027560064D436 /* LexicalReorderingTableCreator.h */; };
+ 1E6D9FE115D027560064D436 /* ListCoders.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FC815D027560064D436 /* ListCoders.h */; };
+ 1E6D9FE215D027560064D436 /* MmapAllocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FC915D027560064D436 /* MmapAllocator.h */; };
+ 1E6D9FE315D027560064D436 /* MonotonicVector.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FCA15D027560064D436 /* MonotonicVector.h */; };
+ 1E6D9FE415D027560064D436 /* MurmurHash3.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E6D9FCB15D027560064D436 /* MurmurHash3.cpp */; };
+ 1E6D9FE515D027560064D436 /* MurmurHash3.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FCC15D027560064D436 /* MurmurHash3.h */; };
+ 1E6D9FE615D027560064D436 /* PackedArray.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FCD15D027560064D436 /* PackedArray.h */; };
+ 1E6D9FE715D027560064D436 /* PhraseDecoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E6D9FCE15D027560064D436 /* PhraseDecoder.cpp */; };
+ 1E6D9FE815D027560064D436 /* PhraseDecoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FCF15D027560064D436 /* PhraseDecoder.h */; };
+ 1E6D9FE915D027560064D436 /* PhraseDictionaryCompact.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E6D9FD015D027560064D436 /* PhraseDictionaryCompact.cpp */; };
+ 1E6D9FEA15D027560064D436 /* PhraseDictionaryCompact.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FD115D027560064D436 /* PhraseDictionaryCompact.h */; };
+ 1E6D9FEB15D027560064D436 /* PhraseTableCreator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E6D9FD215D027560064D436 /* PhraseTableCreator.cpp */; };
+ 1E6D9FEC15D027560064D436 /* PhraseTableCreator.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FD315D027560064D436 /* PhraseTableCreator.h */; };
+ 1E6D9FED15D027560064D436 /* StringVector.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FD415D027560064D436 /* StringVector.h */; };
+ 1E6D9FEE15D027560064D436 /* TargetPhraseCollectionCache.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FD515D027560064D436 /* TargetPhraseCollectionCache.h */; };
+ 1E879EA715A346F90051F346 /* SearchNormalBatch.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E879EA515A346F90051F346 /* SearchNormalBatch.cpp */; };
+ 1E879EA815A346F90051F346 /* SearchNormalBatch.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E879EA615A346F90051F346 /* SearchNormalBatch.h */; };
1EAC363514CDC79300DF97C3 /* Loader.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EAC362C14CDC79300DF97C3 /* Loader.h */; };
1EAC363614CDC79300DF97C3 /* LoaderCompact.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EAC362D14CDC79300DF97C3 /* LoaderCompact.cpp */; };
1EAC363714CDC79300DF97C3 /* LoaderCompact.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EAC362E14CDC79300DF97C3 /* LoaderCompact.h */; };
@@ -16,6 +50,8 @@
1EAC363B14CDC79300DF97C3 /* LoaderHiero.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EAC363214CDC79300DF97C3 /* LoaderHiero.h */; };
1EAC363C14CDC79300DF97C3 /* LoaderStandard.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EAC363314CDC79300DF97C3 /* LoaderStandard.cpp */; };
1EAC363D14CDC79300DF97C3 /* LoaderStandard.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EAC363414CDC79300DF97C3 /* LoaderStandard.h */; };
+ 1EC32DB815D2D90700A313B1 /* ThrowingFwrite.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC32DB615D2D90700A313B1 /* ThrowingFwrite.cpp */; };
+ 1EC32DB915D2D90700A313B1 /* ThrowingFwrite.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC32DB715D2D90700A313B1 /* ThrowingFwrite.h */; };
1EC7374614B977AB00238410 /* AlignmentInfo.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC735D314B977AA00238410 /* AlignmentInfo.cpp */; };
1EC7374714B977AB00238410 /* AlignmentInfo.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC735D414B977AA00238410 /* AlignmentInfo.h */; };
1EC7374814B977AB00238410 /* AlignmentInfoCollection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC735D514B977AA00238410 /* AlignmentInfoCollection.cpp */; };
@@ -24,7 +60,6 @@
1EC7374B14B977AB00238410 /* BilingualDynSuffixArray.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC735D814B977AA00238410 /* BilingualDynSuffixArray.h */; };
1EC7374C14B977AB00238410 /* BitmapContainer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC735D914B977AA00238410 /* BitmapContainer.cpp */; };
1EC7374D14B977AB00238410 /* BitmapContainer.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC735DA14B977AA00238410 /* BitmapContainer.h */; };
- 1EC7374E14B977AB00238410 /* CellCollection.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC735DB14B977AA00238410 /* CellCollection.h */; };
1EC7374F14B977AB00238410 /* ChartCell.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC735DC14B977AA00238410 /* ChartCell.cpp */; };
1EC7375014B977AB00238410 /* ChartCell.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC735DD14B977AA00238410 /* ChartCell.h */; };
1EC7375114B977AB00238410 /* ChartCellCollection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC735DE14B977AA00238410 /* ChartCellCollection.cpp */; };
@@ -38,10 +73,6 @@
1EC7375914B977AB00238410 /* ChartManager.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC735E614B977AA00238410 /* ChartManager.cpp */; };
1EC7375A14B977AB00238410 /* ChartManager.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC735E714B977AA00238410 /* ChartManager.h */; };
1EC7375C14B977AB00238410 /* ChartRuleLookupManager.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC735E914B977AA00238410 /* ChartRuleLookupManager.h */; };
- 1EC7376114B977AB00238410 /* ChartTranslationOption.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC735EE14B977AA00238410 /* ChartTranslationOption.cpp */; };
- 1EC7376214B977AB00238410 /* ChartTranslationOption.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC735EF14B977AA00238410 /* ChartTranslationOption.h */; };
- 1EC7376314B977AB00238410 /* ChartTranslationOptionCollection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC735F014B977AA00238410 /* ChartTranslationOptionCollection.cpp */; };
- 1EC7376414B977AB00238410 /* ChartTranslationOptionCollection.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC735F114B977AA00238410 /* ChartTranslationOptionCollection.h */; };
1EC7376514B977AB00238410 /* ChartTranslationOptionList.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC735F214B977AA00238410 /* ChartTranslationOptionList.cpp */; };
1EC7376614B977AB00238410 /* ChartTranslationOptionList.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC735F314B977AA00238410 /* ChartTranslationOptionList.h */; };
1EC7376714B977AB00238410 /* ChartTrellisDetour.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC735F414B977AA00238410 /* ChartTrellisDetour.cpp */; };
@@ -70,8 +101,6 @@
1EC7378414B977AB00238410 /* DummyScoreProducers.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC7361114B977AA00238410 /* DummyScoreProducers.cpp */; };
1EC7378514B977AB00238410 /* DummyScoreProducers.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC7361214B977AA00238410 /* DummyScoreProducers.h */; };
1EC7378614B977AB00238410 /* fdstream.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC7361414B977AA00238410 /* fdstream.h */; };
- 1EC7378714B977AB00238410 /* file.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC7361514B977AA00238410 /* file.cpp */; };
- 1EC7378814B977AB00238410 /* file.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC7361614B977AA00238410 /* file.h */; };
1EC7378914B977AB00238410 /* hash.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC7361714B977AA00238410 /* hash.h */; };
1EC7378A14B977AB00238410 /* onlineRLM.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC7361914B977AA00238410 /* onlineRLM.h */; };
1EC7378B14B977AB00238410 /* params.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC7361B14B977AA00238410 /* params.cpp */; };
@@ -293,11 +322,55 @@
1EDA809114D19FBF003D2191 /* UTrie.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EDA808314D19FBF003D2191 /* UTrie.h */; };
1EDA809214D19FBF003D2191 /* UTrieNode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EDA808414D19FBF003D2191 /* UTrieNode.cpp */; };
1EDA809314D19FBF003D2191 /* UTrieNode.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EDA808514D19FBF003D2191 /* UTrieNode.h */; };
+ 1EE418ED15C7FDCB0028F9AB /* Match.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EE418E415C7FDCB0028F9AB /* Match.h */; };
+ 1EE418EE15C7FDCB0028F9AB /* SentenceAlignment.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EE418E515C7FDCB0028F9AB /* SentenceAlignment.cpp */; };
+ 1EE418EF15C7FDCB0028F9AB /* SentenceAlignment.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EE418E615C7FDCB0028F9AB /* SentenceAlignment.h */; };
+ 1EE418F015C7FDCB0028F9AB /* SuffixArray.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EE418E715C7FDCB0028F9AB /* SuffixArray.cpp */; };
+ 1EE418F115C7FDCB0028F9AB /* SuffixArray.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EE418E815C7FDCB0028F9AB /* SuffixArray.h */; };
+ 1EE418F215C7FDCB0028F9AB /* FuzzyMatchWrapper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EE418E915C7FDCB0028F9AB /* FuzzyMatchWrapper.cpp */; };
+ 1EE418F315C7FDCB0028F9AB /* FuzzyMatchWrapper.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EE418EA15C7FDCB0028F9AB /* FuzzyMatchWrapper.h */; };
+ 1EE418F415C7FDCB0028F9AB /* Vocabulary.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EE418EB15C7FDCB0028F9AB /* Vocabulary.cpp */; };
+ 1EE418F515C7FDCB0028F9AB /* Vocabulary.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EE418EC15C7FDCB0028F9AB /* Vocabulary.h */; };
1EF0709314B9EFCC0052152A /* ParallelBackoff.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EF0709114B9EFCC0052152A /* ParallelBackoff.cpp */; };
1EF0709414B9EFCC0052152A /* ParallelBackoff.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EF0709214B9EFCC0052152A /* ParallelBackoff.h */; };
+ 1EF8F2C4159A61970047B613 /* HypoList.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EF8F2C3159A61970047B613 /* HypoList.h */; };
/* End PBXBuildFile section */
/* Begin PBXFileReference section */
+ 1E0BA41615B70E5F00AC70E1 /* PhraseDictionaryFuzzyMatch.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseDictionaryFuzzyMatch.cpp; path = ../../moses/src/RuleTable/PhraseDictionaryFuzzyMatch.cpp; sourceTree = "<group>"; };
+ 1E0BA41715B70E5F00AC70E1 /* PhraseDictionaryFuzzyMatch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PhraseDictionaryFuzzyMatch.h; path = ../../moses/src/RuleTable/PhraseDictionaryFuzzyMatch.h; sourceTree = "<group>"; };
+ 1E1D823E15AC29BB00FE42E9 /* FileHandler.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = FileHandler.cpp; sourceTree = "<group>"; };
+ 1E1D823F15AC29BB00FE42E9 /* FileHandler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = FileHandler.h; sourceTree = "<group>"; };
+ 1E365EE816120F4600BA335B /* ChartTranslationOptions.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ChartTranslationOptions.cpp; path = ../../moses/src/ChartTranslationOptions.cpp; sourceTree = "<group>"; };
+ 1E365EE916120F4600BA335B /* ChartTranslationOptions.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ChartTranslationOptions.h; path = ../../moses/src/ChartTranslationOptions.h; sourceTree = "<group>"; };
+ 1E619E9F15B8713600C2D7A7 /* ChartRuleLookupManagerMemoryPerSentence.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ChartRuleLookupManagerMemoryPerSentence.cpp; path = ../../moses/src/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.cpp; sourceTree = "<group>"; };
+ 1E619EA015B8713700C2D7A7 /* ChartRuleLookupManagerMemoryPerSentence.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ChartRuleLookupManagerMemoryPerSentence.h; path = ../../moses/src/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.h; sourceTree = "<group>"; };
+ 1E6D9FBD15D027560064D436 /* BlockHashIndex.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = BlockHashIndex.cpp; path = ../../moses/src/CompactPT/BlockHashIndex.cpp; sourceTree = "<group>"; };
+ 1E6D9FBE15D027560064D436 /* BlockHashIndex.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = BlockHashIndex.h; path = ../../moses/src/CompactPT/BlockHashIndex.h; sourceTree = "<group>"; };
+ 1E6D9FBF15D027560064D436 /* CanonicalHuffman.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CanonicalHuffman.h; path = ../../moses/src/CompactPT/CanonicalHuffman.h; sourceTree = "<group>"; };
+ 1E6D9FC015D027560064D436 /* CmphStringVectorAdapter.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = CmphStringVectorAdapter.cpp; path = ../../moses/src/CompactPT/CmphStringVectorAdapter.cpp; sourceTree = "<group>"; };
+ 1E6D9FC115D027560064D436 /* CmphStringVectorAdapter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CmphStringVectorAdapter.h; path = ../../moses/src/CompactPT/CmphStringVectorAdapter.h; sourceTree = "<group>"; };
+ 1E6D9FC215D027560064D436 /* ConsistantPhrases.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ConsistantPhrases.h; path = ../../moses/src/CompactPT/ConsistantPhrases.h; sourceTree = "<group>"; };
+ 1E6D9FC415D027560064D436 /* LexicalReorderingTableCompact.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = LexicalReorderingTableCompact.cpp; path = ../../moses/src/CompactPT/LexicalReorderingTableCompact.cpp; sourceTree = "<group>"; };
+ 1E6D9FC515D027560064D436 /* LexicalReorderingTableCompact.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = LexicalReorderingTableCompact.h; path = ../../moses/src/CompactPT/LexicalReorderingTableCompact.h; sourceTree = "<group>"; };
+ 1E6D9FC615D027560064D436 /* LexicalReorderingTableCreator.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = LexicalReorderingTableCreator.cpp; path = ../../moses/src/CompactPT/LexicalReorderingTableCreator.cpp; sourceTree = "<group>"; };
+ 1E6D9FC715D027560064D436 /* LexicalReorderingTableCreator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = LexicalReorderingTableCreator.h; path = ../../moses/src/CompactPT/LexicalReorderingTableCreator.h; sourceTree = "<group>"; };
+ 1E6D9FC815D027560064D436 /* ListCoders.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ListCoders.h; path = ../../moses/src/CompactPT/ListCoders.h; sourceTree = "<group>"; };
+ 1E6D9FC915D027560064D436 /* MmapAllocator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = MmapAllocator.h; path = ../../moses/src/CompactPT/MmapAllocator.h; sourceTree = "<group>"; };
+ 1E6D9FCA15D027560064D436 /* MonotonicVector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = MonotonicVector.h; path = ../../moses/src/CompactPT/MonotonicVector.h; sourceTree = "<group>"; };
+ 1E6D9FCB15D027560064D436 /* MurmurHash3.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = MurmurHash3.cpp; path = ../../moses/src/CompactPT/MurmurHash3.cpp; sourceTree = "<group>"; };
+ 1E6D9FCC15D027560064D436 /* MurmurHash3.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = MurmurHash3.h; path = ../../moses/src/CompactPT/MurmurHash3.h; sourceTree = "<group>"; };
+ 1E6D9FCD15D027560064D436 /* PackedArray.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PackedArray.h; path = ../../moses/src/CompactPT/PackedArray.h; sourceTree = "<group>"; };
+ 1E6D9FCE15D027560064D436 /* PhraseDecoder.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseDecoder.cpp; path = ../../moses/src/CompactPT/PhraseDecoder.cpp; sourceTree = "<group>"; };
+ 1E6D9FCF15D027560064D436 /* PhraseDecoder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PhraseDecoder.h; path = ../../moses/src/CompactPT/PhraseDecoder.h; sourceTree = "<group>"; };
+ 1E6D9FD015D027560064D436 /* PhraseDictionaryCompact.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseDictionaryCompact.cpp; path = ../../moses/src/CompactPT/PhraseDictionaryCompact.cpp; sourceTree = "<group>"; };
+ 1E6D9FD115D027560064D436 /* PhraseDictionaryCompact.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PhraseDictionaryCompact.h; path = ../../moses/src/CompactPT/PhraseDictionaryCompact.h; sourceTree = "<group>"; };
+ 1E6D9FD215D027560064D436 /* PhraseTableCreator.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseTableCreator.cpp; path = ../../moses/src/CompactPT/PhraseTableCreator.cpp; sourceTree = "<group>"; };
+ 1E6D9FD315D027560064D436 /* PhraseTableCreator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PhraseTableCreator.h; path = ../../moses/src/CompactPT/PhraseTableCreator.h; sourceTree = "<group>"; };
+ 1E6D9FD415D027560064D436 /* StringVector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = StringVector.h; path = ../../moses/src/CompactPT/StringVector.h; sourceTree = "<group>"; };
+ 1E6D9FD515D027560064D436 /* TargetPhraseCollectionCache.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TargetPhraseCollectionCache.h; path = ../../moses/src/CompactPT/TargetPhraseCollectionCache.h; sourceTree = "<group>"; };
+ 1E879EA515A346F90051F346 /* SearchNormalBatch.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = SearchNormalBatch.cpp; path = ../../moses/src/SearchNormalBatch.cpp; sourceTree = "<group>"; };
+ 1E879EA615A346F90051F346 /* SearchNormalBatch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SearchNormalBatch.h; path = ../../moses/src/SearchNormalBatch.h; sourceTree = "<group>"; };
1EAC362C14CDC79300DF97C3 /* Loader.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Loader.h; path = ../../moses/src/RuleTable/Loader.h; sourceTree = "<group>"; };
1EAC362D14CDC79300DF97C3 /* LoaderCompact.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = LoaderCompact.cpp; path = ../../moses/src/RuleTable/LoaderCompact.cpp; sourceTree = "<group>"; };
1EAC362E14CDC79300DF97C3 /* LoaderCompact.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = LoaderCompact.h; path = ../../moses/src/RuleTable/LoaderCompact.h; sourceTree = "<group>"; };
@@ -307,6 +380,8 @@
1EAC363214CDC79300DF97C3 /* LoaderHiero.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = LoaderHiero.h; path = ../../moses/src/RuleTable/LoaderHiero.h; sourceTree = "<group>"; };
1EAC363314CDC79300DF97C3 /* LoaderStandard.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = LoaderStandard.cpp; path = ../../moses/src/RuleTable/LoaderStandard.cpp; sourceTree = "<group>"; };
1EAC363414CDC79300DF97C3 /* LoaderStandard.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = LoaderStandard.h; path = ../../moses/src/RuleTable/LoaderStandard.h; sourceTree = "<group>"; };
+ 1EC32DB615D2D90700A313B1 /* ThrowingFwrite.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ThrowingFwrite.cpp; path = ../../moses/src/CompactPT/ThrowingFwrite.cpp; sourceTree = "<group>"; };
+ 1EC32DB715D2D90700A313B1 /* ThrowingFwrite.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ThrowingFwrite.h; path = ../../moses/src/CompactPT/ThrowingFwrite.h; sourceTree = "<group>"; };
1EC735D314B977AA00238410 /* AlignmentInfo.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = AlignmentInfo.cpp; path = ../../moses/src/AlignmentInfo.cpp; sourceTree = "<group>"; };
1EC735D414B977AA00238410 /* AlignmentInfo.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = AlignmentInfo.h; path = ../../moses/src/AlignmentInfo.h; sourceTree = "<group>"; };
1EC735D514B977AA00238410 /* AlignmentInfoCollection.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = AlignmentInfoCollection.cpp; path = ../../moses/src/AlignmentInfoCollection.cpp; sourceTree = "<group>"; };
@@ -315,7 +390,6 @@
1EC735D814B977AA00238410 /* BilingualDynSuffixArray.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = BilingualDynSuffixArray.h; path = ../../moses/src/BilingualDynSuffixArray.h; sourceTree = "<group>"; };
1EC735D914B977AA00238410 /* BitmapContainer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = BitmapContainer.cpp; path = ../../moses/src/BitmapContainer.cpp; sourceTree = "<group>"; };
1EC735DA14B977AA00238410 /* BitmapContainer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = BitmapContainer.h; path = ../../moses/src/BitmapContainer.h; sourceTree = "<group>"; };
- 1EC735DB14B977AA00238410 /* CellCollection.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CellCollection.h; path = ../../moses/src/CellCollection.h; sourceTree = "<group>"; };
1EC735DC14B977AA00238410 /* ChartCell.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ChartCell.cpp; path = ../../moses/src/ChartCell.cpp; sourceTree = "<group>"; };
1EC735DD14B977AA00238410 /* ChartCell.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ChartCell.h; path = ../../moses/src/ChartCell.h; sourceTree = "<group>"; };
1EC735DE14B977AA00238410 /* ChartCellCollection.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ChartCellCollection.cpp; path = ../../moses/src/ChartCellCollection.cpp; sourceTree = "<group>"; };
@@ -329,10 +403,6 @@
1EC735E614B977AA00238410 /* ChartManager.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ChartManager.cpp; path = ../../moses/src/ChartManager.cpp; sourceTree = "<group>"; };
1EC735E714B977AA00238410 /* ChartManager.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ChartManager.h; path = ../../moses/src/ChartManager.h; sourceTree = "<group>"; };
1EC735E914B977AA00238410 /* ChartRuleLookupManager.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ChartRuleLookupManager.h; path = ../../moses/src/ChartRuleLookupManager.h; sourceTree = "<group>"; };
- 1EC735EE14B977AA00238410 /* ChartTranslationOption.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ChartTranslationOption.cpp; path = ../../moses/src/ChartTranslationOption.cpp; sourceTree = "<group>"; };
- 1EC735EF14B977AA00238410 /* ChartTranslationOption.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ChartTranslationOption.h; path = ../../moses/src/ChartTranslationOption.h; sourceTree = "<group>"; };
- 1EC735F014B977AA00238410 /* ChartTranslationOptionCollection.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ChartTranslationOptionCollection.cpp; path = ../../moses/src/ChartTranslationOptionCollection.cpp; sourceTree = "<group>"; };
- 1EC735F114B977AA00238410 /* ChartTranslationOptionCollection.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ChartTranslationOptionCollection.h; path = ../../moses/src/ChartTranslationOptionCollection.h; sourceTree = "<group>"; };
1EC735F214B977AA00238410 /* ChartTranslationOptionList.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ChartTranslationOptionList.cpp; path = ../../moses/src/ChartTranslationOptionList.cpp; sourceTree = "<group>"; };
1EC735F314B977AA00238410 /* ChartTranslationOptionList.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ChartTranslationOptionList.h; path = ../../moses/src/ChartTranslationOptionList.h; sourceTree = "<group>"; };
1EC735F414B977AA00238410 /* ChartTrellisDetour.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ChartTrellisDetour.cpp; path = ../../moses/src/ChartTrellisDetour.cpp; sourceTree = "<group>"; };
@@ -361,28 +431,17 @@
1EC7361114B977AA00238410 /* DummyScoreProducers.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = DummyScoreProducers.cpp; path = ../../moses/src/DummyScoreProducers.cpp; sourceTree = "<group>"; };
1EC7361214B977AA00238410 /* DummyScoreProducers.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = DummyScoreProducers.h; path = ../../moses/src/DummyScoreProducers.h; sourceTree = "<group>"; };
1EC7361414B977AA00238410 /* fdstream.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = fdstream.h; sourceTree = "<group>"; };
- 1EC7361514B977AA00238410 /* file.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = file.cpp; sourceTree = "<group>"; };
- 1EC7361614B977AA00238410 /* file.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = file.h; sourceTree = "<group>"; };
1EC7361714B977AA00238410 /* hash.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hash.h; sourceTree = "<group>"; };
- 1EC7361814B977AA00238410 /* hash.h.orig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = hash.h.orig; sourceTree = "<group>"; };
1EC7361914B977AA00238410 /* onlineRLM.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = onlineRLM.h; sourceTree = "<group>"; };
- 1EC7361A14B977AA00238410 /* onlineRLM.h.orig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = onlineRLM.h.orig; sourceTree = "<group>"; };
1EC7361B14B977AA00238410 /* params.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = params.cpp; sourceTree = "<group>"; };
- 1EC7361C14B977AA00238410 /* params.cpp.orig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = params.cpp.orig; sourceTree = "<group>"; };
1EC7361D14B977AA00238410 /* params.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = params.h; sourceTree = "<group>"; };
- 1EC7361E14B977AA00238410 /* params.h.orig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = params.h.orig; sourceTree = "<group>"; };
1EC7361F14B977AA00238410 /* perfectHash.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = perfectHash.h; sourceTree = "<group>"; };
- 1EC7362014B977AA00238410 /* perfectHash.h.orig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = perfectHash.h.orig; sourceTree = "<group>"; };
1EC7362114B977AA00238410 /* quantizer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = quantizer.h; sourceTree = "<group>"; };
- 1EC7362214B977AA00238410 /* quantizer.h.orig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = quantizer.h.orig; sourceTree = "<group>"; };
1EC7362314B977AA00238410 /* RandLMCache.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RandLMCache.h; sourceTree = "<group>"; };
- 1EC7362414B977AA00238410 /* RandLMCache.h.orig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = RandLMCache.h.orig; sourceTree = "<group>"; };
1EC7362514B977AA00238410 /* RandLMFilter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = RandLMFilter.h; sourceTree = "<group>"; };
- 1EC7362614B977AA00238410 /* RandLMFilter.h.orig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = RandLMFilter.h.orig; sourceTree = "<group>"; };
1EC7362714B977AA00238410 /* types.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = types.h; sourceTree = "<group>"; };
1EC7362814B977AA00238410 /* utils.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = utils.h; sourceTree = "<group>"; };
1EC7362914B977AA00238410 /* vocab.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = vocab.cpp; sourceTree = "<group>"; };
- 1EC7362A14B977AA00238410 /* vocab.cpp.orig */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = vocab.cpp.orig; sourceTree = "<group>"; };
1EC7362B14B977AA00238410 /* vocab.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = vocab.h; sourceTree = "<group>"; };
1EC7362C14B977AA00238410 /* DynSuffixArray.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = DynSuffixArray.cpp; path = ../../moses/src/DynSuffixArray.cpp; sourceTree = "<group>"; };
1EC7362D14B977AA00238410 /* DynSuffixArray.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = DynSuffixArray.h; path = ../../moses/src/DynSuffixArray.h; sourceTree = "<group>"; };
@@ -595,8 +654,18 @@
1EDA808314D19FBF003D2191 /* UTrie.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = UTrie.h; path = ../../moses/src/RuleTable/UTrie.h; sourceTree = "<group>"; };
1EDA808414D19FBF003D2191 /* UTrieNode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = UTrieNode.cpp; path = ../../moses/src/RuleTable/UTrieNode.cpp; sourceTree = "<group>"; };
1EDA808514D19FBF003D2191 /* UTrieNode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = UTrieNode.h; path = ../../moses/src/RuleTable/UTrieNode.h; sourceTree = "<group>"; };
+ 1EE418E415C7FDCB0028F9AB /* Match.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Match.h; path = "../../moses/src/fuzzy-match/Match.h"; sourceTree = "<group>"; };
+ 1EE418E515C7FDCB0028F9AB /* SentenceAlignment.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = SentenceAlignment.cpp; path = "../../moses/src/fuzzy-match/SentenceAlignment.cpp"; sourceTree = "<group>"; };
+ 1EE418E615C7FDCB0028F9AB /* SentenceAlignment.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SentenceAlignment.h; path = "../../moses/src/fuzzy-match/SentenceAlignment.h"; sourceTree = "<group>"; };
+ 1EE418E715C7FDCB0028F9AB /* SuffixArray.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = SuffixArray.cpp; path = "../../moses/src/fuzzy-match/SuffixArray.cpp"; sourceTree = "<group>"; };
+ 1EE418E815C7FDCB0028F9AB /* SuffixArray.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SuffixArray.h; path = "../../moses/src/fuzzy-match/SuffixArray.h"; sourceTree = "<group>"; };
+ 1EE418E915C7FDCB0028F9AB /* FuzzyMatchWrapper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = FuzzyMatchWrapper.cpp; path = "../../moses/src/fuzzy-match/FuzzyMatchWrapper.cpp"; sourceTree = "<group>"; };
+ 1EE418EA15C7FDCB0028F9AB /* FuzzyMatchWrapper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = FuzzyMatchWrapper.h; path = "../../moses/src/fuzzy-match/FuzzyMatchWrapper.h"; sourceTree = "<group>"; };
+ 1EE418EB15C7FDCB0028F9AB /* Vocabulary.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Vocabulary.cpp; path = "../../moses/src/fuzzy-match/Vocabulary.cpp"; sourceTree = "<group>"; };
+ 1EE418EC15C7FDCB0028F9AB /* Vocabulary.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Vocabulary.h; path = "../../moses/src/fuzzy-match/Vocabulary.h"; sourceTree = "<group>"; };
1EF0709114B9EFCC0052152A /* ParallelBackoff.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ParallelBackoff.cpp; sourceTree = "<group>"; };
1EF0709214B9EFCC0052152A /* ParallelBackoff.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ParallelBackoff.h; sourceTree = "<group>"; };
+ 1EF8F2C3159A61970047B613 /* HypoList.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = HypoList.h; path = ../../moses/src/HypoList.h; sourceTree = "<group>"; };
D2AAC046055464E500DB518D /* libmoses.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libmoses.a; sourceTree = BUILT_PRODUCTS_DIR; };
/* End PBXFileReference section */
@@ -624,6 +693,8 @@
08FB7795FE84155DC02AAC07 /* Source */ = {
isa = PBXGroup;
children = (
+ 1E6D9FF015D027680064D436 /* CompactPT */,
+ 1ECF13DE15C1A82400EA1DCE /* fuzzy-match */,
1EDA803514D19ECD003D2191 /* Scope3Parser */,
1EDA803414D19EB8003D2191 /* CYKPlusParser */,
1EC7365B14B977AA00238410 /* LM */,
@@ -637,7 +708,6 @@
1EC735D814B977AA00238410 /* BilingualDynSuffixArray.h */,
1EC735D914B977AA00238410 /* BitmapContainer.cpp */,
1EC735DA14B977AA00238410 /* BitmapContainer.h */,
- 1EC735DB14B977AA00238410 /* CellCollection.h */,
1EC735DC14B977AA00238410 /* ChartCell.cpp */,
1EC735DD14B977AA00238410 /* ChartCell.h */,
1EC735DE14B977AA00238410 /* ChartCellCollection.cpp */,
@@ -651,10 +721,8 @@
1EC735E614B977AA00238410 /* ChartManager.cpp */,
1EC735E714B977AA00238410 /* ChartManager.h */,
1EC735E914B977AA00238410 /* ChartRuleLookupManager.h */,
- 1EC735EE14B977AA00238410 /* ChartTranslationOption.cpp */,
- 1EC735EF14B977AA00238410 /* ChartTranslationOption.h */,
- 1EC735F014B977AA00238410 /* ChartTranslationOptionCollection.cpp */,
- 1EC735F114B977AA00238410 /* ChartTranslationOptionCollection.h */,
+ 1E365EE816120F4600BA335B /* ChartTranslationOptions.cpp */,
+ 1E365EE916120F4600BA335B /* ChartTranslationOptions.h */,
1EC735F214B977AA00238410 /* ChartTranslationOptionList.cpp */,
1EC735F314B977AA00238410 /* ChartTranslationOptionList.h */,
1EC735F414B977AA00238410 /* ChartTrellisDetour.cpp */,
@@ -707,6 +775,7 @@
1EC7364214B977AA00238410 /* hash.cpp */,
1EC7364314B977AA00238410 /* hash.h */,
1EC7364414B977AA00238410 /* hypergraph.proto */,
+ 1EF8F2C3159A61970047B613 /* HypoList.h */,
1EC7364514B977AA00238410 /* Hypothesis.cpp */,
1EC7364614B977AA00238410 /* Hypothesis.h */,
1EC7364714B977AA00238410 /* HypothesisStack.cpp */,
@@ -782,6 +851,8 @@
1EC736F414B977AB00238410 /* SearchCubePruning.h */,
1EC736F514B977AB00238410 /* SearchNormal.cpp */,
1EC736F614B977AB00238410 /* SearchNormal.h */,
+ 1E879EA515A346F90051F346 /* SearchNormalBatch.cpp */,
+ 1E879EA615A346F90051F346 /* SearchNormalBatch.h */,
1EC736F714B977AB00238410 /* Sentence.cpp */,
1EC736F814B977AB00238410 /* Sentence.h */,
1EC736F914B977AB00238410 /* SentenceStats.cpp */,
@@ -845,6 +916,39 @@
name = Products;
sourceTree = "<group>";
};
+ 1E6D9FF015D027680064D436 /* CompactPT */ = {
+ isa = PBXGroup;
+ children = (
+ 1EC32DB615D2D90700A313B1 /* ThrowingFwrite.cpp */,
+ 1EC32DB715D2D90700A313B1 /* ThrowingFwrite.h */,
+ 1E6D9FBD15D027560064D436 /* BlockHashIndex.cpp */,
+ 1E6D9FBE15D027560064D436 /* BlockHashIndex.h */,
+ 1E6D9FBF15D027560064D436 /* CanonicalHuffman.h */,
+ 1E6D9FC015D027560064D436 /* CmphStringVectorAdapter.cpp */,
+ 1E6D9FC115D027560064D436 /* CmphStringVectorAdapter.h */,
+ 1E6D9FC215D027560064D436 /* ConsistantPhrases.h */,
+ 1E6D9FC415D027560064D436 /* LexicalReorderingTableCompact.cpp */,
+ 1E6D9FC515D027560064D436 /* LexicalReorderingTableCompact.h */,
+ 1E6D9FC615D027560064D436 /* LexicalReorderingTableCreator.cpp */,
+ 1E6D9FC715D027560064D436 /* LexicalReorderingTableCreator.h */,
+ 1E6D9FC815D027560064D436 /* ListCoders.h */,
+ 1E6D9FC915D027560064D436 /* MmapAllocator.h */,
+ 1E6D9FCA15D027560064D436 /* MonotonicVector.h */,
+ 1E6D9FCB15D027560064D436 /* MurmurHash3.cpp */,
+ 1E6D9FCC15D027560064D436 /* MurmurHash3.h */,
+ 1E6D9FCD15D027560064D436 /* PackedArray.h */,
+ 1E6D9FCE15D027560064D436 /* PhraseDecoder.cpp */,
+ 1E6D9FCF15D027560064D436 /* PhraseDecoder.h */,
+ 1E6D9FD015D027560064D436 /* PhraseDictionaryCompact.cpp */,
+ 1E6D9FD115D027560064D436 /* PhraseDictionaryCompact.h */,
+ 1E6D9FD215D027560064D436 /* PhraseTableCreator.cpp */,
+ 1E6D9FD315D027560064D436 /* PhraseTableCreator.h */,
+ 1E6D9FD415D027560064D436 /* StringVector.h */,
+ 1E6D9FD515D027560064D436 /* TargetPhraseCollectionCache.h */,
+ );
+ name = CompactPT;
+ sourceTree = "<group>";
+ };
1EAC362B14CDC76200DF97C3 /* RuleTable */ = {
isa = PBXGroup;
children = (
@@ -856,6 +960,8 @@
1EDA807D14D19FBF003D2191 /* PhraseDictionaryOnDisk.h */,
1EDA807E14D19FBF003D2191 /* PhraseDictionarySCFG.cpp */,
1EDA807F14D19FBF003D2191 /* PhraseDictionarySCFG.h */,
+ 1E0BA41615B70E5F00AC70E1 /* PhraseDictionaryFuzzyMatch.cpp */,
+ 1E0BA41715B70E5F00AC70E1 /* PhraseDictionaryFuzzyMatch.h */,
1EDA808014D19FBF003D2191 /* Trie.cpp */,
1EDA808114D19FBF003D2191 /* Trie.h */,
1EDA808214D19FBF003D2191 /* UTrie.cpp */,
@@ -878,29 +984,20 @@
1EC7361314B977AA00238410 /* DynSAInclude */ = {
isa = PBXGroup;
children = (
+ 1E1D823E15AC29BB00FE42E9 /* FileHandler.cpp */,
+ 1E1D823F15AC29BB00FE42E9 /* FileHandler.h */,
1EC7361414B977AA00238410 /* fdstream.h */,
- 1EC7361514B977AA00238410 /* file.cpp */,
- 1EC7361614B977AA00238410 /* file.h */,
1EC7361714B977AA00238410 /* hash.h */,
- 1EC7361814B977AA00238410 /* hash.h.orig */,
1EC7361914B977AA00238410 /* onlineRLM.h */,
- 1EC7361A14B977AA00238410 /* onlineRLM.h.orig */,
1EC7361B14B977AA00238410 /* params.cpp */,
- 1EC7361C14B977AA00238410 /* params.cpp.orig */,
1EC7361D14B977AA00238410 /* params.h */,
- 1EC7361E14B977AA00238410 /* params.h.orig */,
1EC7361F14B977AA00238410 /* perfectHash.h */,
- 1EC7362014B977AA00238410 /* perfectHash.h.orig */,
1EC7362114B977AA00238410 /* quantizer.h */,
- 1EC7362214B977AA00238410 /* quantizer.h.orig */,
1EC7362314B977AA00238410 /* RandLMCache.h */,
- 1EC7362414B977AA00238410 /* RandLMCache.h.orig */,
1EC7362514B977AA00238410 /* RandLMFilter.h */,
- 1EC7362614B977AA00238410 /* RandLMFilter.h.orig */,
1EC7362714B977AA00238410 /* types.h */,
1EC7362814B977AA00238410 /* utils.h */,
1EC7362914B977AA00238410 /* vocab.cpp */,
- 1EC7362A14B977AA00238410 /* vocab.cpp.orig */,
1EC7362B14B977AA00238410 /* vocab.h */,
);
name = DynSAInclude;
@@ -939,9 +1036,27 @@
path = ../../moses/src/LM;
sourceTree = "<group>";
};
+ 1ECF13DE15C1A82400EA1DCE /* fuzzy-match */ = {
+ isa = PBXGroup;
+ children = (
+ 1EE418E415C7FDCB0028F9AB /* Match.h */,
+ 1EE418E515C7FDCB0028F9AB /* SentenceAlignment.cpp */,
+ 1EE418E615C7FDCB0028F9AB /* SentenceAlignment.h */,
+ 1EE418E715C7FDCB0028F9AB /* SuffixArray.cpp */,
+ 1EE418E815C7FDCB0028F9AB /* SuffixArray.h */,
+ 1EE418E915C7FDCB0028F9AB /* FuzzyMatchWrapper.cpp */,
+ 1EE418EA15C7FDCB0028F9AB /* FuzzyMatchWrapper.h */,
+ 1EE418EB15C7FDCB0028F9AB /* Vocabulary.cpp */,
+ 1EE418EC15C7FDCB0028F9AB /* Vocabulary.h */,
+ );
+ name = "fuzzy-match";
+ sourceTree = "<group>";
+ };
1EDA803414D19EB8003D2191 /* CYKPlusParser */ = {
isa = PBXGroup;
children = (
+ 1E619E9F15B8713600C2D7A7 /* ChartRuleLookupManagerMemoryPerSentence.cpp */,
+ 1E619EA015B8713700C2D7A7 /* ChartRuleLookupManagerMemoryPerSentence.h */,
1EDA806214D19F12003D2191 /* ChartRuleLookupManagerCYKPlus.cpp */,
1EDA806314D19F12003D2191 /* ChartRuleLookupManagerCYKPlus.h */,
1EDA806414D19F12003D2191 /* ChartRuleLookupManagerMemory.cpp */,
@@ -995,7 +1110,6 @@
1EC7374914B977AB00238410 /* AlignmentInfoCollection.h in Headers */,
1EC7374B14B977AB00238410 /* BilingualDynSuffixArray.h in Headers */,
1EC7374D14B977AB00238410 /* BitmapContainer.h in Headers */,
- 1EC7374E14B977AB00238410 /* CellCollection.h in Headers */,
1EC7375014B977AB00238410 /* ChartCell.h in Headers */,
1EC7375214B977AB00238410 /* ChartCellCollection.h in Headers */,
1EC7375314B977AB00238410 /* ChartCellLabel.h in Headers */,
@@ -1004,8 +1118,6 @@
1EC7375814B977AB00238410 /* ChartHypothesisCollection.h in Headers */,
1EC7375A14B977AB00238410 /* ChartManager.h in Headers */,
1EC7375C14B977AB00238410 /* ChartRuleLookupManager.h in Headers */,
- 1EC7376214B977AB00238410 /* ChartTranslationOption.h in Headers */,
- 1EC7376414B977AB00238410 /* ChartTranslationOptionCollection.h in Headers */,
1EC7376614B977AB00238410 /* ChartTranslationOptionList.h in Headers */,
1EC7376814B977AB00238410 /* ChartTrellisDetour.h in Headers */,
1EC7376A14B977AB00238410 /* ChartTrellisDetourQueue.h in Headers */,
@@ -1021,7 +1133,6 @@
1EC7377D14B977AB00238410 /* Dictionary.h in Headers */,
1EC7378514B977AB00238410 /* DummyScoreProducers.h in Headers */,
1EC7378614B977AB00238410 /* fdstream.h in Headers */,
- 1EC7378814B977AB00238410 /* file.h in Headers */,
1EC7378914B977AB00238410 /* hash.h in Headers */,
1EC7378A14B977AB00238410 /* onlineRLM.h in Headers */,
1EC7378C14B977AB00238410 /* params.h in Headers */,
@@ -1150,6 +1261,34 @@
1EDA808F14D19FBF003D2191 /* Trie.h in Headers */,
1EDA809114D19FBF003D2191 /* UTrie.h in Headers */,
1EDA809314D19FBF003D2191 /* UTrieNode.h in Headers */,
+ 1EF8F2C4159A61970047B613 /* HypoList.h in Headers */,
+ 1E879EA815A346F90051F346 /* SearchNormalBatch.h in Headers */,
+ 1E1D824115AC29BB00FE42E9 /* FileHandler.h in Headers */,
+ 1E0BA41915B70E5F00AC70E1 /* PhraseDictionaryFuzzyMatch.h in Headers */,
+ 1E619EA215B8713700C2D7A7 /* ChartRuleLookupManagerMemoryPerSentence.h in Headers */,
+ 1EE418ED15C7FDCB0028F9AB /* Match.h in Headers */,
+ 1EE418EF15C7FDCB0028F9AB /* SentenceAlignment.h in Headers */,
+ 1EE418F115C7FDCB0028F9AB /* SuffixArray.h in Headers */,
+ 1EE418F315C7FDCB0028F9AB /* FuzzyMatchWrapper.h in Headers */,
+ 1EE418F515C7FDCB0028F9AB /* Vocabulary.h in Headers */,
+ 1E6D9FD715D027560064D436 /* BlockHashIndex.h in Headers */,
+ 1E6D9FD815D027560064D436 /* CanonicalHuffman.h in Headers */,
+ 1E6D9FDA15D027560064D436 /* CmphStringVectorAdapter.h in Headers */,
+ 1E6D9FDB15D027560064D436 /* ConsistantPhrases.h in Headers */,
+ 1E6D9FDE15D027560064D436 /* LexicalReorderingTableCompact.h in Headers */,
+ 1E6D9FE015D027560064D436 /* LexicalReorderingTableCreator.h in Headers */,
+ 1E6D9FE115D027560064D436 /* ListCoders.h in Headers */,
+ 1E6D9FE215D027560064D436 /* MmapAllocator.h in Headers */,
+ 1E6D9FE315D027560064D436 /* MonotonicVector.h in Headers */,
+ 1E6D9FE515D027560064D436 /* MurmurHash3.h in Headers */,
+ 1E6D9FE615D027560064D436 /* PackedArray.h in Headers */,
+ 1E6D9FE815D027560064D436 /* PhraseDecoder.h in Headers */,
+ 1E6D9FEA15D027560064D436 /* PhraseDictionaryCompact.h in Headers */,
+ 1E6D9FEC15D027560064D436 /* PhraseTableCreator.h in Headers */,
+ 1E6D9FED15D027560064D436 /* StringVector.h in Headers */,
+ 1E6D9FEE15D027560064D436 /* TargetPhraseCollectionCache.h in Headers */,
+ 1EC32DB915D2D90700A313B1 /* ThrowingFwrite.h in Headers */,
+ 1E365EEB16120F4600BA335B /* ChartTranslationOptions.h in Headers */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@@ -1179,7 +1318,7 @@
08FB7793FE84155DC02AAC07 /* Project object */ = {
isa = PBXProject;
attributes = {
- LastUpgradeCheck = 0410;
+ LastUpgradeCheck = 0420;
};
buildConfigurationList = 1DEB91EF08733DB70010E9CD /* Build configuration list for PBXProject "moses" */;
compatibilityVersion = "Xcode 3.2";
@@ -1214,8 +1353,6 @@
1EC7375514B977AB00238410 /* ChartHypothesis.cpp in Sources */,
1EC7375714B977AB00238410 /* ChartHypothesisCollection.cpp in Sources */,
1EC7375914B977AB00238410 /* ChartManager.cpp in Sources */,
- 1EC7376114B977AB00238410 /* ChartTranslationOption.cpp in Sources */,
- 1EC7376314B977AB00238410 /* ChartTranslationOptionCollection.cpp in Sources */,
1EC7376514B977AB00238410 /* ChartTranslationOptionList.cpp in Sources */,
1EC7376714B977AB00238410 /* ChartTrellisDetour.cpp in Sources */,
1EC7376914B977AB00238410 /* ChartTrellisDetourQueue.cpp in Sources */,
@@ -1229,7 +1366,6 @@
1EC7377A14B977AB00238410 /* DecodeStepTranslation.cpp in Sources */,
1EC7377C14B977AB00238410 /* Dictionary.cpp in Sources */,
1EC7378414B977AB00238410 /* DummyScoreProducers.cpp in Sources */,
- 1EC7378714B977AB00238410 /* file.cpp in Sources */,
1EC7378B14B977AB00238410 /* params.cpp in Sources */,
1EC7379314B977AB00238410 /* vocab.cpp in Sources */,
1EC7379514B977AB00238410 /* DynSuffixArray.cpp in Sources */,
@@ -1334,6 +1470,24 @@
1EDA808E14D19FBF003D2191 /* Trie.cpp in Sources */,
1EDA809014D19FBF003D2191 /* UTrie.cpp in Sources */,
1EDA809214D19FBF003D2191 /* UTrieNode.cpp in Sources */,
+ 1E879EA715A346F90051F346 /* SearchNormalBatch.cpp in Sources */,
+ 1E1D824015AC29BB00FE42E9 /* FileHandler.cpp in Sources */,
+ 1E0BA41815B70E5F00AC70E1 /* PhraseDictionaryFuzzyMatch.cpp in Sources */,
+ 1E619EA115B8713700C2D7A7 /* ChartRuleLookupManagerMemoryPerSentence.cpp in Sources */,
+ 1EE418EE15C7FDCB0028F9AB /* SentenceAlignment.cpp in Sources */,
+ 1EE418F015C7FDCB0028F9AB /* SuffixArray.cpp in Sources */,
+ 1EE418F215C7FDCB0028F9AB /* FuzzyMatchWrapper.cpp in Sources */,
+ 1EE418F415C7FDCB0028F9AB /* Vocabulary.cpp in Sources */,
+ 1E6D9FD615D027560064D436 /* BlockHashIndex.cpp in Sources */,
+ 1E6D9FD915D027560064D436 /* CmphStringVectorAdapter.cpp in Sources */,
+ 1E6D9FDD15D027560064D436 /* LexicalReorderingTableCompact.cpp in Sources */,
+ 1E6D9FDF15D027560064D436 /* LexicalReorderingTableCreator.cpp in Sources */,
+ 1E6D9FE415D027560064D436 /* MurmurHash3.cpp in Sources */,
+ 1E6D9FE715D027560064D436 /* PhraseDecoder.cpp in Sources */,
+ 1E6D9FE915D027560064D436 /* PhraseDictionaryCompact.cpp in Sources */,
+ 1E6D9FEB15D027560064D436 /* PhraseTableCreator.cpp in Sources */,
+ 1EC32DB815D2D90700A313B1 /* ThrowingFwrite.cpp in Sources */,
+ 1E365EEA16120F4600BA335B /* ChartTranslationOptions.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@@ -1344,6 +1498,7 @@
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
+ ARCHS = "$(ARCHS_STANDARD_64_BIT)";
COPY_PHASE_STRIP = NO;
GCC_DYNAMIC_NO_PIC = NO;
GCC_MODEL_TUNING = G5;
@@ -1358,6 +1513,9 @@
"_FILE_OFFSET_BITS=64",
_LARGE_FILES,
WITH_THREADS,
+ IS_XCODE,
+ HAVE_CMPH,
+ "KENLM_MAX_ORDER=7",
);
HEADER_SEARCH_PATHS = (
../..,
@@ -1382,6 +1540,7 @@
"\"$(SRCROOT)/../../moses/src/Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi\"",
);
PRODUCT_NAME = moses;
+ USER_HEADER_SEARCH_PATHS = "../.. ../../moses/src ../../irstlm/include ../../srilm/include ../../kenlm ../../randlm/include /opt/local/include ../../synlm/hhmm/wsjparse/include ../../synlm/hhmm/rvtl/include/ ../.. ../../cmph/include";
};
name = Debug;
};
@@ -1389,6 +1548,7 @@
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
+ ARCHS = "$(ARCHS_STANDARD_64_BIT)";
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
GCC_MODEL_TUNING = G5;
GCC_PREPROCESSOR_DEFINITIONS = (
@@ -1401,6 +1561,9 @@
"_FILE_OFFSET_BITS=64",
_LARGE_FILES,
WITH_THREADS,
+ IS_XCODE,
+ HAVE_CMPH,
+ "KENLM_MAX_ORDER=7",
);
HEADER_SEARCH_PATHS = (
../..,
@@ -1425,6 +1588,7 @@
"\"$(SRCROOT)/../../moses/src/Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi\"",
);
PRODUCT_NAME = moses;
+ USER_HEADER_SEARCH_PATHS = "../.. ../../moses/src ../../irstlm/include ../../srilm/include ../../kenlm ../../randlm/include /opt/local/include ../../synlm/hhmm/wsjparse/include ../../synlm/hhmm/rvtl/include/ ../.. ../../cmph/include";
};
name = Release;
};
diff --git a/contrib/other-builds/moses/.cproject b/contrib/other-builds/moses/.cproject
index 2995d5eae..0148cc6f2 100644
--- a/contrib/other-builds/moses/.cproject
+++ b/contrib/other-builds/moses/.cproject
@@ -3,8 +3,8 @@
<cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
- <cconfiguration id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426">
- <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426" moduleId="org.eclipse.cdt.core.settings" name="Debug">
+ <cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.656913512">
+ <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.656913512" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings>
<externalSetting>
<entry flags="VALUE_WORKSPACE_PATH" kind="includePath" name="/moses"/>
@@ -13,7 +13,7 @@
</externalSetting>
</externalSettings>
<extensions>
- <extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
+ <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@@ -21,65 +21,70 @@
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426" name="Debug" parent="cdt.managedbuild.config.gnu.macosx.exe.debug">
- <folderInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426." name="/" resourcePath="">
- <toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.debug.497902212" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.debug">
- <targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.debug.1820609450" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.debug"/>
- <builder buildPath="${workspace_loc:/moses/Debug}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.debug.1998579330" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.debug"/>
- <tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug.1330311562" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug"/>
- <tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug.1226580551" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug">
- <inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.102127808" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
- <additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
- <additionalInput kind="additionalinput" paths="$(LIBS)"/>
- </inputType>
- </tool>
- <tool command="as" commandLinePattern="${COMMAND} ${FLAGS} ${OUTPUT_FLAG} ${OUTPUT_PREFIX}${OUTPUT} ${INPUTS}" id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug.1556759720" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug">
- <inputType id="cdt.managedbuild.tool.gnu.assembler.input.897776351" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
- </tool>
- <tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.1820797229" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
- <tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1867588805" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug">
- <option id="gnu.cpp.compilermacosx.exe.debug.option.optimization.level.1898625650" name="Optimization Level" superClass="gnu.cpp.compilermacosx.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.806998992" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.include.paths.1819917957" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
- <listOptionValue builtIn="false" value="/opt/local/include"/>
- <listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt/moses/src"/>
- <listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt"/>
- <listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt/srilm/include"/>
- <listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt/irstlm/include"/>
+ <configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.656913512" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
+ <folderInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512." name="/" resourcePath="">
+ <toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.1793369992" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
+ <targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.1051650049" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
+ <builder buildPath="${workspace_loc:/moses/Debug}" id="cdt.managedbuild.target.gnu.builder.exe.debug.505583888" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
+ <tool id="cdt.managedbuild.tool.gnu.archiver.base.1976472988" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1774992327" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
+ <option id="gnu.cpp.compiler.exe.debug.option.optimization.level.1759650532" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.debug.option.debugging.level.2123672332" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.option.include.paths.57896781" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
+ <listOptionValue builtIn="false" value="/opt/local/include/"/>
+ <listOptionValue builtIn="false" value="${workspace_loc}/../../irstlm/include"/>
+ <listOptionValue builtIn="false" value="${workspace_loc}/../../srilm/include"/>
+ <listOptionValue builtIn="false" value="${workspace_loc}/../../moses/src"/>
+ <listOptionValue builtIn="false" value="${workspace_loc}/../../"/>
</option>
- <option id="gnu.cpp.compiler.option.preprocessor.def.1569452418" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
- <listOptionValue builtIn="false" value="LM_SRI"/>
- <listOptionValue builtIn="false" value="LM_IRST"/>
+ <option id="gnu.cpp.compiler.option.preprocessor.def.752586397" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
+ <listOptionValue builtIn="false" value="KENLM_MAX_ORDER=7"/>
<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
+ <listOptionValue builtIn="false" value="LM_IRST"/>
+ <listOptionValue builtIn="false" value="_FILE_OFFSET_BIT=64"/>
+ <listOptionValue builtIn="false" value="_LARGE_FILES"/>
</option>
- <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1110302565" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1905116220" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.401409202" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug">
- <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.macosx.exe.debug.option.optimization.level.753046525" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.debug.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.macosx.exe.debug.option.debugging.level.1396911098" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
- <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1919272901" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+ <tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.2126314903" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
+ <option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.1524900118" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.debug.option.debugging.level.581728958" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
+ <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.877210753" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+ </tool>
+ <tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.1168585173" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.2074660557" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.340054018" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
+ <additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
+ <additionalInput kind="additionalinput" paths="$(LIBS)"/>
+ </inputType>
+ </tool>
+ <tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.933467113" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug">
+ <inputType id="cdt.managedbuild.tool.gnu.assembler.input.99047750" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
- <fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.1722029461" name="SyntacticLanguageModelState.h" rcbsApplicability="disable" resourcePath="SyntacticLanguageModelState.h" toolsToInvoke=""/>
- <fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.1432960145" name="SyntacticLanguageModelFiles.h" rcbsApplicability="disable" resourcePath="SyntacticLanguageModelFiles.h" toolsToInvoke=""/>
- <fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.1906856645" name="SyntacticLanguageModel.h" rcbsApplicability="disable" resourcePath="SyntacticLanguageModel.h" toolsToInvoke=""/>
- <fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.460380900" name="Rand.h" rcbsApplicability="disable" resourcePath="LM/Rand.h" toolsToInvoke=""/>
- <fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.1692203139" name="ORLM.h" rcbsApplicability="disable" resourcePath="LM/ORLM.h" toolsToInvoke=""/>
- <fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.538301588" name="Remote.h" rcbsApplicability="disable" resourcePath="LM/Remote.h" toolsToInvoke=""/>
- <fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.854427429" name="LDHT.h" rcbsApplicability="disable" resourcePath="LM/LDHT.h" toolsToInvoke=""/>
+ <fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.558758254" name="SyntacticLanguageModelState.h" rcbsApplicability="disable" resourcePath="SyntacticLanguageModelState.h" toolsToInvoke=""/>
+ <fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1930327037" name="SyntacticLanguageModelFiles.h" rcbsApplicability="disable" resourcePath="SyntacticLanguageModelFiles.h" toolsToInvoke=""/>
+ <fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1751563578" name="PhraseTableCreator.cpp" rcbsApplicability="disable" resourcePath="CompactPT/PhraseTableCreator.cpp" toolsToInvoke="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1774992327.1652631861">
+ <tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1774992327.1652631861" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1774992327"/>
+ </fileInfo>
+ <fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1174630266" name="Rand.h" rcbsApplicability="disable" resourcePath="LM/Rand.h" toolsToInvoke=""/>
+ <fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.707830535" name="SRI.h" rcbsApplicability="disable" resourcePath="LM/SRI.h" toolsToInvoke=""/>
+ <fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.160366559" name="LDHT.h" rcbsApplicability="disable" resourcePath="LM/LDHT.h" toolsToInvoke=""/>
+ <fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.622077510" name="ParallelBackoff.h" rcbsApplicability="disable" resourcePath="LM/ParallelBackoff.h" toolsToInvoke=""/>
+ <fileInfo id="cdt.managedbuild.config.gnu.exe.debug.656913512.1084194539" name="SyntacticLanguageModel.h" rcbsApplicability="disable" resourcePath="SyntacticLanguageModel.h" toolsToInvoke=""/>
<sourceEntries>
- <entry excluding="SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/LDHT.cpp|LM/LDHT.h|LM/Remote.h|LM/Remote.cpp|LM/Rand.h|LM/Rand.cpp|LM/ORLM.h|LM/ORLM.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
+ <entry excluding="CompactPT/PhraseTableCreator.cpp|CompactPT/LexicalReorderingTableCreator.cpp|LM/SRI.h|LM/SRI.cpp|SyntacticLanguageModelState.h|SyntacticLanguageModelFiles.h|SyntacticLanguageModel.h|SyntacticLanguageModel.cpp|LM/ParallelBackoff.h|LM/ParallelBackoff.cpp|LM/Rand.h|LM/Rand.cpp|LM/LDHT.h|LM/LDHT.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
</sourceEntries>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
- <cconfiguration id="cdt.managedbuild.config.macosx.exe.release.722580523">
- <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.macosx.exe.release.722580523" moduleId="org.eclipse.cdt.core.settings" name="Release">
+ <cconfiguration id="cdt.managedbuild.config.gnu.exe.release.401150096">
+ <storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.401150096" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
- <extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
+ <extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@@ -88,59 +93,41 @@
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.macosx.exe.release.722580523" name="Release" parent="cdt.managedbuild.config.macosx.exe.release">
- <folderInfo id="cdt.managedbuild.config.macosx.exe.release.722580523." name="/" resourcePath="">
- <toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.release.2070671582" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.release">
- <targetPlatform id="cdt.managedbuild.target.gnu.platform.macosx.exe.release.503591386" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.release"/>
- <builder buildPath="${workspace_loc:/moses/Release}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.release.108117223" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.release"/>
- <tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release.1203406445" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release"/>
- <tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release.1539915639" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release">
- <inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.1333560300" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
+ <configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.401150096" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
+ <folderInfo id="cdt.managedbuild.config.gnu.exe.release.401150096." name="/" resourcePath="">
+ <toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.36295137" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
+ <targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.538725710" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
+ <builder buildPath="${workspace_loc:/moses/Release}" id="cdt.managedbuild.target.gnu.builder.exe.release.1875953334" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
+ <tool id="cdt.managedbuild.tool.gnu.archiver.base.1633496039" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.2060881562" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
+ <option id="gnu.cpp.compiler.exe.release.option.optimization.level.1375372870" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
+ <option id="gnu.cpp.compiler.exe.release.option.debugging.level.815283803" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1020483420" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
+ </tool>
+ <tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.85324871" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
+ <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.1137534635" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
+ <option id="gnu.c.compiler.exe.release.option.debugging.level.143589037" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
+ <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.304912704" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+ </tool>
+ <tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.283583965" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
+ <tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.release.2059280959" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.release">
+ <inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.2020956494" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
- <tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release.1693865756" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release">
- <inputType id="cdt.managedbuild.tool.gnu.assembler.input.2000339940" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
- </tool>
- <tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.505919286" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
- <tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.1662892925" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release">
- <option id="gnu.cpp.compiler.macosx.exe.release.option.optimization.level.1036481202" name="Optimization Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.macosx.exe.release.option.debugging.level.484015287" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
- <option id="gnu.cpp.compiler.option.preprocessor.def.1089615214" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
- <listOptionValue builtIn="false" value="LM_SRI"/>
- <listOptionValue builtIn="false" value="LM_IRST"/>
- <listOptionValue builtIn="false" value="TRACE_ENABLE"/>
- </option>
- <option id="gnu.cpp.compiler.option.include.paths.1722702487" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
- <listOptionValue builtIn="false" value="/opt/local/include"/>
- <listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt/moses/src"/>
- <listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt"/>
- <listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt/srilm/include"/>
- <listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt/irstlm/include"/>
- </option>
- <inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.936283391" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
- </tool>
- <tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.1404156839" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release">
- <option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.macosx.exe.release.option.optimization.level.1487222992" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.release.option.optimization.level" valueType="enumerated"/>
- <option id="gnu.c.compiler.macosx.exe.release.option.debugging.level.1171203697" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
- <inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1172147378" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
+ <tool id="cdt.managedbuild.tool.gnu.assembler.exe.release.782286837" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.release">
+ <inputType id="cdt.managedbuild.tool.gnu.assembler.input.1766138143" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
- <fileInfo id="cdt.managedbuild.config.macosx.exe.release.722580523.1831545277" name="Rand.h" rcbsApplicability="disable" resourcePath="LM/Rand.h" toolsToInvoke=""/>
- <fileInfo id="cdt.managedbuild.config.macosx.exe.release.722580523.1743378025" name="ORLM.h" rcbsApplicability="disable" resourcePath="LM/ORLM.h" toolsToInvoke=""/>
- <fileInfo id="cdt.managedbuild.config.macosx.exe.release.722580523.1490362543" name="Remote.h" rcbsApplicability="disable" resourcePath="LM/Remote.h" toolsToInvoke=""/>
- <sourceEntries>
- <entry excluding="LM/LDHT.cpp|LM/Rand.h|LM/Rand.cpp|LM/ORLM.h|LM/ORLM.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
- </sourceEntries>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
- <project id="moses.cdt.managedbuild.target.macosx.exe.1209017164" name="Executable" projectType="cdt.managedbuild.target.macosx.exe"/>
+ <project id="moses.cdt.managedbuild.target.gnu.exe.1375079569" name="Executable" projectType="cdt.managedbuild.target.gnu.exe"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
@@ -150,12 +137,24 @@
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.722580523;cdt.managedbuild.config.macosx.exe.release.722580523.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.1404156839;cdt.managedbuild.tool.gnu.c.compiler.input.1172147378">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.401150096;cdt.managedbuild.config.gnu.exe.release.401150096.;cdt.managedbuild.tool.gnu.c.compiler.exe.release.85324871;cdt.managedbuild.tool.gnu.c.compiler.input.304912704">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
+ </scannerConfigBuildInfo>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.656913512;cdt.managedbuild.config.gnu.exe.debug.656913512.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1774992327;cdt.managedbuild.tool.gnu.cpp.compiler.input.1905116220">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+ </scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426;cdt.managedbuild.config.gnu.macosx.exe.debug.1895695426.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1867588805;cdt.managedbuild.tool.gnu.cpp.compiler.input.1110302565">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.722580523;cdt.managedbuild.config.macosx.exe.release.722580523.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.1662892925;cdt.managedbuild.tool.gnu.cpp.compiler.input.936283391">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.656913512;cdt.managedbuild.config.gnu.exe.debug.656913512.;cdt.managedbuild.tool.gnu.c.compiler.exe.debug.2126314903;cdt.managedbuild.tool.gnu.c.compiler.input.877210753">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
+ </scannerConfigBuildInfo>
+ <scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.401150096;cdt.managedbuild.config.gnu.exe.release.401150096.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.2060881562;cdt.managedbuild.tool.gnu.cpp.compiler.input.1020483420">
+ <autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
+ </scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="refreshScope" versionNumber="1">
<resource resourceType="PROJECT" workspacePath="/moses"/>
diff --git a/contrib/other-builds/moses/.project b/contrib/other-builds/moses/.project
index 8d534dbd4..31c11819a 100644
--- a/contrib/other-builds/moses/.project
+++ b/contrib/other-builds/moses/.project
@@ -102,6 +102,16 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/src/AlignmentInfoCollection.h</locationURI>
</link>
<link>
+ <name>ApplicableRuleTrie.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/ApplicableRuleTrie.cpp</locationURI>
+ </link>
+ <link>
+ <name>ApplicableRuleTrie.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/ApplicableRuleTrie.h</locationURI>
+ </link>
+ <link>
<name>BilingualDynSuffixArray.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/src/BilingualDynSuffixArray.cpp</locationURI>
@@ -272,6 +282,11 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/src/ChartTrellisPathList.h</locationURI>
</link>
<link>
+ <name>CompactPT</name>
+ <type>2</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/CompactPT</locationURI>
+ </link>
+ <link>
<name>ConfusionNet.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/src/ConfusionNet.cpp</locationURI>
@@ -442,6 +457,16 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/src/FloydWarshall.h</locationURI>
</link>
<link>
+ <name>FuzzyMatchWrapper.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/fuzzy-match/FuzzyMatchWrapper.cpp</locationURI>
+ </link>
+ <link>
+ <name>FuzzyMatchWrapper.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/fuzzy-match/FuzzyMatchWrapper.h</locationURI>
+ </link>
+ <link>
<name>GenerationDictionary.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/src/GenerationDictionary.cpp</locationURI>
@@ -537,6 +562,11 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/src/InputType.h</locationURI>
</link>
<link>
+ <name>IntermediateVarSpanNode.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/IntermediateVarSpanNode.h</locationURI>
+ </link>
+ <link>
<name>Jamfile</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/src/Jamfile</locationURI>
@@ -607,6 +637,11 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/src/Manager.h</locationURI>
</link>
<link>
+ <name>Match.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/fuzzy-match/Match.h</locationURI>
+ </link>
+ <link>
<name>NonTerminal.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/src/NonTerminal.cpp</locationURI>
@@ -662,6 +697,16 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/src/Parameter.h</locationURI>
</link>
<link>
+ <name>Parser.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/Parser.cpp</locationURI>
+ </link>
+ <link>
+ <name>Parser.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/Parser.h</locationURI>
+ </link>
+ <link>
<name>PartialTranslOptColl.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/src/PartialTranslOptColl.cpp</locationURI>
@@ -809,7 +854,7 @@
<link>
<name>RuleTable</name>
<type>2</type>
- <locationURI>virtual:/virtual</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable</locationURI>
</link>
<link>
<name>SRI.lo</name>
@@ -822,11 +867,6 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/src/SRI.o</locationURI>
</link>
<link>
- <name>Scope3Parser</name>
- <type>2</type>
- <locationURI>virtual:/virtual</locationURI>
- </link>
- <link>
<name>ScoreComponentCollection.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/src/ScoreComponentCollection.cpp</locationURI>
@@ -887,6 +927,16 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/src/SearchNormal.h</locationURI>
</link>
<link>
+ <name>SearchNormalBatch.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-1-ECLIPSE_HOME/workspace/github/hieuhoang/moses/src/SearchNormalBatch.cpp</locationURI>
+ </link>
+ <link>
+ <name>SearchNormalBatch.h</name>
+ <type>1</type>
+ <locationURI>PARENT-1-ECLIPSE_HOME/workspace/github/hieuhoang/moses/src/SearchNormalBatch.h</locationURI>
+ </link>
+ <link>
<name>Sentence.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/src/Sentence.cpp</locationURI>
@@ -897,6 +947,21 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/src/Sentence.h</locationURI>
</link>
<link>
+ <name>SentenceAlignment.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/fuzzy-match/SentenceAlignment.cpp</locationURI>
+ </link>
+ <link>
+ <name>SentenceAlignment.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/fuzzy-match/SentenceAlignment.h</locationURI>
+ </link>
+ <link>
+ <name>SentenceMap.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/SentenceMap.h</locationURI>
+ </link>
+ <link>
<name>SentenceStats.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/src/SentenceStats.cpp</locationURI>
@@ -917,6 +982,26 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/src/SquareMatrix.h</locationURI>
</link>
<link>
+ <name>StackLattice.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/StackLattice.h</locationURI>
+ </link>
+ <link>
+ <name>StackLatticeBuilder.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/StackLatticeBuilder.cpp</locationURI>
+ </link>
+ <link>
+ <name>StackLatticeBuilder.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/StackLatticeBuilder.h</locationURI>
+ </link>
+ <link>
+ <name>StackLatticeSearcher.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/StackLatticeSearcher.h</locationURI>
+ </link>
+ <link>
<name>StackVec.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/src/StackVec.h</locationURI>
@@ -942,6 +1027,16 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/src/StaticData.o</locationURI>
</link>
<link>
+ <name>SuffixArray.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/fuzzy-match/SuffixArray.cpp</locationURI>
+ </link>
+ <link>
+ <name>SuffixArray.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/fuzzy-match/SuffixArray.h</locationURI>
+ </link>
+ <link>
<name>SyntacticLanguageModel.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/src/SyntacticLanguageModel.cpp</locationURI>
@@ -1182,6 +1277,31 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/src/Util.o</locationURI>
</link>
<link>
+ <name>VarSpanNode.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/VarSpanNode.h</locationURI>
+ </link>
+ <link>
+ <name>VarSpanTrieBuilder.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/VarSpanTrieBuilder.cpp</locationURI>
+ </link>
+ <link>
+ <name>VarSpanTrieBuilder.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/VarSpanTrieBuilder.h</locationURI>
+ </link>
+ <link>
+ <name>Vocabulary.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/fuzzy-match/Vocabulary.cpp</locationURI>
+ </link>
+ <link>
+ <name>Vocabulary.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/fuzzy-match/Vocabulary.h</locationURI>
+ </link>
+ <link>
<name>Word.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/src/Word.cpp</locationURI>
@@ -1337,6 +1457,16 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/ChartRuleLookupManagerMemory.h</locationURI>
</link>
<link>
+ <name>CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.cpp</locationURI>
+ </link>
+ <link>
+ <name>CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.h</locationURI>
+ </link>
+ <link>
<name>CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp</locationURI>
@@ -1382,6 +1512,16 @@
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
+ <name>DynSAInclude/FileHandler.cpp</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/FileHandler.cpp</locationURI>
+ </link>
+ <link>
+ <name>DynSAInclude/FileHandler.h</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/FileHandler.h</locationURI>
+ </link>
+ <link>
<name>DynSAInclude/Jamfile</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/Jamfile</locationURI>
@@ -1397,26 +1537,11 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/RandLMFilter.h</locationURI>
</link>
<link>
- <name>DynSAInclude/bin</name>
- <type>2</type>
- <locationURI>virtual:/virtual</locationURI>
- </link>
- <link>
<name>DynSAInclude/fdstream.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/fdstream.h</locationURI>
</link>
<link>
- <name>DynSAInclude/file.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/file.cpp</locationURI>
- </link>
- <link>
- <name>DynSAInclude/file.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/file.h</locationURI>
- </link>
- <link>
<name>DynSAInclude/hash.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/hash.h</locationURI>
@@ -1617,207 +1742,12 @@
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
- <name>RuleTable/Jamfile</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/Jamfile</locationURI>
- </link>
- <link>
- <name>RuleTable/Loader.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/Loader.h</locationURI>
- </link>
- <link>
- <name>RuleTable/LoaderCompact.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/LoaderCompact.cpp</locationURI>
- </link>
- <link>
- <name>RuleTable/LoaderCompact.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/LoaderCompact.h</locationURI>
- </link>
- <link>
- <name>RuleTable/LoaderFactory.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/LoaderFactory.cpp</locationURI>
- </link>
- <link>
- <name>RuleTable/LoaderFactory.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/LoaderFactory.h</locationURI>
- </link>
- <link>
- <name>RuleTable/LoaderHiero.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/LoaderHiero.cpp</locationURI>
- </link>
- <link>
- <name>RuleTable/LoaderHiero.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/LoaderHiero.h</locationURI>
- </link>
- <link>
- <name>RuleTable/LoaderStandard.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/LoaderStandard.cpp</locationURI>
- </link>
- <link>
- <name>RuleTable/LoaderStandard.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/LoaderStandard.h</locationURI>
- </link>
- <link>
- <name>RuleTable/PhraseDictionaryALSuffixArray.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/PhraseDictionaryALSuffixArray.cpp</locationURI>
- </link>
- <link>
- <name>RuleTable/PhraseDictionaryALSuffixArray.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/PhraseDictionaryALSuffixArray.h</locationURI>
- </link>
- <link>
- <name>RuleTable/PhraseDictionaryNodeSCFG.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/PhraseDictionaryNodeSCFG.cpp</locationURI>
- </link>
- <link>
- <name>RuleTable/PhraseDictionaryNodeSCFG.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/PhraseDictionaryNodeSCFG.h</locationURI>
- </link>
- <link>
- <name>RuleTable/PhraseDictionaryOnDisk.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/PhraseDictionaryOnDisk.cpp</locationURI>
- </link>
- <link>
- <name>RuleTable/PhraseDictionaryOnDisk.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/PhraseDictionaryOnDisk.h</locationURI>
- </link>
- <link>
- <name>RuleTable/PhraseDictionarySCFG.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/PhraseDictionarySCFG.cpp</locationURI>
- </link>
- <link>
- <name>RuleTable/PhraseDictionarySCFG.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/PhraseDictionarySCFG.h</locationURI>
- </link>
- <link>
- <name>RuleTable/Trie.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/Trie.cpp</locationURI>
- </link>
- <link>
- <name>RuleTable/Trie.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/Trie.h</locationURI>
- </link>
- <link>
- <name>RuleTable/UTrie.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/UTrie.cpp</locationURI>
- </link>
- <link>
- <name>RuleTable/UTrie.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/UTrie.h</locationURI>
- </link>
- <link>
- <name>RuleTable/UTrieNode.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/UTrieNode.cpp</locationURI>
- </link>
- <link>
- <name>RuleTable/UTrieNode.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/UTrieNode.h</locationURI>
- </link>
- <link>
- <name>RuleTable/bin</name>
- <type>2</type>
- <locationURI>virtual:/virtual</locationURI>
- </link>
- <link>
- <name>Scope3Parser/ApplicableRuleTrie.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/ApplicableRuleTrie.cpp</locationURI>
- </link>
- <link>
- <name>Scope3Parser/ApplicableRuleTrie.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/ApplicableRuleTrie.h</locationURI>
- </link>
- <link>
- <name>Scope3Parser/IntermediateVarSpanNode.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/IntermediateVarSpanNode.h</locationURI>
- </link>
- <link>
- <name>Scope3Parser/Jamfile</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/Jamfile</locationURI>
- </link>
- <link>
- <name>Scope3Parser/Parser.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/Parser.cpp</locationURI>
- </link>
- <link>
- <name>Scope3Parser/Parser.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/Parser.h</locationURI>
- </link>
- <link>
- <name>Scope3Parser/SentenceMap.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/SentenceMap.h</locationURI>
- </link>
- <link>
- <name>Scope3Parser/StackLattice.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/StackLattice.h</locationURI>
- </link>
- <link>
- <name>Scope3Parser/StackLatticeBuilder.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/StackLatticeBuilder.cpp</locationURI>
- </link>
- <link>
- <name>Scope3Parser/StackLatticeBuilder.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/StackLatticeBuilder.h</locationURI>
- </link>
- <link>
- <name>Scope3Parser/StackLatticeSearcher.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/StackLatticeSearcher.h</locationURI>
- </link>
- <link>
- <name>Scope3Parser/VarSpanNode.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/VarSpanNode.h</locationURI>
- </link>
- <link>
- <name>Scope3Parser/VarSpanTrieBuilder.cpp</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/VarSpanTrieBuilder.cpp</locationURI>
- </link>
- <link>
- <name>Scope3Parser/VarSpanTrieBuilder.h</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/VarSpanTrieBuilder.h</locationURI>
- </link>
- <link>
- <name>Scope3Parser/bin</name>
+ <name>bin/darwin-4.2.1</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
- <name>bin/darwin-4.2.1</name>
+ <name>bin/gcc-4.6</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
@@ -1832,12 +1762,7 @@
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
- <name>DynSAInclude/bin/clang-darwin-4.2.1</name>
- <type>2</type>
- <locationURI>virtual:/virtual</locationURI>
- </link>
- <link>
- <name>DynSAInclude/bin/darwin-4.2.1</name>
+ <name>CYKPlusParser/bin/gcc-4.6</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
@@ -1857,17 +1782,12 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/lm.log</locationURI>
</link>
<link>
- <name>RuleTable/bin/darwin-4.2.1</name>
+ <name>bin/darwin-4.2.1/release</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
- <name>Scope3Parser/bin/darwin-4.2.1</name>
- <type>2</type>
- <locationURI>virtual:/virtual</locationURI>
- </link>
- <link>
- <name>bin/darwin-4.2.1/release</name>
+ <name>bin/gcc-4.6/release</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
@@ -1882,12 +1802,7 @@
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
- <name>DynSAInclude/bin/clang-darwin-4.2.1/release</name>
- <type>2</type>
- <locationURI>virtual:/virtual</locationURI>
- </link>
- <link>
- <name>DynSAInclude/bin/darwin-4.2.1/release</name>
+ <name>CYKPlusParser/bin/gcc-4.6/release</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
@@ -1902,17 +1817,12 @@
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
- <name>RuleTable/bin/darwin-4.2.1/release</name>
- <type>2</type>
- <locationURI>virtual:/virtual</locationURI>
- </link>
- <link>
- <name>Scope3Parser/bin/darwin-4.2.1/release</name>
+ <name>bin/darwin-4.2.1/release/debug-symbols-on</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
- <name>bin/darwin-4.2.1/release/debug-symbols-on</name>
+ <name>bin/gcc-4.6/release/debug-symbols-on</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
@@ -1927,12 +1837,7 @@
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
- <name>DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on</name>
- <type>2</type>
- <locationURI>virtual:/virtual</locationURI>
- </link>
- <link>
- <name>DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on</name>
+ <name>CYKPlusParser/bin/gcc-4.6/release/debug-symbols-on</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
@@ -1952,17 +1857,12 @@
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
- <name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on</name>
- <type>2</type>
- <locationURI>virtual:/virtual</locationURI>
- </link>
- <link>
- <name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on</name>
+ <name>bin/darwin-4.2.1/release/debug-symbols-on/link-static</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
- <name>bin/darwin-4.2.1/release/debug-symbols-on/link-static</name>
+ <name>bin/gcc-4.6/release/debug-symbols-on/link-static</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
@@ -1982,12 +1882,7 @@
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
- <name>DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static</name>
- <type>2</type>
- <locationURI>virtual:/virtual</locationURI>
- </link>
- <link>
- <name>DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on/link-static</name>
+ <name>CYKPlusParser/bin/gcc-4.6/release/debug-symbols-on/link-static</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
@@ -2012,27 +1907,12 @@
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
- <name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static</name>
- <type>2</type>
- <locationURI>virtual:/virtual</locationURI>
- </link>
- <link>
- <name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi</name>
- <type>2</type>
- <locationURI>virtual:/virtual</locationURI>
- </link>
- <link>
- <name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static</name>
- <type>2</type>
- <locationURI>virtual:/virtual</locationURI>
- </link>
- <link>
- <name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi</name>
+ <name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
- <name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
+ <name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
@@ -2072,12 +1952,7 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/DotChartOnDisk.o</locationURI>
</link>
<link>
- <name>DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
- <type>2</type>
- <locationURI>virtual:/virtual</locationURI>
- </link>
- <link>
- <name>DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
+ <name>CYKPlusParser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi</name>
<type>2</type>
<locationURI>virtual:/virtual</locationURI>
</link>
@@ -2192,91 +2067,6 @@
<locationURI>virtual:/virtual</locationURI>
</link>
<link>
- <name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
- <type>2</type>
- <locationURI>virtual:/virtual</locationURI>
- </link>
- <link>
- <name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/LoaderCompact.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/LoaderCompact.o</locationURI>
- </link>
- <link>
- <name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/LoaderFactory.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/LoaderFactory.o</locationURI>
- </link>
- <link>
- <name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/LoaderHiero.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/LoaderHiero.o</locationURI>
- </link>
- <link>
- <name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/LoaderStandard.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/LoaderStandard.o</locationURI>
- </link>
- <link>
- <name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/PhraseDictionaryALSuffixArray.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/PhraseDictionaryALSuffixArray.o</locationURI>
- </link>
- <link>
- <name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/PhraseDictionaryNodeSCFG.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/PhraseDictionaryNodeSCFG.o</locationURI>
- </link>
- <link>
- <name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/PhraseDictionaryOnDisk.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/PhraseDictionaryOnDisk.o</locationURI>
- </link>
- <link>
- <name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/PhraseDictionarySCFG.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/PhraseDictionarySCFG.o</locationURI>
- </link>
- <link>
- <name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/Trie.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/Trie.o</locationURI>
- </link>
- <link>
- <name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/UTrie.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/UTrie.o</locationURI>
- </link>
- <link>
- <name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/UTrieNode.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/UTrieNode.o</locationURI>
- </link>
- <link>
- <name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi</name>
- <type>2</type>
- <locationURI>virtual:/virtual</locationURI>
- </link>
- <link>
- <name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/ApplicableRuleTrie.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/ApplicableRuleTrie.o</locationURI>
- </link>
- <link>
- <name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/Parser.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/Parser.o</locationURI>
- </link>
- <link>
- <name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/StackLatticeBuilder.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/StackLatticeBuilder.o</locationURI>
- </link>
- <link>
- <name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/VarSpanTrieBuilder.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/threading-multi/VarSpanTrieBuilder.o</locationURI>
- </link>
- <link>
<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/AlignmentInfo.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/AlignmentInfo.o</locationURI>
@@ -2752,6 +2542,56 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/libmoses_internal.a</locationURI>
</link>
<link>
+ <name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/ApplicableRuleTrie.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/ApplicableRuleTrie.o</locationURI>
+ </link>
+ <link>
+ <name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/FuzzyMatchWrapper.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/fuzzy-match/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/FuzzyMatchWrapper.o</locationURI>
+ </link>
+ <link>
+ <name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/Parser.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/Parser.o</locationURI>
+ </link>
+ <link>
+ <name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/SentenceAlignment.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/fuzzy-match/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/SentenceAlignment.o</locationURI>
+ </link>
+ <link>
+ <name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/StackLatticeBuilder.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/StackLatticeBuilder.o</locationURI>
+ </link>
+ <link>
+ <name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/SuffixArray.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/fuzzy-match/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/SuffixArray.o</locationURI>
+ </link>
+ <link>
+ <name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/VarSpanTrieBuilder.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/VarSpanTrieBuilder.o</locationURI>
+ </link>
+ <link>
+ <name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/Vocabulary.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/fuzzy-match/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/Vocabulary.o</locationURI>
+ </link>
+ <link>
+ <name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/libScope3Parser.a</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/libScope3Parser.a</locationURI>
+ </link>
+ <link>
+ <name>bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/libfuzzy-match.a</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/fuzzy-match/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/libfuzzy-match.a</locationURI>
+ </link>
+ <link>
<name>CYKPlusParser/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DotChartOnDisk.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DotChartOnDisk.o</locationURI>
@@ -2787,24 +2627,39 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/libCYKPlusParser.a</locationURI>
</link>
<link>
- <name>DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude</name>
- <type>2</type>
- <locationURI>virtual:/virtual</locationURI>
+ <name>CYKPlusParser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/ChartRuleLookupManagerCYKPlus.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/ChartRuleLookupManagerCYKPlus.o</locationURI>
</link>
<link>
- <name>DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/libdynsa.a</name>
+ <name>CYKPlusParser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/ChartRuleLookupManagerMemory.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/libdynsa.a</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/ChartRuleLookupManagerMemory.o</locationURI>
</link>
<link>
- <name>DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude</name>
- <type>2</type>
- <locationURI>virtual:/virtual</locationURI>
+ <name>CYKPlusParser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/ChartRuleLookupManagerMemoryPerSentence.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/ChartRuleLookupManagerMemoryPerSentence.o</locationURI>
+ </link>
+ <link>
+ <name>CYKPlusParser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/ChartRuleLookupManagerOnDisk.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/ChartRuleLookupManagerOnDisk.o</locationURI>
+ </link>
+ <link>
+ <name>CYKPlusParser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/DotChartInMemory.o</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/DotChartInMemory.o</locationURI>
</link>
<link>
- <name>DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/libdynsa.a</name>
+ <name>CYKPlusParser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/DotChartOnDisk.o</name>
<type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/libdynsa.a</locationURI>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/DotChartOnDisk.o</locationURI>
+ </link>
+ <link>
+ <name>CYKPlusParser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/libCYKPlusParser.a</name>
+ <type>1</type>
+ <locationURI>PARENT-3-PROJECT_LOC/moses/src/CYKPlusParser/bin/gcc-4.6/release/debug-symbols-on/link-static/threading-multi/libCYKPlusParser.a</locationURI>
</link>
<link>
<name>LM/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Base.o</name>
@@ -2922,91 +2777,6 @@
<locationURI>PARENT-3-PROJECT_LOC/moses/src/LM/bin/gcc-4.2.1/release/debug-symbols-on/link-static/threading-multi/libLM.a</locationURI>
</link>
<link>
- <name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LoaderCompact.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LoaderCompact.o</locationURI>
- </link>
- <link>
- <name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LoaderFactory.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LoaderFactory.o</locationURI>
- </link>
- <link>
- <name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LoaderHiero.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LoaderHiero.o</locationURI>
- </link>
- <link>
- <name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LoaderStandard.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/LoaderStandard.o</locationURI>
- </link>
- <link>
- <name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryALSuffixArray.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryALSuffixArray.o</locationURI>
- </link>
- <link>
- <name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryNodeSCFG.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryNodeSCFG.o</locationURI>
- </link>
- <link>
- <name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryOnDisk.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionaryOnDisk.o</locationURI>
- </link>
- <link>
- <name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionarySCFG.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/PhraseDictionarySCFG.o</locationURI>
- </link>
- <link>
- <name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Trie.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Trie.o</locationURI>
- </link>
- <link>
- <name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/UTrie.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/UTrie.o</locationURI>
- </link>
- <link>
- <name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/UTrieNode.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/UTrieNode.o</locationURI>
- </link>
- <link>
- <name>RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/libRuleTable.a</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/RuleTable/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/libRuleTable.a</locationURI>
- </link>
- <link>
- <name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ApplicableRuleTrie.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/ApplicableRuleTrie.o</locationURI>
- </link>
- <link>
- <name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Parser.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/Parser.o</locationURI>
- </link>
- <link>
- <name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/StackLatticeBuilder.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/StackLatticeBuilder.o</locationURI>
- </link>
- <link>
- <name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/VarSpanTrieBuilder.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/VarSpanTrieBuilder.o</locationURI>
- </link>
- <link>
- <name>Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/libScope3Parser.a</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/libScope3Parser.a</locationURI>
- </link>
- <link>
<name>bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/file.o</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/file.o</locationURI>
@@ -3021,35 +2791,5 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/src/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/vocab.o</locationURI>
</link>
- <link>
- <name>DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/file.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/file.o</locationURI>
- </link>
- <link>
- <name>DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/params.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/params.o</locationURI>
- </link>
- <link>
- <name>DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/vocab.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/bin/clang-darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/vocab.o</locationURI>
- </link>
- <link>
- <name>DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/file.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/file.o</locationURI>
- </link>
- <link>
- <name>DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/params.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/params.o</locationURI>
- </link>
- <link>
- <name>DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/vocab.o</name>
- <type>1</type>
- <locationURI>PARENT-3-PROJECT_LOC/moses/src/DynSAInclude/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi/DynSAInclude/vocab.o</locationURI>
- </link>
</linkedResources>
</projectDescription>
diff --git a/contrib/other-builds/mosesserver.vcxproj b/contrib/other-builds/mosesserver.vcxproj
new file mode 100644
index 000000000..6d7470eec
--- /dev/null
+++ b/contrib/other-builds/mosesserver.vcxproj
@@ -0,0 +1,102 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <ProjectGuid>{85811FDF-8AD1-4490-A545-B2F51931A18C}</ProjectGuid>
+ <RootNamespace>mosescmd</RootNamespace>
+ <Keyword>Win32Proj</Keyword>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <CharacterSet>Unicode</CharacterSet>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <CharacterSet>Unicode</CharacterSet>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup>
+ <_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
+ <OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
+ <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
+ <OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
+ <IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
+ <LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
+ <IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">C:\Program Files\boost\boost_1_47;$(IncludePath)</IncludePath>
+ <IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">C:\Program Files\boost\boost_1_47;$(IncludePath)</IncludePath>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <ClCompile>
+ <Optimization>Disabled</Optimization>
+ <AdditionalIncludeDirectories>C:\xmlrpc-c\include;C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <MinimalRebuild>true</MinimalRebuild>
+ <BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
+ <RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <DebugInformationFormat>EditAndContinue</DebugInformationFormat>
+ </ClCompile>
+ <Link>
+ <AdditionalDependencies>libxmlrpc_server_abyss.lib;libxmlrpc_server.lib;libxmlrpc_abyss.lib;libxmlrpc.lib;libxmlrpc_util.lib;libxmlrpc_xmlparse.lib;libxmlrpc_xmltok.lib;libxmlrpc++.lib;C:\GnuWin32\lib\zlib.lib;$(SolutionDir)$(Configuration)\moses.lib;$(SolutionDir)$(Configuration)\kenlm.lib;$(SolutionDir)$(Configuration)\OnDiskPt.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <SubSystem>Console</SubSystem>
+ <RandomizedBaseAddress>false</RandomizedBaseAddress>
+ <DataExecutionPrevention>
+ </DataExecutionPrevention>
+ <TargetMachine>MachineX86</TargetMachine>
+ <AdditionalLibraryDirectories>C:\xmlrpc-c\bin\Debug-Static-Win32;C:\boost\boost_1_47\lib</AdditionalLibraryDirectories>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <ClCompile>
+ <AdditionalIncludeDirectories>C:\xmlrpc-c\include;C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
+ <PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
+ </ClCompile>
+ <Link>
+ <AdditionalDependencies>libxmlrpc_server_abyss.lib;libxmlrpc_server.lib;libxmlrpc_abyss.lib;libxmlrpc.lib;libxmlrpc_util.lib;libxmlrpc_xmlparse.lib;libxmlrpc_xmltok.lib;libxmlrpc++.lib;C:\GnuWin32\lib\zlib.lib;$(SolutionDir)$(Configuration)\moses.lib;$(SolutionDir)$(Configuration)\kenlm.lib;$(SolutionDir)$(Configuration)\OnDiskPt.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <SubSystem>Console</SubSystem>
+ <OptimizeReferences>true</OptimizeReferences>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <RandomizedBaseAddress>false</RandomizedBaseAddress>
+ <DataExecutionPrevention>
+ </DataExecutionPrevention>
+ <TargetMachine>MachineX86</TargetMachine>
+ <AdditionalLibraryDirectories>C:\xmlrpc-c\bin\Release-Static-Win32;C:\boost\boost_1_47\lib</AdditionalLibraryDirectories>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemGroup>
+ <ClCompile Include="..\server\mosesserver.cpp" />
+ </ItemGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project> \ No newline at end of file
diff --git a/contrib/other-builds/processLexicalTableMin.xcodeproj/project.pbxproj b/contrib/other-builds/processLexicalTableMin.xcodeproj/project.pbxproj
new file mode 100644
index 000000000..113d9723d
--- /dev/null
+++ b/contrib/other-builds/processLexicalTableMin.xcodeproj/project.pbxproj
@@ -0,0 +1,297 @@
+// !$*UTF8*$!
+{
+ archiveVersion = 1;
+ classes = {
+ };
+ objectVersion = 46;
+ objects = {
+
+/* Begin PBXBuildFile section */
+ 1E6D9FF115D027F00064D436 /* libmoses.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1EB3EBD515D0269B006B9CF1 /* libmoses.a */; };
+ 1EB3EBB315D024C7006B9CF1 /* processLexicalTableMin.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EB3EBB215D024C7006B9CF1 /* processLexicalTableMin.cpp */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXContainerItemProxy section */
+ 1E6D9FF215D0292D0064D436 /* PBXContainerItemProxy */ = {
+ isa = PBXContainerItemProxy;
+ containerPortal = 1EB3EBD015D0269B006B9CF1 /* moses.xcodeproj */;
+ proxyType = 1;
+ remoteGlobalIDString = D2AAC045055464E500DB518D;
+ remoteInfo = moses;
+ };
+ 1EB3EBD415D0269B006B9CF1 /* PBXContainerItemProxy */ = {
+ isa = PBXContainerItemProxy;
+ containerPortal = 1EB3EBD015D0269B006B9CF1 /* moses.xcodeproj */;
+ proxyType = 2;
+ remoteGlobalIDString = D2AAC046055464E500DB518D;
+ remoteInfo = moses;
+ };
+/* End PBXContainerItemProxy section */
+
+/* Begin PBXCopyFilesBuildPhase section */
+ 1E3A0AEA15D0242A003EF9B4 /* CopyFiles */ = {
+ isa = PBXCopyFilesBuildPhase;
+ buildActionMask = 2147483647;
+ dstPath = /usr/share/man/man1/;
+ dstSubfolderSpec = 0;
+ files = (
+ );
+ runOnlyForDeploymentPostprocessing = 1;
+ };
+/* End PBXCopyFilesBuildPhase section */
+
+/* Begin PBXFileReference section */
+ 1E3A0AEC15D0242A003EF9B4 /* processLexicalTableMin */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = processLexicalTableMin; sourceTree = BUILT_PRODUCTS_DIR; };
+ 1EB3EBB215D024C7006B9CF1 /* processLexicalTableMin.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = processLexicalTableMin.cpp; path = ../../misc/processLexicalTableMin.cpp; sourceTree = "<group>"; };
+ 1EB3EBD015D0269B006B9CF1 /* moses.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; path = moses.xcodeproj; sourceTree = "<group>"; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+ 1E3A0AE915D0242A003EF9B4 /* Frameworks */ = {
+ isa = PBXFrameworksBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ 1E6D9FF115D027F00064D436 /* libmoses.a in Frameworks */,
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+ 1E3A0AE115D02427003EF9B4 = {
+ isa = PBXGroup;
+ children = (
+ 1EB3EBB215D024C7006B9CF1 /* processLexicalTableMin.cpp */,
+ 1E3A0AED15D0242A003EF9B4 /* Products */,
+ 1EB3EBD015D0269B006B9CF1 /* moses.xcodeproj */,
+ );
+ sourceTree = "<group>";
+ };
+ 1E3A0AED15D0242A003EF9B4 /* Products */ = {
+ isa = PBXGroup;
+ children = (
+ 1E3A0AEC15D0242A003EF9B4 /* processLexicalTableMin */,
+ );
+ name = Products;
+ sourceTree = "<group>";
+ };
+ 1EB3EBD115D0269B006B9CF1 /* Products */ = {
+ isa = PBXGroup;
+ children = (
+ 1EB3EBD515D0269B006B9CF1 /* libmoses.a */,
+ );
+ name = Products;
+ sourceTree = "<group>";
+ };
+/* End PBXGroup section */
+
+/* Begin PBXNativeTarget section */
+ 1E3A0AEB15D0242A003EF9B4 /* processLexicalTableMin */ = {
+ isa = PBXNativeTarget;
+ buildConfigurationList = 1E3A0AF615D0242B003EF9B4 /* Build configuration list for PBXNativeTarget "processLexicalTableMin" */;
+ buildPhases = (
+ 1E3A0AE815D0242A003EF9B4 /* Sources */,
+ 1E3A0AE915D0242A003EF9B4 /* Frameworks */,
+ 1E3A0AEA15D0242A003EF9B4 /* CopyFiles */,
+ );
+ buildRules = (
+ );
+ dependencies = (
+ 1E6D9FF315D0292D0064D436 /* PBXTargetDependency */,
+ );
+ name = processLexicalTableMin;
+ productName = processLexicalTableMin;
+ productReference = 1E3A0AEC15D0242A003EF9B4 /* processLexicalTableMin */;
+ productType = "com.apple.product-type.tool";
+ };
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+ 1E3A0AE315D02427003EF9B4 /* Project object */ = {
+ isa = PBXProject;
+ buildConfigurationList = 1E3A0AE615D02427003EF9B4 /* Build configuration list for PBXProject "processLexicalTableMin" */;
+ compatibilityVersion = "Xcode 3.2";
+ developmentRegion = English;
+ hasScannedForEncodings = 0;
+ knownRegions = (
+ en,
+ );
+ mainGroup = 1E3A0AE115D02427003EF9B4;
+ productRefGroup = 1E3A0AED15D0242A003EF9B4 /* Products */;
+ projectDirPath = "";
+ projectReferences = (
+ {
+ ProductGroup = 1EB3EBD115D0269B006B9CF1 /* Products */;
+ ProjectRef = 1EB3EBD015D0269B006B9CF1 /* moses.xcodeproj */;
+ },
+ );
+ projectRoot = "";
+ targets = (
+ 1E3A0AEB15D0242A003EF9B4 /* processLexicalTableMin */,
+ );
+ };
+/* End PBXProject section */
+
+/* Begin PBXReferenceProxy section */
+ 1EB3EBD515D0269B006B9CF1 /* libmoses.a */ = {
+ isa = PBXReferenceProxy;
+ fileType = archive.ar;
+ path = libmoses.a;
+ remoteRef = 1EB3EBD415D0269B006B9CF1 /* PBXContainerItemProxy */;
+ sourceTree = BUILT_PRODUCTS_DIR;
+ };
+/* End PBXReferenceProxy section */
+
+/* Begin PBXSourcesBuildPhase section */
+ 1E3A0AE815D0242A003EF9B4 /* Sources */ = {
+ isa = PBXSourcesBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ 1EB3EBB315D024C7006B9CF1 /* processLexicalTableMin.cpp in Sources */,
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXSourcesBuildPhase section */
+
+/* Begin PBXTargetDependency section */
+ 1E6D9FF315D0292D0064D436 /* PBXTargetDependency */ = {
+ isa = PBXTargetDependency;
+ name = moses;
+ targetProxy = 1E6D9FF215D0292D0064D436 /* PBXContainerItemProxy */;
+ };
+/* End PBXTargetDependency section */
+
+/* Begin XCBuildConfiguration section */
+ 1E3A0AF415D0242B003EF9B4 /* Debug */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ ALWAYS_SEARCH_USER_PATHS = NO;
+ ARCHS = "$(ARCHS_STANDARD_64_BIT)";
+ COPY_PHASE_STRIP = NO;
+ GCC_C_LANGUAGE_STANDARD = gnu99;
+ GCC_DYNAMIC_NO_PIC = NO;
+ GCC_ENABLE_OBJC_EXCEPTIONS = YES;
+ GCC_OPTIMIZATION_LEVEL = 0;
+ GCC_PREPROCESSOR_DEFINITIONS = (
+ "DEBUG=1",
+ "$(inherited)",
+ );
+ GCC_SYMBOLS_PRIVATE_EXTERN = NO;
+ GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+ GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+ GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
+ GCC_WARN_ABOUT_RETURN_TYPE = YES;
+ GCC_WARN_UNUSED_VARIABLE = YES;
+ HEADER_SEARCH_PATHS = (
+ ../../,
+ ../../irstlm/include,
+ /opt/local/include,
+ );
+ MACOSX_DEPLOYMENT_TARGET = 10.7;
+ ONLY_ACTIVE_ARCH = YES;
+ SDKROOT = macosx;
+ USER_HEADER_SEARCH_PATHS = "../../ ../../irstlm/include /opt/local/include ../../moses/src";
+ };
+ name = Debug;
+ };
+ 1E3A0AF515D0242B003EF9B4 /* Release */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ ALWAYS_SEARCH_USER_PATHS = NO;
+ ARCHS = "$(ARCHS_STANDARD_64_BIT)";
+ COPY_PHASE_STRIP = YES;
+ DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+ GCC_C_LANGUAGE_STANDARD = gnu99;
+ GCC_ENABLE_OBJC_EXCEPTIONS = YES;
+ GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+ GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+ GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
+ GCC_WARN_ABOUT_RETURN_TYPE = YES;
+ GCC_WARN_UNUSED_VARIABLE = YES;
+ HEADER_SEARCH_PATHS = (
+ ../../,
+ ../../irstlm/include,
+ /opt/local/include,
+ );
+ MACOSX_DEPLOYMENT_TARGET = 10.7;
+ SDKROOT = macosx;
+ USER_HEADER_SEARCH_PATHS = "../../ ../../irstlm/include /opt/local/include ../../moses/src";
+ };
+ name = Release;
+ };
+ 1E3A0AF715D0242B003EF9B4 /* Debug */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ GCC_PREPROCESSOR_DEFINITIONS = WITH_THREADS;
+ "GCC_PREPROCESSOR_DEFINITIONS[arch=*]" = WITH_THREADS;
+ LIBRARY_SEARCH_PATHS = (
+ ../../irstlm/lib,
+ ../../srilm/lib/macosx,
+ ../../randlm/lib,
+ /opt/local/lib,
+ );
+ OTHER_LDFLAGS = (
+ "-lz",
+ "-lirstlm",
+ "-lmisc",
+ "-ldstruct",
+ "-loolm",
+ "-lflm",
+ "-llattice",
+ "-lrandlm",
+ "-lboost_thread-mt",
+ );
+ PRODUCT_NAME = "$(TARGET_NAME)";
+ };
+ name = Debug;
+ };
+ 1E3A0AF815D0242B003EF9B4 /* Release */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ GCC_PREPROCESSOR_DEFINITIONS = WITH_THREADS;
+ LIBRARY_SEARCH_PATHS = (
+ ../../irstlm/lib,
+ ../../srilm/lib/macosx,
+ ../../randlm/lib,
+ /opt/local/lib,
+ );
+ OTHER_LDFLAGS = (
+ "-lz",
+ "-lirstlm",
+ "-lmisc",
+ "-ldstruct",
+ "-loolm",
+ "-lflm",
+ "-llattice",
+ "-lrandlm",
+ "-lboost_thread-mt",
+ );
+ PRODUCT_NAME = "$(TARGET_NAME)";
+ };
+ name = Release;
+ };
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+ 1E3A0AE615D02427003EF9B4 /* Build configuration list for PBXProject "processLexicalTableMin" */ = {
+ isa = XCConfigurationList;
+ buildConfigurations = (
+ 1E3A0AF415D0242B003EF9B4 /* Debug */,
+ 1E3A0AF515D0242B003EF9B4 /* Release */,
+ );
+ defaultConfigurationIsVisible = 0;
+ defaultConfigurationName = Release;
+ };
+ 1E3A0AF615D0242B003EF9B4 /* Build configuration list for PBXNativeTarget "processLexicalTableMin" */ = {
+ isa = XCConfigurationList;
+ buildConfigurations = (
+ 1E3A0AF715D0242B003EF9B4 /* Debug */,
+ 1E3A0AF815D0242B003EF9B4 /* Release */,
+ );
+ defaultConfigurationIsVisible = 0;
+ defaultConfigurationName = Release;
+ };
+/* End XCConfigurationList section */
+ };
+ rootObject = 1E3A0AE315D02427003EF9B4 /* Project object */;
+}
diff --git a/contrib/other-builds/processPhraseTableMin.xcodeproj/project.pbxproj b/contrib/other-builds/processPhraseTableMin.xcodeproj/project.pbxproj
new file mode 100644
index 000000000..9db1d49b8
--- /dev/null
+++ b/contrib/other-builds/processPhraseTableMin.xcodeproj/project.pbxproj
@@ -0,0 +1,304 @@
+// !$*UTF8*$!
+{
+ archiveVersion = 1;
+ classes = {
+ };
+ objectVersion = 46;
+ objects = {
+
+/* Begin PBXBuildFile section */
+ 1EF3D68A15D02AEF00969478 /* processPhraseTableMin.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EF3D68915D02AEF00969478 /* processPhraseTableMin.cpp */; };
+ 1EF3D6A415D02B6400969478 /* libmoses.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1EF3D69915D02B4400969478 /* libmoses.a */; };
+/* End PBXBuildFile section */
+
+/* Begin PBXContainerItemProxy section */
+ 1EF3D69815D02B4400969478 /* PBXContainerItemProxy */ = {
+ isa = PBXContainerItemProxy;
+ containerPortal = 1EF3D69415D02B4400969478 /* moses.xcodeproj */;
+ proxyType = 2;
+ remoteGlobalIDString = D2AAC046055464E500DB518D;
+ remoteInfo = moses;
+ };
+ 1EF3D6A515D02B6B00969478 /* PBXContainerItemProxy */ = {
+ isa = PBXContainerItemProxy;
+ containerPortal = 1EF3D69415D02B4400969478 /* moses.xcodeproj */;
+ proxyType = 1;
+ remoteGlobalIDString = D2AAC045055464E500DB518D;
+ remoteInfo = moses;
+ };
+/* End PBXContainerItemProxy section */
+
+/* Begin PBXCopyFilesBuildPhase section */
+ 1E6D9FFD15D02A8D0064D436 /* CopyFiles */ = {
+ isa = PBXCopyFilesBuildPhase;
+ buildActionMask = 2147483647;
+ dstPath = /usr/share/man/man1/;
+ dstSubfolderSpec = 0;
+ files = (
+ );
+ runOnlyForDeploymentPostprocessing = 1;
+ };
+/* End PBXCopyFilesBuildPhase section */
+
+/* Begin PBXFileReference section */
+ 1E6D9FFF15D02A8D0064D436 /* processPhraseTableMin */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = processPhraseTableMin; sourceTree = BUILT_PRODUCTS_DIR; };
+ 1EF3D68915D02AEF00969478 /* processPhraseTableMin.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = processPhraseTableMin.cpp; path = ../../misc/processPhraseTableMin.cpp; sourceTree = "<group>"; };
+ 1EF3D69415D02B4400969478 /* moses.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; path = moses.xcodeproj; sourceTree = "<group>"; };
+/* End PBXFileReference section */
+
+/* Begin PBXFrameworksBuildPhase section */
+ 1E6D9FFC15D02A8D0064D436 /* Frameworks */ = {
+ isa = PBXFrameworksBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ 1EF3D6A415D02B6400969478 /* libmoses.a in Frameworks */,
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXFrameworksBuildPhase section */
+
+/* Begin PBXGroup section */
+ 1E6D9FF415D02A8C0064D436 = {
+ isa = PBXGroup;
+ children = (
+ 1EF3D68915D02AEF00969478 /* processPhraseTableMin.cpp */,
+ 1E6DA00015D02A8D0064D436 /* Products */,
+ 1EF3D69415D02B4400969478 /* moses.xcodeproj */,
+ );
+ sourceTree = "<group>";
+ };
+ 1E6DA00015D02A8D0064D436 /* Products */ = {
+ isa = PBXGroup;
+ children = (
+ 1E6D9FFF15D02A8D0064D436 /* processPhraseTableMin */,
+ );
+ name = Products;
+ sourceTree = "<group>";
+ };
+ 1EF3D69515D02B4400969478 /* Products */ = {
+ isa = PBXGroup;
+ children = (
+ 1EF3D69915D02B4400969478 /* libmoses.a */,
+ );
+ name = Products;
+ sourceTree = "<group>";
+ };
+/* End PBXGroup section */
+
+/* Begin PBXNativeTarget section */
+ 1E6D9FFE15D02A8D0064D436 /* processPhraseTableMin */ = {
+ isa = PBXNativeTarget;
+ buildConfigurationList = 1E6DA00915D02A8D0064D436 /* Build configuration list for PBXNativeTarget "processPhraseTableMin" */;
+ buildPhases = (
+ 1E6D9FFB15D02A8D0064D436 /* Sources */,
+ 1E6D9FFC15D02A8D0064D436 /* Frameworks */,
+ 1E6D9FFD15D02A8D0064D436 /* CopyFiles */,
+ );
+ buildRules = (
+ );
+ dependencies = (
+ 1EF3D6A615D02B6B00969478 /* PBXTargetDependency */,
+ );
+ name = processPhraseTableMin;
+ productName = processPhraseTableMin;
+ productReference = 1E6D9FFF15D02A8D0064D436 /* processPhraseTableMin */;
+ productType = "com.apple.product-type.tool";
+ };
+/* End PBXNativeTarget section */
+
+/* Begin PBXProject section */
+ 1E6D9FF615D02A8C0064D436 /* Project object */ = {
+ isa = PBXProject;
+ buildConfigurationList = 1E6D9FF915D02A8C0064D436 /* Build configuration list for PBXProject "processPhraseTableMin" */;
+ compatibilityVersion = "Xcode 3.2";
+ developmentRegion = English;
+ hasScannedForEncodings = 0;
+ knownRegions = (
+ en,
+ );
+ mainGroup = 1E6D9FF415D02A8C0064D436;
+ productRefGroup = 1E6DA00015D02A8D0064D436 /* Products */;
+ projectDirPath = "";
+ projectReferences = (
+ {
+ ProductGroup = 1EF3D69515D02B4400969478 /* Products */;
+ ProjectRef = 1EF3D69415D02B4400969478 /* moses.xcodeproj */;
+ },
+ );
+ projectRoot = "";
+ targets = (
+ 1E6D9FFE15D02A8D0064D436 /* processPhraseTableMin */,
+ );
+ };
+/* End PBXProject section */
+
+/* Begin PBXReferenceProxy section */
+ 1EF3D69915D02B4400969478 /* libmoses.a */ = {
+ isa = PBXReferenceProxy;
+ fileType = archive.ar;
+ path = libmoses.a;
+ remoteRef = 1EF3D69815D02B4400969478 /* PBXContainerItemProxy */;
+ sourceTree = BUILT_PRODUCTS_DIR;
+ };
+/* End PBXReferenceProxy section */
+
+/* Begin PBXSourcesBuildPhase section */
+ 1E6D9FFB15D02A8D0064D436 /* Sources */ = {
+ isa = PBXSourcesBuildPhase;
+ buildActionMask = 2147483647;
+ files = (
+ 1EF3D68A15D02AEF00969478 /* processPhraseTableMin.cpp in Sources */,
+ );
+ runOnlyForDeploymentPostprocessing = 0;
+ };
+/* End PBXSourcesBuildPhase section */
+
+/* Begin PBXTargetDependency section */
+ 1EF3D6A615D02B6B00969478 /* PBXTargetDependency */ = {
+ isa = PBXTargetDependency;
+ name = moses;
+ targetProxy = 1EF3D6A515D02B6B00969478 /* PBXContainerItemProxy */;
+ };
+/* End PBXTargetDependency section */
+
+/* Begin XCBuildConfiguration section */
+ 1E6DA00715D02A8D0064D436 /* Debug */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ ALWAYS_SEARCH_USER_PATHS = NO;
+ ARCHS = "$(ARCHS_STANDARD_64_BIT)";
+ COPY_PHASE_STRIP = NO;
+ GCC_C_LANGUAGE_STANDARD = gnu99;
+ GCC_DYNAMIC_NO_PIC = NO;
+ GCC_ENABLE_OBJC_EXCEPTIONS = YES;
+ GCC_OPTIMIZATION_LEVEL = 0;
+ GCC_PREPROCESSOR_DEFINITIONS = (
+ "DEBUG=1",
+ "$(inherited)",
+ );
+ GCC_SYMBOLS_PRIVATE_EXTERN = NO;
+ GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+ GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+ GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
+ GCC_WARN_ABOUT_RETURN_TYPE = YES;
+ GCC_WARN_UNUSED_VARIABLE = YES;
+ LIBRARY_SEARCH_PATHS = "";
+ MACOSX_DEPLOYMENT_TARGET = 10.7;
+ ONLY_ACTIVE_ARCH = YES;
+ SDKROOT = macosx;
+ };
+ name = Debug;
+ };
+ 1E6DA00815D02A8D0064D436 /* Release */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ ALWAYS_SEARCH_USER_PATHS = NO;
+ ARCHS = "$(ARCHS_STANDARD_64_BIT)";
+ COPY_PHASE_STRIP = YES;
+ DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
+ GCC_C_LANGUAGE_STANDARD = gnu99;
+ GCC_ENABLE_OBJC_EXCEPTIONS = YES;
+ GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
+ GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
+ GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
+ GCC_WARN_ABOUT_RETURN_TYPE = YES;
+ GCC_WARN_UNUSED_VARIABLE = YES;
+ LIBRARY_SEARCH_PATHS = "";
+ MACOSX_DEPLOYMENT_TARGET = 10.7;
+ SDKROOT = macosx;
+ };
+ name = Release;
+ };
+ 1E6DA00A15D02A8D0064D436 /* Debug */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ GCC_PREPROCESSOR_DEFINITIONS = WITH_THREADS;
+ HEADER_SEARCH_PATHS = (
+ ../../,
+ ../../irstlm/include,
+ /opt/local/include,
+ ../../moses/src,
+ ../../cmph/include,
+ );
+ LIBRARY_SEARCH_PATHS = (
+ ../../irstlm/lib,
+ ../../srilm/lib/macosx,
+ ../../randlm/lib,
+ /opt/local/lib,
+ ../../cmph/lib,
+ );
+ OTHER_LDFLAGS = (
+ "-lz",
+ "-lirstlm",
+ "-lmisc",
+ "-ldstruct",
+ "-loolm",
+ "-lflm",
+ "-llattice",
+ "-lrandlm",
+ "-lboost_thread-mt",
+ "-lcmph",
+ );
+ PRODUCT_NAME = "$(TARGET_NAME)";
+ };
+ name = Debug;
+ };
+ 1E6DA00B15D02A8D0064D436 /* Release */ = {
+ isa = XCBuildConfiguration;
+ buildSettings = {
+ GCC_PREPROCESSOR_DEFINITIONS = WITH_THREADS;
+ HEADER_SEARCH_PATHS = (
+ ../../,
+ ../../irstlm/include,
+ /opt/local/include,
+ ../../moses/src,
+ ../../cmph/include,
+ );
+ LIBRARY_SEARCH_PATHS = (
+ ../../irstlm/lib,
+ ../../srilm/lib/macosx,
+ ../../randlm/lib,
+ /opt/local/lib,
+ ../../cmph/lib,
+ );
+ OTHER_LDFLAGS = (
+ "-lz",
+ "-lirstlm",
+ "-lmisc",
+ "-ldstruct",
+ "-loolm",
+ "-lflm",
+ "-llattice",
+ "-lrandlm",
+ "-lboost_thread-mt",
+ "-lcmph",
+ );
+ PRODUCT_NAME = "$(TARGET_NAME)";
+ };
+ name = Release;
+ };
+/* End XCBuildConfiguration section */
+
+/* Begin XCConfigurationList section */
+ 1E6D9FF915D02A8C0064D436 /* Build configuration list for PBXProject "processPhraseTableMin" */ = {
+ isa = XCConfigurationList;
+ buildConfigurations = (
+ 1E6DA00715D02A8D0064D436 /* Debug */,
+ 1E6DA00815D02A8D0064D436 /* Release */,
+ );
+ defaultConfigurationIsVisible = 0;
+ defaultConfigurationName = Release;
+ };
+ 1E6DA00915D02A8D0064D436 /* Build configuration list for PBXNativeTarget "processPhraseTableMin" */ = {
+ isa = XCConfigurationList;
+ buildConfigurations = (
+ 1E6DA00A15D02A8D0064D436 /* Debug */,
+ 1E6DA00B15D02A8D0064D436 /* Release */,
+ );
+ defaultConfigurationIsVisible = 0;
+ defaultConfigurationName = Release;
+ };
+/* End XCConfigurationList section */
+ };
+ rootObject = 1E6D9FF615D02A8C0064D436 /* Project object */;
+}
diff --git a/contrib/other-builds/query.sln b/contrib/other-builds/query.sln
new file mode 100755
index 000000000..8e40beace
--- /dev/null
+++ b/contrib/other-builds/query.sln
@@ -0,0 +1,29 @@
+
+Microsoft Visual Studio Solution File, Format Version 11.00
+# Visual C++ Express 2010
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "query", "query.vcxproj", "{0A9F6EFE-21FD-4252-841A-599B8661A62B}"
+ ProjectSection(ProjectDependencies) = postProject
+ {A5402E0B-6ED7-465C-9669-E4124A0CDDCB} = {A5402E0B-6ED7-465C-9669-E4124A0CDDCB}
+ EndProjectSection
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "kenlm", "kenlm.vcxproj", "{A5402E0B-6ED7-465C-9669-E4124A0CDDCB}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|Win32 = Debug|Win32
+ Release|Win32 = Release|Win32
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {0A9F6EFE-21FD-4252-841A-599B8661A62B}.Debug|Win32.ActiveCfg = Debug|Win32
+ {0A9F6EFE-21FD-4252-841A-599B8661A62B}.Debug|Win32.Build.0 = Debug|Win32
+ {0A9F6EFE-21FD-4252-841A-599B8661A62B}.Release|Win32.ActiveCfg = Release|Win32
+ {0A9F6EFE-21FD-4252-841A-599B8661A62B}.Release|Win32.Build.0 = Release|Win32
+ {A5402E0B-6ED7-465C-9669-E4124A0CDDCB}.Debug|Win32.ActiveCfg = Debug|Win32
+ {A5402E0B-6ED7-465C-9669-E4124A0CDDCB}.Debug|Win32.Build.0 = Debug|Win32
+ {A5402E0B-6ED7-465C-9669-E4124A0CDDCB}.Release|Win32.ActiveCfg = Release|Win32
+ {A5402E0B-6ED7-465C-9669-E4124A0CDDCB}.Release|Win32.Build.0 = Release|Win32
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+EndGlobal
diff --git a/contrib/other-builds/query.vcxproj b/contrib/other-builds/query.vcxproj
new file mode 100755
index 000000000..b70d5002d
--- /dev/null
+++ b/contrib/other-builds/query.vcxproj
@@ -0,0 +1,89 @@
+<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+ <ItemGroup Label="ProjectConfigurations">
+ <ProjectConfiguration Include="Debug|Win32">
+ <Configuration>Debug</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ <ProjectConfiguration Include="Release|Win32">
+ <Configuration>Release</Configuration>
+ <Platform>Win32</Platform>
+ </ProjectConfiguration>
+ </ItemGroup>
+ <ItemGroup>
+ <ClCompile Include="..\..\lm\ngram_query.cc" />
+ </ItemGroup>
+ <ItemGroup>
+ <None Include="..\..\lm\ngram_query.hh" />
+ </ItemGroup>
+ <PropertyGroup Label="Globals">
+ <ProjectGuid>{0A9F6EFE-21FD-4252-841A-599B8661A62B}</ProjectGuid>
+ <Keyword>Win32Proj</Keyword>
+ <RootNamespace>query</RootNamespace>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>true</UseDebugLibraries>
+ <CharacterSet>Unicode</CharacterSet>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+ <ConfigurationType>Application</ConfigurationType>
+ <UseDebugLibraries>false</UseDebugLibraries>
+ <WholeProgramOptimization>true</WholeProgramOptimization>
+ <CharacterSet>Unicode</CharacterSet>
+ </PropertyGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+ <ImportGroup Label="ExtensionSettings">
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+ </ImportGroup>
+ <PropertyGroup Label="UserMacros" />
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <LinkIncremental>true</LinkIncremental>
+ </PropertyGroup>
+ <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <LinkIncremental>false</LinkIncremental>
+ </PropertyGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+ <ClCompile>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <WarningLevel>Level3</WarningLevel>
+ <Optimization>Disabled</Optimization>
+ <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>../../</AdditionalIncludeDirectories>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <AdditionalDependencies>zdll.lib;$(SolutionDir)$(Configuration)\kenlm.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+ <ClCompile>
+ <WarningLevel>Level3</WarningLevel>
+ <PrecompiledHeader>
+ </PrecompiledHeader>
+ <Optimization>MaxSpeed</Optimization>
+ <FunctionLevelLinking>true</FunctionLevelLinking>
+ <IntrinsicFunctions>true</IntrinsicFunctions>
+ <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+ <AdditionalIncludeDirectories>../../</AdditionalIncludeDirectories>
+ </ClCompile>
+ <Link>
+ <SubSystem>Console</SubSystem>
+ <GenerateDebugInformation>true</GenerateDebugInformation>
+ <EnableCOMDATFolding>true</EnableCOMDATFolding>
+ <OptimizeReferences>true</OptimizeReferences>
+ <AdditionalDependencies>zdll.lib;$(SolutionDir)$(Configuration)\moses.lib;$(SolutionDir)$(Configuration)\kenlm.lib;$(SolutionDir)$(Configuration)\OnDiskPt.lib;%(AdditionalDependencies)</AdditionalDependencies>
+ </Link>
+ </ItemDefinitionGroup>
+ <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+ <ImportGroup Label="ExtensionTargets">
+ </ImportGroup>
+</Project> \ No newline at end of file
diff --git a/contrib/other-builds/util/.cproject b/contrib/other-builds/util/.cproject
index 46e9a02b6..8ea5ab73b 100644
--- a/contrib/other-builds/util/.cproject
+++ b/contrib/other-builds/util/.cproject
@@ -41,9 +41,12 @@
<option id="gnu.cpp.compilermacosx.exe.debug.option.optimization.level.623959371" name="Optimization Level" superClass="gnu.cpp.compilermacosx.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.892917290" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.1401298824" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
- <listOptionValue builtIn="false" value="/Users/hieuhoang/unison/workspace/github/moses-smt"/>
+ <listOptionValue builtIn="false" value="${workspace_loc}/../../"/>
<listOptionValue builtIn="false" value="/opt/local/include"/>
</option>
+ <option id="gnu.cpp.compiler.option.preprocessor.def.1952961175" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
+ <listOptionValue builtIn="false" value="TRACE_ENABLE"/>
+ </option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1420621104" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.1724141901" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug">
@@ -130,4 +133,5 @@
<storageModule moduleId="refreshScope" versionNumber="1">
<resource resourceType="PROJECT" workspacePath="/util"/>
</storageModule>
+ <storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
</cproject>
diff --git a/contrib/python/README.md b/contrib/python/README.md
new file mode 100644
index 000000000..fa7d270c8
--- /dev/null
+++ b/contrib/python/README.md
@@ -0,0 +1,28 @@
+# Python interface to Moses
+
+The idea is to have some of Moses' internals exposed to Python (inspired on pycdec).
+
+## What's been interfaced?
+
+* Binary phrase table:
+
+ Moses::PhraseDictionaryTree.h
+
+## Building
+
+1. Build the python extension
+
+ python setup.py build_ext -i [--with-cmph]
+
+3. Check the example code
+
+ echo "casa" | python example.py examples/phrase-table 5 1
+ echo "essa casa" | python example.py examples/phrase-table 5 1
+
+## Changing the code
+
+If you want to add your changes you are going to have to recompile the cython code.
+
+1. Compile the cython code (use Cython 0.16): this will generate binpt/binpt.cpp
+
+ cython --cplus binpt/binpt.pyx
diff --git a/contrib/python/binpt/binpt.cpp b/contrib/python/binpt/binpt.cpp
new file mode 100644
index 000000000..7de3058fc
--- /dev/null
+++ b/contrib/python/binpt/binpt.cpp
@@ -0,0 +1,5648 @@
+/* Generated by Cython 0.16 on Tue Sep 18 11:36:58 2012 */
+
+#define PY_SSIZE_T_CLEAN
+#include "Python.h"
+#ifndef Py_PYTHON_H
+ #error Python headers needed to compile C extensions, please install development version of Python.
+#elif PY_VERSION_HEX < 0x02040000
+ #error Cython requires Python 2.4+.
+#else
+#include <stddef.h> /* For offsetof */
+#ifndef offsetof
+#define offsetof(type, member) ( (size_t) & ((type*)0) -> member )
+#endif
+
+#if !defined(WIN32) && !defined(MS_WINDOWS)
+ #ifndef __stdcall
+ #define __stdcall
+ #endif
+ #ifndef __cdecl
+ #define __cdecl
+ #endif
+ #ifndef __fastcall
+ #define __fastcall
+ #endif
+#endif
+
+#ifndef DL_IMPORT
+ #define DL_IMPORT(t) t
+#endif
+#ifndef DL_EXPORT
+ #define DL_EXPORT(t) t
+#endif
+
+#ifndef PY_LONG_LONG
+ #define PY_LONG_LONG LONG_LONG
+#endif
+
+#ifndef Py_HUGE_VAL
+ #define Py_HUGE_VAL HUGE_VAL
+#endif
+
+#ifdef PYPY_VERSION
+#define CYTHON_COMPILING_IN_PYPY 1
+#define CYTHON_COMPILING_IN_CPYTHON 0
+#else
+#define CYTHON_COMPILING_IN_PYPY 0
+#define CYTHON_COMPILING_IN_CPYTHON 1
+#endif
+
+#if CYTHON_COMPILING_IN_PYPY
+ #define __Pyx_PyCFunction_Call PyObject_Call
+#else
+ #define __Pyx_PyCFunction_Call PyCFunction_Call
+#endif
+
+#if PY_VERSION_HEX < 0x02050000
+ typedef int Py_ssize_t;
+ #define PY_SSIZE_T_MAX INT_MAX
+ #define PY_SSIZE_T_MIN INT_MIN
+ #define PY_FORMAT_SIZE_T ""
+ #define PyInt_FromSsize_t(z) PyInt_FromLong(z)
+ #define PyInt_AsSsize_t(o) __Pyx_PyInt_AsInt(o)
+ #define PyNumber_Index(o) PyNumber_Int(o)
+ #define PyIndex_Check(o) PyNumber_Check(o)
+ #define PyErr_WarnEx(category, message, stacklevel) PyErr_Warn(category, message)
+ #define __PYX_BUILD_PY_SSIZE_T "i"
+#else
+ #define __PYX_BUILD_PY_SSIZE_T "n"
+#endif
+
+#if PY_VERSION_HEX < 0x02060000
+ #define Py_REFCNT(ob) (((PyObject*)(ob))->ob_refcnt)
+ #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
+ #define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size)
+ #define PyVarObject_HEAD_INIT(type, size) \
+ PyObject_HEAD_INIT(type) size,
+ #define PyType_Modified(t)
+
+ typedef struct {
+ void *buf;
+ PyObject *obj;
+ Py_ssize_t len;
+ Py_ssize_t itemsize;
+ int readonly;
+ int ndim;
+ char *format;
+ Py_ssize_t *shape;
+ Py_ssize_t *strides;
+ Py_ssize_t *suboffsets;
+ void *internal;
+ } Py_buffer;
+
+ #define PyBUF_SIMPLE 0
+ #define PyBUF_WRITABLE 0x0001
+ #define PyBUF_FORMAT 0x0004
+ #define PyBUF_ND 0x0008
+ #define PyBUF_STRIDES (0x0010 | PyBUF_ND)
+ #define PyBUF_C_CONTIGUOUS (0x0020 | PyBUF_STRIDES)
+ #define PyBUF_F_CONTIGUOUS (0x0040 | PyBUF_STRIDES)
+ #define PyBUF_ANY_CONTIGUOUS (0x0080 | PyBUF_STRIDES)
+ #define PyBUF_INDIRECT (0x0100 | PyBUF_STRIDES)
+ #define PyBUF_RECORDS (PyBUF_STRIDES | PyBUF_FORMAT | PyBUF_WRITABLE)
+ #define PyBUF_FULL (PyBUF_INDIRECT | PyBUF_FORMAT | PyBUF_WRITABLE)
+
+ typedef int (*getbufferproc)(PyObject *, Py_buffer *, int);
+ typedef void (*releasebufferproc)(PyObject *, Py_buffer *);
+#endif
+
+#if PY_MAJOR_VERSION < 3
+ #define __Pyx_BUILTIN_MODULE_NAME "__builtin__"
+ #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) \
+ PyCode_New(a, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
+#else
+ #define __Pyx_BUILTIN_MODULE_NAME "builtins"
+ #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) \
+ PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
+#endif
+
+#if PY_MAJOR_VERSION < 3 && PY_MINOR_VERSION < 6
+ #define PyUnicode_FromString(s) PyUnicode_Decode(s, strlen(s), "UTF-8", "strict")
+#endif
+
+#if PY_MAJOR_VERSION >= 3
+ #define Py_TPFLAGS_CHECKTYPES 0
+ #define Py_TPFLAGS_HAVE_INDEX 0
+#endif
+
+#if (PY_VERSION_HEX < 0x02060000) || (PY_MAJOR_VERSION >= 3)
+ #define Py_TPFLAGS_HAVE_NEWBUFFER 0
+#endif
+
+
+#if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_GET_LENGTH)
+ #define CYTHON_PEP393_ENABLED 1
+ #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u)
+ #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i)
+#else
+ #define CYTHON_PEP393_ENABLED 0
+ #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_SIZE(u)
+ #define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i]))
+#endif
+
+#if PY_MAJOR_VERSION >= 3
+ #define PyBaseString_Type PyUnicode_Type
+ #define PyStringObject PyUnicodeObject
+ #define PyString_Type PyUnicode_Type
+ #define PyString_Check PyUnicode_Check
+ #define PyString_CheckExact PyUnicode_CheckExact
+#endif
+
+#if PY_VERSION_HEX < 0x02060000
+ #define PyBytesObject PyStringObject
+ #define PyBytes_Type PyString_Type
+ #define PyBytes_Check PyString_Check
+ #define PyBytes_CheckExact PyString_CheckExact
+ #define PyBytes_FromString PyString_FromString
+ #define PyBytes_FromStringAndSize PyString_FromStringAndSize
+ #define PyBytes_FromFormat PyString_FromFormat
+ #define PyBytes_DecodeEscape PyString_DecodeEscape
+ #define PyBytes_AsString PyString_AsString
+ #define PyBytes_AsStringAndSize PyString_AsStringAndSize
+ #define PyBytes_Size PyString_Size
+ #define PyBytes_AS_STRING PyString_AS_STRING
+ #define PyBytes_GET_SIZE PyString_GET_SIZE
+ #define PyBytes_Repr PyString_Repr
+ #define PyBytes_Concat PyString_Concat
+ #define PyBytes_ConcatAndDel PyString_ConcatAndDel
+#endif
+
+#if PY_VERSION_HEX < 0x02060000
+ #define PySet_Check(obj) PyObject_TypeCheck(obj, &PySet_Type)
+ #define PyFrozenSet_Check(obj) PyObject_TypeCheck(obj, &PyFrozenSet_Type)
+#endif
+#ifndef PySet_CheckExact
+ #define PySet_CheckExact(obj) (Py_TYPE(obj) == &PySet_Type)
+#endif
+
+#define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type)
+
+#if PY_MAJOR_VERSION >= 3
+ #define PyIntObject PyLongObject
+ #define PyInt_Type PyLong_Type
+ #define PyInt_Check(op) PyLong_Check(op)
+ #define PyInt_CheckExact(op) PyLong_CheckExact(op)
+ #define PyInt_FromString PyLong_FromString
+ #define PyInt_FromUnicode PyLong_FromUnicode
+ #define PyInt_FromLong PyLong_FromLong
+ #define PyInt_FromSize_t PyLong_FromSize_t
+ #define PyInt_FromSsize_t PyLong_FromSsize_t
+ #define PyInt_AsLong PyLong_AsLong
+ #define PyInt_AS_LONG PyLong_AS_LONG
+ #define PyInt_AsSsize_t PyLong_AsSsize_t
+ #define PyInt_AsUnsignedLongMask PyLong_AsUnsignedLongMask
+ #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask
+#endif
+
+#if PY_MAJOR_VERSION >= 3
+ #define PyBoolObject PyLongObject
+#endif
+
+#if PY_VERSION_HEX < 0x03020000
+ typedef long Py_hash_t;
+ #define __Pyx_PyInt_FromHash_t PyInt_FromLong
+ #define __Pyx_PyInt_AsHash_t PyInt_AsLong
+#else
+ #define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t
+ #define __Pyx_PyInt_AsHash_t PyInt_AsSsize_t
+#endif
+
+#if (PY_MAJOR_VERSION < 3) || (PY_VERSION_HEX >= 0x03010300)
+ #define __Pyx_PySequence_GetSlice(obj, a, b) PySequence_GetSlice(obj, a, b)
+ #define __Pyx_PySequence_SetSlice(obj, a, b, value) PySequence_SetSlice(obj, a, b, value)
+ #define __Pyx_PySequence_DelSlice(obj, a, b) PySequence_DelSlice(obj, a, b)
+#else
+ #define __Pyx_PySequence_GetSlice(obj, a, b) (unlikely(!(obj)) ? \
+ (PyErr_SetString(PyExc_SystemError, "null argument to internal routine"), (PyObject*)0) : \
+ (likely((obj)->ob_type->tp_as_mapping) ? (PySequence_GetSlice(obj, a, b)) : \
+ (PyErr_Format(PyExc_TypeError, "'%.200s' object is unsliceable", (obj)->ob_type->tp_name), (PyObject*)0)))
+ #define __Pyx_PySequence_SetSlice(obj, a, b, value) (unlikely(!(obj)) ? \
+ (PyErr_SetString(PyExc_SystemError, "null argument to internal routine"), -1) : \
+ (likely((obj)->ob_type->tp_as_mapping) ? (PySequence_SetSlice(obj, a, b, value)) : \
+ (PyErr_Format(PyExc_TypeError, "'%.200s' object doesn't support slice assignment", (obj)->ob_type->tp_name), -1)))
+ #define __Pyx_PySequence_DelSlice(obj, a, b) (unlikely(!(obj)) ? \
+ (PyErr_SetString(PyExc_SystemError, "null argument to internal routine"), -1) : \
+ (likely((obj)->ob_type->tp_as_mapping) ? (PySequence_DelSlice(obj, a, b)) : \
+ (PyErr_Format(PyExc_TypeError, "'%.200s' object doesn't support slice deletion", (obj)->ob_type->tp_name), -1)))
+#endif
+
+#if PY_MAJOR_VERSION >= 3
+ #define PyMethod_New(func, self, klass) ((self) ? PyMethod_New(func, self) : PyInstanceMethod_New(func))
+#endif
+
+#if PY_VERSION_HEX < 0x02050000
+ #define __Pyx_GetAttrString(o,n) PyObject_GetAttrString((o),((char *)(n)))
+ #define __Pyx_SetAttrString(o,n,a) PyObject_SetAttrString((o),((char *)(n)),(a))
+ #define __Pyx_DelAttrString(o,n) PyObject_DelAttrString((o),((char *)(n)))
+#else
+ #define __Pyx_GetAttrString(o,n) PyObject_GetAttrString((o),(n))
+ #define __Pyx_SetAttrString(o,n,a) PyObject_SetAttrString((o),(n),(a))
+ #define __Pyx_DelAttrString(o,n) PyObject_DelAttrString((o),(n))
+#endif
+
+#if PY_VERSION_HEX < 0x02050000
+ #define __Pyx_NAMESTR(n) ((char *)(n))
+ #define __Pyx_DOCSTR(n) ((char *)(n))
+#else
+ #define __Pyx_NAMESTR(n) (n)
+ #define __Pyx_DOCSTR(n) (n)
+#endif
+
+#if PY_MAJOR_VERSION >= 3
+ #define __Pyx_PyNumber_Divide(x,y) PyNumber_TrueDivide(x,y)
+ #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceTrueDivide(x,y)
+#else
+ #define __Pyx_PyNumber_Divide(x,y) PyNumber_Divide(x,y)
+ #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceDivide(x,y)
+#endif
+
+#ifndef __PYX_EXTERN_C
+ #ifdef __cplusplus
+ #define __PYX_EXTERN_C extern "C"
+ #else
+ #define __PYX_EXTERN_C extern
+ #endif
+#endif
+
+#if defined(WIN32) || defined(MS_WINDOWS)
+#define _USE_MATH_DEFINES
+#endif
+#include <math.h>
+#define __PYX_HAVE__binpt
+#define __PYX_HAVE_API__binpt
+#include <string>
+#include <vector>
+#include <utility>
+#include "TypeDef.h"
+#include "PhraseDictionaryTree.h"
+#include "Util.h"
+#ifdef _OPENMP
+#include <omp.h>
+#endif /* _OPENMP */
+
+#ifdef PYREX_WITHOUT_ASSERTIONS
+#define CYTHON_WITHOUT_ASSERTIONS
+#endif
+
+
+/* inline attribute */
+#ifndef CYTHON_INLINE
+ #if defined(__GNUC__)
+ #define CYTHON_INLINE __inline__
+ #elif defined(_MSC_VER)
+ #define CYTHON_INLINE __inline
+ #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
+ #define CYTHON_INLINE inline
+ #else
+ #define CYTHON_INLINE
+ #endif
+#endif
+
+/* unused attribute */
+#ifndef CYTHON_UNUSED
+# if defined(__GNUC__)
+# if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
+# define CYTHON_UNUSED __attribute__ ((__unused__))
+# else
+# define CYTHON_UNUSED
+# endif
+# elif defined(__ICC) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER))
+# define CYTHON_UNUSED __attribute__ ((__unused__))
+# else
+# define CYTHON_UNUSED
+# endif
+#endif
+
+typedef struct {PyObject **p; char *s; const long n; const char* encoding; const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry; /*proto*/
+
+
+/* Type Conversion Predeclarations */
+
+#define __Pyx_PyBytes_FromUString(s) PyBytes_FromString((char*)s)
+#define __Pyx_PyBytes_AsUString(s) ((unsigned char*) PyBytes_AsString(s))
+
+#define __Pyx_Owned_Py_None(b) (Py_INCREF(Py_None), Py_None)
+#define __Pyx_PyBool_FromLong(b) ((b) ? (Py_INCREF(Py_True), Py_True) : (Py_INCREF(Py_False), Py_False))
+static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*);
+static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x);
+
+static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*);
+static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t);
+static CYTHON_INLINE size_t __Pyx_PyInt_AsSize_t(PyObject*);
+
+#define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x))
+#define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x))
+
+#ifdef __GNUC__
+ /* Test for GCC > 2.95 */
+ #if __GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))
+ #define likely(x) __builtin_expect(!!(x), 1)
+ #define unlikely(x) __builtin_expect(!!(x), 0)
+ #else /* __GNUC__ > 2 ... */
+ #define likely(x) (x)
+ #define unlikely(x) (x)
+ #endif /* __GNUC__ > 2 ... */
+#else /* __GNUC__ */
+ #define likely(x) (x)
+ #define unlikely(x) (x)
+#endif /* __GNUC__ */
+
+static PyObject *__pyx_m;
+static PyObject *__pyx_b;
+static PyObject *__pyx_empty_tuple;
+static PyObject *__pyx_empty_bytes;
+static int __pyx_lineno;
+static int __pyx_clineno = 0;
+static const char * __pyx_cfilenm= __FILE__;
+static const char *__pyx_filename;
+
+
+static const char *__pyx_f[] = {
+ "binpt.pyx",
+};
+
+/*--- Type declarations ---*/
+struct __pyx_obj_5binpt_QueryResult;
+struct __pyx_obj_5binpt_BinaryPhraseTable;
+struct __pyx_opt_args_5binpt_get_query_result;
+
+/* "binpt.pxd":5
+ * from libcpp.pair cimport pair
+ *
+ * ctypedef string* str_pointer # <<<<<<<<<<<<<<
+ *
+ * cdef extern from 'TypeDef.h' namespace 'Moses':
+ */
+typedef std::string *__pyx_t_5binpt_str_pointer;
+
+/* "binpt.pyx":71
+ * return repr((repr(self._words), repr(self._scores), repr(self._wa)))
+ *
+ * cdef QueryResult get_query_result(StringTgtCand& cand, object wa = None): # <<<<<<<<<<<<<<
+ * '''Converts a StringTgtCandidate (c++ object) and possibly a word-alignment info (string)
+ * to a QueryResult (python object).'''
+ */
+struct __pyx_opt_args_5binpt_get_query_result {
+ int __pyx_n;
+ PyObject *wa;
+};
+
+/* "binpt.pyx":17
+ * raise TypeError('Cannot convert %s to string' % type(data))
+ *
+ * cdef class QueryResult(object): # <<<<<<<<<<<<<<
+ * '''This class represents a query result, that is,
+ * a target phrase (tuple of words/strings),
+ */
+struct __pyx_obj_5binpt_QueryResult {
+ PyObject_HEAD
+ PyObject *_words;
+ PyObject *_scores;
+ PyObject *_wa;
+};
+
+
+/* "binpt.pyx":78
+ * return QueryResult(words, scores, wa)
+ *
+ * cdef class BinaryPhraseTable(object): # <<<<<<<<<<<<<<
+ * '''This class encapsulates a Moses::PhraseDictionaryTree for operations over
+ * binary phrase tables.'''
+ */
+struct __pyx_obj_5binpt_BinaryPhraseTable {
+ PyObject_HEAD
+ Moses::PhraseDictionaryTree *__pyx___tree;
+ PyObject *_path;
+ unsigned int _nscores;
+ int _wa;
+ PyObject *_delimiters;
+};
+
+#ifndef CYTHON_REFNANNY
+ #define CYTHON_REFNANNY 0
+#endif
+#if CYTHON_REFNANNY
+ typedef struct {
+ void (*INCREF)(void*, PyObject*, int);
+ void (*DECREF)(void*, PyObject*, int);
+ void (*GOTREF)(void*, PyObject*, int);
+ void (*GIVEREF)(void*, PyObject*, int);
+ void* (*SetupContext)(const char*, int, const char*);
+ void (*FinishContext)(void**);
+ } __Pyx_RefNannyAPIStruct;
+ static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL;
+ static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname); /*proto*/
+ #define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL;
+#ifdef WITH_THREAD
+ #define __Pyx_RefNannySetupContext(name, acquire_gil) \
+ if (acquire_gil) { \
+ PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure(); \
+ __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__); \
+ PyGILState_Release(__pyx_gilstate_save); \
+ } else { \
+ __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__); \
+ }
+#else
+ #define __Pyx_RefNannySetupContext(name, acquire_gil) \
+ __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__)
+#endif
+ #define __Pyx_RefNannyFinishContext() \
+ __Pyx_RefNanny->FinishContext(&__pyx_refnanny)
+ #define __Pyx_INCREF(r) __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+ #define __Pyx_DECREF(r) __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+ #define __Pyx_GOTREF(r) __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+ #define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
+ #define __Pyx_XINCREF(r) do { if((r) != NULL) {__Pyx_INCREF(r); }} while(0)
+ #define __Pyx_XDECREF(r) do { if((r) != NULL) {__Pyx_DECREF(r); }} while(0)
+ #define __Pyx_XGOTREF(r) do { if((r) != NULL) {__Pyx_GOTREF(r); }} while(0)
+ #define __Pyx_XGIVEREF(r) do { if((r) != NULL) {__Pyx_GIVEREF(r);}} while(0)
+#else
+ #define __Pyx_RefNannyDeclarations
+ #define __Pyx_RefNannySetupContext(name, acquire_gil)
+ #define __Pyx_RefNannyFinishContext()
+ #define __Pyx_INCREF(r) Py_INCREF(r)
+ #define __Pyx_DECREF(r) Py_DECREF(r)
+ #define __Pyx_GOTREF(r)
+ #define __Pyx_GIVEREF(r)
+ #define __Pyx_XINCREF(r) Py_XINCREF(r)
+ #define __Pyx_XDECREF(r) Py_XDECREF(r)
+ #define __Pyx_XGOTREF(r)
+ #define __Pyx_XGIVEREF(r)
+#endif /* CYTHON_REFNANNY */
+#define __Pyx_CLEAR(r) do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0)
+#define __Pyx_XCLEAR(r) do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0)
+
+static PyObject *__Pyx_GetName(PyObject *dict, PyObject *name); /*proto*/
+
+static CYTHON_INLINE void __Pyx_ErrRestore(PyObject *type, PyObject *value, PyObject *tb); /*proto*/
+static CYTHON_INLINE void __Pyx_ErrFetch(PyObject **type, PyObject **value, PyObject **tb); /*proto*/
+
+static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause); /*proto*/
+
+static void __Pyx_RaiseArgtupleInvalid(const char* func_name, int exact,
+ Py_ssize_t num_min, Py_ssize_t num_max, Py_ssize_t num_found); /*proto*/
+
+static void __Pyx_RaiseDoubleKeywordsError(const char* func_name, PyObject* kw_name); /*proto*/
+
+static int __Pyx_ParseOptionalKeywords(PyObject *kwds, PyObject **argnames[], \
+ PyObject *kwds2, PyObject *values[], Py_ssize_t num_pos_args, \
+ const char* function_name); /*proto*/
+
+static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Generic(PyObject *o, PyObject* j) {
+ PyObject *r;
+ if (!j) return NULL;
+ r = PyObject_GetItem(o, j);
+ Py_DECREF(j);
+ return r;
+}
+#define __Pyx_GetItemInt_List(o, i, size, to_py_func) (((size) <= sizeof(Py_ssize_t)) ? \
+ __Pyx_GetItemInt_List_Fast(o, i) : \
+ __Pyx_GetItemInt_Generic(o, to_py_func(i)))
+static CYTHON_INLINE PyObject *__Pyx_GetItemInt_List_Fast(PyObject *o, Py_ssize_t i) {
+ if (likely(o != Py_None)) {
+ if (likely((0 <= i) & (i < PyList_GET_SIZE(o)))) {
+ PyObject *r = PyList_GET_ITEM(o, i);
+ Py_INCREF(r);
+ return r;
+ }
+ else if ((-PyList_GET_SIZE(o) <= i) & (i < 0)) {
+ PyObject *r = PyList_GET_ITEM(o, PyList_GET_SIZE(o) + i);
+ Py_INCREF(r);
+ return r;
+ }
+ }
+ return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i));
+}
+#define __Pyx_GetItemInt_Tuple(o, i, size, to_py_func) (((size) <= sizeof(Py_ssize_t)) ? \
+ __Pyx_GetItemInt_Tuple_Fast(o, i) : \
+ __Pyx_GetItemInt_Generic(o, to_py_func(i)))
+static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Tuple_Fast(PyObject *o, Py_ssize_t i) {
+ if (likely(o != Py_None)) {
+ if (likely((0 <= i) & (i < PyTuple_GET_SIZE(o)))) {
+ PyObject *r = PyTuple_GET_ITEM(o, i);
+ Py_INCREF(r);
+ return r;
+ }
+ else if ((-PyTuple_GET_SIZE(o) <= i) & (i < 0)) {
+ PyObject *r = PyTuple_GET_ITEM(o, PyTuple_GET_SIZE(o) + i);
+ Py_INCREF(r);
+ return r;
+ }
+ }
+ return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i));
+}
+#define __Pyx_GetItemInt(o, i, size, to_py_func) (((size) <= sizeof(Py_ssize_t)) ? \
+ __Pyx_GetItemInt_Fast(o, i) : \
+ __Pyx_GetItemInt_Generic(o, to_py_func(i)))
+static CYTHON_INLINE PyObject *__Pyx_GetItemInt_Fast(PyObject *o, Py_ssize_t i) {
+ if (PyList_CheckExact(o)) {
+ Py_ssize_t n = (likely(i >= 0)) ? i : i + PyList_GET_SIZE(o);
+ if (likely((n >= 0) & (n < PyList_GET_SIZE(o)))) {
+ PyObject *r = PyList_GET_ITEM(o, n);
+ Py_INCREF(r);
+ return r;
+ }
+ }
+ else if (PyTuple_CheckExact(o)) {
+ Py_ssize_t n = (likely(i >= 0)) ? i : i + PyTuple_GET_SIZE(o);
+ if (likely((n >= 0) & (n < PyTuple_GET_SIZE(o)))) {
+ PyObject *r = PyTuple_GET_ITEM(o, n);
+ Py_INCREF(r);
+ return r;
+ }
+ }
+ else if (likely(i >= 0)) {
+ PySequenceMethods *m = Py_TYPE(o)->tp_as_sequence;
+ if (likely(m && m->sq_item)) {
+ return m->sq_item(o, i);
+ }
+ }
+ return __Pyx_GetItemInt_Generic(o, PyInt_FromSsize_t(i));
+}
+
+static int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed,
+ const char *name, int exact); /*proto*/
+
+static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, long level); /*proto*/
+
+#define __Pyx_CyFunction_USED 1
+#include <structmember.h>
+#define __Pyx_CYFUNCTION_STATICMETHOD 0x01
+#define __Pyx_CYFUNCTION_CLASSMETHOD 0x02
+#define __Pyx_CYFUNCTION_CCLASS 0x04
+#define __Pyx_CyFunction_GetClosure(f) \
+ (((__pyx_CyFunctionObject *) (f))->func_closure)
+#define __Pyx_CyFunction_GetClassObj(f) \
+ (((__pyx_CyFunctionObject *) (f))->func_classobj)
+#define __Pyx_CyFunction_Defaults(type, f) \
+ ((type *)(((__pyx_CyFunctionObject *) (f))->defaults))
+#define __Pyx_CyFunction_SetDefaultsGetter(f, g) \
+ ((__pyx_CyFunctionObject *) (f))->defaults_getter = (g)
+typedef struct {
+ PyCFunctionObject func;
+ int flags;
+ PyObject *func_dict;
+ PyObject *func_weakreflist;
+ PyObject *func_name;
+ PyObject *func_doc;
+ PyObject *func_code;
+ PyObject *func_closure;
+ PyObject *func_classobj; /* No-args super() class cell */
+ void *defaults;
+ int defaults_pyobjects;
+ PyObject *defaults_tuple; /* Const defaults tuple */
+ PyObject *(*defaults_getter)(PyObject *);
+} __pyx_CyFunctionObject;
+static PyTypeObject *__pyx_CyFunctionType = 0;
+#define __Pyx_CyFunction_NewEx(ml, flags, self, module, code) \
+ __Pyx_CyFunction_New(__pyx_CyFunctionType, ml, flags, self, module, code)
+static PyObject *__Pyx_CyFunction_New(PyTypeObject *,
+ PyMethodDef *ml, int flags,
+ PyObject *self, PyObject *module,
+ PyObject* code);
+static CYTHON_INLINE void *__Pyx_CyFunction_InitDefaults(PyObject *m,
+ size_t size,
+ int pyobjects);
+static CYTHON_INLINE void __Pyx_CyFunction_SetDefaultsTuple(PyObject *m,
+ PyObject *tuple);
+static int __Pyx_CyFunction_init(void);
+
+static CYTHON_INLINE unsigned char __Pyx_PyInt_AsUnsignedChar(PyObject *);
+
+static CYTHON_INLINE unsigned short __Pyx_PyInt_AsUnsignedShort(PyObject *);
+
+static CYTHON_INLINE unsigned int __Pyx_PyInt_AsUnsignedInt(PyObject *);
+
+static CYTHON_INLINE char __Pyx_PyInt_AsChar(PyObject *);
+
+static CYTHON_INLINE short __Pyx_PyInt_AsShort(PyObject *);
+
+static CYTHON_INLINE int __Pyx_PyInt_AsInt(PyObject *);
+
+static CYTHON_INLINE signed char __Pyx_PyInt_AsSignedChar(PyObject *);
+
+static CYTHON_INLINE signed short __Pyx_PyInt_AsSignedShort(PyObject *);
+
+static CYTHON_INLINE signed int __Pyx_PyInt_AsSignedInt(PyObject *);
+
+static CYTHON_INLINE int __Pyx_PyInt_AsLongDouble(PyObject *);
+
+static CYTHON_INLINE unsigned long __Pyx_PyInt_AsUnsignedLong(PyObject *);
+
+static CYTHON_INLINE unsigned PY_LONG_LONG __Pyx_PyInt_AsUnsignedLongLong(PyObject *);
+
+static CYTHON_INLINE long __Pyx_PyInt_AsLong(PyObject *);
+
+static CYTHON_INLINE PY_LONG_LONG __Pyx_PyInt_AsLongLong(PyObject *);
+
+static CYTHON_INLINE signed long __Pyx_PyInt_AsSignedLong(PyObject *);
+
+static CYTHON_INLINE signed PY_LONG_LONG __Pyx_PyInt_AsSignedLongLong(PyObject *);
+
+static int __Pyx_check_binary_version(void);
+
+typedef struct {
+ int code_line;
+ PyCodeObject* code_object;
+} __Pyx_CodeObjectCacheEntry;
+struct __Pyx_CodeObjectCache {
+ int count;
+ int max_count;
+ __Pyx_CodeObjectCacheEntry* entries;
+};
+static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL};
+static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line);
+static PyCodeObject *__pyx_find_code_object(int code_line);
+static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object);
+
+static void __Pyx_AddTraceback(const char *funcname, int c_line,
+ int py_line, const char *filename); /*proto*/
+
+static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); /*proto*/
+
+
+/* Module declarations from 'libcpp.string' */
+
+/* Module declarations from 'libcpp.vector' */
+
+/* Module declarations from 'libcpp.utility' */
+
+/* Module declarations from 'libcpp.pair' */
+
+/* Module declarations from 'cython' */
+
+/* Module declarations from 'binpt' */
+static PyTypeObject *__pyx_ptype_5binpt_QueryResult = 0;
+static PyTypeObject *__pyx_ptype_5binpt_BinaryPhraseTable = 0;
+static int __pyx_f_5binpt_fsign(float, int __pyx_skip_dispatch); /*proto*/
+static PyObject *__pyx_f_5binpt_as_str(PyObject *); /*proto*/
+static struct __pyx_obj_5binpt_QueryResult *__pyx_f_5binpt_get_query_result(Moses::StringTgtCand &, struct __pyx_opt_args_5binpt_get_query_result *__pyx_optional_args); /*proto*/
+#define __Pyx_MODULE_NAME "binpt"
+int __pyx_module_is_main_binpt = 0;
+
+/* Implementation of 'binpt' */
+static PyObject *__pyx_builtin_property;
+static PyObject *__pyx_builtin_staticmethod;
+static PyObject *__pyx_builtin_TypeError;
+static PyObject *__pyx_builtin_range;
+static PyObject *__pyx_builtin_ValueError;
+static PyObject *__pyx_pf_5binpt_fsign(CYTHON_UNUSED PyObject *__pyx_self, float __pyx_v_x); /* proto */
+static int __pyx_pf_5binpt_11QueryResult___cinit__(struct __pyx_obj_5binpt_QueryResult *__pyx_v_self, PyObject *__pyx_v_words, PyObject *__pyx_v_scores, PyObject *__pyx_v_wa); /* proto */
+static PyObject *__pyx_pf_5binpt_11QueryResult_2words(struct __pyx_obj_5binpt_QueryResult *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf_5binpt_11QueryResult_4scores(struct __pyx_obj_5binpt_QueryResult *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf_5binpt_11QueryResult_6wa(struct __pyx_obj_5binpt_QueryResult *__pyx_v_self); /* proto */
+static PyObject *__pyx_lambda_funcdef_lambda1(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_r); /* proto */
+static PyObject *__pyx_pf_5binpt_11QueryResult_8desc(PyObject *__pyx_v_x, PyObject *__pyx_v_y, PyObject *__pyx_v_keys); /* proto */
+static PyObject *__pyx_pf_5binpt_11QueryResult_10__str__(struct __pyx_obj_5binpt_QueryResult *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf_5binpt_11QueryResult_12__repr__(struct __pyx_obj_5binpt_QueryResult *__pyx_v_self); /* proto */
+static int __pyx_pf_5binpt_17BinaryPhraseTable___cinit__(struct __pyx_obj_5binpt_BinaryPhraseTable *__pyx_v_self, PyObject *__pyx_v_path, unsigned int __pyx_v_nscores, int __pyx_v_wa, PyObject *__pyx_v_delimiters); /* proto */
+static void __pyx_pf_5binpt_17BinaryPhraseTable_2__dealloc__(CYTHON_UNUSED struct __pyx_obj_5binpt_BinaryPhraseTable *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf_5binpt_17BinaryPhraseTable_4isValidBinaryTable(PyObject *__pyx_v_stem, int __pyx_v_wa); /* proto */
+static PyObject *__pyx_pf_5binpt_17BinaryPhraseTable_6path(struct __pyx_obj_5binpt_BinaryPhraseTable *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf_5binpt_17BinaryPhraseTable_8nscores(struct __pyx_obj_5binpt_BinaryPhraseTable *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf_5binpt_17BinaryPhraseTable_10wa(struct __pyx_obj_5binpt_BinaryPhraseTable *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf_5binpt_17BinaryPhraseTable_12delimiters(struct __pyx_obj_5binpt_BinaryPhraseTable *__pyx_v_self); /* proto */
+static PyObject *__pyx_pf_5binpt_17BinaryPhraseTable_14query(struct __pyx_obj_5binpt_BinaryPhraseTable *__pyx_v_self, PyObject *__pyx_v_line, PyObject *__pyx_v_cmp, PyObject *__pyx_v_top); /* proto */
+static char __pyx_k_1[] = "UTF-8";
+static char __pyx_k_3[] = "Cannot convert %s to string";
+static char __pyx_k_5[] = " ||| ";
+static char __pyx_k_6[] = " ";
+static char __pyx_k_7[] = " \t";
+static char __pyx_k_8[] = "'%s' doesn't seem a valid binary table.";
+static char __pyx_k_9[] = ".binphr.idx";
+static char __pyx_k_10[] = ".binphr.srctree.wa";
+static char __pyx_k_11[] = ".binphr.srcvoc";
+static char __pyx_k_12[] = ".binphr.tgtdata.wa";
+static char __pyx_k_13[] = ".binphr.tgtvoc";
+static char __pyx_k_14[] = ".binphr.srctree";
+static char __pyx_k_15[] = ".binphr.tgtdata";
+static char __pyx_k_18[] = "/media/Data/tools/moses/mosesdecoder/contrib/python/binpt/binpt.pyx";
+static char __pyx_k__x[] = "x";
+static char __pyx_k__y[] = "y";
+static char __pyx_k__os[] = "os";
+static char __pyx_k__wa[] = "wa";
+static char __pyx_k__cmp[] = "cmp";
+static char __pyx_k__top[] = "top";
+static char __pyx_k__desc[] = "desc";
+static char __pyx_k__join[] = "join";
+static char __pyx_k__keys[] = "keys";
+static char __pyx_k__line[] = "line";
+static char __pyx_k__path[] = "path";
+static char __pyx_k__sort[] = "sort";
+static char __pyx_k__stem[] = "stem";
+static char __pyx_k__binpt[] = "binpt";
+static char __pyx_k__range[] = "range";
+static char __pyx_k__words[] = "words";
+static char __pyx_k__encode[] = "encode";
+static char __pyx_k__isfile[] = "isfile";
+static char __pyx_k__scores[] = "scores";
+static char __pyx_k__nscores[] = "nscores";
+static char __pyx_k____main__[] = "__main__";
+static char __pyx_k____test__[] = "__test__";
+static char __pyx_k__property[] = "property";
+static char __pyx_k__TypeError[] = "TypeError";
+static char __pyx_k__ValueError[] = "ValueError";
+static char __pyx_k__delimiters[] = "delimiters";
+static char __pyx_k__staticmethod[] = "staticmethod";
+static char __pyx_k__isValidBinaryTable[] = "isValidBinaryTable";
+static PyObject *__pyx_kp_s_1;
+static PyObject *__pyx_kp_s_10;
+static PyObject *__pyx_kp_s_11;
+static PyObject *__pyx_kp_s_12;
+static PyObject *__pyx_kp_s_13;
+static PyObject *__pyx_kp_s_14;
+static PyObject *__pyx_kp_s_15;
+static PyObject *__pyx_kp_s_18;
+static PyObject *__pyx_kp_s_3;
+static PyObject *__pyx_kp_s_5;
+static PyObject *__pyx_kp_s_6;
+static PyObject *__pyx_kp_s_7;
+static PyObject *__pyx_kp_s_8;
+static PyObject *__pyx_kp_s_9;
+static PyObject *__pyx_n_s__TypeError;
+static PyObject *__pyx_n_s__ValueError;
+static PyObject *__pyx_n_s____main__;
+static PyObject *__pyx_n_s____test__;
+static PyObject *__pyx_n_s__binpt;
+static PyObject *__pyx_n_s__cmp;
+static PyObject *__pyx_n_s__delimiters;
+static PyObject *__pyx_n_s__desc;
+static PyObject *__pyx_n_s__encode;
+static PyObject *__pyx_n_s__isValidBinaryTable;
+static PyObject *__pyx_n_s__isfile;
+static PyObject *__pyx_n_s__join;
+static PyObject *__pyx_n_s__keys;
+static PyObject *__pyx_n_s__line;
+static PyObject *__pyx_n_s__nscores;
+static PyObject *__pyx_n_s__os;
+static PyObject *__pyx_n_s__path;
+static PyObject *__pyx_n_s__property;
+static PyObject *__pyx_n_s__range;
+static PyObject *__pyx_n_s__scores;
+static PyObject *__pyx_n_s__sort;
+static PyObject *__pyx_n_s__staticmethod;
+static PyObject *__pyx_n_s__stem;
+static PyObject *__pyx_n_s__top;
+static PyObject *__pyx_n_s__wa;
+static PyObject *__pyx_n_s__words;
+static PyObject *__pyx_n_s__x;
+static PyObject *__pyx_n_s__y;
+static PyObject *__pyx_int_0;
+static PyObject *__pyx_k_4;
+static PyObject *__pyx_k_tuple_2;
+static PyObject *__pyx_k_tuple_16;
+static PyObject *__pyx_k_tuple_19;
+static PyObject *__pyx_k_codeobj_17;
+static PyObject *__pyx_k_codeobj_20;
+
+/* "binpt.pyx":6
+ * import cython
+ *
+ * cpdef int fsign(float x): # <<<<<<<<<<<<<<
+ * '''Simply returns the sign of float x (zero is assumed +), it's defined here just so one gains a little bit with static typing'''
+ * return 1 if x >= 0 else -1
+ */
+
+static PyObject *__pyx_pw_5binpt_1fsign(PyObject *__pyx_self, PyObject *__pyx_arg_x); /*proto*/
+static int __pyx_f_5binpt_fsign(float __pyx_v_x, CYTHON_UNUSED int __pyx_skip_dispatch) {
+ int __pyx_r;
+ __Pyx_RefNannyDeclarations
+ long __pyx_t_1;
+ __Pyx_RefNannySetupContext("fsign", 0);
+
+ /* "binpt.pyx":8
+ * cpdef int fsign(float x):
+ * '''Simply returns the sign of float x (zero is assumed +), it's defined here just so one gains a little bit with static typing'''
+ * return 1 if x >= 0 else -1 # <<<<<<<<<<<<<<
+ *
+ * cdef bytes as_str(data):
+ */
+ if ((__pyx_v_x >= 0.0)) {
+ __pyx_t_1 = 1;
+ } else {
+ __pyx_t_1 = -1;
+ }
+ __pyx_r = __pyx_t_1;
+ goto __pyx_L0;
+
+ __pyx_r = 0;
+ __pyx_L0:;
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* Python wrapper */
+static PyObject *__pyx_pw_5binpt_1fsign(PyObject *__pyx_self, PyObject *__pyx_arg_x); /*proto*/
+static char __pyx_doc_5binpt_fsign[] = "Simply returns the sign of float x (zero is assumed +), it's defined here just so one gains a little bit with static typing";
+static PyObject *__pyx_pw_5binpt_1fsign(PyObject *__pyx_self, PyObject *__pyx_arg_x) {
+ float __pyx_v_x;
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("fsign (wrapper)", 0);
+ __pyx_self = __pyx_self;
+ assert(__pyx_arg_x); {
+ __pyx_v_x = __pyx_PyFloat_AsFloat(__pyx_arg_x); if (unlikely((__pyx_v_x == (float)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ }
+ goto __pyx_L4_argument_unpacking_done;
+ __pyx_L3_error:;
+ __Pyx_AddTraceback("binpt.fsign", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __Pyx_RefNannyFinishContext();
+ return NULL;
+ __pyx_L4_argument_unpacking_done:;
+ __pyx_r = __pyx_pf_5binpt_fsign(__pyx_self, ((float)__pyx_v_x));
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "binpt.pyx":6
+ * import cython
+ *
+ * cpdef int fsign(float x): # <<<<<<<<<<<<<<
+ * '''Simply returns the sign of float x (zero is assumed +), it's defined here just so one gains a little bit with static typing'''
+ * return 1 if x >= 0 else -1
+ */
+
+static PyObject *__pyx_pf_5binpt_fsign(CYTHON_UNUSED PyObject *__pyx_self, float __pyx_v_x) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("fsign", 0);
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_1 = PyInt_FromLong(__pyx_f_5binpt_fsign(__pyx_v_x, 0)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 6; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+
+ __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+ goto __pyx_L0;
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_AddTraceback("binpt.fsign", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = NULL;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "binpt.pyx":10
+ * return 1 if x >= 0 else -1
+ *
+ * cdef bytes as_str(data): # <<<<<<<<<<<<<<
+ * if isinstance(data, bytes):
+ * return data
+ */
+
+static PyObject *__pyx_f_5binpt_as_str(PyObject *__pyx_v_data) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ int __pyx_t_2;
+ PyObject *__pyx_t_3 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("as_str", 0);
+
+ /* "binpt.pyx":11
+ *
+ * cdef bytes as_str(data):
+ * if isinstance(data, bytes): # <<<<<<<<<<<<<<
+ * return data
+ * elif isinstance(data, unicode):
+ */
+ __pyx_t_1 = ((PyObject *)((PyObject*)(&PyBytes_Type)));
+ __Pyx_INCREF(__pyx_t_1);
+ __pyx_t_2 = __Pyx_TypeCheck(__pyx_v_data, __pyx_t_1);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ if (__pyx_t_2) {
+
+ /* "binpt.pyx":12
+ * cdef bytes as_str(data):
+ * if isinstance(data, bytes):
+ * return data # <<<<<<<<<<<<<<
+ * elif isinstance(data, unicode):
+ * return data.encode('UTF-8')
+ */
+ __Pyx_XDECREF(((PyObject *)__pyx_r));
+ if (!(likely(PyBytes_CheckExact(__pyx_v_data))||((__pyx_v_data) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected bytes, got %.200s", Py_TYPE(__pyx_v_data)->tp_name), 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 12; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_INCREF(__pyx_v_data);
+ __pyx_r = ((PyObject*)__pyx_v_data);
+ goto __pyx_L0;
+ goto __pyx_L3;
+ }
+
+ /* "binpt.pyx":13
+ * if isinstance(data, bytes):
+ * return data
+ * elif isinstance(data, unicode): # <<<<<<<<<<<<<<
+ * return data.encode('UTF-8')
+ * raise TypeError('Cannot convert %s to string' % type(data))
+ */
+ __pyx_t_1 = ((PyObject *)((PyObject*)(&PyUnicode_Type)));
+ __Pyx_INCREF(__pyx_t_1);
+ __pyx_t_2 = __Pyx_TypeCheck(__pyx_v_data, __pyx_t_1);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ if (__pyx_t_2) {
+
+ /* "binpt.pyx":14
+ * return data
+ * elif isinstance(data, unicode):
+ * return data.encode('UTF-8') # <<<<<<<<<<<<<<
+ * raise TypeError('Cannot convert %s to string' % type(data))
+ *
+ */
+ __Pyx_XDECREF(((PyObject *)__pyx_r));
+ __pyx_t_1 = PyObject_GetAttr(__pyx_v_data, __pyx_n_s__encode); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 14; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_3 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_k_tuple_2), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 14; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ if (!(likely(PyBytes_CheckExact(__pyx_t_3))||((__pyx_t_3) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected bytes, got %.200s", Py_TYPE(__pyx_t_3)->tp_name), 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 14; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_r = ((PyObject*)__pyx_t_3);
+ __pyx_t_3 = 0;
+ goto __pyx_L0;
+ goto __pyx_L3;
+ }
+ __pyx_L3:;
+
+ /* "binpt.pyx":15
+ * elif isinstance(data, unicode):
+ * return data.encode('UTF-8')
+ * raise TypeError('Cannot convert %s to string' % type(data)) # <<<<<<<<<<<<<<
+ *
+ * cdef class QueryResult(object):
+ */
+ __pyx_t_3 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_3), ((PyObject *)Py_TYPE(__pyx_v_data))); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(((PyObject *)__pyx_t_3));
+ __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_t_3));
+ __Pyx_GIVEREF(((PyObject *)__pyx_t_3));
+ __pyx_t_3 = 0;
+ __pyx_t_3 = PyObject_Call(__pyx_builtin_TypeError, ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0;
+ __Pyx_Raise(__pyx_t_3, 0, 0, 0);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ {__pyx_filename = __pyx_f[0]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+
+ __pyx_r = ((PyObject*)Py_None); __Pyx_INCREF(Py_None);
+ goto __pyx_L0;
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_XDECREF(__pyx_t_3);
+ __Pyx_AddTraceback("binpt.as_str", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = 0;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* Python wrapper */
+static int __pyx_pw_5binpt_11QueryResult_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static int __pyx_pw_5binpt_11QueryResult_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+ PyObject *__pyx_v_words = 0;
+ PyObject *__pyx_v_scores = 0;
+ PyObject *__pyx_v_wa = 0;
+ static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__words,&__pyx_n_s__scores,&__pyx_n_s__wa,0};
+ int __pyx_r;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__cinit__ (wrapper)", 0);
+ {
+ PyObject* values[3] = {0,0,0};
+
+ /* "binpt.pyx":29
+ * cdef bytes _wa
+ *
+ * def __cinit__(self, words, scores, wa = None): # <<<<<<<<<<<<<<
+ * '''Requires a tuple of words (as strings) and a tuple of scores (as floats).
+ * Word-alignment info (as string) may be provided'''
+ */
+ values[2] = ((PyObject *)Py_None);
+ if (unlikely(__pyx_kwds)) {
+ Py_ssize_t kw_args;
+ const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
+ switch (pos_args) {
+ case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
+ case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+ case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+ case 0: break;
+ default: goto __pyx_L5_argtuple_error;
+ }
+ kw_args = PyDict_Size(__pyx_kwds);
+ switch (pos_args) {
+ case 0:
+ values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__words);
+ if (likely(values[0])) kw_args--;
+ else goto __pyx_L5_argtuple_error;
+ case 1:
+ values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__scores);
+ if (likely(values[1])) kw_args--;
+ else {
+ __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 2, 3, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 29; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ }
+ case 2:
+ if (kw_args > 0) {
+ PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__wa);
+ if (value) { values[2] = value; kw_args--; }
+ }
+ }
+ if (unlikely(kw_args > 0)) {
+ if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__cinit__") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 29; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ }
+ } else {
+ switch (PyTuple_GET_SIZE(__pyx_args)) {
+ case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
+ case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+ values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+ break;
+ default: goto __pyx_L5_argtuple_error;
+ }
+ }
+ __pyx_v_words = values[0];
+ __pyx_v_scores = values[1];
+ __pyx_v_wa = values[2];
+ }
+ goto __pyx_L4_argument_unpacking_done;
+ __pyx_L5_argtuple_error:;
+ __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 2, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 29; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ __pyx_L3_error:;
+ __Pyx_AddTraceback("binpt.QueryResult.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __Pyx_RefNannyFinishContext();
+ return -1;
+ __pyx_L4_argument_unpacking_done:;
+ __pyx_r = __pyx_pf_5binpt_11QueryResult___cinit__(((struct __pyx_obj_5binpt_QueryResult *)__pyx_v_self), __pyx_v_words, __pyx_v_scores, __pyx_v_wa);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static int __pyx_pf_5binpt_11QueryResult___cinit__(struct __pyx_obj_5binpt_QueryResult *__pyx_v_self, PyObject *__pyx_v_words, PyObject *__pyx_v_scores, PyObject *__pyx_v_wa) {
+ int __pyx_r;
+ __Pyx_RefNannyDeclarations
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("__cinit__", 0);
+
+ /* "binpt.pyx":32
+ * '''Requires a tuple of words (as strings) and a tuple of scores (as floats).
+ * Word-alignment info (as string) may be provided'''
+ * self._words = words # <<<<<<<<<<<<<<
+ * self._scores = scores
+ * self._wa = wa
+ */
+ if (!(likely(PyTuple_CheckExact(__pyx_v_words))||((__pyx_v_words) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected tuple, got %.200s", Py_TYPE(__pyx_v_words)->tp_name), 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 32; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_INCREF(__pyx_v_words);
+ __Pyx_GIVEREF(__pyx_v_words);
+ __Pyx_GOTREF(__pyx_v_self->_words);
+ __Pyx_DECREF(((PyObject *)__pyx_v_self->_words));
+ __pyx_v_self->_words = ((PyObject*)__pyx_v_words);
+
+ /* "binpt.pyx":33
+ * Word-alignment info (as string) may be provided'''
+ * self._words = words
+ * self._scores = scores # <<<<<<<<<<<<<<
+ * self._wa = wa
+ *
+ */
+ if (!(likely(PyTuple_CheckExact(__pyx_v_scores))||((__pyx_v_scores) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected tuple, got %.200s", Py_TYPE(__pyx_v_scores)->tp_name), 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 33; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_INCREF(__pyx_v_scores);
+ __Pyx_GIVEREF(__pyx_v_scores);
+ __Pyx_GOTREF(__pyx_v_self->_scores);
+ __Pyx_DECREF(((PyObject *)__pyx_v_self->_scores));
+ __pyx_v_self->_scores = ((PyObject*)__pyx_v_scores);
+
+ /* "binpt.pyx":34
+ * self._words = words
+ * self._scores = scores
+ * self._wa = wa # <<<<<<<<<<<<<<
+ *
+ * @property
+ */
+ if (!(likely(PyBytes_CheckExact(__pyx_v_wa))||((__pyx_v_wa) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected bytes, got %.200s", Py_TYPE(__pyx_v_wa)->tp_name), 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 34; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_INCREF(__pyx_v_wa);
+ __Pyx_GIVEREF(__pyx_v_wa);
+ __Pyx_GOTREF(__pyx_v_self->_wa);
+ __Pyx_DECREF(((PyObject *)__pyx_v_self->_wa));
+ __pyx_v_self->_wa = ((PyObject*)__pyx_v_wa);
+
+ __pyx_r = 0;
+ goto __pyx_L0;
+ __pyx_L1_error:;
+ __Pyx_AddTraceback("binpt.QueryResult.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = -1;
+ __pyx_L0:;
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* Python wrapper */
+static PyObject *__pyx_pw_5binpt_11QueryResult_3words(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/
+static char __pyx_doc_5binpt_11QueryResult_2words[] = "Tuple of words (as strings)";
+static PyObject *__pyx_pw_5binpt_11QueryResult_3words(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) {
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("words (wrapper)", 0);
+ __pyx_r = __pyx_pf_5binpt_11QueryResult_2words(((struct __pyx_obj_5binpt_QueryResult *)__pyx_v_self));
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "binpt.pyx":37
+ *
+ * @property
+ * def words(self): # <<<<<<<<<<<<<<
+ * '''Tuple of words (as strings)'''
+ * return self._words
+ */
+
+static PyObject *__pyx_pf_5binpt_11QueryResult_2words(struct __pyx_obj_5binpt_QueryResult *__pyx_v_self) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("words", 0);
+
+ /* "binpt.pyx":39
+ * def words(self):
+ * '''Tuple of words (as strings)'''
+ * return self._words # <<<<<<<<<<<<<<
+ *
+ * @property
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __Pyx_INCREF(((PyObject *)__pyx_v_self->_words));
+ __pyx_r = ((PyObject *)__pyx_v_self->_words);
+ goto __pyx_L0;
+
+ __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* Python wrapper */
+static PyObject *__pyx_pw_5binpt_11QueryResult_5scores(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/
+static char __pyx_doc_5binpt_11QueryResult_4scores[] = "Tuple of scores (as floats)";
+static PyObject *__pyx_pw_5binpt_11QueryResult_5scores(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) {
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("scores (wrapper)", 0);
+ __pyx_r = __pyx_pf_5binpt_11QueryResult_4scores(((struct __pyx_obj_5binpt_QueryResult *)__pyx_v_self));
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "binpt.pyx":42
+ *
+ * @property
+ * def scores(self): # <<<<<<<<<<<<<<
+ * '''Tuple of scores (as floats)'''
+ * return self._scores
+ */
+
+static PyObject *__pyx_pf_5binpt_11QueryResult_4scores(struct __pyx_obj_5binpt_QueryResult *__pyx_v_self) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("scores", 0);
+
+ /* "binpt.pyx":44
+ * def scores(self):
+ * '''Tuple of scores (as floats)'''
+ * return self._scores # <<<<<<<<<<<<<<
+ *
+ * @property
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __Pyx_INCREF(((PyObject *)__pyx_v_self->_scores));
+ __pyx_r = ((PyObject *)__pyx_v_self->_scores);
+ goto __pyx_L0;
+
+ __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* Python wrapper */
+static PyObject *__pyx_pw_5binpt_11QueryResult_7wa(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/
+static char __pyx_doc_5binpt_11QueryResult_6wa[] = "Word-alignment info (as string)";
+static PyObject *__pyx_pw_5binpt_11QueryResult_7wa(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) {
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("wa (wrapper)", 0);
+ __pyx_r = __pyx_pf_5binpt_11QueryResult_6wa(((struct __pyx_obj_5binpt_QueryResult *)__pyx_v_self));
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "binpt.pyx":47
+ *
+ * @property
+ * def wa(self): # <<<<<<<<<<<<<<
+ * '''Word-alignment info (as string)'''
+ * return self._wa
+ */
+
+static PyObject *__pyx_pf_5binpt_11QueryResult_6wa(struct __pyx_obj_5binpt_QueryResult *__pyx_v_self) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("wa", 0);
+
+ /* "binpt.pyx":49
+ * def wa(self):
+ * '''Word-alignment info (as string)'''
+ * return self._wa # <<<<<<<<<<<<<<
+ *
+ * @staticmethod
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __Pyx_INCREF(((PyObject *)__pyx_v_self->_wa));
+ __pyx_r = ((PyObject *)__pyx_v_self->_wa);
+ goto __pyx_L0;
+
+ __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* Python wrapper */
+static PyObject *__pyx_pw_5binpt_11QueryResult_9desc(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static char __pyx_doc_5binpt_11QueryResult_8desc[] = "Returns the sign of keys(y) - keys(x).\n Can only be used if scores is not an empty vector as\n keys defaults to scores[0]";
+static PyMethodDef __pyx_mdef_5binpt_11QueryResult_9desc = {__Pyx_NAMESTR("desc"), (PyCFunction)__pyx_pw_5binpt_11QueryResult_9desc, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_5binpt_11QueryResult_8desc)};
+static PyObject *__pyx_pw_5binpt_11QueryResult_9desc(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+ PyObject *__pyx_v_x = 0;
+ PyObject *__pyx_v_y = 0;
+ PyObject *__pyx_v_keys = 0;
+ static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__x,&__pyx_n_s__y,&__pyx_n_s__keys,0};
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("desc (wrapper)", 0);
+ {
+ PyObject* values[3] = {0,0,0};
+ values[2] = __pyx_k_4;
+ if (unlikely(__pyx_kwds)) {
+ Py_ssize_t kw_args;
+ const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
+ switch (pos_args) {
+ case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
+ case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+ case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+ case 0: break;
+ default: goto __pyx_L5_argtuple_error;
+ }
+ kw_args = PyDict_Size(__pyx_kwds);
+ switch (pos_args) {
+ case 0:
+ values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__x);
+ if (likely(values[0])) kw_args--;
+ else goto __pyx_L5_argtuple_error;
+ case 1:
+ values[1] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__y);
+ if (likely(values[1])) kw_args--;
+ else {
+ __Pyx_RaiseArgtupleInvalid("desc", 0, 2, 3, 1); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ }
+ case 2:
+ if (kw_args > 0) {
+ PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__keys);
+ if (value) { values[2] = value; kw_args--; }
+ }
+ }
+ if (unlikely(kw_args > 0)) {
+ if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "desc") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ }
+ } else {
+ switch (PyTuple_GET_SIZE(__pyx_args)) {
+ case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
+ case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+ values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+ break;
+ default: goto __pyx_L5_argtuple_error;
+ }
+ }
+ __pyx_v_x = values[0];
+ __pyx_v_y = values[1];
+ __pyx_v_keys = values[2];
+ }
+ goto __pyx_L4_argument_unpacking_done;
+ __pyx_L5_argtuple_error:;
+ __Pyx_RaiseArgtupleInvalid("desc", 0, 2, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ __pyx_L3_error:;
+ __Pyx_AddTraceback("binpt.QueryResult.desc", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __Pyx_RefNannyFinishContext();
+ return NULL;
+ __pyx_L4_argument_unpacking_done:;
+ __pyx_r = __pyx_pf_5binpt_11QueryResult_8desc(__pyx_v_x, __pyx_v_y, __pyx_v_keys);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* Python wrapper */
+static PyObject *__pyx_pw_5binpt_11QueryResult_4desc_lambda1(PyObject *__pyx_self, PyObject *__pyx_v_r); /*proto*/
+static PyMethodDef __pyx_mdef_5binpt_11QueryResult_4desc_lambda1 = {__Pyx_NAMESTR("lambda1"), (PyCFunction)__pyx_pw_5binpt_11QueryResult_4desc_lambda1, METH_O, __Pyx_DOCSTR(0)};
+static PyObject *__pyx_pw_5binpt_11QueryResult_4desc_lambda1(PyObject *__pyx_self, PyObject *__pyx_v_r) {
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("lambda1 (wrapper)", 0);
+ __pyx_self = __pyx_self;
+ __pyx_r = __pyx_lambda_funcdef_lambda1(__pyx_self, ((PyObject *)__pyx_v_r));
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "binpt.pyx":52
+ *
+ * @staticmethod
+ * def desc(x, y, keys = lambda r: r.scores[0]): # <<<<<<<<<<<<<<
+ * '''Returns the sign of keys(y) - keys(x).
+ * Can only be used if scores is not an empty vector as
+ */
+
+static PyObject *__pyx_lambda_funcdef_lambda1(CYTHON_UNUSED PyObject *__pyx_self, PyObject *__pyx_v_r) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ PyObject *__pyx_t_2 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("lambda1", 0);
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_1 = PyObject_GetAttr(__pyx_v_r, __pyx_n_s__scores); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_2 = __Pyx_GetItemInt(__pyx_t_1, 0, sizeof(long), PyInt_FromLong); if (!__pyx_t_2) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ __pyx_r = __pyx_t_2;
+ __pyx_t_2 = 0;
+ goto __pyx_L0;
+
+ __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+ goto __pyx_L0;
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_XDECREF(__pyx_t_2);
+ __Pyx_AddTraceback("binpt.QueryResult.desc.lambda1", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = NULL;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static PyObject *__pyx_pf_5binpt_11QueryResult_8desc(PyObject *__pyx_v_x, PyObject *__pyx_v_y, PyObject *__pyx_v_keys) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ PyObject *__pyx_t_2 = NULL;
+ PyObject *__pyx_t_3 = NULL;
+ float __pyx_t_4;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("desc", 0);
+
+ /* "binpt.pyx":56
+ * Can only be used if scores is not an empty vector as
+ * keys defaults to scores[0]'''
+ * return fsign(keys(y) - keys(x)) # <<<<<<<<<<<<<<
+ *
+ * def __str__(self):
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_INCREF(__pyx_v_y);
+ PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_y);
+ __Pyx_GIVEREF(__pyx_v_y);
+ __pyx_t_2 = PyObject_Call(__pyx_v_keys, ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0;
+ __pyx_t_1 = PyTuple_New(1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_INCREF(__pyx_v_x);
+ PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_v_x);
+ __Pyx_GIVEREF(__pyx_v_x);
+ __pyx_t_3 = PyObject_Call(__pyx_v_keys, ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0;
+ __pyx_t_1 = PyNumber_Subtract(__pyx_t_2, __pyx_t_3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __pyx_t_4 = __pyx_PyFloat_AsFloat(__pyx_t_1); if (unlikely((__pyx_t_4 == (float)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ __pyx_t_1 = PyInt_FromLong(__pyx_f_5binpt_fsign(__pyx_t_4, 0)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 56; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+
+ __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+ goto __pyx_L0;
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_XDECREF(__pyx_t_2);
+ __Pyx_XDECREF(__pyx_t_3);
+ __Pyx_AddTraceback("binpt.QueryResult.desc", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = NULL;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* Python wrapper */
+static PyObject *__pyx_pw_5binpt_11QueryResult_11__str__(PyObject *__pyx_v_self); /*proto*/
+static char __pyx_doc_5binpt_11QueryResult_10__str__[] = "Returns a string such as: <words> ||| <scores> [||| word-alignment info]";
+struct wrapperbase __pyx_wrapperbase_5binpt_11QueryResult_10__str__;
+static PyObject *__pyx_pw_5binpt_11QueryResult_11__str__(PyObject *__pyx_v_self) {
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__str__ (wrapper)", 0);
+ __pyx_r = __pyx_pf_5binpt_11QueryResult_10__str__(((struct __pyx_obj_5binpt_QueryResult *)__pyx_v_self));
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "binpt.pyx":58
+ * return fsign(keys(y) - keys(x))
+ *
+ * def __str__(self): # <<<<<<<<<<<<<<
+ * '''Returns a string such as: <words> ||| <scores> [||| word-alignment info]'''
+ * if self._wa:
+ */
+
+static PyObject *__pyx_pf_5binpt_11QueryResult_10__str__(struct __pyx_obj_5binpt_QueryResult *__pyx_v_self) {
+ PyObject *__pyx_v_x = NULL;
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ int __pyx_t_1;
+ PyObject *__pyx_t_2 = NULL;
+ PyObject *__pyx_t_3 = NULL;
+ PyObject *__pyx_t_4 = NULL;
+ PyObject *__pyx_t_5 = NULL;
+ PyObject *__pyx_t_6 = NULL;
+ Py_ssize_t __pyx_t_7;
+ PyObject *__pyx_t_8 = NULL;
+ PyObject *__pyx_t_9 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("__str__", 0);
+
+ /* "binpt.pyx":60
+ * def __str__(self):
+ * '''Returns a string such as: <words> ||| <scores> [||| word-alignment info]'''
+ * if self._wa: # <<<<<<<<<<<<<<
+ * return ' ||| '.join( (' '.join(self._words),
+ * ' '.join([str(x) for x in self._scores]),
+ */
+ __pyx_t_1 = (((PyObject *)__pyx_v_self->_wa) != Py_None) && (PyBytes_GET_SIZE(((PyObject *)__pyx_v_self->_wa)) != 0);
+ if (__pyx_t_1) {
+
+ /* "binpt.pyx":61
+ * '''Returns a string such as: <words> ||| <scores> [||| word-alignment info]'''
+ * if self._wa:
+ * return ' ||| '.join( (' '.join(self._words), # <<<<<<<<<<<<<<
+ * ' '.join([str(x) for x in self._scores]),
+ * self._wa) )
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_2 = PyObject_GetAttr(((PyObject *)__pyx_kp_s_5), __pyx_n_s__join); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ __pyx_t_3 = PyObject_GetAttr(((PyObject *)__pyx_kp_s_6), __pyx_n_s__join); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_4);
+ __Pyx_INCREF(((PyObject *)__pyx_v_self->_words));
+ PyTuple_SET_ITEM(__pyx_t_4, 0, ((PyObject *)__pyx_v_self->_words));
+ __Pyx_GIVEREF(((PyObject *)__pyx_v_self->_words));
+ __pyx_t_5 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_t_4), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0;
+
+ /* "binpt.pyx":62
+ * if self._wa:
+ * return ' ||| '.join( (' '.join(self._words),
+ * ' '.join([str(x) for x in self._scores]), # <<<<<<<<<<<<<<
+ * self._wa) )
+ * else:
+ */
+ __pyx_t_4 = PyObject_GetAttr(((PyObject *)__pyx_kp_s_6), __pyx_n_s__join); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_4);
+ __pyx_t_3 = PyList_New(0); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ if (unlikely(((PyObject *)__pyx_v_self->_scores) == Py_None)) {
+ PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ }
+ __pyx_t_6 = ((PyObject *)__pyx_v_self->_scores); __Pyx_INCREF(__pyx_t_6); __pyx_t_7 = 0;
+ for (;;) {
+ if (__pyx_t_7 >= PyTuple_GET_SIZE(__pyx_t_6)) break;
+ __pyx_t_8 = PyTuple_GET_ITEM(__pyx_t_6, __pyx_t_7); __Pyx_INCREF(__pyx_t_8); __pyx_t_7++;
+ __Pyx_XDECREF(__pyx_v_x);
+ __pyx_v_x = __pyx_t_8;
+ __pyx_t_8 = 0;
+ __pyx_t_8 = PyTuple_New(1); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_8);
+ __Pyx_INCREF(__pyx_v_x);
+ PyTuple_SET_ITEM(__pyx_t_8, 0, __pyx_v_x);
+ __Pyx_GIVEREF(__pyx_v_x);
+ __pyx_t_9 = PyObject_Call(((PyObject *)((PyObject*)(&PyString_Type))), ((PyObject *)__pyx_t_8), NULL); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_9);
+ __Pyx_DECREF(((PyObject *)__pyx_t_8)); __pyx_t_8 = 0;
+ if (unlikely(PyList_Append(__pyx_t_3, (PyObject*)__pyx_t_9))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_DECREF(__pyx_t_9); __pyx_t_9 = 0;
+ }
+ __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+ __pyx_t_6 = PyTuple_New(1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_6);
+ __Pyx_INCREF(((PyObject *)__pyx_t_3));
+ PyTuple_SET_ITEM(__pyx_t_6, 0, ((PyObject *)__pyx_t_3));
+ __Pyx_GIVEREF(((PyObject *)__pyx_t_3));
+ __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0;
+ __pyx_t_3 = PyObject_Call(__pyx_t_4, ((PyObject *)__pyx_t_6), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 62; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+ __Pyx_DECREF(((PyObject *)__pyx_t_6)); __pyx_t_6 = 0;
+
+ /* "binpt.pyx":63
+ * return ' ||| '.join( (' '.join(self._words),
+ * ' '.join([str(x) for x in self._scores]),
+ * self._wa) ) # <<<<<<<<<<<<<<
+ * else:
+ * return ' ||| '.join( (' '.join(self._words),
+ */
+ __pyx_t_6 = PyTuple_New(3); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_6);
+ PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_t_5);
+ __Pyx_GIVEREF(__pyx_t_5);
+ PyTuple_SET_ITEM(__pyx_t_6, 1, __pyx_t_3);
+ __Pyx_GIVEREF(__pyx_t_3);
+ __Pyx_INCREF(((PyObject *)__pyx_v_self->_wa));
+ PyTuple_SET_ITEM(__pyx_t_6, 2, ((PyObject *)__pyx_v_self->_wa));
+ __Pyx_GIVEREF(((PyObject *)__pyx_v_self->_wa));
+ __pyx_t_5 = 0;
+ __pyx_t_3 = 0;
+ __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ PyTuple_SET_ITEM(__pyx_t_3, 0, ((PyObject *)__pyx_t_6));
+ __Pyx_GIVEREF(((PyObject *)__pyx_t_6));
+ __pyx_t_6 = 0;
+ __pyx_t_6 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 61; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_6);
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0;
+ __pyx_r = __pyx_t_6;
+ __pyx_t_6 = 0;
+ goto __pyx_L0;
+ goto __pyx_L3;
+ }
+ /*else*/ {
+
+ /* "binpt.pyx":65
+ * self._wa) )
+ * else:
+ * return ' ||| '.join( (' '.join(self._words), # <<<<<<<<<<<<<<
+ * ' '.join([str(x) for x in self._scores]) ) )
+ *
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_6 = PyObject_GetAttr(((PyObject *)__pyx_kp_s_5), __pyx_n_s__join); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_6);
+ __pyx_t_3 = PyObject_GetAttr(((PyObject *)__pyx_kp_s_6), __pyx_n_s__join); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_INCREF(((PyObject *)__pyx_v_self->_words));
+ PyTuple_SET_ITEM(__pyx_t_2, 0, ((PyObject *)__pyx_v_self->_words));
+ __Pyx_GIVEREF(((PyObject *)__pyx_v_self->_words));
+ __pyx_t_5 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0;
+
+ /* "binpt.pyx":66
+ * else:
+ * return ' ||| '.join( (' '.join(self._words),
+ * ' '.join([str(x) for x in self._scores]) ) ) # <<<<<<<<<<<<<<
+ *
+ * def __repr__(self):
+ */
+ __pyx_t_2 = PyObject_GetAttr(((PyObject *)__pyx_kp_s_6), __pyx_n_s__join); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ __pyx_t_3 = PyList_New(0); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ if (unlikely(((PyObject *)__pyx_v_self->_scores) == Py_None)) {
+ PyErr_SetString(PyExc_TypeError, "'NoneType' object is not iterable"); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ }
+ __pyx_t_4 = ((PyObject *)__pyx_v_self->_scores); __Pyx_INCREF(__pyx_t_4); __pyx_t_7 = 0;
+ for (;;) {
+ if (__pyx_t_7 >= PyTuple_GET_SIZE(__pyx_t_4)) break;
+ __pyx_t_9 = PyTuple_GET_ITEM(__pyx_t_4, __pyx_t_7); __Pyx_INCREF(__pyx_t_9); __pyx_t_7++;
+ __Pyx_XDECREF(__pyx_v_x);
+ __pyx_v_x = __pyx_t_9;
+ __pyx_t_9 = 0;
+ __pyx_t_9 = PyTuple_New(1); if (unlikely(!__pyx_t_9)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_9);
+ __Pyx_INCREF(__pyx_v_x);
+ PyTuple_SET_ITEM(__pyx_t_9, 0, __pyx_v_x);
+ __Pyx_GIVEREF(__pyx_v_x);
+ __pyx_t_8 = PyObject_Call(((PyObject *)((PyObject*)(&PyString_Type))), ((PyObject *)__pyx_t_9), NULL); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_8);
+ __Pyx_DECREF(((PyObject *)__pyx_t_9)); __pyx_t_9 = 0;
+ if (unlikely(PyList_Append(__pyx_t_3, (PyObject*)__pyx_t_8))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
+ }
+ __Pyx_DECREF(__pyx_t_4); __pyx_t_4 = 0;
+ __pyx_t_4 = PyTuple_New(1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_4);
+ __Pyx_INCREF(((PyObject *)__pyx_t_3));
+ PyTuple_SET_ITEM(__pyx_t_4, 0, ((PyObject *)__pyx_t_3));
+ __Pyx_GIVEREF(((PyObject *)__pyx_t_3));
+ __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0;
+ __pyx_t_3 = PyObject_Call(__pyx_t_2, ((PyObject *)__pyx_t_4), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 66; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0;
+ __pyx_t_4 = PyTuple_New(2); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_4);
+ PyTuple_SET_ITEM(__pyx_t_4, 0, __pyx_t_5);
+ __Pyx_GIVEREF(__pyx_t_5);
+ PyTuple_SET_ITEM(__pyx_t_4, 1, __pyx_t_3);
+ __Pyx_GIVEREF(__pyx_t_3);
+ __pyx_t_5 = 0;
+ __pyx_t_3 = 0;
+ __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ PyTuple_SET_ITEM(__pyx_t_3, 0, ((PyObject *)__pyx_t_4));
+ __Pyx_GIVEREF(((PyObject *)__pyx_t_4));
+ __pyx_t_4 = 0;
+ __pyx_t_4 = PyObject_Call(__pyx_t_6, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 65; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_4);
+ __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+ __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0;
+ __pyx_r = __pyx_t_4;
+ __pyx_t_4 = 0;
+ goto __pyx_L0;
+ }
+ __pyx_L3:;
+
+ __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+ goto __pyx_L0;
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_2);
+ __Pyx_XDECREF(__pyx_t_3);
+ __Pyx_XDECREF(__pyx_t_4);
+ __Pyx_XDECREF(__pyx_t_5);
+ __Pyx_XDECREF(__pyx_t_6);
+ __Pyx_XDECREF(__pyx_t_8);
+ __Pyx_XDECREF(__pyx_t_9);
+ __Pyx_AddTraceback("binpt.QueryResult.__str__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = NULL;
+ __pyx_L0:;
+ __Pyx_XDECREF(__pyx_v_x);
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* Python wrapper */
+static PyObject *__pyx_pw_5binpt_11QueryResult_13__repr__(PyObject *__pyx_v_self); /*proto*/
+static PyObject *__pyx_pw_5binpt_11QueryResult_13__repr__(PyObject *__pyx_v_self) {
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__repr__ (wrapper)", 0);
+ __pyx_r = __pyx_pf_5binpt_11QueryResult_12__repr__(((struct __pyx_obj_5binpt_QueryResult *)__pyx_v_self));
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "binpt.pyx":68
+ * ' '.join([str(x) for x in self._scores]) ) )
+ *
+ * def __repr__(self): # <<<<<<<<<<<<<<
+ * return repr((repr(self._words), repr(self._scores), repr(self._wa)))
+ *
+ */
+
+static PyObject *__pyx_pf_5binpt_11QueryResult_12__repr__(struct __pyx_obj_5binpt_QueryResult *__pyx_v_self) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ PyObject *__pyx_t_2 = NULL;
+ PyObject *__pyx_t_3 = NULL;
+ PyObject *__pyx_t_4 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("__repr__", 0);
+
+ /* "binpt.pyx":69
+ *
+ * def __repr__(self):
+ * return repr((repr(self._words), repr(self._scores), repr(self._wa))) # <<<<<<<<<<<<<<
+ *
+ * cdef QueryResult get_query_result(StringTgtCand& cand, object wa = None):
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_1 = ((PyObject *)__pyx_v_self->_words);
+ __Pyx_INCREF(__pyx_t_1);
+ __pyx_t_2 = PyObject_Repr(__pyx_t_1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ __pyx_t_1 = ((PyObject *)__pyx_v_self->_scores);
+ __Pyx_INCREF(__pyx_t_1);
+ __pyx_t_3 = PyObject_Repr(__pyx_t_1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ __pyx_t_1 = ((PyObject *)__pyx_v_self->_wa);
+ __Pyx_INCREF(__pyx_t_1);
+ __pyx_t_4 = PyObject_Repr(__pyx_t_1); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_4);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ __pyx_t_1 = PyTuple_New(3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ PyTuple_SET_ITEM(__pyx_t_1, 0, __pyx_t_2);
+ __Pyx_GIVEREF(__pyx_t_2);
+ PyTuple_SET_ITEM(__pyx_t_1, 1, __pyx_t_3);
+ __Pyx_GIVEREF(__pyx_t_3);
+ PyTuple_SET_ITEM(__pyx_t_1, 2, __pyx_t_4);
+ __Pyx_GIVEREF(__pyx_t_4);
+ __pyx_t_2 = 0;
+ __pyx_t_3 = 0;
+ __pyx_t_4 = 0;
+ __pyx_t_4 = PyObject_Repr(((PyObject *)__pyx_t_1)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 69; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_4);
+ __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0;
+ __pyx_r = __pyx_t_4;
+ __pyx_t_4 = 0;
+ goto __pyx_L0;
+
+ __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+ goto __pyx_L0;
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_XDECREF(__pyx_t_2);
+ __Pyx_XDECREF(__pyx_t_3);
+ __Pyx_XDECREF(__pyx_t_4);
+ __Pyx_AddTraceback("binpt.QueryResult.__repr__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = NULL;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "binpt.pyx":71
+ * return repr((repr(self._words), repr(self._scores), repr(self._wa)))
+ *
+ * cdef QueryResult get_query_result(StringTgtCand& cand, object wa = None): # <<<<<<<<<<<<<<
+ * '''Converts a StringTgtCandidate (c++ object) and possibly a word-alignment info (string)
+ * to a QueryResult (python object).'''
+ */
+
+static struct __pyx_obj_5binpt_QueryResult *__pyx_f_5binpt_get_query_result(Moses::StringTgtCand &__pyx_v_cand, struct __pyx_opt_args_5binpt_get_query_result *__pyx_optional_args) {
+ PyObject *__pyx_v_wa = ((PyObject *)Py_None);
+ PyObject *__pyx_v_words = 0;
+ PyObject *__pyx_v_scores = 0;
+ size_t __pyx_v_i;
+ struct __pyx_obj_5binpt_QueryResult *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ size_t __pyx_t_2;
+ size_t __pyx_t_3;
+ PyObject *__pyx_t_4 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("get_query_result", 0);
+ if (__pyx_optional_args) {
+ if (__pyx_optional_args->__pyx_n > 0) {
+ __pyx_v_wa = __pyx_optional_args->wa;
+ }
+ }
+
+ /* "binpt.pyx":74
+ * '''Converts a StringTgtCandidate (c++ object) and possibly a word-alignment info (string)
+ * to a QueryResult (python object).'''
+ * cdef tuple words = tuple([cand.first[i].c_str() for i in range(cand.first.size())]) # <<<<<<<<<<<<<<
+ * cdef tuple scores = tuple([cand.second[i] for i in range(cand.second.size())])
+ * return QueryResult(words, scores, wa)
+ */
+ __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 74; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_2 = __pyx_v_cand.first.size();
+ for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) {
+ __pyx_v_i = __pyx_t_3;
+ __pyx_t_4 = PyBytes_FromString((__pyx_v_cand.first[__pyx_v_i])->c_str()); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 74; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(((PyObject *)__pyx_t_4));
+ if (unlikely(PyList_Append(__pyx_t_1, (PyObject*)__pyx_t_4))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 74; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0;
+ }
+ __pyx_t_4 = ((PyObject *)PyList_AsTuple(__pyx_t_1)); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 74; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(((PyObject *)__pyx_t_4));
+ __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0;
+ __pyx_v_words = __pyx_t_4;
+ __pyx_t_4 = 0;
+
+ /* "binpt.pyx":75
+ * to a QueryResult (python object).'''
+ * cdef tuple words = tuple([cand.first[i].c_str() for i in range(cand.first.size())])
+ * cdef tuple scores = tuple([cand.second[i] for i in range(cand.second.size())]) # <<<<<<<<<<<<<<
+ * return QueryResult(words, scores, wa)
+ *
+ */
+ __pyx_t_4 = PyList_New(0); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_4);
+ __pyx_t_2 = __pyx_v_cand.second.size();
+ for (__pyx_t_3 = 0; __pyx_t_3 < __pyx_t_2; __pyx_t_3+=1) {
+ __pyx_v_i = __pyx_t_3;
+ __pyx_t_1 = PyFloat_FromDouble((__pyx_v_cand.second[__pyx_v_i])); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ if (unlikely(PyList_Append(__pyx_t_4, (PyObject*)__pyx_t_1))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ }
+ __pyx_t_1 = ((PyObject *)PyList_AsTuple(__pyx_t_4)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 75; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(((PyObject *)__pyx_t_1));
+ __Pyx_DECREF(((PyObject *)__pyx_t_4)); __pyx_t_4 = 0;
+ __pyx_v_scores = __pyx_t_1;
+ __pyx_t_1 = 0;
+
+ /* "binpt.pyx":76
+ * cdef tuple words = tuple([cand.first[i].c_str() for i in range(cand.first.size())])
+ * cdef tuple scores = tuple([cand.second[i] for i in range(cand.second.size())])
+ * return QueryResult(words, scores, wa) # <<<<<<<<<<<<<<
+ *
+ * cdef class BinaryPhraseTable(object):
+ */
+ __Pyx_XDECREF(((PyObject *)__pyx_r));
+ __pyx_t_1 = PyTuple_New(3); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 76; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_INCREF(((PyObject *)__pyx_v_words));
+ PyTuple_SET_ITEM(__pyx_t_1, 0, ((PyObject *)__pyx_v_words));
+ __Pyx_GIVEREF(((PyObject *)__pyx_v_words));
+ __Pyx_INCREF(((PyObject *)__pyx_v_scores));
+ PyTuple_SET_ITEM(__pyx_t_1, 1, ((PyObject *)__pyx_v_scores));
+ __Pyx_GIVEREF(((PyObject *)__pyx_v_scores));
+ __Pyx_INCREF(__pyx_v_wa);
+ PyTuple_SET_ITEM(__pyx_t_1, 2, __pyx_v_wa);
+ __Pyx_GIVEREF(__pyx_v_wa);
+ __pyx_t_4 = PyObject_Call(((PyObject *)((PyObject*)__pyx_ptype_5binpt_QueryResult)), ((PyObject *)__pyx_t_1), NULL); if (unlikely(!__pyx_t_4)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 76; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_4);
+ __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0;
+ __pyx_r = ((struct __pyx_obj_5binpt_QueryResult *)__pyx_t_4);
+ __pyx_t_4 = 0;
+ goto __pyx_L0;
+
+ __pyx_r = ((struct __pyx_obj_5binpt_QueryResult *)Py_None); __Pyx_INCREF(Py_None);
+ goto __pyx_L0;
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_XDECREF(__pyx_t_4);
+ __Pyx_AddTraceback("binpt.get_query_result", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = 0;
+ __pyx_L0:;
+ __Pyx_XDECREF(__pyx_v_words);
+ __Pyx_XDECREF(__pyx_v_scores);
+ __Pyx_XGIVEREF((PyObject *)__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* Python wrapper */
+static int __pyx_pw_5binpt_17BinaryPhraseTable_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static int __pyx_pw_5binpt_17BinaryPhraseTable_1__cinit__(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+ PyObject *__pyx_v_path = 0;
+ unsigned int __pyx_v_nscores;
+ int __pyx_v_wa;
+ PyObject *__pyx_v_delimiters = 0;
+ static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__path,&__pyx_n_s__nscores,&__pyx_n_s__wa,&__pyx_n_s__delimiters,0};
+ int __pyx_r;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__cinit__ (wrapper)", 0);
+ {
+ PyObject* values[4] = {0,0,0,0};
+ values[3] = ((PyObject *)__pyx_kp_s_7);
+ if (unlikely(__pyx_kwds)) {
+ Py_ssize_t kw_args;
+ const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
+ switch (pos_args) {
+ case 4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
+ case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
+ case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+ case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+ case 0: break;
+ default: goto __pyx_L5_argtuple_error;
+ }
+ kw_args = PyDict_Size(__pyx_kwds);
+ switch (pos_args) {
+ case 0:
+ values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__path);
+ if (likely(values[0])) kw_args--;
+ else goto __pyx_L5_argtuple_error;
+ case 1:
+ if (kw_args > 0) {
+ PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__nscores);
+ if (value) { values[1] = value; kw_args--; }
+ }
+ case 2:
+ if (kw_args > 0) {
+ PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__wa);
+ if (value) { values[2] = value; kw_args--; }
+ }
+ case 3:
+ if (kw_args > 0) {
+ PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__delimiters);
+ if (value) { values[3] = value; kw_args--; }
+ }
+ }
+ if (unlikely(kw_args > 0)) {
+ if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "__cinit__") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 88; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ }
+ if (values[1]) {
+ } else {
+ __pyx_v_nscores = ((unsigned int)5);
+ }
+ if (values[2]) {
+ } else {
+
+ /* "binpt.pyx":88
+ * cdef bytes _delimiters
+ *
+ * def __cinit__(self, bytes path, unsigned nscores = 5, bint wa = False, delimiters = ' \t'): # <<<<<<<<<<<<<<
+ * '''It requies a path to binary phrase table (stem of the table, e.g europarl.fr-en
+ * is the stem for europar.fr-en.binphr.*).
+ */
+ __pyx_v_wa = ((int)0);
+ }
+ } else {
+ switch (PyTuple_GET_SIZE(__pyx_args)) {
+ case 4: values[3] = PyTuple_GET_ITEM(__pyx_args, 3);
+ case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
+ case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+ case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+ break;
+ default: goto __pyx_L5_argtuple_error;
+ }
+ }
+ __pyx_v_path = ((PyObject*)values[0]);
+ if (values[1]) {
+ __pyx_v_nscores = __Pyx_PyInt_AsUnsignedInt(values[1]); if (unlikely((__pyx_v_nscores == (unsigned int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 88; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ } else {
+ __pyx_v_nscores = ((unsigned int)5);
+ }
+ if (values[2]) {
+ __pyx_v_wa = __Pyx_PyObject_IsTrue(values[2]); if (unlikely((__pyx_v_wa == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 88; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ } else {
+ __pyx_v_wa = ((int)0);
+ }
+ __pyx_v_delimiters = values[3];
+ }
+ goto __pyx_L4_argument_unpacking_done;
+ __pyx_L5_argtuple_error:;
+ __Pyx_RaiseArgtupleInvalid("__cinit__", 0, 1, 4, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 88; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ __pyx_L3_error:;
+ __Pyx_AddTraceback("binpt.BinaryPhraseTable.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __Pyx_RefNannyFinishContext();
+ return -1;
+ __pyx_L4_argument_unpacking_done:;
+ if (unlikely(!__Pyx_ArgTypeTest(((PyObject *)__pyx_v_path), (&PyBytes_Type), 1, "path", 1))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 88; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_r = __pyx_pf_5binpt_17BinaryPhraseTable___cinit__(((struct __pyx_obj_5binpt_BinaryPhraseTable *)__pyx_v_self), __pyx_v_path, __pyx_v_nscores, __pyx_v_wa, __pyx_v_delimiters);
+ goto __pyx_L0;
+ __pyx_L1_error:;
+ __pyx_r = -1;
+ __pyx_L0:;
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static int __pyx_pf_5binpt_17BinaryPhraseTable___cinit__(struct __pyx_obj_5binpt_BinaryPhraseTable *__pyx_v_self, PyObject *__pyx_v_path, unsigned int __pyx_v_nscores, int __pyx_v_wa, PyObject *__pyx_v_delimiters) {
+ int __pyx_r;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ PyObject *__pyx_t_2 = NULL;
+ PyObject *__pyx_t_3 = NULL;
+ int __pyx_t_4;
+ int __pyx_t_5;
+ char *__pyx_t_6;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("__cinit__", 0);
+
+ /* "binpt.pyx":95
+ * One can also specify the token delimiters, for Moses::Tokenize(text, delimiters), which is space or tab by default.'''
+ *
+ * if not BinaryPhraseTable.isValidBinaryTable(path, wa): # <<<<<<<<<<<<<<
+ * raise ValueError, "'%s' doesn't seem a valid binary table." % path
+ * self._path = path
+ */
+ __pyx_t_1 = PyObject_GetAttr(((PyObject *)((PyObject*)__pyx_ptype_5binpt_BinaryPhraseTable)), __pyx_n_s__isValidBinaryTable); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 95; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_2 = __Pyx_PyBool_FromLong(__pyx_v_wa); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 95; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ __pyx_t_3 = PyTuple_New(2); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 95; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_INCREF(((PyObject *)__pyx_v_path));
+ PyTuple_SET_ITEM(__pyx_t_3, 0, ((PyObject *)__pyx_v_path));
+ __Pyx_GIVEREF(((PyObject *)__pyx_v_path));
+ PyTuple_SET_ITEM(__pyx_t_3, 1, __pyx_t_2);
+ __Pyx_GIVEREF(__pyx_t_2);
+ __pyx_t_2 = 0;
+ __pyx_t_2 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 95; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0;
+ __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 95; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+ __pyx_t_5 = (!__pyx_t_4);
+ if (__pyx_t_5) {
+
+ /* "binpt.pyx":96
+ *
+ * if not BinaryPhraseTable.isValidBinaryTable(path, wa):
+ * raise ValueError, "'%s' doesn't seem a valid binary table." % path # <<<<<<<<<<<<<<
+ * self._path = path
+ * self._nscores = nscores
+ */
+ __pyx_t_2 = PyNumber_Remainder(((PyObject *)__pyx_kp_s_8), ((PyObject *)__pyx_v_path)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 96; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(((PyObject *)__pyx_t_2));
+ __Pyx_Raise(__pyx_builtin_ValueError, ((PyObject *)__pyx_t_2), 0, 0);
+ __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0;
+ {__pyx_filename = __pyx_f[0]; __pyx_lineno = 96; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ goto __pyx_L3;
+ }
+ __pyx_L3:;
+
+ /* "binpt.pyx":97
+ * if not BinaryPhraseTable.isValidBinaryTable(path, wa):
+ * raise ValueError, "'%s' doesn't seem a valid binary table." % path
+ * self._path = path # <<<<<<<<<<<<<<
+ * self._nscores = nscores
+ * self._wa = wa
+ */
+ __Pyx_INCREF(((PyObject *)__pyx_v_path));
+ __Pyx_GIVEREF(((PyObject *)__pyx_v_path));
+ __Pyx_GOTREF(__pyx_v_self->_path);
+ __Pyx_DECREF(((PyObject *)__pyx_v_self->_path));
+ __pyx_v_self->_path = __pyx_v_path;
+
+ /* "binpt.pyx":98
+ * raise ValueError, "'%s' doesn't seem a valid binary table." % path
+ * self._path = path
+ * self._nscores = nscores # <<<<<<<<<<<<<<
+ * self._wa = wa
+ * self._delimiters = delimiters
+ */
+ __pyx_v_self->_nscores = __pyx_v_nscores;
+
+ /* "binpt.pyx":99
+ * self._path = path
+ * self._nscores = nscores
+ * self._wa = wa # <<<<<<<<<<<<<<
+ * self._delimiters = delimiters
+ * self.__tree = new PhraseDictionaryTree(nscores)
+ */
+ __pyx_v_self->_wa = __pyx_v_wa;
+
+ /* "binpt.pyx":100
+ * self._nscores = nscores
+ * self._wa = wa
+ * self._delimiters = delimiters # <<<<<<<<<<<<<<
+ * self.__tree = new PhraseDictionaryTree(nscores)
+ * self.__tree.UseWordAlignment(wa)
+ */
+ if (!(likely(PyBytes_CheckExact(__pyx_v_delimiters))||((__pyx_v_delimiters) == Py_None)||(PyErr_Format(PyExc_TypeError, "Expected bytes, got %.200s", Py_TYPE(__pyx_v_delimiters)->tp_name), 0))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 100; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_INCREF(__pyx_v_delimiters);
+ __Pyx_GIVEREF(__pyx_v_delimiters);
+ __Pyx_GOTREF(__pyx_v_self->_delimiters);
+ __Pyx_DECREF(((PyObject *)__pyx_v_self->_delimiters));
+ __pyx_v_self->_delimiters = ((PyObject*)__pyx_v_delimiters);
+
+ /* "binpt.pyx":101
+ * self._wa = wa
+ * self._delimiters = delimiters
+ * self.__tree = new PhraseDictionaryTree(nscores) # <<<<<<<<<<<<<<
+ * self.__tree.UseWordAlignment(wa)
+ * self.__tree.Read(string(path))
+ */
+ __pyx_v_self->__pyx___tree = new Moses::PhraseDictionaryTree(__pyx_v_nscores);
+
+ /* "binpt.pyx":102
+ * self._delimiters = delimiters
+ * self.__tree = new PhraseDictionaryTree(nscores)
+ * self.__tree.UseWordAlignment(wa) # <<<<<<<<<<<<<<
+ * self.__tree.Read(string(path))
+ *
+ */
+ __pyx_v_self->__pyx___tree->UseWordAlignment(__pyx_v_wa);
+
+ /* "binpt.pyx":103
+ * self.__tree = new PhraseDictionaryTree(nscores)
+ * self.__tree.UseWordAlignment(wa)
+ * self.__tree.Read(string(path)) # <<<<<<<<<<<<<<
+ *
+ * def __dealloc__(self):
+ */
+ __pyx_t_6 = PyBytes_AsString(((PyObject *)__pyx_v_path)); if (unlikely((!__pyx_t_6) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 103; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_v_self->__pyx___tree->Read(std::string(__pyx_t_6));
+
+ __pyx_r = 0;
+ goto __pyx_L0;
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_XDECREF(__pyx_t_2);
+ __Pyx_XDECREF(__pyx_t_3);
+ __Pyx_AddTraceback("binpt.BinaryPhraseTable.__cinit__", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = -1;
+ __pyx_L0:;
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* Python wrapper */
+static void __pyx_pw_5binpt_17BinaryPhraseTable_3__dealloc__(PyObject *__pyx_v_self); /*proto*/
+static void __pyx_pw_5binpt_17BinaryPhraseTable_3__dealloc__(PyObject *__pyx_v_self) {
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__dealloc__ (wrapper)", 0);
+ __pyx_pf_5binpt_17BinaryPhraseTable_2__dealloc__(((struct __pyx_obj_5binpt_BinaryPhraseTable *)__pyx_v_self));
+ __Pyx_RefNannyFinishContext();
+}
+
+/* "binpt.pyx":105
+ * self.__tree.Read(string(path))
+ *
+ * def __dealloc__(self): # <<<<<<<<<<<<<<
+ * del self.__tree
+ *
+ */
+
+static void __pyx_pf_5binpt_17BinaryPhraseTable_2__dealloc__(CYTHON_UNUSED struct __pyx_obj_5binpt_BinaryPhraseTable *__pyx_v_self) {
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__dealloc__", 0);
+
+ /* "binpt.pyx":106
+ *
+ * def __dealloc__(self):
+ * del self.__tree # <<<<<<<<<<<<<<
+ *
+ * @staticmethod
+ */
+ delete __pyx_v_self->__pyx___tree;
+
+ __Pyx_RefNannyFinishContext();
+}
+
+/* Python wrapper */
+static PyObject *__pyx_pw_5binpt_17BinaryPhraseTable_5isValidBinaryTable(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static char __pyx_doc_5binpt_17BinaryPhraseTable_4isValidBinaryTable[] = "This sanity check was added to the constructor, but you can access it from outside this class\n to determine whether or not you are providing a valid stem to BinaryPhraseTable.";
+static PyMethodDef __pyx_mdef_5binpt_17BinaryPhraseTable_5isValidBinaryTable = {__Pyx_NAMESTR("isValidBinaryTable"), (PyCFunction)__pyx_pw_5binpt_17BinaryPhraseTable_5isValidBinaryTable, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_5binpt_17BinaryPhraseTable_4isValidBinaryTable)};
+static PyObject *__pyx_pw_5binpt_17BinaryPhraseTable_5isValidBinaryTable(PyObject *__pyx_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+ PyObject *__pyx_v_stem = 0;
+ int __pyx_v_wa;
+ static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__stem,&__pyx_n_s__wa,0};
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("isValidBinaryTable (wrapper)", 0);
+ {
+ PyObject* values[2] = {0,0};
+ if (unlikely(__pyx_kwds)) {
+ Py_ssize_t kw_args;
+ const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
+ switch (pos_args) {
+ case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+ case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+ case 0: break;
+ default: goto __pyx_L5_argtuple_error;
+ }
+ kw_args = PyDict_Size(__pyx_kwds);
+ switch (pos_args) {
+ case 0:
+ values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__stem);
+ if (likely(values[0])) kw_args--;
+ else goto __pyx_L5_argtuple_error;
+ case 1:
+ if (kw_args > 0) {
+ PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__wa);
+ if (value) { values[1] = value; kw_args--; }
+ }
+ }
+ if (unlikely(kw_args > 0)) {
+ if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "isValidBinaryTable") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 109; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ }
+ if (values[1]) {
+ } else {
+
+ /* "binpt.pyx":109
+ *
+ * @staticmethod
+ * def isValidBinaryTable(stem, bint wa = False): # <<<<<<<<<<<<<<
+ * '''This sanity check was added to the constructor, but you can access it from outside this class
+ * to determine whether or not you are providing a valid stem to BinaryPhraseTable.'''
+ */
+ __pyx_v_wa = ((int)0);
+ }
+ } else {
+ switch (PyTuple_GET_SIZE(__pyx_args)) {
+ case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+ case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+ break;
+ default: goto __pyx_L5_argtuple_error;
+ }
+ }
+ __pyx_v_stem = values[0];
+ if (values[1]) {
+ __pyx_v_wa = __Pyx_PyObject_IsTrue(values[1]); if (unlikely((__pyx_v_wa == (int)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 109; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ } else {
+ __pyx_v_wa = ((int)0);
+ }
+ }
+ goto __pyx_L4_argument_unpacking_done;
+ __pyx_L5_argtuple_error:;
+ __Pyx_RaiseArgtupleInvalid("isValidBinaryTable", 0, 1, 2, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 109; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ __pyx_L3_error:;
+ __Pyx_AddTraceback("binpt.BinaryPhraseTable.isValidBinaryTable", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __Pyx_RefNannyFinishContext();
+ return NULL;
+ __pyx_L4_argument_unpacking_done:;
+ __pyx_r = __pyx_pf_5binpt_17BinaryPhraseTable_4isValidBinaryTable(__pyx_v_stem, __pyx_v_wa);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static PyObject *__pyx_pf_5binpt_17BinaryPhraseTable_4isValidBinaryTable(PyObject *__pyx_v_stem, int __pyx_v_wa) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ PyObject *__pyx_t_2 = NULL;
+ PyObject *__pyx_t_3 = NULL;
+ int __pyx_t_4;
+ PyObject *__pyx_t_5 = NULL;
+ PyObject *__pyx_t_6 = NULL;
+ PyObject *__pyx_t_7 = NULL;
+ PyObject *__pyx_t_8 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("isValidBinaryTable", 0);
+
+ /* "binpt.pyx":112
+ * '''This sanity check was added to the constructor, but you can access it from outside this class
+ * to determine whether or not you are providing a valid stem to BinaryPhraseTable.'''
+ * if wa: # <<<<<<<<<<<<<<
+ * return os.path.isfile(stem + ".binphr.idx") \
+ * and os.path.isfile(stem + ".binphr.srctree.wa") \
+ */
+ if (__pyx_v_wa) {
+
+ /* "binpt.pyx":113
+ * to determine whether or not you are providing a valid stem to BinaryPhraseTable.'''
+ * if wa:
+ * return os.path.isfile(stem + ".binphr.idx") \ # <<<<<<<<<<<<<<
+ * and os.path.isfile(stem + ".binphr.srctree.wa") \
+ * and os.path.isfile(stem + ".binphr.srcvoc") \
+ */
+ __Pyx_XDECREF(__pyx_r);
+
+ /* "binpt.pyx":114
+ * if wa:
+ * return os.path.isfile(stem + ".binphr.idx") \
+ * and os.path.isfile(stem + ".binphr.srctree.wa") \ # <<<<<<<<<<<<<<
+ * and os.path.isfile(stem + ".binphr.srcvoc") \
+ * and os.path.isfile(stem + ".binphr.tgtdata.wa") \
+ */
+ __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__os); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 113; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_2 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__path); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 113; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ __pyx_t_1 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s__isfile); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 113; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+
+ /* "binpt.pyx":113
+ * to determine whether or not you are providing a valid stem to BinaryPhraseTable.'''
+ * if wa:
+ * return os.path.isfile(stem + ".binphr.idx") \ # <<<<<<<<<<<<<<
+ * and os.path.isfile(stem + ".binphr.srctree.wa") \
+ * and os.path.isfile(stem + ".binphr.srcvoc") \
+ */
+ __pyx_t_2 = PyNumber_Add(__pyx_v_stem, ((PyObject *)__pyx_kp_s_9)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 113; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 113; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_2);
+ __Pyx_GIVEREF(__pyx_t_2);
+ __pyx_t_2 = 0;
+ __pyx_t_2 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 113; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0;
+ __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 114; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ if (__pyx_t_4) {
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+
+ /* "binpt.pyx":114
+ * if wa:
+ * return os.path.isfile(stem + ".binphr.idx") \
+ * and os.path.isfile(stem + ".binphr.srctree.wa") \ # <<<<<<<<<<<<<<
+ * and os.path.isfile(stem + ".binphr.srcvoc") \
+ * and os.path.isfile(stem + ".binphr.tgtdata.wa") \
+ */
+ __pyx_t_3 = __Pyx_GetName(__pyx_m, __pyx_n_s__os); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 114; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_1 = PyObject_GetAttr(__pyx_t_3, __pyx_n_s__path); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 114; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __pyx_t_3 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__isfile); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 114; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ __pyx_t_1 = PyNumber_Add(__pyx_v_stem, ((PyObject *)__pyx_kp_s_10)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 114; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_5 = PyTuple_New(1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 114; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_5);
+ PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_1);
+ __Pyx_GIVEREF(__pyx_t_1);
+ __pyx_t_1 = 0;
+ __pyx_t_1 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_t_5), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 114; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __Pyx_DECREF(((PyObject *)__pyx_t_5)); __pyx_t_5 = 0;
+ __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 115; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ if (__pyx_t_4) {
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+ /* "binpt.pyx":115
+ * return os.path.isfile(stem + ".binphr.idx") \
+ * and os.path.isfile(stem + ".binphr.srctree.wa") \
+ * and os.path.isfile(stem + ".binphr.srcvoc") \ # <<<<<<<<<<<<<<
+ * and os.path.isfile(stem + ".binphr.tgtdata.wa") \
+ * and os.path.isfile(stem + ".binphr.tgtvoc")
+ */
+ __pyx_t_5 = __Pyx_GetName(__pyx_m, __pyx_n_s__os); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 115; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_3 = PyObject_GetAttr(__pyx_t_5, __pyx_n_s__path); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 115; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __pyx_t_5 = PyObject_GetAttr(__pyx_t_3, __pyx_n_s__isfile); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 115; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __pyx_t_3 = PyNumber_Add(__pyx_v_stem, ((PyObject *)__pyx_kp_s_11)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 115; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_6 = PyTuple_New(1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 115; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_6);
+ PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_t_3);
+ __Pyx_GIVEREF(__pyx_t_3);
+ __pyx_t_3 = 0;
+ __pyx_t_3 = PyObject_Call(__pyx_t_5, ((PyObject *)__pyx_t_6), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 115; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __Pyx_DECREF(((PyObject *)__pyx_t_6)); __pyx_t_6 = 0;
+ __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 116; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ if (__pyx_t_4) {
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+ /* "binpt.pyx":116
+ * and os.path.isfile(stem + ".binphr.srctree.wa") \
+ * and os.path.isfile(stem + ".binphr.srcvoc") \
+ * and os.path.isfile(stem + ".binphr.tgtdata.wa") \ # <<<<<<<<<<<<<<
+ * and os.path.isfile(stem + ".binphr.tgtvoc")
+ * else:
+ */
+ __pyx_t_6 = __Pyx_GetName(__pyx_m, __pyx_n_s__os); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 116; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_6);
+ __pyx_t_5 = PyObject_GetAttr(__pyx_t_6, __pyx_n_s__path); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 116; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+ __pyx_t_6 = PyObject_GetAttr(__pyx_t_5, __pyx_n_s__isfile); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 116; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_6);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __pyx_t_5 = PyNumber_Add(__pyx_v_stem, ((PyObject *)__pyx_kp_s_12)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 116; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_7 = PyTuple_New(1); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 116; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_7);
+ PyTuple_SET_ITEM(__pyx_t_7, 0, __pyx_t_5);
+ __Pyx_GIVEREF(__pyx_t_5);
+ __pyx_t_5 = 0;
+ __pyx_t_5 = PyObject_Call(__pyx_t_6, ((PyObject *)__pyx_t_7), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 116; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+ __Pyx_DECREF(((PyObject *)__pyx_t_7)); __pyx_t_7 = 0;
+ __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 117; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ if (__pyx_t_4) {
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+
+ /* "binpt.pyx":117
+ * and os.path.isfile(stem + ".binphr.srcvoc") \
+ * and os.path.isfile(stem + ".binphr.tgtdata.wa") \
+ * and os.path.isfile(stem + ".binphr.tgtvoc") # <<<<<<<<<<<<<<
+ * else:
+ * return os.path.isfile(stem + ".binphr.idx") \
+ */
+ __pyx_t_7 = __Pyx_GetName(__pyx_m, __pyx_n_s__os); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 117; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_7);
+ __pyx_t_6 = PyObject_GetAttr(__pyx_t_7, __pyx_n_s__path); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 117; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_6);
+ __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
+ __pyx_t_7 = PyObject_GetAttr(__pyx_t_6, __pyx_n_s__isfile); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 117; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_7);
+ __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+ __pyx_t_6 = PyNumber_Add(__pyx_v_stem, ((PyObject *)__pyx_kp_s_13)); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 117; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_6);
+ __pyx_t_8 = PyTuple_New(1); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 117; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_8);
+ PyTuple_SET_ITEM(__pyx_t_8, 0, __pyx_t_6);
+ __Pyx_GIVEREF(__pyx_t_6);
+ __pyx_t_6 = 0;
+ __pyx_t_6 = PyObject_Call(__pyx_t_7, ((PyObject *)__pyx_t_8), NULL); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 117; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_6);
+ __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
+ __Pyx_DECREF(((PyObject *)__pyx_t_8)); __pyx_t_8 = 0;
+ __pyx_t_8 = __pyx_t_6;
+ __pyx_t_6 = 0;
+ } else {
+ __pyx_t_8 = __pyx_t_5;
+ __pyx_t_5 = 0;
+ }
+ __pyx_t_5 = __pyx_t_8;
+ __pyx_t_8 = 0;
+ } else {
+ __pyx_t_5 = __pyx_t_3;
+ __pyx_t_3 = 0;
+ }
+ __pyx_t_3 = __pyx_t_5;
+ __pyx_t_5 = 0;
+ } else {
+ __pyx_t_3 = __pyx_t_1;
+ __pyx_t_1 = 0;
+ }
+ __pyx_t_1 = __pyx_t_3;
+ __pyx_t_3 = 0;
+ } else {
+ __pyx_t_1 = __pyx_t_2;
+ __pyx_t_2 = 0;
+ }
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+ goto __pyx_L3;
+ }
+ /*else*/ {
+
+ /* "binpt.pyx":119
+ * and os.path.isfile(stem + ".binphr.tgtvoc")
+ * else:
+ * return os.path.isfile(stem + ".binphr.idx") \ # <<<<<<<<<<<<<<
+ * and os.path.isfile(stem + ".binphr.srctree") \
+ * and os.path.isfile(stem + ".binphr.srcvoc") \
+ */
+ __Pyx_XDECREF(__pyx_r);
+
+ /* "binpt.pyx":120
+ * else:
+ * return os.path.isfile(stem + ".binphr.idx") \
+ * and os.path.isfile(stem + ".binphr.srctree") \ # <<<<<<<<<<<<<<
+ * and os.path.isfile(stem + ".binphr.srcvoc") \
+ * and os.path.isfile(stem + ".binphr.tgtdata") \
+ */
+ __pyx_t_1 = __Pyx_GetName(__pyx_m, __pyx_n_s__os); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 119; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_2 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__path); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 119; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ __pyx_t_1 = PyObject_GetAttr(__pyx_t_2, __pyx_n_s__isfile); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 119; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+
+ /* "binpt.pyx":119
+ * and os.path.isfile(stem + ".binphr.tgtvoc")
+ * else:
+ * return os.path.isfile(stem + ".binphr.idx") \ # <<<<<<<<<<<<<<
+ * and os.path.isfile(stem + ".binphr.srctree") \
+ * and os.path.isfile(stem + ".binphr.srcvoc") \
+ */
+ __pyx_t_2 = PyNumber_Add(__pyx_v_stem, ((PyObject *)__pyx_kp_s_9)); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 119; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ __pyx_t_3 = PyTuple_New(1); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 119; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ PyTuple_SET_ITEM(__pyx_t_3, 0, __pyx_t_2);
+ __Pyx_GIVEREF(__pyx_t_2);
+ __pyx_t_2 = 0;
+ __pyx_t_2 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_t_3), NULL); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 119; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ __Pyx_DECREF(((PyObject *)__pyx_t_3)); __pyx_t_3 = 0;
+ __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_2); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 120; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ if (__pyx_t_4) {
+ __Pyx_DECREF(__pyx_t_2); __pyx_t_2 = 0;
+
+ /* "binpt.pyx":120
+ * else:
+ * return os.path.isfile(stem + ".binphr.idx") \
+ * and os.path.isfile(stem + ".binphr.srctree") \ # <<<<<<<<<<<<<<
+ * and os.path.isfile(stem + ".binphr.srcvoc") \
+ * and os.path.isfile(stem + ".binphr.tgtdata") \
+ */
+ __pyx_t_3 = __Pyx_GetName(__pyx_m, __pyx_n_s__os); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 120; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_1 = PyObject_GetAttr(__pyx_t_3, __pyx_n_s__path); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 120; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __pyx_t_3 = PyObject_GetAttr(__pyx_t_1, __pyx_n_s__isfile); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 120; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ __pyx_t_1 = PyNumber_Add(__pyx_v_stem, ((PyObject *)__pyx_kp_s_14)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 120; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_5 = PyTuple_New(1); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 120; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_5);
+ PyTuple_SET_ITEM(__pyx_t_5, 0, __pyx_t_1);
+ __Pyx_GIVEREF(__pyx_t_1);
+ __pyx_t_1 = 0;
+ __pyx_t_1 = PyObject_Call(__pyx_t_3, ((PyObject *)__pyx_t_5), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 120; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __Pyx_DECREF(((PyObject *)__pyx_t_5)); __pyx_t_5 = 0;
+ __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_1); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 121; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ if (__pyx_t_4) {
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+ /* "binpt.pyx":121
+ * return os.path.isfile(stem + ".binphr.idx") \
+ * and os.path.isfile(stem + ".binphr.srctree") \
+ * and os.path.isfile(stem + ".binphr.srcvoc") \ # <<<<<<<<<<<<<<
+ * and os.path.isfile(stem + ".binphr.tgtdata") \
+ * and os.path.isfile(stem + ".binphr.tgtvoc")
+ */
+ __pyx_t_5 = __Pyx_GetName(__pyx_m, __pyx_n_s__os); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 121; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_3 = PyObject_GetAttr(__pyx_t_5, __pyx_n_s__path); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 121; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __pyx_t_5 = PyObject_GetAttr(__pyx_t_3, __pyx_n_s__isfile); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 121; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+ __pyx_t_3 = PyNumber_Add(__pyx_v_stem, ((PyObject *)__pyx_kp_s_11)); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 121; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __pyx_t_8 = PyTuple_New(1); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 121; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_8);
+ PyTuple_SET_ITEM(__pyx_t_8, 0, __pyx_t_3);
+ __Pyx_GIVEREF(__pyx_t_3);
+ __pyx_t_3 = 0;
+ __pyx_t_3 = PyObject_Call(__pyx_t_5, ((PyObject *)__pyx_t_8), NULL); if (unlikely(!__pyx_t_3)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 121; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_3);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __Pyx_DECREF(((PyObject *)__pyx_t_8)); __pyx_t_8 = 0;
+ __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_3); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 122; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ if (__pyx_t_4) {
+ __Pyx_DECREF(__pyx_t_3); __pyx_t_3 = 0;
+
+ /* "binpt.pyx":122
+ * and os.path.isfile(stem + ".binphr.srctree") \
+ * and os.path.isfile(stem + ".binphr.srcvoc") \
+ * and os.path.isfile(stem + ".binphr.tgtdata") \ # <<<<<<<<<<<<<<
+ * and os.path.isfile(stem + ".binphr.tgtvoc")
+ *
+ */
+ __pyx_t_8 = __Pyx_GetName(__pyx_m, __pyx_n_s__os); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 122; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_8);
+ __pyx_t_5 = PyObject_GetAttr(__pyx_t_8, __pyx_n_s__path); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 122; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
+ __pyx_t_8 = PyObject_GetAttr(__pyx_t_5, __pyx_n_s__isfile); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 122; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_8);
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+ __pyx_t_5 = PyNumber_Add(__pyx_v_stem, ((PyObject *)__pyx_kp_s_15)); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 122; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_5);
+ __pyx_t_6 = PyTuple_New(1); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 122; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_6);
+ PyTuple_SET_ITEM(__pyx_t_6, 0, __pyx_t_5);
+ __Pyx_GIVEREF(__pyx_t_5);
+ __pyx_t_5 = 0;
+ __pyx_t_5 = PyObject_Call(__pyx_t_8, ((PyObject *)__pyx_t_6), NULL); if (unlikely(!__pyx_t_5)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 122; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_5);
+ __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
+ __Pyx_DECREF(((PyObject *)__pyx_t_6)); __pyx_t_6 = 0;
+ __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_5); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 123; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ if (__pyx_t_4) {
+ __Pyx_DECREF(__pyx_t_5); __pyx_t_5 = 0;
+
+ /* "binpt.pyx":123
+ * and os.path.isfile(stem + ".binphr.srcvoc") \
+ * and os.path.isfile(stem + ".binphr.tgtdata") \
+ * and os.path.isfile(stem + ".binphr.tgtvoc") # <<<<<<<<<<<<<<
+ *
+ * @property
+ */
+ __pyx_t_6 = __Pyx_GetName(__pyx_m, __pyx_n_s__os); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 123; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_6);
+ __pyx_t_8 = PyObject_GetAttr(__pyx_t_6, __pyx_n_s__path); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 123; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_8);
+ __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+ __pyx_t_6 = PyObject_GetAttr(__pyx_t_8, __pyx_n_s__isfile); if (unlikely(!__pyx_t_6)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 123; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_6);
+ __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
+ __pyx_t_8 = PyNumber_Add(__pyx_v_stem, ((PyObject *)__pyx_kp_s_13)); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 123; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_8);
+ __pyx_t_7 = PyTuple_New(1); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 123; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_7);
+ PyTuple_SET_ITEM(__pyx_t_7, 0, __pyx_t_8);
+ __Pyx_GIVEREF(__pyx_t_8);
+ __pyx_t_8 = 0;
+ __pyx_t_8 = PyObject_Call(__pyx_t_6, ((PyObject *)__pyx_t_7), NULL); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 123; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_8);
+ __Pyx_DECREF(__pyx_t_6); __pyx_t_6 = 0;
+ __Pyx_DECREF(((PyObject *)__pyx_t_7)); __pyx_t_7 = 0;
+ __pyx_t_7 = __pyx_t_8;
+ __pyx_t_8 = 0;
+ } else {
+ __pyx_t_7 = __pyx_t_5;
+ __pyx_t_5 = 0;
+ }
+ __pyx_t_5 = __pyx_t_7;
+ __pyx_t_7 = 0;
+ } else {
+ __pyx_t_5 = __pyx_t_3;
+ __pyx_t_3 = 0;
+ }
+ __pyx_t_3 = __pyx_t_5;
+ __pyx_t_5 = 0;
+ } else {
+ __pyx_t_3 = __pyx_t_1;
+ __pyx_t_1 = 0;
+ }
+ __pyx_t_1 = __pyx_t_3;
+ __pyx_t_3 = 0;
+ } else {
+ __pyx_t_1 = __pyx_t_2;
+ __pyx_t_2 = 0;
+ }
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+ }
+ __pyx_L3:;
+
+ __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+ goto __pyx_L0;
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_XDECREF(__pyx_t_2);
+ __Pyx_XDECREF(__pyx_t_3);
+ __Pyx_XDECREF(__pyx_t_5);
+ __Pyx_XDECREF(__pyx_t_6);
+ __Pyx_XDECREF(__pyx_t_7);
+ __Pyx_XDECREF(__pyx_t_8);
+ __Pyx_AddTraceback("binpt.BinaryPhraseTable.isValidBinaryTable", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = NULL;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* Python wrapper */
+static PyObject *__pyx_pw_5binpt_17BinaryPhraseTable_7path(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/
+static PyObject *__pyx_pw_5binpt_17BinaryPhraseTable_7path(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) {
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("path (wrapper)", 0);
+ __pyx_r = __pyx_pf_5binpt_17BinaryPhraseTable_6path(((struct __pyx_obj_5binpt_BinaryPhraseTable *)__pyx_v_self));
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "binpt.pyx":126
+ *
+ * @property
+ * def path(self): # <<<<<<<<<<<<<<
+ * return self._path
+ *
+ */
+
+static PyObject *__pyx_pf_5binpt_17BinaryPhraseTable_6path(struct __pyx_obj_5binpt_BinaryPhraseTable *__pyx_v_self) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("path", 0);
+
+ /* "binpt.pyx":127
+ * @property
+ * def path(self):
+ * return self._path # <<<<<<<<<<<<<<
+ *
+ * @property
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __Pyx_INCREF(((PyObject *)__pyx_v_self->_path));
+ __pyx_r = ((PyObject *)__pyx_v_self->_path);
+ goto __pyx_L0;
+
+ __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* Python wrapper */
+static PyObject *__pyx_pw_5binpt_17BinaryPhraseTable_9nscores(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/
+static PyObject *__pyx_pw_5binpt_17BinaryPhraseTable_9nscores(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) {
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("nscores (wrapper)", 0);
+ __pyx_r = __pyx_pf_5binpt_17BinaryPhraseTable_8nscores(((struct __pyx_obj_5binpt_BinaryPhraseTable *)__pyx_v_self));
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "binpt.pyx":130
+ *
+ * @property
+ * def nscores(self): # <<<<<<<<<<<<<<
+ * return self._nscores
+ *
+ */
+
+static PyObject *__pyx_pf_5binpt_17BinaryPhraseTable_8nscores(struct __pyx_obj_5binpt_BinaryPhraseTable *__pyx_v_self) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("nscores", 0);
+
+ /* "binpt.pyx":131
+ * @property
+ * def nscores(self):
+ * return self._nscores # <<<<<<<<<<<<<<
+ *
+ * @property
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_1 = PyLong_FromUnsignedLong(__pyx_v_self->_nscores); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 131; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+
+ __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+ goto __pyx_L0;
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_AddTraceback("binpt.BinaryPhraseTable.nscores", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = NULL;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* Python wrapper */
+static PyObject *__pyx_pw_5binpt_17BinaryPhraseTable_11wa(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/
+static PyObject *__pyx_pw_5binpt_17BinaryPhraseTable_11wa(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) {
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("wa (wrapper)", 0);
+ __pyx_r = __pyx_pf_5binpt_17BinaryPhraseTable_10wa(((struct __pyx_obj_5binpt_BinaryPhraseTable *)__pyx_v_self));
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "binpt.pyx":134
+ *
+ * @property
+ * def wa(self): # <<<<<<<<<<<<<<
+ * return self._wa
+ *
+ */
+
+static PyObject *__pyx_pf_5binpt_17BinaryPhraseTable_10wa(struct __pyx_obj_5binpt_BinaryPhraseTable *__pyx_v_self) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("wa", 0);
+
+ /* "binpt.pyx":135
+ * @property
+ * def wa(self):
+ * return self._wa # <<<<<<<<<<<<<<
+ *
+ * @property
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_1 = __Pyx_PyBool_FromLong(__pyx_v_self->_wa); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 135; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_r = __pyx_t_1;
+ __pyx_t_1 = 0;
+ goto __pyx_L0;
+
+ __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+ goto __pyx_L0;
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_AddTraceback("binpt.BinaryPhraseTable.wa", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = NULL;
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* Python wrapper */
+static PyObject *__pyx_pw_5binpt_17BinaryPhraseTable_13delimiters(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused); /*proto*/
+static PyObject *__pyx_pw_5binpt_17BinaryPhraseTable_13delimiters(PyObject *__pyx_v_self, CYTHON_UNUSED PyObject *unused) {
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("delimiters (wrapper)", 0);
+ __pyx_r = __pyx_pf_5binpt_17BinaryPhraseTable_12delimiters(((struct __pyx_obj_5binpt_BinaryPhraseTable *)__pyx_v_self));
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* "binpt.pyx":138
+ *
+ * @property
+ * def delimiters(self): # <<<<<<<<<<<<<<
+ * return self._delimiters
+ *
+ */
+
+static PyObject *__pyx_pf_5binpt_17BinaryPhraseTable_12delimiters(struct __pyx_obj_5binpt_BinaryPhraseTable *__pyx_v_self) {
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("delimiters", 0);
+
+ /* "binpt.pyx":139
+ * @property
+ * def delimiters(self):
+ * return self._delimiters # <<<<<<<<<<<<<<
+ *
+ * def query(self, line, cmp = None, top = 0):
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __Pyx_INCREF(((PyObject *)__pyx_v_self->_delimiters));
+ __pyx_r = ((PyObject *)__pyx_v_self->_delimiters);
+ goto __pyx_L0;
+
+ __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+ __pyx_L0:;
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+/* Python wrapper */
+static PyObject *__pyx_pw_5binpt_17BinaryPhraseTable_15query(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds); /*proto*/
+static char __pyx_doc_5binpt_17BinaryPhraseTable_14query[] = "Queries the phrase table and returns a list of matches.\n Each match is a QueryResult.\n If 'cmp' is defined the return list is sorted.\n If 'top' is defined, onlye the top elements will be returned.";
+static PyObject *__pyx_pw_5binpt_17BinaryPhraseTable_15query(PyObject *__pyx_v_self, PyObject *__pyx_args, PyObject *__pyx_kwds) {
+ PyObject *__pyx_v_line = 0;
+ PyObject *__pyx_v_cmp = 0;
+ PyObject *__pyx_v_top = 0;
+ static PyObject **__pyx_pyargnames[] = {&__pyx_n_s__line,&__pyx_n_s__cmp,&__pyx_n_s__top,0};
+ PyObject *__pyx_r = 0;
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("query (wrapper)", 0);
+ {
+ PyObject* values[3] = {0,0,0};
+
+ /* "binpt.pyx":141
+ * return self._delimiters
+ *
+ * def query(self, line, cmp = None, top = 0): # <<<<<<<<<<<<<<
+ * '''Queries the phrase table and returns a list of matches.
+ * Each match is a QueryResult.
+ */
+ values[1] = ((PyObject *)Py_None);
+ values[2] = ((PyObject *)__pyx_int_0);
+ if (unlikely(__pyx_kwds)) {
+ Py_ssize_t kw_args;
+ const Py_ssize_t pos_args = PyTuple_GET_SIZE(__pyx_args);
+ switch (pos_args) {
+ case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
+ case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+ case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+ case 0: break;
+ default: goto __pyx_L5_argtuple_error;
+ }
+ kw_args = PyDict_Size(__pyx_kwds);
+ switch (pos_args) {
+ case 0:
+ values[0] = PyDict_GetItem(__pyx_kwds, __pyx_n_s__line);
+ if (likely(values[0])) kw_args--;
+ else goto __pyx_L5_argtuple_error;
+ case 1:
+ if (kw_args > 0) {
+ PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__cmp);
+ if (value) { values[1] = value; kw_args--; }
+ }
+ case 2:
+ if (kw_args > 0) {
+ PyObject* value = PyDict_GetItem(__pyx_kwds, __pyx_n_s__top);
+ if (value) { values[2] = value; kw_args--; }
+ }
+ }
+ if (unlikely(kw_args > 0)) {
+ if (unlikely(__Pyx_ParseOptionalKeywords(__pyx_kwds, __pyx_pyargnames, 0, values, pos_args, "query") < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 141; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ }
+ } else {
+ switch (PyTuple_GET_SIZE(__pyx_args)) {
+ case 3: values[2] = PyTuple_GET_ITEM(__pyx_args, 2);
+ case 2: values[1] = PyTuple_GET_ITEM(__pyx_args, 1);
+ case 1: values[0] = PyTuple_GET_ITEM(__pyx_args, 0);
+ break;
+ default: goto __pyx_L5_argtuple_error;
+ }
+ }
+ __pyx_v_line = values[0];
+ __pyx_v_cmp = values[1];
+ __pyx_v_top = values[2];
+ }
+ goto __pyx_L4_argument_unpacking_done;
+ __pyx_L5_argtuple_error:;
+ __Pyx_RaiseArgtupleInvalid("query", 0, 1, 3, PyTuple_GET_SIZE(__pyx_args)); {__pyx_filename = __pyx_f[0]; __pyx_lineno = 141; __pyx_clineno = __LINE__; goto __pyx_L3_error;}
+ __pyx_L3_error:;
+ __Pyx_AddTraceback("binpt.BinaryPhraseTable.query", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __Pyx_RefNannyFinishContext();
+ return NULL;
+ __pyx_L4_argument_unpacking_done:;
+ __pyx_r = __pyx_pf_5binpt_17BinaryPhraseTable_14query(((struct __pyx_obj_5binpt_BinaryPhraseTable *)__pyx_v_self), __pyx_v_line, __pyx_v_cmp, __pyx_v_top);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static PyObject *__pyx_pf_5binpt_17BinaryPhraseTable_14query(struct __pyx_obj_5binpt_BinaryPhraseTable *__pyx_v_self, PyObject *__pyx_v_line, PyObject *__pyx_v_cmp, PyObject *__pyx_v_top) {
+ PyObject *__pyx_v_text = 0;
+ std::vector<std::string> __pyx_v_fphrase;
+ std::vector<Moses::StringTgtCand> *__pyx_v_rv;
+ std::vector<std::string> *__pyx_v_wa;
+ PyObject *__pyx_v_phrases = 0;
+ size_t __pyx_v_i;
+ PyObject *__pyx_r = NULL;
+ __Pyx_RefNannyDeclarations
+ PyObject *__pyx_t_1 = NULL;
+ char *__pyx_t_2;
+ char *__pyx_t_3;
+ int __pyx_t_4;
+ size_t __pyx_t_5;
+ size_t __pyx_t_6;
+ PyObject *__pyx_t_7 = NULL;
+ PyObject *__pyx_t_8 = NULL;
+ struct __pyx_opt_args_5binpt_get_query_result __pyx_t_9;
+ Py_ssize_t __pyx_t_10;
+ int __pyx_lineno = 0;
+ const char *__pyx_filename = NULL;
+ int __pyx_clineno = 0;
+ __Pyx_RefNannySetupContext("query", 0);
+
+ /* "binpt.pyx":146
+ * If 'cmp' is defined the return list is sorted.
+ * If 'top' is defined, onlye the top elements will be returned.'''
+ * cdef bytes text = as_str(line) # <<<<<<<<<<<<<<
+ * cdef vector[string] fphrase = Tokenize(string(text), string(self._delimiters))
+ * cdef vector[StringTgtCand]* rv = new vector[StringTgtCand]()
+ */
+ __pyx_t_1 = ((PyObject *)__pyx_f_5binpt_as_str(__pyx_v_line)); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 146; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_v_text = ((PyObject*)__pyx_t_1);
+ __pyx_t_1 = 0;
+
+ /* "binpt.pyx":147
+ * If 'top' is defined, onlye the top elements will be returned.'''
+ * cdef bytes text = as_str(line)
+ * cdef vector[string] fphrase = Tokenize(string(text), string(self._delimiters)) # <<<<<<<<<<<<<<
+ * cdef vector[StringTgtCand]* rv = new vector[StringTgtCand]()
+ * cdef vector[string]* wa = NULL
+ */
+ __pyx_t_2 = PyBytes_AsString(((PyObject *)__pyx_v_text)); if (unlikely((!__pyx_t_2) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 147; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_t_3 = PyBytes_AsString(((PyObject *)__pyx_v_self->_delimiters)); if (unlikely((!__pyx_t_3) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 147; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_v_fphrase = Moses::Tokenize(std::string(__pyx_t_2), std::string(__pyx_t_3));
+
+ /* "binpt.pyx":148
+ * cdef bytes text = as_str(line)
+ * cdef vector[string] fphrase = Tokenize(string(text), string(self._delimiters))
+ * cdef vector[StringTgtCand]* rv = new vector[StringTgtCand]() # <<<<<<<<<<<<<<
+ * cdef vector[string]* wa = NULL
+ * cdef list phrases
+ */
+ __pyx_v_rv = new std::vector<Moses::StringTgtCand>();
+
+ /* "binpt.pyx":149
+ * cdef vector[string] fphrase = Tokenize(string(text), string(self._delimiters))
+ * cdef vector[StringTgtCand]* rv = new vector[StringTgtCand]()
+ * cdef vector[string]* wa = NULL # <<<<<<<<<<<<<<
+ * cdef list phrases
+ * if not self.__tree.UseWordAlignment():
+ */
+ __pyx_v_wa = NULL;
+
+ /* "binpt.pyx":151
+ * cdef vector[string]* wa = NULL
+ * cdef list phrases
+ * if not self.__tree.UseWordAlignment(): # <<<<<<<<<<<<<<
+ * self.__tree.GetTargetCandidates(fphrase, rv[0])
+ * phrases = [get_query_result(rv[0][i]) for i in range(rv.size())]
+ */
+ __pyx_t_4 = (!__pyx_v_self->__pyx___tree->UseWordAlignment());
+ if (__pyx_t_4) {
+
+ /* "binpt.pyx":152
+ * cdef list phrases
+ * if not self.__tree.UseWordAlignment():
+ * self.__tree.GetTargetCandidates(fphrase, rv[0]) # <<<<<<<<<<<<<<
+ * phrases = [get_query_result(rv[0][i]) for i in range(rv.size())]
+ * else:
+ */
+ __pyx_v_self->__pyx___tree->GetTargetCandidates(__pyx_v_fphrase, (__pyx_v_rv[0]));
+
+ /* "binpt.pyx":153
+ * if not self.__tree.UseWordAlignment():
+ * self.__tree.GetTargetCandidates(fphrase, rv[0])
+ * phrases = [get_query_result(rv[0][i]) for i in range(rv.size())] # <<<<<<<<<<<<<<
+ * else:
+ * wa = new vector[string]()
+ */
+ __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 153; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_5 = __pyx_v_rv->size();
+ for (__pyx_t_6 = 0; __pyx_t_6 < __pyx_t_5; __pyx_t_6+=1) {
+ __pyx_v_i = __pyx_t_6;
+ __pyx_t_7 = ((PyObject *)__pyx_f_5binpt_get_query_result(((__pyx_v_rv[0])[__pyx_v_i]), NULL)); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 153; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_7);
+ if (unlikely(PyList_Append(__pyx_t_1, (PyObject*)__pyx_t_7))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 153; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
+ }
+ __Pyx_INCREF(((PyObject *)__pyx_t_1));
+ __pyx_v_phrases = __pyx_t_1;
+ __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0;
+ goto __pyx_L3;
+ }
+ /*else*/ {
+
+ /* "binpt.pyx":155
+ * phrases = [get_query_result(rv[0][i]) for i in range(rv.size())]
+ * else:
+ * wa = new vector[string]() # <<<<<<<<<<<<<<
+ * self.__tree.GetTargetCandidates(fphrase, rv[0], wa[0])
+ * phrases = [get_query_result(rv[0][i], wa[0][i].c_str()) for i in range(rv.size())]
+ */
+ __pyx_v_wa = new std::vector<std::string>();
+
+ /* "binpt.pyx":156
+ * else:
+ * wa = new vector[string]()
+ * self.__tree.GetTargetCandidates(fphrase, rv[0], wa[0]) # <<<<<<<<<<<<<<
+ * phrases = [get_query_result(rv[0][i], wa[0][i].c_str()) for i in range(rv.size())]
+ * del wa
+ */
+ __pyx_v_self->__pyx___tree->GetTargetCandidates(__pyx_v_fphrase, (__pyx_v_rv[0]), (__pyx_v_wa[0]));
+
+ /* "binpt.pyx":157
+ * wa = new vector[string]()
+ * self.__tree.GetTargetCandidates(fphrase, rv[0], wa[0])
+ * phrases = [get_query_result(rv[0][i], wa[0][i].c_str()) for i in range(rv.size())] # <<<<<<<<<<<<<<
+ * del wa
+ * del rv
+ */
+ __pyx_t_1 = PyList_New(0); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_5 = __pyx_v_rv->size();
+ for (__pyx_t_6 = 0; __pyx_t_6 < __pyx_t_5; __pyx_t_6+=1) {
+ __pyx_v_i = __pyx_t_6;
+ __pyx_t_7 = PyBytes_FromString(((__pyx_v_wa[0])[__pyx_v_i]).c_str()); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(((PyObject *)__pyx_t_7));
+ __pyx_t_9.__pyx_n = 1;
+ __pyx_t_9.wa = ((PyObject *)__pyx_t_7);
+ __pyx_t_8 = ((PyObject *)__pyx_f_5binpt_get_query_result(((__pyx_v_rv[0])[__pyx_v_i]), &__pyx_t_9)); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_8);
+ __Pyx_DECREF(((PyObject *)__pyx_t_7)); __pyx_t_7 = 0;
+ if (unlikely(PyList_Append(__pyx_t_1, (PyObject*)__pyx_t_8))) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 157; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_DECREF(__pyx_t_8); __pyx_t_8 = 0;
+ }
+ __Pyx_INCREF(((PyObject *)__pyx_t_1));
+ __pyx_v_phrases = __pyx_t_1;
+ __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0;
+
+ /* "binpt.pyx":158
+ * self.__tree.GetTargetCandidates(fphrase, rv[0], wa[0])
+ * phrases = [get_query_result(rv[0][i], wa[0][i].c_str()) for i in range(rv.size())]
+ * del wa # <<<<<<<<<<<<<<
+ * del rv
+ * if cmp:
+ */
+ delete __pyx_v_wa;
+ }
+ __pyx_L3:;
+
+ /* "binpt.pyx":159
+ * phrases = [get_query_result(rv[0][i], wa[0][i].c_str()) for i in range(rv.size())]
+ * del wa
+ * del rv # <<<<<<<<<<<<<<
+ * if cmp:
+ * phrases.sort(cmp=cmp)
+ */
+ delete __pyx_v_rv;
+
+ /* "binpt.pyx":160
+ * del wa
+ * del rv
+ * if cmp: # <<<<<<<<<<<<<<
+ * phrases.sort(cmp=cmp)
+ * if top > 0:
+ */
+ __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_v_cmp); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 160; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ if (__pyx_t_4) {
+
+ /* "binpt.pyx":161
+ * del rv
+ * if cmp:
+ * phrases.sort(cmp=cmp) # <<<<<<<<<<<<<<
+ * if top > 0:
+ * return phrases[0:top]
+ */
+ __pyx_t_1 = PyObject_GetAttr(((PyObject *)__pyx_v_phrases), __pyx_n_s__sort); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 161; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_8 = PyDict_New(); if (unlikely(!__pyx_t_8)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 161; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(((PyObject *)__pyx_t_8));
+ if (PyDict_SetItem(__pyx_t_8, ((PyObject *)__pyx_n_s__cmp), __pyx_v_cmp) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 161; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_t_7 = PyObject_Call(__pyx_t_1, ((PyObject *)__pyx_empty_tuple), ((PyObject *)__pyx_t_8)); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 161; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_7);
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ __Pyx_DECREF(((PyObject *)__pyx_t_8)); __pyx_t_8 = 0;
+ __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
+ goto __pyx_L8;
+ }
+ __pyx_L8:;
+
+ /* "binpt.pyx":162
+ * if cmp:
+ * phrases.sort(cmp=cmp)
+ * if top > 0: # <<<<<<<<<<<<<<
+ * return phrases[0:top]
+ * else:
+ */
+ __pyx_t_7 = PyObject_RichCompare(__pyx_v_top, __pyx_int_0, Py_GT); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 162; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_7);
+ __pyx_t_4 = __Pyx_PyObject_IsTrue(__pyx_t_7); if (unlikely(__pyx_t_4 < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 162; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_DECREF(__pyx_t_7); __pyx_t_7 = 0;
+ if (__pyx_t_4) {
+
+ /* "binpt.pyx":163
+ * phrases.sort(cmp=cmp)
+ * if top > 0:
+ * return phrases[0:top] # <<<<<<<<<<<<<<
+ * else:
+ * return phrases
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __pyx_t_10 = __Pyx_PyIndex_AsSsize_t(__pyx_v_top); if (unlikely((__pyx_t_10 == (Py_ssize_t)-1) && PyErr_Occurred())) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 163; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_t_7 = __Pyx_PySequence_GetSlice(((PyObject *)__pyx_v_phrases), 0, __pyx_t_10); if (unlikely(!__pyx_t_7)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 163; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(((PyObject *)__pyx_t_7));
+ __pyx_r = ((PyObject *)__pyx_t_7);
+ __pyx_t_7 = 0;
+ goto __pyx_L0;
+ goto __pyx_L9;
+ }
+ /*else*/ {
+
+ /* "binpt.pyx":165
+ * return phrases[0:top]
+ * else:
+ * return phrases # <<<<<<<<<<<<<<
+ *
+ */
+ __Pyx_XDECREF(__pyx_r);
+ __Pyx_INCREF(((PyObject *)__pyx_v_phrases));
+ __pyx_r = ((PyObject *)__pyx_v_phrases);
+ goto __pyx_L0;
+ }
+ __pyx_L9:;
+
+ __pyx_r = Py_None; __Pyx_INCREF(Py_None);
+ goto __pyx_L0;
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_XDECREF(__pyx_t_7);
+ __Pyx_XDECREF(__pyx_t_8);
+ __Pyx_AddTraceback("binpt.BinaryPhraseTable.query", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ __pyx_r = NULL;
+ __pyx_L0:;
+ __Pyx_XDECREF(__pyx_v_text);
+ __Pyx_XDECREF(__pyx_v_phrases);
+ __Pyx_XGIVEREF(__pyx_r);
+ __Pyx_RefNannyFinishContext();
+ return __pyx_r;
+}
+
+static PyObject *__pyx_tp_new_5binpt_QueryResult(PyTypeObject *t, PyObject *a, PyObject *k) {
+ struct __pyx_obj_5binpt_QueryResult *p;
+ PyObject *o = (*t->tp_alloc)(t, 0);
+ if (!o) return 0;
+ p = ((struct __pyx_obj_5binpt_QueryResult *)o);
+ p->_words = ((PyObject*)Py_None); Py_INCREF(Py_None);
+ p->_scores = ((PyObject*)Py_None); Py_INCREF(Py_None);
+ p->_wa = ((PyObject*)Py_None); Py_INCREF(Py_None);
+ if (__pyx_pw_5binpt_11QueryResult_1__cinit__(o, a, k) < 0) {
+ Py_DECREF(o); o = 0;
+ }
+ return o;
+}
+
+static void __pyx_tp_dealloc_5binpt_QueryResult(PyObject *o) {
+ struct __pyx_obj_5binpt_QueryResult *p = (struct __pyx_obj_5binpt_QueryResult *)o;
+ Py_XDECREF(((PyObject *)p->_words));
+ Py_XDECREF(((PyObject *)p->_scores));
+ Py_XDECREF(((PyObject *)p->_wa));
+ (*Py_TYPE(o)->tp_free)(o);
+}
+
+static int __pyx_tp_traverse_5binpt_QueryResult(PyObject *o, visitproc v, void *a) {
+ int e;
+ struct __pyx_obj_5binpt_QueryResult *p = (struct __pyx_obj_5binpt_QueryResult *)o;
+ if (p->_words) {
+ e = (*v)(p->_words, a); if (e) return e;
+ }
+ if (p->_scores) {
+ e = (*v)(p->_scores, a); if (e) return e;
+ }
+ if (p->_wa) {
+ e = (*v)(p->_wa, a); if (e) return e;
+ }
+ return 0;
+}
+
+static int __pyx_tp_clear_5binpt_QueryResult(PyObject *o) {
+ struct __pyx_obj_5binpt_QueryResult *p = (struct __pyx_obj_5binpt_QueryResult *)o;
+ PyObject* tmp;
+ tmp = ((PyObject*)p->_words);
+ p->_words = ((PyObject*)Py_None); Py_INCREF(Py_None);
+ Py_XDECREF(tmp);
+ tmp = ((PyObject*)p->_scores);
+ p->_scores = ((PyObject*)Py_None); Py_INCREF(Py_None);
+ Py_XDECREF(tmp);
+ tmp = ((PyObject*)p->_wa);
+ p->_wa = ((PyObject*)Py_None); Py_INCREF(Py_None);
+ Py_XDECREF(tmp);
+ return 0;
+}
+
+static PyMethodDef __pyx_methods_5binpt_QueryResult[] = {
+ {__Pyx_NAMESTR("words"), (PyCFunction)__pyx_pw_5binpt_11QueryResult_3words, METH_NOARGS, __Pyx_DOCSTR(__pyx_doc_5binpt_11QueryResult_2words)},
+ {__Pyx_NAMESTR("scores"), (PyCFunction)__pyx_pw_5binpt_11QueryResult_5scores, METH_NOARGS, __Pyx_DOCSTR(__pyx_doc_5binpt_11QueryResult_4scores)},
+ {__Pyx_NAMESTR("wa"), (PyCFunction)__pyx_pw_5binpt_11QueryResult_7wa, METH_NOARGS, __Pyx_DOCSTR(__pyx_doc_5binpt_11QueryResult_6wa)},
+ {__Pyx_NAMESTR("desc"), (PyCFunction)__pyx_pw_5binpt_11QueryResult_9desc, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_5binpt_11QueryResult_8desc)},
+ {0, 0, 0, 0}
+};
+
+static PyNumberMethods __pyx_tp_as_number_QueryResult = {
+ 0, /*nb_add*/
+ 0, /*nb_subtract*/
+ 0, /*nb_multiply*/
+ #if PY_MAJOR_VERSION < 3
+ 0, /*nb_divide*/
+ #endif
+ 0, /*nb_remainder*/
+ 0, /*nb_divmod*/
+ 0, /*nb_power*/
+ 0, /*nb_negative*/
+ 0, /*nb_positive*/
+ 0, /*nb_absolute*/
+ 0, /*nb_nonzero*/
+ 0, /*nb_invert*/
+ 0, /*nb_lshift*/
+ 0, /*nb_rshift*/
+ 0, /*nb_and*/
+ 0, /*nb_xor*/
+ 0, /*nb_or*/
+ #if PY_MAJOR_VERSION < 3
+ 0, /*nb_coerce*/
+ #endif
+ 0, /*nb_int*/
+ #if PY_MAJOR_VERSION < 3
+ 0, /*nb_long*/
+ #else
+ 0, /*reserved*/
+ #endif
+ 0, /*nb_float*/
+ #if PY_MAJOR_VERSION < 3
+ 0, /*nb_oct*/
+ #endif
+ #if PY_MAJOR_VERSION < 3
+ 0, /*nb_hex*/
+ #endif
+ 0, /*nb_inplace_add*/
+ 0, /*nb_inplace_subtract*/
+ 0, /*nb_inplace_multiply*/
+ #if PY_MAJOR_VERSION < 3
+ 0, /*nb_inplace_divide*/
+ #endif
+ 0, /*nb_inplace_remainder*/
+ 0, /*nb_inplace_power*/
+ 0, /*nb_inplace_lshift*/
+ 0, /*nb_inplace_rshift*/
+ 0, /*nb_inplace_and*/
+ 0, /*nb_inplace_xor*/
+ 0, /*nb_inplace_or*/
+ 0, /*nb_floor_divide*/
+ 0, /*nb_true_divide*/
+ 0, /*nb_inplace_floor_divide*/
+ 0, /*nb_inplace_true_divide*/
+ #if PY_VERSION_HEX >= 0x02050000
+ 0, /*nb_index*/
+ #endif
+};
+
+static PySequenceMethods __pyx_tp_as_sequence_QueryResult = {
+ 0, /*sq_length*/
+ 0, /*sq_concat*/
+ 0, /*sq_repeat*/
+ 0, /*sq_item*/
+ 0, /*sq_slice*/
+ 0, /*sq_ass_item*/
+ 0, /*sq_ass_slice*/
+ 0, /*sq_contains*/
+ 0, /*sq_inplace_concat*/
+ 0, /*sq_inplace_repeat*/
+};
+
+static PyMappingMethods __pyx_tp_as_mapping_QueryResult = {
+ 0, /*mp_length*/
+ 0, /*mp_subscript*/
+ 0, /*mp_ass_subscript*/
+};
+
+static PyBufferProcs __pyx_tp_as_buffer_QueryResult = {
+ #if PY_MAJOR_VERSION < 3
+ 0, /*bf_getreadbuffer*/
+ #endif
+ #if PY_MAJOR_VERSION < 3
+ 0, /*bf_getwritebuffer*/
+ #endif
+ #if PY_MAJOR_VERSION < 3
+ 0, /*bf_getsegcount*/
+ #endif
+ #if PY_MAJOR_VERSION < 3
+ 0, /*bf_getcharbuffer*/
+ #endif
+ #if PY_VERSION_HEX >= 0x02060000
+ 0, /*bf_getbuffer*/
+ #endif
+ #if PY_VERSION_HEX >= 0x02060000
+ 0, /*bf_releasebuffer*/
+ #endif
+};
+
+static PyTypeObject __pyx_type_5binpt_QueryResult = {
+ PyVarObject_HEAD_INIT(0, 0)
+ __Pyx_NAMESTR("binpt.QueryResult"), /*tp_name*/
+ sizeof(struct __pyx_obj_5binpt_QueryResult), /*tp_basicsize*/
+ 0, /*tp_itemsize*/
+ __pyx_tp_dealloc_5binpt_QueryResult, /*tp_dealloc*/
+ 0, /*tp_print*/
+ 0, /*tp_getattr*/
+ 0, /*tp_setattr*/
+ #if PY_MAJOR_VERSION < 3
+ 0, /*tp_compare*/
+ #else
+ 0, /*reserved*/
+ #endif
+ __pyx_pw_5binpt_11QueryResult_13__repr__, /*tp_repr*/
+ &__pyx_tp_as_number_QueryResult, /*tp_as_number*/
+ &__pyx_tp_as_sequence_QueryResult, /*tp_as_sequence*/
+ &__pyx_tp_as_mapping_QueryResult, /*tp_as_mapping*/
+ 0, /*tp_hash*/
+ 0, /*tp_call*/
+ __pyx_pw_5binpt_11QueryResult_11__str__, /*tp_str*/
+ 0, /*tp_getattro*/
+ 0, /*tp_setattro*/
+ &__pyx_tp_as_buffer_QueryResult, /*tp_as_buffer*/
+ Py_TPFLAGS_DEFAULT|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, /*tp_flags*/
+ __Pyx_DOCSTR("This class represents a query result, that is,\n a target phrase (tuple of words/strings),\n a feature vector (tuple of floats)\n and possibly an alignment info (string).\n Here we don't bother parsing the alignment info, as it's often only\n used as is, threfore saving some time."), /*tp_doc*/
+ __pyx_tp_traverse_5binpt_QueryResult, /*tp_traverse*/
+ __pyx_tp_clear_5binpt_QueryResult, /*tp_clear*/
+ 0, /*tp_richcompare*/
+ 0, /*tp_weaklistoffset*/
+ 0, /*tp_iter*/
+ 0, /*tp_iternext*/
+ __pyx_methods_5binpt_QueryResult, /*tp_methods*/
+ 0, /*tp_members*/
+ 0, /*tp_getset*/
+ 0, /*tp_base*/
+ 0, /*tp_dict*/
+ 0, /*tp_descr_get*/
+ 0, /*tp_descr_set*/
+ 0, /*tp_dictoffset*/
+ 0, /*tp_init*/
+ 0, /*tp_alloc*/
+ __pyx_tp_new_5binpt_QueryResult, /*tp_new*/
+ 0, /*tp_free*/
+ 0, /*tp_is_gc*/
+ 0, /*tp_bases*/
+ 0, /*tp_mro*/
+ 0, /*tp_cache*/
+ 0, /*tp_subclasses*/
+ 0, /*tp_weaklist*/
+ 0, /*tp_del*/
+ #if PY_VERSION_HEX >= 0x02060000
+ 0, /*tp_version_tag*/
+ #endif
+};
+
+static PyObject *__pyx_tp_new_5binpt_BinaryPhraseTable(PyTypeObject *t, PyObject *a, PyObject *k) {
+ struct __pyx_obj_5binpt_BinaryPhraseTable *p;
+ PyObject *o = (*t->tp_alloc)(t, 0);
+ if (!o) return 0;
+ p = ((struct __pyx_obj_5binpt_BinaryPhraseTable *)o);
+ p->_path = ((PyObject*)Py_None); Py_INCREF(Py_None);
+ p->_delimiters = ((PyObject*)Py_None); Py_INCREF(Py_None);
+ if (__pyx_pw_5binpt_17BinaryPhraseTable_1__cinit__(o, a, k) < 0) {
+ Py_DECREF(o); o = 0;
+ }
+ return o;
+}
+
+static void __pyx_tp_dealloc_5binpt_BinaryPhraseTable(PyObject *o) {
+ struct __pyx_obj_5binpt_BinaryPhraseTable *p = (struct __pyx_obj_5binpt_BinaryPhraseTable *)o;
+ {
+ PyObject *etype, *eval, *etb;
+ PyErr_Fetch(&etype, &eval, &etb);
+ ++Py_REFCNT(o);
+ __pyx_pw_5binpt_17BinaryPhraseTable_3__dealloc__(o);
+ if (PyErr_Occurred()) PyErr_WriteUnraisable(o);
+ --Py_REFCNT(o);
+ PyErr_Restore(etype, eval, etb);
+ }
+ Py_XDECREF(((PyObject *)p->_path));
+ Py_XDECREF(((PyObject *)p->_delimiters));
+ (*Py_TYPE(o)->tp_free)(o);
+}
+
+static int __pyx_tp_traverse_5binpt_BinaryPhraseTable(PyObject *o, visitproc v, void *a) {
+ int e;
+ struct __pyx_obj_5binpt_BinaryPhraseTable *p = (struct __pyx_obj_5binpt_BinaryPhraseTable *)o;
+ if (p->_path) {
+ e = (*v)(p->_path, a); if (e) return e;
+ }
+ if (p->_delimiters) {
+ e = (*v)(p->_delimiters, a); if (e) return e;
+ }
+ return 0;
+}
+
+static int __pyx_tp_clear_5binpt_BinaryPhraseTable(PyObject *o) {
+ struct __pyx_obj_5binpt_BinaryPhraseTable *p = (struct __pyx_obj_5binpt_BinaryPhraseTable *)o;
+ PyObject* tmp;
+ tmp = ((PyObject*)p->_path);
+ p->_path = ((PyObject*)Py_None); Py_INCREF(Py_None);
+ Py_XDECREF(tmp);
+ tmp = ((PyObject*)p->_delimiters);
+ p->_delimiters = ((PyObject*)Py_None); Py_INCREF(Py_None);
+ Py_XDECREF(tmp);
+ return 0;
+}
+
+static PyMethodDef __pyx_methods_5binpt_BinaryPhraseTable[] = {
+ {__Pyx_NAMESTR("isValidBinaryTable"), (PyCFunction)__pyx_pw_5binpt_17BinaryPhraseTable_5isValidBinaryTable, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_5binpt_17BinaryPhraseTable_4isValidBinaryTable)},
+ {__Pyx_NAMESTR("path"), (PyCFunction)__pyx_pw_5binpt_17BinaryPhraseTable_7path, METH_NOARGS, __Pyx_DOCSTR(0)},
+ {__Pyx_NAMESTR("nscores"), (PyCFunction)__pyx_pw_5binpt_17BinaryPhraseTable_9nscores, METH_NOARGS, __Pyx_DOCSTR(0)},
+ {__Pyx_NAMESTR("wa"), (PyCFunction)__pyx_pw_5binpt_17BinaryPhraseTable_11wa, METH_NOARGS, __Pyx_DOCSTR(0)},
+ {__Pyx_NAMESTR("delimiters"), (PyCFunction)__pyx_pw_5binpt_17BinaryPhraseTable_13delimiters, METH_NOARGS, __Pyx_DOCSTR(0)},
+ {__Pyx_NAMESTR("query"), (PyCFunction)__pyx_pw_5binpt_17BinaryPhraseTable_15query, METH_VARARGS|METH_KEYWORDS, __Pyx_DOCSTR(__pyx_doc_5binpt_17BinaryPhraseTable_14query)},
+ {0, 0, 0, 0}
+};
+
+static PyNumberMethods __pyx_tp_as_number_BinaryPhraseTable = {
+ 0, /*nb_add*/
+ 0, /*nb_subtract*/
+ 0, /*nb_multiply*/
+ #if PY_MAJOR_VERSION < 3
+ 0, /*nb_divide*/
+ #endif
+ 0, /*nb_remainder*/
+ 0, /*nb_divmod*/
+ 0, /*nb_power*/
+ 0, /*nb_negative*/
+ 0, /*nb_positive*/
+ 0, /*nb_absolute*/
+ 0, /*nb_nonzero*/
+ 0, /*nb_invert*/
+ 0, /*nb_lshift*/
+ 0, /*nb_rshift*/
+ 0, /*nb_and*/
+ 0, /*nb_xor*/
+ 0, /*nb_or*/
+ #if PY_MAJOR_VERSION < 3
+ 0, /*nb_coerce*/
+ #endif
+ 0, /*nb_int*/
+ #if PY_MAJOR_VERSION < 3
+ 0, /*nb_long*/
+ #else
+ 0, /*reserved*/
+ #endif
+ 0, /*nb_float*/
+ #if PY_MAJOR_VERSION < 3
+ 0, /*nb_oct*/
+ #endif
+ #if PY_MAJOR_VERSION < 3
+ 0, /*nb_hex*/
+ #endif
+ 0, /*nb_inplace_add*/
+ 0, /*nb_inplace_subtract*/
+ 0, /*nb_inplace_multiply*/
+ #if PY_MAJOR_VERSION < 3
+ 0, /*nb_inplace_divide*/
+ #endif
+ 0, /*nb_inplace_remainder*/
+ 0, /*nb_inplace_power*/
+ 0, /*nb_inplace_lshift*/
+ 0, /*nb_inplace_rshift*/
+ 0, /*nb_inplace_and*/
+ 0, /*nb_inplace_xor*/
+ 0, /*nb_inplace_or*/
+ 0, /*nb_floor_divide*/
+ 0, /*nb_true_divide*/
+ 0, /*nb_inplace_floor_divide*/
+ 0, /*nb_inplace_true_divide*/
+ #if PY_VERSION_HEX >= 0x02050000
+ 0, /*nb_index*/
+ #endif
+};
+
+static PySequenceMethods __pyx_tp_as_sequence_BinaryPhraseTable = {
+ 0, /*sq_length*/
+ 0, /*sq_concat*/
+ 0, /*sq_repeat*/
+ 0, /*sq_item*/
+ 0, /*sq_slice*/
+ 0, /*sq_ass_item*/
+ 0, /*sq_ass_slice*/
+ 0, /*sq_contains*/
+ 0, /*sq_inplace_concat*/
+ 0, /*sq_inplace_repeat*/
+};
+
+static PyMappingMethods __pyx_tp_as_mapping_BinaryPhraseTable = {
+ 0, /*mp_length*/
+ 0, /*mp_subscript*/
+ 0, /*mp_ass_subscript*/
+};
+
+static PyBufferProcs __pyx_tp_as_buffer_BinaryPhraseTable = {
+ #if PY_MAJOR_VERSION < 3
+ 0, /*bf_getreadbuffer*/
+ #endif
+ #if PY_MAJOR_VERSION < 3
+ 0, /*bf_getwritebuffer*/
+ #endif
+ #if PY_MAJOR_VERSION < 3
+ 0, /*bf_getsegcount*/
+ #endif
+ #if PY_MAJOR_VERSION < 3
+ 0, /*bf_getcharbuffer*/
+ #endif
+ #if PY_VERSION_HEX >= 0x02060000
+ 0, /*bf_getbuffer*/
+ #endif
+ #if PY_VERSION_HEX >= 0x02060000
+ 0, /*bf_releasebuffer*/
+ #endif
+};
+
+static PyTypeObject __pyx_type_5binpt_BinaryPhraseTable = {
+ PyVarObject_HEAD_INIT(0, 0)
+ __Pyx_NAMESTR("binpt.BinaryPhraseTable"), /*tp_name*/
+ sizeof(struct __pyx_obj_5binpt_BinaryPhraseTable), /*tp_basicsize*/
+ 0, /*tp_itemsize*/
+ __pyx_tp_dealloc_5binpt_BinaryPhraseTable, /*tp_dealloc*/
+ 0, /*tp_print*/
+ 0, /*tp_getattr*/
+ 0, /*tp_setattr*/
+ #if PY_MAJOR_VERSION < 3
+ 0, /*tp_compare*/
+ #else
+ 0, /*reserved*/
+ #endif
+ 0, /*tp_repr*/
+ &__pyx_tp_as_number_BinaryPhraseTable, /*tp_as_number*/
+ &__pyx_tp_as_sequence_BinaryPhraseTable, /*tp_as_sequence*/
+ &__pyx_tp_as_mapping_BinaryPhraseTable, /*tp_as_mapping*/
+ 0, /*tp_hash*/
+ 0, /*tp_call*/
+ 0, /*tp_str*/
+ 0, /*tp_getattro*/
+ 0, /*tp_setattro*/
+ &__pyx_tp_as_buffer_BinaryPhraseTable, /*tp_as_buffer*/
+ Py_TPFLAGS_DEFAULT|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER|Py_TPFLAGS_BASETYPE|Py_TPFLAGS_HAVE_GC, /*tp_flags*/
+ __Pyx_DOCSTR("This class encapsulates a Moses::PhraseDictionaryTree for operations over\n binary phrase tables."), /*tp_doc*/
+ __pyx_tp_traverse_5binpt_BinaryPhraseTable, /*tp_traverse*/
+ __pyx_tp_clear_5binpt_BinaryPhraseTable, /*tp_clear*/
+ 0, /*tp_richcompare*/
+ 0, /*tp_weaklistoffset*/
+ 0, /*tp_iter*/
+ 0, /*tp_iternext*/
+ __pyx_methods_5binpt_BinaryPhraseTable, /*tp_methods*/
+ 0, /*tp_members*/
+ 0, /*tp_getset*/
+ 0, /*tp_base*/
+ 0, /*tp_dict*/
+ 0, /*tp_descr_get*/
+ 0, /*tp_descr_set*/
+ 0, /*tp_dictoffset*/
+ 0, /*tp_init*/
+ 0, /*tp_alloc*/
+ __pyx_tp_new_5binpt_BinaryPhraseTable, /*tp_new*/
+ 0, /*tp_free*/
+ 0, /*tp_is_gc*/
+ 0, /*tp_bases*/
+ 0, /*tp_mro*/
+ 0, /*tp_cache*/
+ 0, /*tp_subclasses*/
+ 0, /*tp_weaklist*/
+ 0, /*tp_del*/
+ #if PY_VERSION_HEX >= 0x02060000
+ 0, /*tp_version_tag*/
+ #endif
+};
+
+static PyMethodDef __pyx_methods[] = {
+ {__Pyx_NAMESTR("fsign"), (PyCFunction)__pyx_pw_5binpt_1fsign, METH_O, __Pyx_DOCSTR(__pyx_doc_5binpt_fsign)},
+ {0, 0, 0, 0}
+};
+
+#if PY_MAJOR_VERSION >= 3
+static struct PyModuleDef __pyx_moduledef = {
+ PyModuleDef_HEAD_INIT,
+ __Pyx_NAMESTR("binpt"),
+ 0, /* m_doc */
+ -1, /* m_size */
+ __pyx_methods /* m_methods */,
+ NULL, /* m_reload */
+ NULL, /* m_traverse */
+ NULL, /* m_clear */
+ NULL /* m_free */
+};
+#endif
+
+static __Pyx_StringTabEntry __pyx_string_tab[] = {
+ {&__pyx_kp_s_1, __pyx_k_1, sizeof(__pyx_k_1), 0, 0, 1, 0},
+ {&__pyx_kp_s_10, __pyx_k_10, sizeof(__pyx_k_10), 0, 0, 1, 0},
+ {&__pyx_kp_s_11, __pyx_k_11, sizeof(__pyx_k_11), 0, 0, 1, 0},
+ {&__pyx_kp_s_12, __pyx_k_12, sizeof(__pyx_k_12), 0, 0, 1, 0},
+ {&__pyx_kp_s_13, __pyx_k_13, sizeof(__pyx_k_13), 0, 0, 1, 0},
+ {&__pyx_kp_s_14, __pyx_k_14, sizeof(__pyx_k_14), 0, 0, 1, 0},
+ {&__pyx_kp_s_15, __pyx_k_15, sizeof(__pyx_k_15), 0, 0, 1, 0},
+ {&__pyx_kp_s_18, __pyx_k_18, sizeof(__pyx_k_18), 0, 0, 1, 0},
+ {&__pyx_kp_s_3, __pyx_k_3, sizeof(__pyx_k_3), 0, 0, 1, 0},
+ {&__pyx_kp_s_5, __pyx_k_5, sizeof(__pyx_k_5), 0, 0, 1, 0},
+ {&__pyx_kp_s_6, __pyx_k_6, sizeof(__pyx_k_6), 0, 0, 1, 0},
+ {&__pyx_kp_s_7, __pyx_k_7, sizeof(__pyx_k_7), 0, 0, 1, 0},
+ {&__pyx_kp_s_8, __pyx_k_8, sizeof(__pyx_k_8), 0, 0, 1, 0},
+ {&__pyx_kp_s_9, __pyx_k_9, sizeof(__pyx_k_9), 0, 0, 1, 0},
+ {&__pyx_n_s__TypeError, __pyx_k__TypeError, sizeof(__pyx_k__TypeError), 0, 0, 1, 1},
+ {&__pyx_n_s__ValueError, __pyx_k__ValueError, sizeof(__pyx_k__ValueError), 0, 0, 1, 1},
+ {&__pyx_n_s____main__, __pyx_k____main__, sizeof(__pyx_k____main__), 0, 0, 1, 1},
+ {&__pyx_n_s____test__, __pyx_k____test__, sizeof(__pyx_k____test__), 0, 0, 1, 1},
+ {&__pyx_n_s__binpt, __pyx_k__binpt, sizeof(__pyx_k__binpt), 0, 0, 1, 1},
+ {&__pyx_n_s__cmp, __pyx_k__cmp, sizeof(__pyx_k__cmp), 0, 0, 1, 1},
+ {&__pyx_n_s__delimiters, __pyx_k__delimiters, sizeof(__pyx_k__delimiters), 0, 0, 1, 1},
+ {&__pyx_n_s__desc, __pyx_k__desc, sizeof(__pyx_k__desc), 0, 0, 1, 1},
+ {&__pyx_n_s__encode, __pyx_k__encode, sizeof(__pyx_k__encode), 0, 0, 1, 1},
+ {&__pyx_n_s__isValidBinaryTable, __pyx_k__isValidBinaryTable, sizeof(__pyx_k__isValidBinaryTable), 0, 0, 1, 1},
+ {&__pyx_n_s__isfile, __pyx_k__isfile, sizeof(__pyx_k__isfile), 0, 0, 1, 1},
+ {&__pyx_n_s__join, __pyx_k__join, sizeof(__pyx_k__join), 0, 0, 1, 1},
+ {&__pyx_n_s__keys, __pyx_k__keys, sizeof(__pyx_k__keys), 0, 0, 1, 1},
+ {&__pyx_n_s__line, __pyx_k__line, sizeof(__pyx_k__line), 0, 0, 1, 1},
+ {&__pyx_n_s__nscores, __pyx_k__nscores, sizeof(__pyx_k__nscores), 0, 0, 1, 1},
+ {&__pyx_n_s__os, __pyx_k__os, sizeof(__pyx_k__os), 0, 0, 1, 1},
+ {&__pyx_n_s__path, __pyx_k__path, sizeof(__pyx_k__path), 0, 0, 1, 1},
+ {&__pyx_n_s__property, __pyx_k__property, sizeof(__pyx_k__property), 0, 0, 1, 1},
+ {&__pyx_n_s__range, __pyx_k__range, sizeof(__pyx_k__range), 0, 0, 1, 1},
+ {&__pyx_n_s__scores, __pyx_k__scores, sizeof(__pyx_k__scores), 0, 0, 1, 1},
+ {&__pyx_n_s__sort, __pyx_k__sort, sizeof(__pyx_k__sort), 0, 0, 1, 1},
+ {&__pyx_n_s__staticmethod, __pyx_k__staticmethod, sizeof(__pyx_k__staticmethod), 0, 0, 1, 1},
+ {&__pyx_n_s__stem, __pyx_k__stem, sizeof(__pyx_k__stem), 0, 0, 1, 1},
+ {&__pyx_n_s__top, __pyx_k__top, sizeof(__pyx_k__top), 0, 0, 1, 1},
+ {&__pyx_n_s__wa, __pyx_k__wa, sizeof(__pyx_k__wa), 0, 0, 1, 1},
+ {&__pyx_n_s__words, __pyx_k__words, sizeof(__pyx_k__words), 0, 0, 1, 1},
+ {&__pyx_n_s__x, __pyx_k__x, sizeof(__pyx_k__x), 0, 0, 1, 1},
+ {&__pyx_n_s__y, __pyx_k__y, sizeof(__pyx_k__y), 0, 0, 1, 1},
+ {0, 0, 0, 0, 0, 0, 0}
+};
+static int __Pyx_InitCachedBuiltins(void) {
+ __pyx_builtin_property = __Pyx_GetName(__pyx_b, __pyx_n_s__property); if (!__pyx_builtin_property) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 36; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_builtin_staticmethod = __Pyx_GetName(__pyx_b, __pyx_n_s__staticmethod); if (!__pyx_builtin_staticmethod) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_builtin_TypeError = __Pyx_GetName(__pyx_b, __pyx_n_s__TypeError); if (!__pyx_builtin_TypeError) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 15; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_builtin_range = __Pyx_GetName(__pyx_b, __pyx_n_s__range); if (!__pyx_builtin_range) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 74; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_builtin_ValueError = __Pyx_GetName(__pyx_b, __pyx_n_s__ValueError); if (!__pyx_builtin_ValueError) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 96; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ return 0;
+ __pyx_L1_error:;
+ return -1;
+}
+
+static int __Pyx_InitCachedConstants(void) {
+ __Pyx_RefNannyDeclarations
+ __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0);
+
+ /* "binpt.pyx":14
+ * return data
+ * elif isinstance(data, unicode):
+ * return data.encode('UTF-8') # <<<<<<<<<<<<<<
+ * raise TypeError('Cannot convert %s to string' % type(data))
+ *
+ */
+ __pyx_k_tuple_2 = PyTuple_New(1); if (unlikely(!__pyx_k_tuple_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 14; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_k_tuple_2);
+ __Pyx_INCREF(((PyObject *)__pyx_kp_s_1));
+ PyTuple_SET_ITEM(__pyx_k_tuple_2, 0, ((PyObject *)__pyx_kp_s_1));
+ __Pyx_GIVEREF(((PyObject *)__pyx_kp_s_1));
+ __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_2));
+
+ /* "binpt.pyx":52
+ *
+ * @staticmethod
+ * def desc(x, y, keys = lambda r: r.scores[0]): # <<<<<<<<<<<<<<
+ * '''Returns the sign of keys(y) - keys(x).
+ * Can only be used if scores is not an empty vector as
+ */
+ __pyx_k_tuple_16 = PyTuple_New(3); if (unlikely(!__pyx_k_tuple_16)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_k_tuple_16);
+ __Pyx_INCREF(((PyObject *)__pyx_n_s__x));
+ PyTuple_SET_ITEM(__pyx_k_tuple_16, 0, ((PyObject *)__pyx_n_s__x));
+ __Pyx_GIVEREF(((PyObject *)__pyx_n_s__x));
+ __Pyx_INCREF(((PyObject *)__pyx_n_s__y));
+ PyTuple_SET_ITEM(__pyx_k_tuple_16, 1, ((PyObject *)__pyx_n_s__y));
+ __Pyx_GIVEREF(((PyObject *)__pyx_n_s__y));
+ __Pyx_INCREF(((PyObject *)__pyx_n_s__keys));
+ PyTuple_SET_ITEM(__pyx_k_tuple_16, 2, ((PyObject *)__pyx_n_s__keys));
+ __Pyx_GIVEREF(((PyObject *)__pyx_n_s__keys));
+ __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_16));
+ __pyx_k_codeobj_17 = (PyObject*)__Pyx_PyCode_New(3, 0, 3, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_k_tuple_16, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_18, __pyx_n_s__desc, 52, __pyx_empty_bytes); if (unlikely(!__pyx_k_codeobj_17)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+
+ /* "binpt.pyx":109
+ *
+ * @staticmethod
+ * def isValidBinaryTable(stem, bint wa = False): # <<<<<<<<<<<<<<
+ * '''This sanity check was added to the constructor, but you can access it from outside this class
+ * to determine whether or not you are providing a valid stem to BinaryPhraseTable.'''
+ */
+ __pyx_k_tuple_19 = PyTuple_New(2); if (unlikely(!__pyx_k_tuple_19)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 109; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_k_tuple_19);
+ __Pyx_INCREF(((PyObject *)__pyx_n_s__stem));
+ PyTuple_SET_ITEM(__pyx_k_tuple_19, 0, ((PyObject *)__pyx_n_s__stem));
+ __Pyx_GIVEREF(((PyObject *)__pyx_n_s__stem));
+ __Pyx_INCREF(((PyObject *)__pyx_n_s__wa));
+ PyTuple_SET_ITEM(__pyx_k_tuple_19, 1, ((PyObject *)__pyx_n_s__wa));
+ __Pyx_GIVEREF(((PyObject *)__pyx_n_s__wa));
+ __Pyx_GIVEREF(((PyObject *)__pyx_k_tuple_19));
+ __pyx_k_codeobj_20 = (PyObject*)__Pyx_PyCode_New(2, 0, 2, 0, 0, __pyx_empty_bytes, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_k_tuple_19, __pyx_empty_tuple, __pyx_empty_tuple, __pyx_kp_s_18, __pyx_n_s__isValidBinaryTable, 109, __pyx_empty_bytes); if (unlikely(!__pyx_k_codeobj_20)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 109; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_RefNannyFinishContext();
+ return 0;
+ __pyx_L1_error:;
+ __Pyx_RefNannyFinishContext();
+ return -1;
+}
+
+static int __Pyx_InitGlobals(void) {
+ if (__Pyx_InitStrings(__pyx_string_tab) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+ __pyx_int_0 = PyInt_FromLong(0); if (unlikely(!__pyx_int_0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+ return 0;
+ __pyx_L1_error:;
+ return -1;
+}
+
+#if PY_MAJOR_VERSION < 3
+PyMODINIT_FUNC initbinpt(void); /*proto*/
+PyMODINIT_FUNC initbinpt(void)
+#else
+PyMODINIT_FUNC PyInit_binpt(void); /*proto*/
+PyMODINIT_FUNC PyInit_binpt(void)
+#endif
+{
+ PyObject *__pyx_t_1 = NULL;
+ PyObject *__pyx_t_2 = NULL;
+ __Pyx_RefNannyDeclarations
+ #if CYTHON_REFNANNY
+ __Pyx_RefNanny = __Pyx_RefNannyImportAPI("refnanny");
+ if (!__Pyx_RefNanny) {
+ PyErr_Clear();
+ __Pyx_RefNanny = __Pyx_RefNannyImportAPI("Cython.Runtime.refnanny");
+ if (!__Pyx_RefNanny)
+ Py_FatalError("failed to import 'refnanny' module");
+ }
+ #endif
+ __Pyx_RefNannySetupContext("PyMODINIT_FUNC PyInit_binpt(void)", 0);
+ if ( __Pyx_check_binary_version() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_empty_tuple = PyTuple_New(0); if (unlikely(!__pyx_empty_tuple)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_empty_bytes = PyBytes_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_bytes)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ #ifdef __Pyx_CyFunction_USED
+ if (__Pyx_CyFunction_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ #endif
+ #ifdef __Pyx_FusedFunction_USED
+ if (__pyx_FusedFunction_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ #endif
+ #ifdef __Pyx_Generator_USED
+ if (__pyx_Generator_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ #endif
+ /*--- Library function declarations ---*/
+ /*--- Threads initialization code ---*/
+ #if defined(__PYX_FORCE_INIT_THREADS) && __PYX_FORCE_INIT_THREADS
+ #ifdef WITH_THREAD /* Python build with threading support? */
+ PyEval_InitThreads();
+ #endif
+ #endif
+ /*--- Module creation code ---*/
+ #if PY_MAJOR_VERSION < 3
+ __pyx_m = Py_InitModule4(__Pyx_NAMESTR("binpt"), __pyx_methods, 0, 0, PYTHON_API_VERSION);
+ #else
+ __pyx_m = PyModule_Create(&__pyx_moduledef);
+ #endif
+ if (!__pyx_m) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+ #if PY_MAJOR_VERSION < 3
+ Py_INCREF(__pyx_m);
+ #endif
+ __pyx_b = PyImport_AddModule(__Pyx_NAMESTR(__Pyx_BUILTIN_MODULE_NAME));
+ if (!__pyx_b) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+ if (__Pyx_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+ /*--- Initialize various global constants etc. ---*/
+ if (unlikely(__Pyx_InitGlobals() < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ if (__pyx_module_is_main_binpt) {
+ if (__Pyx_SetAttrString(__pyx_m, "__name__", __pyx_n_s____main__) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
+ }
+ /*--- Builtin init code ---*/
+ if (unlikely(__Pyx_InitCachedBuiltins() < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ /*--- Constants init code ---*/
+ if (unlikely(__Pyx_InitCachedConstants() < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ /*--- Global init code ---*/
+ /*--- Variable export code ---*/
+ /*--- Function export code ---*/
+ /*--- Type init code ---*/
+ if (PyType_Ready(&__pyx_type_5binpt_QueryResult) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ {
+ PyObject *wrapper = __Pyx_GetAttrString((PyObject *)&__pyx_type_5binpt_QueryResult, "__str__"); if (unlikely(!wrapper)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ if (Py_TYPE(wrapper) == &PyWrapperDescr_Type) {
+ __pyx_wrapperbase_5binpt_11QueryResult_10__str__ = *((PyWrapperDescrObject *)wrapper)->d_base;
+ __pyx_wrapperbase_5binpt_11QueryResult_10__str__.doc = __pyx_doc_5binpt_11QueryResult_10__str__;
+ ((PyWrapperDescrObject *)wrapper)->d_base = &__pyx_wrapperbase_5binpt_11QueryResult_10__str__;
+ }
+ }
+ if (__Pyx_SetAttrString(__pyx_m, "QueryResult", (PyObject *)&__pyx_type_5binpt_QueryResult) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 17; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_ptype_5binpt_QueryResult = &__pyx_type_5binpt_QueryResult;
+ if (PyType_Ready(&__pyx_type_5binpt_BinaryPhraseTable) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ if (__Pyx_SetAttrString(__pyx_m, "BinaryPhraseTable", (PyObject *)&__pyx_type_5binpt_BinaryPhraseTable) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 78; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __pyx_ptype_5binpt_BinaryPhraseTable = &__pyx_type_5binpt_BinaryPhraseTable;
+ /*--- Type import code ---*/
+ /*--- Variable import code ---*/
+ /*--- Function import code ---*/
+ /*--- Execution code ---*/
+
+ /* "binpt.pyx":3
+ * from libcpp.string cimport string
+ * from libcpp.vector cimport vector
+ * import os # <<<<<<<<<<<<<<
+ * import cython
+ *
+ */
+ __pyx_t_1 = __Pyx_Import(((PyObject *)__pyx_n_s__os), 0, -1); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 3; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ if (PyObject_SetAttr(__pyx_m, __pyx_n_s__os, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 3; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+
+ /* "binpt.pyx":37
+ *
+ * @property
+ * def words(self): # <<<<<<<<<<<<<<
+ * '''Tuple of words (as strings)'''
+ * return self._words
+ */
+ __pyx_t_1 = __Pyx_GetName((PyObject *)__pyx_ptype_5binpt_QueryResult, __pyx_n_s__words); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 36; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_1);
+ __Pyx_GIVEREF(__pyx_t_1);
+ __pyx_t_1 = 0;
+ __pyx_t_1 = PyObject_Call(__pyx_builtin_property, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 36; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0;
+ if (PyDict_SetItem((PyObject *)__pyx_ptype_5binpt_QueryResult->tp_dict, __pyx_n_s__words, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 37; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ PyType_Modified(__pyx_ptype_5binpt_QueryResult);
+
+ /* "binpt.pyx":42
+ *
+ * @property
+ * def scores(self): # <<<<<<<<<<<<<<
+ * '''Tuple of scores (as floats)'''
+ * return self._scores
+ */
+ __pyx_t_1 = __Pyx_GetName((PyObject *)__pyx_ptype_5binpt_QueryResult, __pyx_n_s__scores); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 42; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_1);
+ __Pyx_GIVEREF(__pyx_t_1);
+ __pyx_t_1 = 0;
+ __pyx_t_1 = PyObject_Call(__pyx_builtin_property, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 41; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0;
+ if (PyDict_SetItem((PyObject *)__pyx_ptype_5binpt_QueryResult->tp_dict, __pyx_n_s__scores, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 42; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ PyType_Modified(__pyx_ptype_5binpt_QueryResult);
+
+ /* "binpt.pyx":47
+ *
+ * @property
+ * def wa(self): # <<<<<<<<<<<<<<
+ * '''Word-alignment info (as string)'''
+ * return self._wa
+ */
+ __pyx_t_1 = __Pyx_GetName((PyObject *)__pyx_ptype_5binpt_QueryResult, __pyx_n_s__wa); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_1);
+ __Pyx_GIVEREF(__pyx_t_1);
+ __pyx_t_1 = 0;
+ __pyx_t_1 = PyObject_Call(__pyx_builtin_property, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 46; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0;
+ if (PyDict_SetItem((PyObject *)__pyx_ptype_5binpt_QueryResult->tp_dict, __pyx_n_s__wa, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 47; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ PyType_Modified(__pyx_ptype_5binpt_QueryResult);
+
+ /* "binpt.pyx":52
+ *
+ * @staticmethod
+ * def desc(x, y, keys = lambda r: r.scores[0]): # <<<<<<<<<<<<<<
+ * '''Returns the sign of keys(y) - keys(x).
+ * Can only be used if scores is not an empty vector as
+ */
+ __pyx_t_1 = __Pyx_CyFunction_NewEx(&__pyx_mdef_5binpt_11QueryResult_4desc_lambda1, 0, NULL, __pyx_n_s__binpt, NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_k_4 = __pyx_t_1;
+ __Pyx_GIVEREF(__pyx_t_1);
+ __pyx_t_1 = 0;
+
+ /* "binpt.pyx":51
+ * return self._wa
+ *
+ * @staticmethod # <<<<<<<<<<<<<<
+ * def desc(x, y, keys = lambda r: r.scores[0]):
+ * '''Returns the sign of keys(y) - keys(x).
+ */
+ __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5binpt_11QueryResult_9desc, NULL, __pyx_n_s__binpt); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_1);
+ __Pyx_GIVEREF(__pyx_t_1);
+ __pyx_t_1 = 0;
+ __pyx_t_1 = PyObject_Call(__pyx_builtin_staticmethod, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0;
+ if (PyDict_SetItem((PyObject *)__pyx_ptype_5binpt_QueryResult->tp_dict, __pyx_n_s__desc, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ PyType_Modified(__pyx_ptype_5binpt_QueryResult);
+
+ /* "binpt.pyx":52
+ *
+ * @staticmethod
+ * def desc(x, y, keys = lambda r: r.scores[0]): # <<<<<<<<<<<<<<
+ * '''Returns the sign of keys(y) - keys(x).
+ * Can only be used if scores is not an empty vector as
+ */
+ __pyx_t_1 = __Pyx_GetName((PyObject *)__pyx_ptype_5binpt_QueryResult, __pyx_n_s__desc); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_1);
+ __Pyx_GIVEREF(__pyx_t_1);
+ __pyx_t_1 = 0;
+ __pyx_t_1 = PyObject_Call(__pyx_builtin_staticmethod, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 51; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0;
+ if (PyDict_SetItem((PyObject *)__pyx_ptype_5binpt_QueryResult->tp_dict, __pyx_n_s__desc, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 52; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ PyType_Modified(__pyx_ptype_5binpt_QueryResult);
+
+ /* "binpt.pyx":109
+ *
+ * @staticmethod
+ * def isValidBinaryTable(stem, bint wa = False): # <<<<<<<<<<<<<<
+ * '''This sanity check was added to the constructor, but you can access it from outside this class
+ * to determine whether or not you are providing a valid stem to BinaryPhraseTable.'''
+ */
+ __pyx_t_1 = PyCFunction_NewEx(&__pyx_mdef_5binpt_17BinaryPhraseTable_5isValidBinaryTable, NULL, __pyx_n_s__binpt); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 109; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 108; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_1);
+ __Pyx_GIVEREF(__pyx_t_1);
+ __pyx_t_1 = 0;
+ __pyx_t_1 = PyObject_Call(__pyx_builtin_staticmethod, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 108; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0;
+ if (PyDict_SetItem((PyObject *)__pyx_ptype_5binpt_BinaryPhraseTable->tp_dict, __pyx_n_s__isValidBinaryTable, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 109; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ PyType_Modified(__pyx_ptype_5binpt_BinaryPhraseTable);
+
+ /* "binpt.pyx":108
+ * del self.__tree
+ *
+ * @staticmethod # <<<<<<<<<<<<<<
+ * def isValidBinaryTable(stem, bint wa = False):
+ * '''This sanity check was added to the constructor, but you can access it from outside this class
+ */
+ __pyx_t_1 = __Pyx_GetName((PyObject *)__pyx_ptype_5binpt_BinaryPhraseTable, __pyx_n_s__isValidBinaryTable); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 109; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 108; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_1);
+ __Pyx_GIVEREF(__pyx_t_1);
+ __pyx_t_1 = 0;
+ __pyx_t_1 = PyObject_Call(__pyx_builtin_staticmethod, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 108; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0;
+ if (PyDict_SetItem((PyObject *)__pyx_ptype_5binpt_BinaryPhraseTable->tp_dict, __pyx_n_s__isValidBinaryTable, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 109; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ PyType_Modified(__pyx_ptype_5binpt_BinaryPhraseTable);
+
+ /* "binpt.pyx":126
+ *
+ * @property
+ * def path(self): # <<<<<<<<<<<<<<
+ * return self._path
+ *
+ */
+ __pyx_t_1 = __Pyx_GetName((PyObject *)__pyx_ptype_5binpt_BinaryPhraseTable, __pyx_n_s__path); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 126; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 125; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_1);
+ __Pyx_GIVEREF(__pyx_t_1);
+ __pyx_t_1 = 0;
+ __pyx_t_1 = PyObject_Call(__pyx_builtin_property, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 125; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0;
+ if (PyDict_SetItem((PyObject *)__pyx_ptype_5binpt_BinaryPhraseTable->tp_dict, __pyx_n_s__path, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 126; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ PyType_Modified(__pyx_ptype_5binpt_BinaryPhraseTable);
+
+ /* "binpt.pyx":130
+ *
+ * @property
+ * def nscores(self): # <<<<<<<<<<<<<<
+ * return self._nscores
+ *
+ */
+ __pyx_t_1 = __Pyx_GetName((PyObject *)__pyx_ptype_5binpt_BinaryPhraseTable, __pyx_n_s__nscores); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 130; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 129; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_1);
+ __Pyx_GIVEREF(__pyx_t_1);
+ __pyx_t_1 = 0;
+ __pyx_t_1 = PyObject_Call(__pyx_builtin_property, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 129; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0;
+ if (PyDict_SetItem((PyObject *)__pyx_ptype_5binpt_BinaryPhraseTable->tp_dict, __pyx_n_s__nscores, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 130; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ PyType_Modified(__pyx_ptype_5binpt_BinaryPhraseTable);
+
+ /* "binpt.pyx":134
+ *
+ * @property
+ * def wa(self): # <<<<<<<<<<<<<<
+ * return self._wa
+ *
+ */
+ __pyx_t_1 = __Pyx_GetName((PyObject *)__pyx_ptype_5binpt_BinaryPhraseTable, __pyx_n_s__wa); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 134; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 133; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_1);
+ __Pyx_GIVEREF(__pyx_t_1);
+ __pyx_t_1 = 0;
+ __pyx_t_1 = PyObject_Call(__pyx_builtin_property, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 133; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0;
+ if (PyDict_SetItem((PyObject *)__pyx_ptype_5binpt_BinaryPhraseTable->tp_dict, __pyx_n_s__wa, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 134; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ PyType_Modified(__pyx_ptype_5binpt_BinaryPhraseTable);
+
+ /* "binpt.pyx":138
+ *
+ * @property
+ * def delimiters(self): # <<<<<<<<<<<<<<
+ * return self._delimiters
+ *
+ */
+ __pyx_t_1 = __Pyx_GetName((PyObject *)__pyx_ptype_5binpt_BinaryPhraseTable, __pyx_n_s__delimiters); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 138; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __pyx_t_2 = PyTuple_New(1); if (unlikely(!__pyx_t_2)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 137; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_2);
+ PyTuple_SET_ITEM(__pyx_t_2, 0, __pyx_t_1);
+ __Pyx_GIVEREF(__pyx_t_1);
+ __pyx_t_1 = 0;
+ __pyx_t_1 = PyObject_Call(__pyx_builtin_property, ((PyObject *)__pyx_t_2), NULL); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 137; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(__pyx_t_1);
+ __Pyx_DECREF(((PyObject *)__pyx_t_2)); __pyx_t_2 = 0;
+ if (PyDict_SetItem((PyObject *)__pyx_ptype_5binpt_BinaryPhraseTable->tp_dict, __pyx_n_s__delimiters, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 138; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
+ PyType_Modified(__pyx_ptype_5binpt_BinaryPhraseTable);
+
+ /* "binpt.pyx":1
+ * from libcpp.string cimport string # <<<<<<<<<<<<<<
+ * from libcpp.vector cimport vector
+ * import os
+ */
+ __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_GOTREF(((PyObject *)__pyx_t_1));
+ if (PyObject_SetAttr(__pyx_m, __pyx_n_s____test__, ((PyObject *)__pyx_t_1)) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
+ __Pyx_DECREF(((PyObject *)__pyx_t_1)); __pyx_t_1 = 0;
+ goto __pyx_L0;
+ __pyx_L1_error:;
+ __Pyx_XDECREF(__pyx_t_1);
+ __Pyx_XDECREF(__pyx_t_2);
+ if (__pyx_m) {
+ __Pyx_AddTraceback("init binpt", __pyx_clineno, __pyx_lineno, __pyx_filename);
+ Py_DECREF(__pyx_m); __pyx_m = 0;
+ } else if (!PyErr_Occurred()) {
+ PyErr_SetString(PyExc_ImportError, "init binpt");
+ }
+ __pyx_L0:;
+ __Pyx_RefNannyFinishContext();
+ #if PY_MAJOR_VERSION < 3
+ return;
+ #else
+ return __pyx_m;
+ #endif
+}
+
+/* Runtime support code */
+#if CYTHON_REFNANNY
+static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname) {
+ PyObject *m = NULL, *p = NULL;
+ void *r = NULL;
+ m = PyImport_ImportModule((char *)modname);
+ if (!m) goto end;
+ p = PyObject_GetAttrString(m, (char *)"RefNannyAPI");
+ if (!p) goto end;
+ r = PyLong_AsVoidPtr(p);
+end:
+ Py_XDECREF(p);
+ Py_XDECREF(m);
+ return (__Pyx_RefNannyAPIStruct *)r;
+}
+#endif /* CYTHON_REFNANNY */
+
+static PyObject *__Pyx_GetName(PyObject *dict, PyObject *name) {
+ PyObject *result;
+ result = PyObject_GetAttr(dict, name);
+ if (!result) {
+ if (dict != __pyx_b) {
+ PyErr_Clear();
+ result = PyObject_GetAttr(__pyx_b, name);
+ }
+ if (!result) {
+ PyErr_SetObject(PyExc_NameError, name);
+ }
+ }
+ return result;
+}
+
+static CYTHON_INLINE void __Pyx_ErrRestore(PyObject *type, PyObject *value, PyObject *tb) {
+#if CYTHON_COMPILING_IN_CPYTHON
+ PyObject *tmp_type, *tmp_value, *tmp_tb;
+ PyThreadState *tstate = PyThreadState_GET();
+ tmp_type = tstate->curexc_type;
+ tmp_value = tstate->curexc_value;
+ tmp_tb = tstate->curexc_traceback;
+ tstate->curexc_type = type;
+ tstate->curexc_value = value;
+ tstate->curexc_traceback = tb;
+ Py_XDECREF(tmp_type);
+ Py_XDECREF(tmp_value);
+ Py_XDECREF(tmp_tb);
+#else
+ PyErr_Restore(type, value, tb);
+#endif
+}
+static CYTHON_INLINE void __Pyx_ErrFetch(PyObject **type, PyObject **value, PyObject **tb) {
+#if CYTHON_COMPILING_IN_CPYTHON
+ PyThreadState *tstate = PyThreadState_GET();
+ *type = tstate->curexc_type;
+ *value = tstate->curexc_value;
+ *tb = tstate->curexc_traceback;
+ tstate->curexc_type = 0;
+ tstate->curexc_value = 0;
+ tstate->curexc_traceback = 0;
+#else
+ PyErr_Fetch(type, value, tb);
+#endif
+}
+
+#if PY_MAJOR_VERSION < 3
+static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb,
+ CYTHON_UNUSED PyObject *cause) {
+ Py_XINCREF(type);
+ Py_XINCREF(value);
+ Py_XINCREF(tb);
+ if (tb == Py_None) {
+ Py_DECREF(tb);
+ tb = 0;
+ }
+ else if (tb != NULL && !PyTraceBack_Check(tb)) {
+ PyErr_SetString(PyExc_TypeError,
+ "raise: arg 3 must be a traceback or None");
+ goto raise_error;
+ }
+ if (value == NULL) {
+ value = Py_None;
+ Py_INCREF(value);
+ }
+ #if PY_VERSION_HEX < 0x02050000
+ if (!PyClass_Check(type))
+ #else
+ if (!PyType_Check(type))
+ #endif
+ {
+ if (value != Py_None) {
+ PyErr_SetString(PyExc_TypeError,
+ "instance exception may not have a separate value");
+ goto raise_error;
+ }
+ Py_DECREF(value);
+ value = type;
+ #if PY_VERSION_HEX < 0x02050000
+ if (PyInstance_Check(type)) {
+ type = (PyObject*) ((PyInstanceObject*)type)->in_class;
+ Py_INCREF(type);
+ }
+ else {
+ type = 0;
+ PyErr_SetString(PyExc_TypeError,
+ "raise: exception must be an old-style class or instance");
+ goto raise_error;
+ }
+ #else
+ type = (PyObject*) Py_TYPE(type);
+ Py_INCREF(type);
+ if (!PyType_IsSubtype((PyTypeObject *)type, (PyTypeObject *)PyExc_BaseException)) {
+ PyErr_SetString(PyExc_TypeError,
+ "raise: exception class must be a subclass of BaseException");
+ goto raise_error;
+ }
+ #endif
+ }
+ __Pyx_ErrRestore(type, value, tb);
+ return;
+raise_error:
+ Py_XDECREF(value);
+ Py_XDECREF(type);
+ Py_XDECREF(tb);
+ return;
+}
+#else /* Python 3+ */
+static void __Pyx_Raise(PyObject *type, PyObject *value, PyObject *tb, PyObject *cause) {
+ if (tb == Py_None) {
+ tb = 0;
+ } else if (tb && !PyTraceBack_Check(tb)) {
+ PyErr_SetString(PyExc_TypeError,
+ "raise: arg 3 must be a traceback or None");
+ goto bad;
+ }
+ if (value == Py_None)
+ value = 0;
+ if (PyExceptionInstance_Check(type)) {
+ if (value) {
+ PyErr_SetString(PyExc_TypeError,
+ "instance exception may not have a separate value");
+ goto bad;
+ }
+ value = type;
+ type = (PyObject*) Py_TYPE(value);
+ } else if (!PyExceptionClass_Check(type)) {
+ PyErr_SetString(PyExc_TypeError,
+ "raise: exception class must be a subclass of BaseException");
+ goto bad;
+ }
+ if (cause) {
+ PyObject *fixed_cause;
+ if (PyExceptionClass_Check(cause)) {
+ fixed_cause = PyObject_CallObject(cause, NULL);
+ if (fixed_cause == NULL)
+ goto bad;
+ }
+ else if (PyExceptionInstance_Check(cause)) {
+ fixed_cause = cause;
+ Py_INCREF(fixed_cause);
+ }
+ else {
+ PyErr_SetString(PyExc_TypeError,
+ "exception causes must derive from "
+ "BaseException");
+ goto bad;
+ }
+ if (!value) {
+ value = PyObject_CallObject(type, NULL);
+ }
+ PyException_SetCause(value, fixed_cause);
+ }
+ PyErr_SetObject(type, value);
+ if (tb) {
+ PyThreadState *tstate = PyThreadState_GET();
+ PyObject* tmp_tb = tstate->curexc_traceback;
+ if (tb != tmp_tb) {
+ Py_INCREF(tb);
+ tstate->curexc_traceback = tb;
+ Py_XDECREF(tmp_tb);
+ }
+ }
+bad:
+ return;
+}
+#endif
+
+static void __Pyx_RaiseArgtupleInvalid(
+ const char* func_name,
+ int exact,
+ Py_ssize_t num_min,
+ Py_ssize_t num_max,
+ Py_ssize_t num_found)
+{
+ Py_ssize_t num_expected;
+ const char *more_or_less;
+ if (num_found < num_min) {
+ num_expected = num_min;
+ more_or_less = "at least";
+ } else {
+ num_expected = num_max;
+ more_or_less = "at most";
+ }
+ if (exact) {
+ more_or_less = "exactly";
+ }
+ PyErr_Format(PyExc_TypeError,
+ "%s() takes %s %"PY_FORMAT_SIZE_T"d positional argument%s (%"PY_FORMAT_SIZE_T"d given)",
+ func_name, more_or_less, num_expected,
+ (num_expected == 1) ? "" : "s", num_found);
+}
+
+static void __Pyx_RaiseDoubleKeywordsError(
+ const char* func_name,
+ PyObject* kw_name)
+{
+ PyErr_Format(PyExc_TypeError,
+ #if PY_MAJOR_VERSION >= 3
+ "%s() got multiple values for keyword argument '%U'", func_name, kw_name);
+ #else
+ "%s() got multiple values for keyword argument '%s'", func_name,
+ PyString_AS_STRING(kw_name));
+ #endif
+}
+
+static int __Pyx_ParseOptionalKeywords(
+ PyObject *kwds,
+ PyObject **argnames[],
+ PyObject *kwds2,
+ PyObject *values[],
+ Py_ssize_t num_pos_args,
+ const char* function_name)
+{
+ PyObject *key = 0, *value = 0;
+ Py_ssize_t pos = 0;
+ PyObject*** name;
+ PyObject*** first_kw_arg = argnames + num_pos_args;
+ while (PyDict_Next(kwds, &pos, &key, &value)) {
+ name = first_kw_arg;
+ while (*name && (**name != key)) name++;
+ if (*name) {
+ values[name-argnames] = value;
+ } else {
+ #if PY_MAJOR_VERSION < 3
+ if (unlikely(!PyString_CheckExact(key)) && unlikely(!PyString_Check(key))) {
+ #else
+ if (unlikely(!PyUnicode_Check(key))) {
+ #endif
+ goto invalid_keyword_type;
+ } else {
+ for (name = first_kw_arg; *name; name++) {
+ #if PY_MAJOR_VERSION >= 3
+ if (PyUnicode_GET_SIZE(**name) == PyUnicode_GET_SIZE(key) &&
+ PyUnicode_Compare(**name, key) == 0) break;
+ #else
+ if (PyString_GET_SIZE(**name) == PyString_GET_SIZE(key) &&
+ _PyString_Eq(**name, key)) break;
+ #endif
+ }
+ if (*name) {
+ values[name-argnames] = value;
+ } else {
+ for (name=argnames; name != first_kw_arg; name++) {
+ if (**name == key) goto arg_passed_twice;
+ #if PY_MAJOR_VERSION >= 3
+ if (PyUnicode_GET_SIZE(**name) == PyUnicode_GET_SIZE(key) &&
+ PyUnicode_Compare(**name, key) == 0) goto arg_passed_twice;
+ #else
+ if (PyString_GET_SIZE(**name) == PyString_GET_SIZE(key) &&
+ _PyString_Eq(**name, key)) goto arg_passed_twice;
+ #endif
+ }
+ if (kwds2) {
+ if (unlikely(PyDict_SetItem(kwds2, key, value))) goto bad;
+ } else {
+ goto invalid_keyword;
+ }
+ }
+ }
+ }
+ }
+ return 0;
+arg_passed_twice:
+ __Pyx_RaiseDoubleKeywordsError(function_name, **name);
+ goto bad;
+invalid_keyword_type:
+ PyErr_Format(PyExc_TypeError,
+ "%s() keywords must be strings", function_name);
+ goto bad;
+invalid_keyword:
+ PyErr_Format(PyExc_TypeError,
+ #if PY_MAJOR_VERSION < 3
+ "%s() got an unexpected keyword argument '%s'",
+ function_name, PyString_AsString(key));
+ #else
+ "%s() got an unexpected keyword argument '%U'",
+ function_name, key);
+ #endif
+bad:
+ return -1;
+}
+
+
+
+static int __Pyx_ArgTypeTest(PyObject *obj, PyTypeObject *type, int none_allowed,
+ const char *name, int exact)
+{
+ if (!type) {
+ PyErr_Format(PyExc_SystemError, "Missing type object");
+ return 0;
+ }
+ if (none_allowed && obj == Py_None) return 1;
+ else if (exact) {
+ if (Py_TYPE(obj) == type) return 1;
+ }
+ else {
+ if (PyObject_TypeCheck(obj, type)) return 1;
+ }
+ PyErr_Format(PyExc_TypeError,
+ "Argument '%s' has incorrect type (expected %s, got %s)",
+ name, type->tp_name, Py_TYPE(obj)->tp_name);
+ return 0;
+}
+
+static PyObject *__Pyx_Import(PyObject *name, PyObject *from_list, long level) {
+ PyObject *py_import = 0;
+ PyObject *empty_list = 0;
+ PyObject *module = 0;
+ PyObject *global_dict = 0;
+ PyObject *empty_dict = 0;
+ PyObject *list;
+ py_import = __Pyx_GetAttrString(__pyx_b, "__import__");
+ if (!py_import)
+ goto bad;
+ if (from_list)
+ list = from_list;
+ else {
+ empty_list = PyList_New(0);
+ if (!empty_list)
+ goto bad;
+ list = empty_list;
+ }
+ global_dict = PyModule_GetDict(__pyx_m);
+ if (!global_dict)
+ goto bad;
+ empty_dict = PyDict_New();
+ if (!empty_dict)
+ goto bad;
+ #if PY_VERSION_HEX >= 0x02050000
+ {
+ #if PY_MAJOR_VERSION >= 3
+ if (level == -1) {
+ if (strchr(__Pyx_MODULE_NAME, '.')) {
+ /* try package relative import first */
+ PyObject *py_level = PyInt_FromLong(1);
+ if (!py_level)
+ goto bad;
+ module = PyObject_CallFunctionObjArgs(py_import,
+ name, global_dict, empty_dict, list, py_level, NULL);
+ Py_DECREF(py_level);
+ if (!module) {
+ if (!PyErr_ExceptionMatches(PyExc_ImportError))
+ goto bad;
+ PyErr_Clear();
+ }
+ }
+ level = 0; /* try absolute import on failure */
+ }
+ #endif
+ if (!module) {
+ PyObject *py_level = PyInt_FromLong(level);
+ if (!py_level)
+ goto bad;
+ module = PyObject_CallFunctionObjArgs(py_import,
+ name, global_dict, empty_dict, list, py_level, NULL);
+ Py_DECREF(py_level);
+ }
+ }
+ #else
+ if (level>0) {
+ PyErr_SetString(PyExc_RuntimeError, "Relative import is not supported for Python <=2.4.");
+ goto bad;
+ }
+ module = PyObject_CallFunctionObjArgs(py_import,
+ name, global_dict, empty_dict, list, NULL);
+ #endif
+bad:
+ Py_XDECREF(empty_list);
+ Py_XDECREF(py_import);
+ Py_XDECREF(empty_dict);
+ return module;
+}
+
+static PyObject *
+__Pyx_CyFunction_get_doc(__pyx_CyFunctionObject *op, CYTHON_UNUSED void *closure)
+{
+ if (op->func_doc == NULL && op->func.m_ml->ml_doc) {
+#if PY_MAJOR_VERSION >= 3
+ op->func_doc = PyUnicode_FromString(op->func.m_ml->ml_doc);
+#else
+ op->func_doc = PyString_FromString(op->func.m_ml->ml_doc);
+#endif
+ }
+ if (op->func_doc == 0) {
+ Py_INCREF(Py_None);
+ return Py_None;
+ }
+ Py_INCREF(op->func_doc);
+ return op->func_doc;
+}
+static int
+__Pyx_CyFunction_set_doc(__pyx_CyFunctionObject *op, PyObject *value)
+{
+ PyObject *tmp = op->func_doc;
+ if (value == NULL)
+ op->func_doc = Py_None; /* Mark as deleted */
+ else
+ op->func_doc = value;
+ Py_INCREF(op->func_doc);
+ Py_XDECREF(tmp);
+ return 0;
+}
+static PyObject *
+__Pyx_CyFunction_get_name(__pyx_CyFunctionObject *op)
+{
+ if (op->func_name == NULL) {
+#if PY_MAJOR_VERSION >= 3
+ op->func_name = PyUnicode_InternFromString(op->func.m_ml->ml_name);
+#else
+ op->func_name = PyString_InternFromString(op->func.m_ml->ml_name);
+#endif
+ }
+ Py_INCREF(op->func_name);
+ return op->func_name;
+}
+static int
+__Pyx_CyFunction_set_name(__pyx_CyFunctionObject *op, PyObject *value)
+{
+ PyObject *tmp;
+#if PY_MAJOR_VERSION >= 3
+ if (value == NULL || !PyUnicode_Check(value)) {
+#else
+ if (value == NULL || !PyString_Check(value)) {
+#endif
+ PyErr_SetString(PyExc_TypeError,
+ "__name__ must be set to a string object");
+ return -1;
+ }
+ tmp = op->func_name;
+ Py_INCREF(value);
+ op->func_name = value;
+ Py_XDECREF(tmp);
+ return 0;
+}
+static PyObject *
+__Pyx_CyFunction_get_self(__pyx_CyFunctionObject *m, CYTHON_UNUSED void *closure)
+{
+ PyObject *self;
+ self = m->func_closure;
+ if (self == NULL)
+ self = Py_None;
+ Py_INCREF(self);
+ return self;
+}
+static PyObject *
+__Pyx_CyFunction_get_dict(__pyx_CyFunctionObject *op)
+{
+ if (op->func_dict == NULL) {
+ op->func_dict = PyDict_New();
+ if (op->func_dict == NULL)
+ return NULL;
+ }
+ Py_INCREF(op->func_dict);
+ return op->func_dict;
+}
+static int
+__Pyx_CyFunction_set_dict(__pyx_CyFunctionObject *op, PyObject *value)
+{
+ PyObject *tmp;
+ if (value == NULL) {
+ PyErr_SetString(PyExc_TypeError,
+ "function's dictionary may not be deleted");
+ return -1;
+ }
+ if (!PyDict_Check(value)) {
+ PyErr_SetString(PyExc_TypeError,
+ "setting function's dictionary to a non-dict");
+ return -1;
+ }
+ tmp = op->func_dict;
+ Py_INCREF(value);
+ op->func_dict = value;
+ Py_XDECREF(tmp);
+ return 0;
+}
+static PyObject *
+__Pyx_CyFunction_get_globals(CYTHON_UNUSED __pyx_CyFunctionObject *op)
+{
+ PyObject* dict = PyModule_GetDict(__pyx_m);
+ Py_XINCREF(dict);
+ return dict;
+}
+static PyObject *
+__Pyx_CyFunction_get_closure(CYTHON_UNUSED __pyx_CyFunctionObject *op)
+{
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+static PyObject *
+__Pyx_CyFunction_get_code(__pyx_CyFunctionObject *op)
+{
+ PyObject* result = (op->func_code) ? op->func_code : Py_None;
+ Py_INCREF(result);
+ return result;
+}
+static PyObject *
+__Pyx_CyFunction_get_defaults(__pyx_CyFunctionObject *op)
+{
+ if (op->defaults_tuple) {
+ Py_INCREF(op->defaults_tuple);
+ return op->defaults_tuple;
+ }
+ if (op->defaults_getter) {
+ PyObject *res = op->defaults_getter((PyObject *) op);
+ if (res) {
+ Py_INCREF(res);
+ op->defaults_tuple = res;
+ }
+ return res;
+ }
+ Py_INCREF(Py_None);
+ return Py_None;
+}
+static PyGetSetDef __pyx_CyFunction_getsets[] = {
+ {(char *) "func_doc", (getter)__Pyx_CyFunction_get_doc, (setter)__Pyx_CyFunction_set_doc, 0, 0},
+ {(char *) "__doc__", (getter)__Pyx_CyFunction_get_doc, (setter)__Pyx_CyFunction_set_doc, 0, 0},
+ {(char *) "func_name", (getter)__Pyx_CyFunction_get_name, (setter)__Pyx_CyFunction_set_name, 0, 0},
+ {(char *) "__name__", (getter)__Pyx_CyFunction_get_name, (setter)__Pyx_CyFunction_set_name, 0, 0},
+ {(char *) "__self__", (getter)__Pyx_CyFunction_get_self, 0, 0, 0},
+ {(char *) "func_dict", (getter)__Pyx_CyFunction_get_dict, (setter)__Pyx_CyFunction_set_dict, 0, 0},
+ {(char *) "__dict__", (getter)__Pyx_CyFunction_get_dict, (setter)__Pyx_CyFunction_set_dict, 0, 0},
+ {(char *) "func_globals", (getter)__Pyx_CyFunction_get_globals, 0, 0, 0},
+ {(char *) "__globals__", (getter)__Pyx_CyFunction_get_globals, 0, 0, 0},
+ {(char *) "func_closure", (getter)__Pyx_CyFunction_get_closure, 0, 0, 0},
+ {(char *) "__closure__", (getter)__Pyx_CyFunction_get_closure, 0, 0, 0},
+ {(char *) "func_code", (getter)__Pyx_CyFunction_get_code, 0, 0, 0},
+ {(char *) "__code__", (getter)__Pyx_CyFunction_get_code, 0, 0, 0},
+ {(char *) "func_defaults", (getter)__Pyx_CyFunction_get_defaults, 0, 0, 0},
+ {(char *) "__defaults__", (getter)__Pyx_CyFunction_get_defaults, 0, 0, 0},
+ {0, 0, 0, 0, 0}
+};
+#ifndef PY_WRITE_RESTRICTED /* < Py2.5 */
+#define PY_WRITE_RESTRICTED WRITE_RESTRICTED
+#endif
+static PyMemberDef __pyx_CyFunction_members[] = {
+ {(char *) "__module__", T_OBJECT, offsetof(__pyx_CyFunctionObject, func.m_module), PY_WRITE_RESTRICTED, 0},
+ {0, 0, 0, 0, 0}
+};
+static PyObject *
+__Pyx_CyFunction_reduce(__pyx_CyFunctionObject *m, CYTHON_UNUSED PyObject *args)
+{
+#if PY_MAJOR_VERSION >= 3
+ return PyUnicode_FromString(m->func.m_ml->ml_name);
+#else
+ return PyString_FromString(m->func.m_ml->ml_name);
+#endif
+}
+static PyMethodDef __pyx_CyFunction_methods[] = {
+ {__Pyx_NAMESTR("__reduce__"), (PyCFunction)__Pyx_CyFunction_reduce, METH_VARARGS, 0},
+ {0, 0, 0, 0}
+};
+static PyObject *__Pyx_CyFunction_New(PyTypeObject *type, PyMethodDef *ml, int flags,
+ PyObject *closure, PyObject *module, PyObject* code) {
+ __pyx_CyFunctionObject *op = PyObject_GC_New(__pyx_CyFunctionObject, type);
+ if (op == NULL)
+ return NULL;
+ op->flags = flags;
+ op->func_weakreflist = NULL;
+ op->func.m_ml = ml;
+ op->func.m_self = (PyObject *) op;
+ Py_XINCREF(closure);
+ op->func_closure = closure;
+ Py_XINCREF(module);
+ op->func.m_module = module;
+ op->func_dict = NULL;
+ op->func_name = NULL;
+ op->func_doc = NULL;
+ op->func_classobj = NULL;
+ Py_XINCREF(code);
+ op->func_code = code;
+ op->defaults_pyobjects = 0;
+ op->defaults = NULL;
+ op->defaults_tuple = NULL;
+ op->defaults_getter = NULL;
+ PyObject_GC_Track(op);
+ return (PyObject *) op;
+}
+static int
+__Pyx_CyFunction_clear(__pyx_CyFunctionObject *m)
+{
+ Py_CLEAR(m->func_closure);
+ Py_CLEAR(m->func.m_module);
+ Py_CLEAR(m->func_dict);
+ Py_CLEAR(m->func_name);
+ Py_CLEAR(m->func_doc);
+ Py_CLEAR(m->func_code);
+ Py_CLEAR(m->func_classobj);
+ Py_CLEAR(m->defaults_tuple);
+ if (m->defaults) {
+ PyObject **pydefaults = __Pyx_CyFunction_Defaults(PyObject *, m);
+ int i;
+ for (i = 0; i < m->defaults_pyobjects; i++)
+ Py_XDECREF(pydefaults[i]);
+ PyMem_Free(m->defaults);
+ m->defaults = NULL;
+ }
+ return 0;
+}
+static void __Pyx_CyFunction_dealloc(__pyx_CyFunctionObject *m)
+{
+ PyObject_GC_UnTrack(m);
+ if (m->func_weakreflist != NULL)
+ PyObject_ClearWeakRefs((PyObject *) m);
+ __Pyx_CyFunction_clear(m);
+ PyObject_GC_Del(m);
+}
+static int __Pyx_CyFunction_traverse(__pyx_CyFunctionObject *m, visitproc visit, void *arg)
+{
+ Py_VISIT(m->func_closure);
+ Py_VISIT(m->func.m_module);
+ Py_VISIT(m->func_dict);
+ Py_VISIT(m->func_name);
+ Py_VISIT(m->func_doc);
+ Py_VISIT(m->func_code);
+ Py_VISIT(m->func_classobj);
+ Py_VISIT(m->defaults_tuple);
+ if (m->defaults) {
+ PyObject **pydefaults = __Pyx_CyFunction_Defaults(PyObject *, m);
+ int i;
+ for (i = 0; i < m->defaults_pyobjects; i++)
+ Py_VISIT(pydefaults[i]);
+ }
+ return 0;
+}
+static PyObject *__Pyx_CyFunction_descr_get(PyObject *func, PyObject *obj, PyObject *type)
+{
+ __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func;
+ if (m->flags & __Pyx_CYFUNCTION_STATICMETHOD) {
+ Py_INCREF(func);
+ return func;
+ }
+ if (m->flags & __Pyx_CYFUNCTION_CLASSMETHOD) {
+ if (type == NULL)
+ type = (PyObject *)(Py_TYPE(obj));
+ return PyMethod_New(func,
+ type, (PyObject *)(Py_TYPE(type)));
+ }
+ if (obj == Py_None)
+ obj = NULL;
+ return PyMethod_New(func, obj, type);
+}
+static PyObject*
+__Pyx_CyFunction_repr(__pyx_CyFunctionObject *op)
+{
+ PyObject *func_name = __Pyx_CyFunction_get_name(op);
+#if PY_MAJOR_VERSION >= 3
+ return PyUnicode_FromFormat("<cyfunction %U at %p>",
+ func_name, (void *)op);
+#else
+ return PyString_FromFormat("<cyfunction %s at %p>",
+ PyString_AsString(func_name), (void *)op);
+#endif
+}
+static PyTypeObject __pyx_CyFunctionType_type = {
+ PyVarObject_HEAD_INIT(0, 0)
+ __Pyx_NAMESTR("cython_function_or_method"), /*tp_name*/
+ sizeof(__pyx_CyFunctionObject), /*tp_basicsize*/
+ 0, /*tp_itemsize*/
+ (destructor) __Pyx_CyFunction_dealloc, /*tp_dealloc*/
+ 0, /*tp_print*/
+ 0, /*tp_getattr*/
+ 0, /*tp_setattr*/
+#if PY_MAJOR_VERSION < 3
+ 0, /*tp_compare*/
+#else
+ 0, /*reserved*/
+#endif
+ (reprfunc) __Pyx_CyFunction_repr, /*tp_repr*/
+ 0, /*tp_as_number*/
+ 0, /*tp_as_sequence*/
+ 0, /*tp_as_mapping*/
+ 0, /*tp_hash*/
+ __Pyx_PyCFunction_Call, /*tp_call*/
+ 0, /*tp_str*/
+ 0, /*tp_getattro*/
+ 0, /*tp_setattro*/
+ 0, /*tp_as_buffer*/
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags*/
+ 0, /*tp_doc*/
+ (traverseproc) __Pyx_CyFunction_traverse, /*tp_traverse*/
+ (inquiry) __Pyx_CyFunction_clear, /*tp_clear*/
+ 0, /*tp_richcompare*/
+ offsetof(__pyx_CyFunctionObject, func_weakreflist), /* tp_weaklistoffse */
+ 0, /*tp_iter*/
+ 0, /*tp_iternext*/
+ __pyx_CyFunction_methods, /*tp_methods*/
+ __pyx_CyFunction_members, /*tp_members*/
+ __pyx_CyFunction_getsets, /*tp_getset*/
+ 0, /*tp_base*/
+ 0, /*tp_dict*/
+ __Pyx_CyFunction_descr_get, /*tp_descr_get*/
+ 0, /*tp_descr_set*/
+ offsetof(__pyx_CyFunctionObject, func_dict),/*tp_dictoffset*/
+ 0, /*tp_init*/
+ 0, /*tp_alloc*/
+ 0, /*tp_new*/
+ 0, /*tp_free*/
+ 0, /*tp_is_gc*/
+ 0, /*tp_bases*/
+ 0, /*tp_mro*/
+ 0, /*tp_cache*/
+ 0, /*tp_subclasses*/
+ 0, /*tp_weaklist*/
+ 0, /*tp_del*/
+#if PY_VERSION_HEX >= 0x02060000
+ 0, /*tp_version_tag*/
+#endif
+};
+static int __Pyx_CyFunction_init(void)
+{
+ if (PyType_Ready(&__pyx_CyFunctionType_type) < 0)
+ return -1;
+ __pyx_CyFunctionType = &__pyx_CyFunctionType_type;
+ return 0;
+}
+void *__Pyx_CyFunction_InitDefaults(PyObject *func, size_t size, int pyobjects)
+{
+ __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func;
+ m->defaults = PyMem_Malloc(size);
+ if (!m->defaults)
+ return PyErr_NoMemory();
+ memset(m->defaults, 0, sizeof(size));
+ m->defaults_pyobjects = pyobjects;
+ return m->defaults;
+}
+static void __Pyx_CyFunction_SetDefaultsTuple(PyObject *func, PyObject *tuple)
+{
+ __pyx_CyFunctionObject *m = (__pyx_CyFunctionObject *) func;
+ m->defaults_tuple = tuple;
+ Py_INCREF(tuple);
+}
+
+static CYTHON_INLINE unsigned char __Pyx_PyInt_AsUnsignedChar(PyObject* x) {
+ const unsigned char neg_one = (unsigned char)-1, const_zero = 0;
+ const int is_unsigned = neg_one > const_zero;
+ if (sizeof(unsigned char) < sizeof(long)) {
+ long val = __Pyx_PyInt_AsLong(x);
+ if (unlikely(val != (long)(unsigned char)val)) {
+ if (!unlikely(val == -1 && PyErr_Occurred())) {
+ PyErr_SetString(PyExc_OverflowError,
+ (is_unsigned && unlikely(val < 0)) ?
+ "can't convert negative value to unsigned char" :
+ "value too large to convert to unsigned char");
+ }
+ return (unsigned char)-1;
+ }
+ return (unsigned char)val;
+ }
+ return (unsigned char)__Pyx_PyInt_AsUnsignedLong(x);
+}
+
+static CYTHON_INLINE unsigned short __Pyx_PyInt_AsUnsignedShort(PyObject* x) {
+ const unsigned short neg_one = (unsigned short)-1, const_zero = 0;
+ const int is_unsigned = neg_one > const_zero;
+ if (sizeof(unsigned short) < sizeof(long)) {
+ long val = __Pyx_PyInt_AsLong(x);
+ if (unlikely(val != (long)(unsigned short)val)) {
+ if (!unlikely(val == -1 && PyErr_Occurred())) {
+ PyErr_SetString(PyExc_OverflowError,
+ (is_unsigned && unlikely(val < 0)) ?
+ "can't convert negative value to unsigned short" :
+ "value too large to convert to unsigned short");
+ }
+ return (unsigned short)-1;
+ }
+ return (unsigned short)val;
+ }
+ return (unsigned short)__Pyx_PyInt_AsUnsignedLong(x);
+}
+
+static CYTHON_INLINE unsigned int __Pyx_PyInt_AsUnsignedInt(PyObject* x) {
+ const unsigned int neg_one = (unsigned int)-1, const_zero = 0;
+ const int is_unsigned = neg_one > const_zero;
+ if (sizeof(unsigned int) < sizeof(long)) {
+ long val = __Pyx_PyInt_AsLong(x);
+ if (unlikely(val != (long)(unsigned int)val)) {
+ if (!unlikely(val == -1 && PyErr_Occurred())) {
+ PyErr_SetString(PyExc_OverflowError,
+ (is_unsigned && unlikely(val < 0)) ?
+ "can't convert negative value to unsigned int" :
+ "value too large to convert to unsigned int");
+ }
+ return (unsigned int)-1;
+ }
+ return (unsigned int)val;
+ }
+ return (unsigned int)__Pyx_PyInt_AsUnsignedLong(x);
+}
+
+static CYTHON_INLINE char __Pyx_PyInt_AsChar(PyObject* x) {
+ const char neg_one = (char)-1, const_zero = 0;
+ const int is_unsigned = neg_one > const_zero;
+ if (sizeof(char) < sizeof(long)) {
+ long val = __Pyx_PyInt_AsLong(x);
+ if (unlikely(val != (long)(char)val)) {
+ if (!unlikely(val == -1 && PyErr_Occurred())) {
+ PyErr_SetString(PyExc_OverflowError,
+ (is_unsigned && unlikely(val < 0)) ?
+ "can't convert negative value to char" :
+ "value too large to convert to char");
+ }
+ return (char)-1;
+ }
+ return (char)val;
+ }
+ return (char)__Pyx_PyInt_AsLong(x);
+}
+
+static CYTHON_INLINE short __Pyx_PyInt_AsShort(PyObject* x) {
+ const short neg_one = (short)-1, const_zero = 0;
+ const int is_unsigned = neg_one > const_zero;
+ if (sizeof(short) < sizeof(long)) {
+ long val = __Pyx_PyInt_AsLong(x);
+ if (unlikely(val != (long)(short)val)) {
+ if (!unlikely(val == -1 && PyErr_Occurred())) {
+ PyErr_SetString(PyExc_OverflowError,
+ (is_unsigned && unlikely(val < 0)) ?
+ "can't convert negative value to short" :
+ "value too large to convert to short");
+ }
+ return (short)-1;
+ }
+ return (short)val;
+ }
+ return (short)__Pyx_PyInt_AsLong(x);
+}
+
+static CYTHON_INLINE int __Pyx_PyInt_AsInt(PyObject* x) {
+ const int neg_one = (int)-1, const_zero = 0;
+ const int is_unsigned = neg_one > const_zero;
+ if (sizeof(int) < sizeof(long)) {
+ long val = __Pyx_PyInt_AsLong(x);
+ if (unlikely(val != (long)(int)val)) {
+ if (!unlikely(val == -1 && PyErr_Occurred())) {
+ PyErr_SetString(PyExc_OverflowError,
+ (is_unsigned && unlikely(val < 0)) ?
+ "can't convert negative value to int" :
+ "value too large to convert to int");
+ }
+ return (int)-1;
+ }
+ return (int)val;
+ }
+ return (int)__Pyx_PyInt_AsLong(x);
+}
+
+static CYTHON_INLINE signed char __Pyx_PyInt_AsSignedChar(PyObject* x) {
+ const signed char neg_one = (signed char)-1, const_zero = 0;
+ const int is_unsigned = neg_one > const_zero;
+ if (sizeof(signed char) < sizeof(long)) {
+ long val = __Pyx_PyInt_AsLong(x);
+ if (unlikely(val != (long)(signed char)val)) {
+ if (!unlikely(val == -1 && PyErr_Occurred())) {
+ PyErr_SetString(PyExc_OverflowError,
+ (is_unsigned && unlikely(val < 0)) ?
+ "can't convert negative value to signed char" :
+ "value too large to convert to signed char");
+ }
+ return (signed char)-1;
+ }
+ return (signed char)val;
+ }
+ return (signed char)__Pyx_PyInt_AsSignedLong(x);
+}
+
+static CYTHON_INLINE signed short __Pyx_PyInt_AsSignedShort(PyObject* x) {
+ const signed short neg_one = (signed short)-1, const_zero = 0;
+ const int is_unsigned = neg_one > const_zero;
+ if (sizeof(signed short) < sizeof(long)) {
+ long val = __Pyx_PyInt_AsLong(x);
+ if (unlikely(val != (long)(signed short)val)) {
+ if (!unlikely(val == -1 && PyErr_Occurred())) {
+ PyErr_SetString(PyExc_OverflowError,
+ (is_unsigned && unlikely(val < 0)) ?
+ "can't convert negative value to signed short" :
+ "value too large to convert to signed short");
+ }
+ return (signed short)-1;
+ }
+ return (signed short)val;
+ }
+ return (signed short)__Pyx_PyInt_AsSignedLong(x);
+}
+
+static CYTHON_INLINE signed int __Pyx_PyInt_AsSignedInt(PyObject* x) {
+ const signed int neg_one = (signed int)-1, const_zero = 0;
+ const int is_unsigned = neg_one > const_zero;
+ if (sizeof(signed int) < sizeof(long)) {
+ long val = __Pyx_PyInt_AsLong(x);
+ if (unlikely(val != (long)(signed int)val)) {
+ if (!unlikely(val == -1 && PyErr_Occurred())) {
+ PyErr_SetString(PyExc_OverflowError,
+ (is_unsigned && unlikely(val < 0)) ?
+ "can't convert negative value to signed int" :
+ "value too large to convert to signed int");
+ }
+ return (signed int)-1;
+ }
+ return (signed int)val;
+ }
+ return (signed int)__Pyx_PyInt_AsSignedLong(x);
+}
+
+static CYTHON_INLINE int __Pyx_PyInt_AsLongDouble(PyObject* x) {
+ const int neg_one = (int)-1, const_zero = 0;
+ const int is_unsigned = neg_one > const_zero;
+ if (sizeof(int) < sizeof(long)) {
+ long val = __Pyx_PyInt_AsLong(x);
+ if (unlikely(val != (long)(int)val)) {
+ if (!unlikely(val == -1 && PyErr_Occurred())) {
+ PyErr_SetString(PyExc_OverflowError,
+ (is_unsigned && unlikely(val < 0)) ?
+ "can't convert negative value to int" :
+ "value too large to convert to int");
+ }
+ return (int)-1;
+ }
+ return (int)val;
+ }
+ return (int)__Pyx_PyInt_AsLong(x);
+}
+
+static CYTHON_INLINE unsigned long __Pyx_PyInt_AsUnsignedLong(PyObject* x) {
+ const unsigned long neg_one = (unsigned long)-1, const_zero = 0;
+ const int is_unsigned = neg_one > const_zero;
+#if PY_VERSION_HEX < 0x03000000
+ if (likely(PyInt_Check(x))) {
+ long val = PyInt_AS_LONG(x);
+ if (is_unsigned && unlikely(val < 0)) {
+ PyErr_SetString(PyExc_OverflowError,
+ "can't convert negative value to unsigned long");
+ return (unsigned long)-1;
+ }
+ return (unsigned long)val;
+ } else
+#endif
+ if (likely(PyLong_Check(x))) {
+ if (is_unsigned) {
+ if (unlikely(Py_SIZE(x) < 0)) {
+ PyErr_SetString(PyExc_OverflowError,
+ "can't convert negative value to unsigned long");
+ return (unsigned long)-1;
+ }
+ return (unsigned long)PyLong_AsUnsignedLong(x);
+ } else {
+ return (unsigned long)PyLong_AsLong(x);
+ }
+ } else {
+ unsigned long val;
+ PyObject *tmp = __Pyx_PyNumber_Int(x);
+ if (!tmp) return (unsigned long)-1;
+ val = __Pyx_PyInt_AsUnsignedLong(tmp);
+ Py_DECREF(tmp);
+ return val;
+ }
+}
+
+static CYTHON_INLINE unsigned PY_LONG_LONG __Pyx_PyInt_AsUnsignedLongLong(PyObject* x) {
+ const unsigned PY_LONG_LONG neg_one = (unsigned PY_LONG_LONG)-1, const_zero = 0;
+ const int is_unsigned = neg_one > const_zero;
+#if PY_VERSION_HEX < 0x03000000
+ if (likely(PyInt_Check(x))) {
+ long val = PyInt_AS_LONG(x);
+ if (is_unsigned && unlikely(val < 0)) {
+ PyErr_SetString(PyExc_OverflowError,
+ "can't convert negative value to unsigned PY_LONG_LONG");
+ return (unsigned PY_LONG_LONG)-1;
+ }
+ return (unsigned PY_LONG_LONG)val;
+ } else
+#endif
+ if (likely(PyLong_Check(x))) {
+ if (is_unsigned) {
+ if (unlikely(Py_SIZE(x) < 0)) {
+ PyErr_SetString(PyExc_OverflowError,
+ "can't convert negative value to unsigned PY_LONG_LONG");
+ return (unsigned PY_LONG_LONG)-1;
+ }
+ return (unsigned PY_LONG_LONG)PyLong_AsUnsignedLongLong(x);
+ } else {
+ return (unsigned PY_LONG_LONG)PyLong_AsLongLong(x);
+ }
+ } else {
+ unsigned PY_LONG_LONG val;
+ PyObject *tmp = __Pyx_PyNumber_Int(x);
+ if (!tmp) return (unsigned PY_LONG_LONG)-1;
+ val = __Pyx_PyInt_AsUnsignedLongLong(tmp);
+ Py_DECREF(tmp);
+ return val;
+ }
+}
+
+static CYTHON_INLINE long __Pyx_PyInt_AsLong(PyObject* x) {
+ const long neg_one = (long)-1, const_zero = 0;
+ const int is_unsigned = neg_one > const_zero;
+#if PY_VERSION_HEX < 0x03000000
+ if (likely(PyInt_Check(x))) {
+ long val = PyInt_AS_LONG(x);
+ if (is_unsigned && unlikely(val < 0)) {
+ PyErr_SetString(PyExc_OverflowError,
+ "can't convert negative value to long");
+ return (long)-1;
+ }
+ return (long)val;
+ } else
+#endif
+ if (likely(PyLong_Check(x))) {
+ if (is_unsigned) {
+ if (unlikely(Py_SIZE(x) < 0)) {
+ PyErr_SetString(PyExc_OverflowError,
+ "can't convert negative value to long");
+ return (long)-1;
+ }
+ return (long)PyLong_AsUnsignedLong(x);
+ } else {
+ return (long)PyLong_AsLong(x);
+ }
+ } else {
+ long val;
+ PyObject *tmp = __Pyx_PyNumber_Int(x);
+ if (!tmp) return (long)-1;
+ val = __Pyx_PyInt_AsLong(tmp);
+ Py_DECREF(tmp);
+ return val;
+ }
+}
+
+static CYTHON_INLINE PY_LONG_LONG __Pyx_PyInt_AsLongLong(PyObject* x) {
+ const PY_LONG_LONG neg_one = (PY_LONG_LONG)-1, const_zero = 0;
+ const int is_unsigned = neg_one > const_zero;
+#if PY_VERSION_HEX < 0x03000000
+ if (likely(PyInt_Check(x))) {
+ long val = PyInt_AS_LONG(x);
+ if (is_unsigned && unlikely(val < 0)) {
+ PyErr_SetString(PyExc_OverflowError,
+ "can't convert negative value to PY_LONG_LONG");
+ return (PY_LONG_LONG)-1;
+ }
+ return (PY_LONG_LONG)val;
+ } else
+#endif
+ if (likely(PyLong_Check(x))) {
+ if (is_unsigned) {
+ if (unlikely(Py_SIZE(x) < 0)) {
+ PyErr_SetString(PyExc_OverflowError,
+ "can't convert negative value to PY_LONG_LONG");
+ return (PY_LONG_LONG)-1;
+ }
+ return (PY_LONG_LONG)PyLong_AsUnsignedLongLong(x);
+ } else {
+ return (PY_LONG_LONG)PyLong_AsLongLong(x);
+ }
+ } else {
+ PY_LONG_LONG val;
+ PyObject *tmp = __Pyx_PyNumber_Int(x);
+ if (!tmp) return (PY_LONG_LONG)-1;
+ val = __Pyx_PyInt_AsLongLong(tmp);
+ Py_DECREF(tmp);
+ return val;
+ }
+}
+
+static CYTHON_INLINE signed long __Pyx_PyInt_AsSignedLong(PyObject* x) {
+ const signed long neg_one = (signed long)-1, const_zero = 0;
+ const int is_unsigned = neg_one > const_zero;
+#if PY_VERSION_HEX < 0x03000000
+ if (likely(PyInt_Check(x))) {
+ long val = PyInt_AS_LONG(x);
+ if (is_unsigned && unlikely(val < 0)) {
+ PyErr_SetString(PyExc_OverflowError,
+ "can't convert negative value to signed long");
+ return (signed long)-1;
+ }
+ return (signed long)val;
+ } else
+#endif
+ if (likely(PyLong_Check(x))) {
+ if (is_unsigned) {
+ if (unlikely(Py_SIZE(x) < 0)) {
+ PyErr_SetString(PyExc_OverflowError,
+ "can't convert negative value to signed long");
+ return (signed long)-1;
+ }
+ return (signed long)PyLong_AsUnsignedLong(x);
+ } else {
+ return (signed long)PyLong_AsLong(x);
+ }
+ } else {
+ signed long val;
+ PyObject *tmp = __Pyx_PyNumber_Int(x);
+ if (!tmp) return (signed long)-1;
+ val = __Pyx_PyInt_AsSignedLong(tmp);
+ Py_DECREF(tmp);
+ return val;
+ }
+}
+
+static CYTHON_INLINE signed PY_LONG_LONG __Pyx_PyInt_AsSignedLongLong(PyObject* x) {
+ const signed PY_LONG_LONG neg_one = (signed PY_LONG_LONG)-1, const_zero = 0;
+ const int is_unsigned = neg_one > const_zero;
+#if PY_VERSION_HEX < 0x03000000
+ if (likely(PyInt_Check(x))) {
+ long val = PyInt_AS_LONG(x);
+ if (is_unsigned && unlikely(val < 0)) {
+ PyErr_SetString(PyExc_OverflowError,
+ "can't convert negative value to signed PY_LONG_LONG");
+ return (signed PY_LONG_LONG)-1;
+ }
+ return (signed PY_LONG_LONG)val;
+ } else
+#endif
+ if (likely(PyLong_Check(x))) {
+ if (is_unsigned) {
+ if (unlikely(Py_SIZE(x) < 0)) {
+ PyErr_SetString(PyExc_OverflowError,
+ "can't convert negative value to signed PY_LONG_LONG");
+ return (signed PY_LONG_LONG)-1;
+ }
+ return (signed PY_LONG_LONG)PyLong_AsUnsignedLongLong(x);
+ } else {
+ return (signed PY_LONG_LONG)PyLong_AsLongLong(x);
+ }
+ } else {
+ signed PY_LONG_LONG val;
+ PyObject *tmp = __Pyx_PyNumber_Int(x);
+ if (!tmp) return (signed PY_LONG_LONG)-1;
+ val = __Pyx_PyInt_AsSignedLongLong(tmp);
+ Py_DECREF(tmp);
+ return val;
+ }
+}
+
+static int __Pyx_check_binary_version(void) {
+ char ctversion[4], rtversion[4];
+ PyOS_snprintf(ctversion, 4, "%d.%d", PY_MAJOR_VERSION, PY_MINOR_VERSION);
+ PyOS_snprintf(rtversion, 4, "%s", Py_GetVersion());
+ if (ctversion[0] != rtversion[0] || ctversion[2] != rtversion[2]) {
+ char message[200];
+ PyOS_snprintf(message, sizeof(message),
+ "compiletime version %s of module '%.100s' "
+ "does not match runtime version %s",
+ ctversion, __Pyx_MODULE_NAME, rtversion);
+ #if PY_VERSION_HEX < 0x02050000
+ return PyErr_Warn(NULL, message);
+ #else
+ return PyErr_WarnEx(NULL, message, 1);
+ #endif
+ }
+ return 0;
+}
+
+static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line) {
+ int start = 0, mid = 0, end = count - 1;
+ if (end >= 0 && code_line > entries[end].code_line) {
+ return count;
+ }
+ while (start < end) {
+ mid = (start + end) / 2;
+ if (code_line < entries[mid].code_line) {
+ end = mid;
+ } else if (code_line > entries[mid].code_line) {
+ start = mid + 1;
+ } else {
+ return mid;
+ }
+ }
+ if (code_line <= entries[mid].code_line) {
+ return mid;
+ } else {
+ return mid + 1;
+ }
+}
+static PyCodeObject *__pyx_find_code_object(int code_line) {
+ PyCodeObject* code_object;
+ int pos;
+ if (unlikely(!code_line) || unlikely(!__pyx_code_cache.entries)) {
+ return NULL;
+ }
+ pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line);
+ if (unlikely(pos >= __pyx_code_cache.count) || unlikely(__pyx_code_cache.entries[pos].code_line != code_line)) {
+ return NULL;
+ }
+ code_object = __pyx_code_cache.entries[pos].code_object;
+ Py_INCREF(code_object);
+ return code_object;
+}
+static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object) {
+ int pos, i;
+ __Pyx_CodeObjectCacheEntry* entries = __pyx_code_cache.entries;
+ if (unlikely(!code_line)) {
+ return;
+ }
+ if (unlikely(!entries)) {
+ entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Malloc(64*sizeof(__Pyx_CodeObjectCacheEntry));
+ if (likely(entries)) {
+ __pyx_code_cache.entries = entries;
+ __pyx_code_cache.max_count = 64;
+ __pyx_code_cache.count = 1;
+ entries[0].code_line = code_line;
+ entries[0].code_object = code_object;
+ Py_INCREF(code_object);
+ }
+ return;
+ }
+ pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line);
+ if ((pos < __pyx_code_cache.count) && unlikely(__pyx_code_cache.entries[pos].code_line == code_line)) {
+ PyCodeObject* tmp = entries[pos].code_object;
+ entries[pos].code_object = code_object;
+ Py_DECREF(tmp);
+ return;
+ }
+ if (__pyx_code_cache.count == __pyx_code_cache.max_count) {
+ int new_max = __pyx_code_cache.max_count + 64;
+ entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Realloc(
+ __pyx_code_cache.entries, new_max*sizeof(__Pyx_CodeObjectCacheEntry));
+ if (unlikely(!entries)) {
+ return;
+ }
+ __pyx_code_cache.entries = entries;
+ __pyx_code_cache.max_count = new_max;
+ }
+ for (i=__pyx_code_cache.count; i>pos; i--) {
+ entries[i] = entries[i-1];
+ }
+ entries[pos].code_line = code_line;
+ entries[pos].code_object = code_object;
+ __pyx_code_cache.count++;
+ Py_INCREF(code_object);
+}
+
+#include "compile.h"
+#include "frameobject.h"
+#include "traceback.h"
+static PyCodeObject* __Pyx_CreateCodeObjectForTraceback(
+ const char *funcname, int c_line,
+ int py_line, const char *filename) {
+ PyCodeObject *py_code = 0;
+ PyObject *py_srcfile = 0;
+ PyObject *py_funcname = 0;
+ #if PY_MAJOR_VERSION < 3
+ py_srcfile = PyString_FromString(filename);
+ #else
+ py_srcfile = PyUnicode_FromString(filename);
+ #endif
+ if (!py_srcfile) goto bad;
+ if (c_line) {
+ #if PY_MAJOR_VERSION < 3
+ py_funcname = PyString_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line);
+ #else
+ py_funcname = PyUnicode_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line);
+ #endif
+ }
+ else {
+ #if PY_MAJOR_VERSION < 3
+ py_funcname = PyString_FromString(funcname);
+ #else
+ py_funcname = PyUnicode_FromString(funcname);
+ #endif
+ }
+ if (!py_funcname) goto bad;
+ py_code = __Pyx_PyCode_New(
+ 0, /*int argcount,*/
+ 0, /*int kwonlyargcount,*/
+ 0, /*int nlocals,*/
+ 0, /*int stacksize,*/
+ 0, /*int flags,*/
+ __pyx_empty_bytes, /*PyObject *code,*/
+ __pyx_empty_tuple, /*PyObject *consts,*/
+ __pyx_empty_tuple, /*PyObject *names,*/
+ __pyx_empty_tuple, /*PyObject *varnames,*/
+ __pyx_empty_tuple, /*PyObject *freevars,*/
+ __pyx_empty_tuple, /*PyObject *cellvars,*/
+ py_srcfile, /*PyObject *filename,*/
+ py_funcname, /*PyObject *name,*/
+ py_line, /*int firstlineno,*/
+ __pyx_empty_bytes /*PyObject *lnotab*/
+ );
+ Py_DECREF(py_srcfile);
+ Py_DECREF(py_funcname);
+ return py_code;
+bad:
+ Py_XDECREF(py_srcfile);
+ Py_XDECREF(py_funcname);
+ return NULL;
+}
+static void __Pyx_AddTraceback(const char *funcname, int c_line,
+ int py_line, const char *filename) {
+ PyCodeObject *py_code = 0;
+ PyObject *py_globals = 0;
+ PyFrameObject *py_frame = 0;
+ py_code = __pyx_find_code_object(c_line ? c_line : py_line);
+ if (!py_code) {
+ py_code = __Pyx_CreateCodeObjectForTraceback(
+ funcname, c_line, py_line, filename);
+ if (!py_code) goto bad;
+ __pyx_insert_code_object(c_line ? c_line : py_line, py_code);
+ }
+ py_globals = PyModule_GetDict(__pyx_m);
+ if (!py_globals) goto bad;
+ py_frame = PyFrame_New(
+ PyThreadState_GET(), /*PyThreadState *tstate,*/
+ py_code, /*PyCodeObject *code,*/
+ py_globals, /*PyObject *globals,*/
+ 0 /*PyObject *locals*/
+ );
+ if (!py_frame) goto bad;
+ py_frame->f_lineno = py_line;
+ PyTraceBack_Here(py_frame);
+bad:
+ Py_XDECREF(py_code);
+ Py_XDECREF(py_frame);
+}
+
+static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) {
+ while (t->p) {
+ #if PY_MAJOR_VERSION < 3
+ if (t->is_unicode) {
+ *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL);
+ } else if (t->intern) {
+ *t->p = PyString_InternFromString(t->s);
+ } else {
+ *t->p = PyString_FromStringAndSize(t->s, t->n - 1);
+ }
+ #else /* Python 3+ has unicode identifiers */
+ if (t->is_unicode | t->is_str) {
+ if (t->intern) {
+ *t->p = PyUnicode_InternFromString(t->s);
+ } else if (t->encoding) {
+ *t->p = PyUnicode_Decode(t->s, t->n - 1, t->encoding, NULL);
+ } else {
+ *t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1);
+ }
+ } else {
+ *t->p = PyBytes_FromStringAndSize(t->s, t->n - 1);
+ }
+ #endif
+ if (!*t->p)
+ return -1;
+ ++t;
+ }
+ return 0;
+}
+
+
+/* Type Conversion Functions */
+
+static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) {
+ int is_true = x == Py_True;
+ if (is_true | (x == Py_False) | (x == Py_None)) return is_true;
+ else return PyObject_IsTrue(x);
+}
+
+static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x) {
+ PyNumberMethods *m;
+ const char *name = NULL;
+ PyObject *res = NULL;
+#if PY_VERSION_HEX < 0x03000000
+ if (PyInt_Check(x) || PyLong_Check(x))
+#else
+ if (PyLong_Check(x))
+#endif
+ return Py_INCREF(x), x;
+ m = Py_TYPE(x)->tp_as_number;
+#if PY_VERSION_HEX < 0x03000000
+ if (m && m->nb_int) {
+ name = "int";
+ res = PyNumber_Int(x);
+ }
+ else if (m && m->nb_long) {
+ name = "long";
+ res = PyNumber_Long(x);
+ }
+#else
+ if (m && m->nb_int) {
+ name = "int";
+ res = PyNumber_Long(x);
+ }
+#endif
+ if (res) {
+#if PY_VERSION_HEX < 0x03000000
+ if (!PyInt_Check(res) && !PyLong_Check(res)) {
+#else
+ if (!PyLong_Check(res)) {
+#endif
+ PyErr_Format(PyExc_TypeError,
+ "__%s__ returned non-%s (type %.200s)",
+ name, name, Py_TYPE(res)->tp_name);
+ Py_DECREF(res);
+ return NULL;
+ }
+ }
+ else if (!PyErr_Occurred()) {
+ PyErr_SetString(PyExc_TypeError,
+ "an integer is required");
+ }
+ return res;
+}
+
+static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) {
+ Py_ssize_t ival;
+ PyObject* x = PyNumber_Index(b);
+ if (!x) return -1;
+ ival = PyInt_AsSsize_t(x);
+ Py_DECREF(x);
+ return ival;
+}
+
+static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) {
+#if PY_VERSION_HEX < 0x02050000
+ if (ival <= LONG_MAX)
+ return PyInt_FromLong((long)ival);
+ else {
+ unsigned char *bytes = (unsigned char *) &ival;
+ int one = 1; int little = (int)*(unsigned char*)&one;
+ return _PyLong_FromByteArray(bytes, sizeof(size_t), little, 0);
+ }
+#else
+ return PyInt_FromSize_t(ival);
+#endif
+}
+
+static CYTHON_INLINE size_t __Pyx_PyInt_AsSize_t(PyObject* x) {
+ unsigned PY_LONG_LONG val = __Pyx_PyInt_AsUnsignedLongLong(x);
+ if (unlikely(val == (unsigned PY_LONG_LONG)-1 && PyErr_Occurred())) {
+ return (size_t)-1;
+ } else if (unlikely(val != (unsigned PY_LONG_LONG)(size_t)val)) {
+ PyErr_SetString(PyExc_OverflowError,
+ "value too large to convert to size_t");
+ return (size_t)-1;
+ }
+ return (size_t)val;
+}
+
+
+#endif /* Py_PYTHON_H */
diff --git a/contrib/python/binpt/binpt.pxd b/contrib/python/binpt/binpt.pxd
new file mode 100644
index 000000000..33661ceaf
--- /dev/null
+++ b/contrib/python/binpt/binpt.pxd
@@ -0,0 +1,25 @@
+from libcpp.string cimport string
+from libcpp.vector cimport vector
+from libcpp.pair cimport pair
+
+ctypedef string* str_pointer
+
+cdef extern from 'TypeDef.h' namespace 'Moses':
+ ctypedef vector[float] Scores
+ ctypedef pair[vector[str_pointer], Scores] StringTgtCand
+
+cdef extern from 'PhraseDictionaryTree.h' namespace 'Moses':
+ cdef cppclass PhraseDictionaryTree:
+ PhraseDictionaryTree(unsigned nscores)
+ void UseWordAlignment(bint use)
+ bint UseWordAlignment()
+ int Read(string& path)
+ void GetTargetCandidates(vector[string]& fs,
+ vector[StringTgtCand]& rv)
+ void GetTargetCandidates(vector[string]& fs,
+ vector[StringTgtCand]& rv,
+ vector[string]& wa)
+
+cdef extern from 'Util.h' namespace 'Moses':
+ cdef vector[string] Tokenize(string& text, string& delimiters)
+
diff --git a/contrib/python/binpt/binpt.pyx b/contrib/python/binpt/binpt.pyx
new file mode 100644
index 000000000..e66981df6
--- /dev/null
+++ b/contrib/python/binpt/binpt.pyx
@@ -0,0 +1,166 @@
+from libcpp.string cimport string
+from libcpp.vector cimport vector
+import os
+import cython
+
+cpdef int fsign(float x):
+ '''Simply returns the sign of float x (zero is assumed +), it's defined here just so one gains a little bit with static typing'''
+ return 1 if x >= 0 else -1
+
+cdef bytes as_str(data):
+ if isinstance(data, bytes):
+ return data
+ elif isinstance(data, unicode):
+ return data.encode('UTF-8')
+ raise TypeError('Cannot convert %s to string' % type(data))
+
+cdef class QueryResult(object):
+ '''This class represents a query result, that is,
+ a target phrase (tuple of words/strings),
+ a feature vector (tuple of floats)
+ and possibly an alignment info (string).
+ Here we don't bother parsing the alignment info, as it's often only
+ used as is, threfore saving some time.'''
+
+ cdef tuple _words
+ cdef tuple _scores
+ cdef bytes _wa
+
+ def __cinit__(self, words, scores, wa = None):
+ '''Requires a tuple of words (as strings) and a tuple of scores (as floats).
+ Word-alignment info (as string) may be provided'''
+ self._words = words
+ self._scores = scores
+ self._wa = wa
+
+ @property
+ def words(self):
+ '''Tuple of words (as strings)'''
+ return self._words
+
+ @property
+ def scores(self):
+ '''Tuple of scores (as floats)'''
+ return self._scores
+
+ @property
+ def wa(self):
+ '''Word-alignment info (as string)'''
+ return self._wa
+
+ @staticmethod
+ def desc(x, y, keys = lambda r: r.scores[0]):
+ '''Returns the sign of keys(y) - keys(x).
+ Can only be used if scores is not an empty vector as
+ keys defaults to scores[0]'''
+ return fsign(keys(y) - keys(x))
+
+ def __str__(self):
+ '''Returns a string such as: <words> ||| <scores> [||| word-alignment info]'''
+ if self._wa:
+ return ' ||| '.join( (' '.join(self._words),
+ ' '.join([str(x) for x in self._scores]),
+ self._wa) )
+ else:
+ return ' ||| '.join( (' '.join(self._words),
+ ' '.join([str(x) for x in self._scores]) ) )
+
+ def __repr__(self):
+ return repr((repr(self._words), repr(self._scores), repr(self._wa)))
+
+cdef QueryResult get_query_result(StringTgtCand& cand, object wa = None):
+ '''Converts a StringTgtCandidate (c++ object) and possibly a word-alignment info (string)
+ to a QueryResult (python object).'''
+ cdef tuple words = tuple([cand.first[i].c_str() for i in range(cand.first.size())])
+ cdef tuple scores = tuple([cand.second[i] for i in range(cand.second.size())])
+ return QueryResult(words, scores, wa)
+
+cdef class BinaryPhraseTable(object):
+ '''This class encapsulates a Moses::PhraseDictionaryTree for operations over
+ binary phrase tables.'''
+
+ cdef PhraseDictionaryTree* __tree
+ cdef bytes _path
+ cdef unsigned _nscores
+ cdef bint _wa
+ cdef bytes _delimiters
+
+ def __cinit__(self, bytes path, unsigned nscores = 5, bint wa = False, delimiters = ' \t'):
+ '''It requies a path to binary phrase table (stem of the table, e.g europarl.fr-en
+ is the stem for europar.fr-en.binphr.*).
+ Moses::PhraseDictionaryTree also needs to be aware of the number of scores (usually 5),
+ and whether or not there is word-alignment info in the table (usually not).
+ One can also specify the token delimiters, for Moses::Tokenize(text, delimiters), which is space or tab by default.'''
+
+ if not BinaryPhraseTable.isValidBinaryTable(path, wa):
+ raise ValueError, "'%s' doesn't seem a valid binary table." % path
+ self._path = path
+ self._nscores = nscores
+ self._wa = wa
+ self._delimiters = delimiters
+ self.__tree = new PhraseDictionaryTree(nscores)
+ self.__tree.UseWordAlignment(wa)
+ self.__tree.Read(string(path))
+
+ def __dealloc__(self):
+ del self.__tree
+
+ @staticmethod
+ def isValidBinaryTable(stem, bint wa = False):
+ '''This sanity check was added to the constructor, but you can access it from outside this class
+ to determine whether or not you are providing a valid stem to BinaryPhraseTable.'''
+ if wa:
+ return os.path.isfile(stem + ".binphr.idx") \
+ and os.path.isfile(stem + ".binphr.srctree.wa") \
+ and os.path.isfile(stem + ".binphr.srcvoc") \
+ and os.path.isfile(stem + ".binphr.tgtdata.wa") \
+ and os.path.isfile(stem + ".binphr.tgtvoc")
+ else:
+ return os.path.isfile(stem + ".binphr.idx") \
+ and os.path.isfile(stem + ".binphr.srctree") \
+ and os.path.isfile(stem + ".binphr.srcvoc") \
+ and os.path.isfile(stem + ".binphr.tgtdata") \
+ and os.path.isfile(stem + ".binphr.tgtvoc")
+
+ @property
+ def path(self):
+ return self._path
+
+ @property
+ def nscores(self):
+ return self._nscores
+
+ @property
+ def wa(self):
+ return self._wa
+
+ @property
+ def delimiters(self):
+ return self._delimiters
+
+ def query(self, line, cmp = None, top = 0):
+ '''Queries the phrase table and returns a list of matches.
+ Each match is a QueryResult.
+ If 'cmp' is defined the return list is sorted.
+ If 'top' is defined, onlye the top elements will be returned.'''
+ cdef bytes text = as_str(line)
+ cdef vector[string] fphrase = Tokenize(string(text), string(self._delimiters))
+ cdef vector[StringTgtCand]* rv = new vector[StringTgtCand]()
+ cdef vector[string]* wa = NULL
+ cdef list phrases
+ if not self.__tree.UseWordAlignment():
+ self.__tree.GetTargetCandidates(fphrase, rv[0])
+ phrases = [get_query_result(rv[0][i]) for i in range(rv.size())]
+ else:
+ wa = new vector[string]()
+ self.__tree.GetTargetCandidates(fphrase, rv[0], wa[0])
+ phrases = [get_query_result(rv[0][i], wa[0][i].c_str()) for i in range(rv.size())]
+ del wa
+ del rv
+ if cmp:
+ phrases.sort(cmp=cmp)
+ if top > 0:
+ return phrases[0:top]
+ else:
+ return phrases
+
diff --git a/contrib/python/example.py b/contrib/python/example.py
new file mode 100644
index 000000000..8494ba5fe
--- /dev/null
+++ b/contrib/python/example.py
@@ -0,0 +1,31 @@
+import binpt
+#from binpt import QueryResult
+import sys
+
+
+if len(sys.argv) < 3:
+ print "Usage: %s phrase-table nscores [wa] < query > result" % (sys.argv[0])
+ sys.exit(0)
+
+pt_file = sys.argv[1]
+nscores = int(sys.argv[2])
+wa = len(sys.argv) == 4
+
+pt = binpt.BinaryPhraseTable(pt_file, nscores, wa)
+print >> sys.stderr, "-ttable %s -nscores %d -alignment-info %s -delimiter '%s'\n" %(pt.path, pt.nscores, str(pt.wa), pt.delimiters)
+
+for line in sys.stdin:
+ f = line.strip()
+ matches = pt.query(f, cmp = binpt.QueryResult.desc, top = 20)
+ print '\n'.join([' ||| '.join((f, str(e))) for e in matches])
+ '''
+ # This is how one would use the QueryResult object
+ for e in matches:
+ print ' '.join(e.words) # tuple of strings
+ print e.scores # tuple of floats
+ if e.wa:
+ print e.wa # string
+ '''
+
+
+
diff --git a/contrib/python/examples/phrase-table.binphr.idx b/contrib/python/examples/phrase-table.binphr.idx
new file mode 100644
index 000000000..58adc514e
--- /dev/null
+++ b/contrib/python/examples/phrase-table.binphr.idx
Binary files differ
diff --git a/contrib/python/examples/phrase-table.binphr.srctree.wa b/contrib/python/examples/phrase-table.binphr.srctree.wa
new file mode 100644
index 000000000..a6da5e1bf
--- /dev/null
+++ b/contrib/python/examples/phrase-table.binphr.srctree.wa
Binary files differ
diff --git a/contrib/python/examples/phrase-table.binphr.srcvoc b/contrib/python/examples/phrase-table.binphr.srcvoc
new file mode 100644
index 000000000..d8656e003
--- /dev/null
+++ b/contrib/python/examples/phrase-table.binphr.srcvoc
@@ -0,0 +1,2 @@
+1 essa
+0 casa
diff --git a/contrib/python/examples/phrase-table.binphr.tgtdata.wa b/contrib/python/examples/phrase-table.binphr.tgtdata.wa
new file mode 100644
index 000000000..592874362
--- /dev/null
+++ b/contrib/python/examples/phrase-table.binphr.tgtdata.wa
Binary files differ
diff --git a/contrib/python/examples/phrase-table.binphr.tgtvoc b/contrib/python/examples/phrase-table.binphr.tgtvoc
new file mode 100644
index 000000000..71975c3c5
--- /dev/null
+++ b/contrib/python/examples/phrase-table.binphr.tgtvoc
@@ -0,0 +1,4 @@
+3 this
+2 location
+1 house
+0 building
diff --git a/contrib/python/examples/phrase-table.txt b/contrib/python/examples/phrase-table.txt
new file mode 100644
index 000000000..1b2a2630a
--- /dev/null
+++ b/contrib/python/examples/phrase-table.txt
@@ -0,0 +1,4 @@
+casa ||| building ||| 0.6 0.75 0.35 0.35 2.718 ||| 0-0 ||| 2 2
+casa ||| house ||| 0.7 0.75 0.35 0.35 2.718 ||| 0-0 ||| 2 2
+casa ||| location ||| 0.5 0.75 0.35 0.35 2.718 ||| 0-0 ||| 2 2
+essa casa ||| this house ||| 0.7 0.5 0.8 0.6 2.718 ||| 0-0 1-1 ||| 2 2
diff --git a/contrib/python/setup.py b/contrib/python/setup.py
new file mode 100644
index 000000000..66042fbc8
--- /dev/null
+++ b/contrib/python/setup.py
@@ -0,0 +1,47 @@
+from distutils.core import setup
+from distutils.extension import Extension
+import os
+import sys
+
+available_switches = ['--with-cmph']
+with_cmph = False
+
+while sys.argv[-1] in available_switches:
+ switch = sys.argv.pop()
+ if switch == '--with-cmph':
+ with_cmph = True
+
+
+#### From here you probably don't need to change anything
+#### unless a new dependency shows up in Moses
+
+mosesdir = os.path.abspath('../../')
+includes = [mosesdir, os.path.join(mosesdir, 'moses/src'), os.path.join(mosesdir, 'util')]
+libdir = os.path.join(mosesdir, 'lib')
+
+basic=['z', 'stdc++', 'pthread', 'm', 'gcc_s', 'c', 'boost_system', 'boost_thread', 'boost_filesystem', 'rt']
+moses=['OnDiskPt', 'kenutil', 'kenlm', 'LM', 'mert_lib', 'moses_internal', 'CYKPlusParser', 'Scope3Parser', 'fuzzy-match', 'RuleTable', 'CompactPT', 'moses', 'dynsa', 'pcfg_common' ]
+additional=[]
+
+if with_cmph:
+ additional.append('cmph')
+
+exobj = [os.path.join(libdir, 'lib' + l + '.so') for l in moses]
+
+ext_modules = [
+ Extension(name = 'binpt',
+ sources = ['binpt/binpt.cpp'],
+ language = 'C++',
+ include_dirs = includes,
+ extra_objects = exobj,
+ library_dirs = [libdir],
+ runtime_library_dirs = [libdir],
+ libraries = basic + moses + additional,
+ extra_compile_args = ['-O3', '-DNDEBUG'],
+ )
+]
+
+setup(
+ name='binpt',
+ ext_modules=ext_modules
+)
diff --git a/contrib/relent-filter/AUTHORS b/contrib/relent-filter/AUTHORS
new file mode 100644
index 000000000..184a6dddd
--- /dev/null
+++ b/contrib/relent-filter/AUTHORS
@@ -0,0 +1 @@
+Wang Ling - lingwang at cs dot cmu dot edu
diff --git a/contrib/relent-filter/README.txt b/contrib/relent-filter/README.txt
new file mode 100644
index 000000000..e791d1f8a
--- /dev/null
+++ b/contrib/relent-filter/README.txt
@@ -0,0 +1,91 @@
+Implementation of the Relative Entropy-based Phrase table filtering algorithm by Wang Ling (Ling et al, 2012).
+
+This implementation also calculates the significance scores for the phrase tables based on the Fisher's Test(Johnson et al, 2007). Uses a slightly modified version of the "sigtest-filter" by Chris Dyer.
+
+-------BUILD INSTRUCTIONS-------
+
+1 - Build the sigtest-filter binary
+
+1.1 - Download and build SALM available at http://projectile.sv.cmu.edu/research/public/tools/salm/salm.htm
+
+1.2 - Run "make SALMDIR=<path_to_salm>" in "<path_to_moses>/contrib/relent-filter/sigtest-filter" to create the executable filter-pt
+
+2 - Build moses project by running "./bjam <options>", this will create the executables for relent filtering
+
+-------USAGE INSTRUCTIONS-------
+
+Required files:
+s_train - source training file
+t_train - target training file
+moses_ini - path to the moses configuration file ( after tuning )
+pruning_binaries - path to the relent pruning binaries ( should be "<path_to_moses>/bin" )
+pruning_scripts - path to the relent pruning scripts ( should be "<path_to_moses>/contrib/relent-filter/scripts" )
+sigbin - path to the sigtest filter binaries ( should be "<path_to_moses>/contrib/relent-filter/sigtest-filter" )
+output_dir - path to write the output
+
+1 - build suffix arrays for the source and target parallel training data
+
+1.1 - run "<path to salm>/Bin/Linux/Index/IndexSA.O32 <s_train>" (or IndexSA.O64)
+
+1.2 - run "<path to salm>/Bin/Linux/Index/IndexSA.O32 <t_train>" (or IndexSA.O64)
+
+2 - calculate phrase pair scores by running:
+
+perl <pruning_scripts>/calcPruningScores.pl -moses_ini <moses_ini> -training_s <s_train> -training_t <t_train> -prune_bin <pruning_binaries> -prune_scripts <pruning_scripts> -moses_scripts <path_to_moses>/scripts/training/ -workdir <output_dir> -dec_size 10000
+
+this will create the following files in the <output_dir/scores/> dir:
+
+count.txt - counts of the phrase pairs for N(s,t) N(s,*) and N(*,t)
+divergence.txt - negative log of the divergence of the phrase pair
+empirical.txt - empirical distribution of the phrase pairs N(s,t)/N(*,*)
+rel_ent.txt - relative entropy of the phrase pairs
+significance.txt - significance of the phrase pairs
+
+You can use any one of these files for pruning and also combine these scores using <pruning_scripts>/interpolateScores.pl
+
+3 - To actually prune a phrase table you should run <pruning_scripts>/prunePT.pl
+
+For instance, to prune 30% of the phrase table using rel_ent run:
+perl <pruning_scripts>/prunePT.pl -table <phrase_table_file> -scores <output_dir>/scores/rel_ent.txt -percentage 70 > <pruned_phrase_table_file>
+
+You can also prune by threshold
+perl <pruning_scripts>/prunePT.pl -table <phrase_table_file> -scores <output_dir>/scores/rel_ent.txt -threshold 0.1 > <pruned_phrase_table_file>
+
+The same must be done for the reordering table by replacing <phrase_table_file> with the <reord_table_file>
+
+perl <pruning_scripts>/prunePT.pl -table <reord_table_file> -scores <output_dir>/scores/rel_ent.txt -percentage 70 > <pruned_reord_table_file>
+
+-------RUNNING STEP 2 IN PARALLEL-------
+
+Step 2 requires the forced decoding of the whole set of phrase pairs in the table, so unless you test it on a small corpora, it usually requires large amounts of time to process.
+Thus, we recommend users to run multiple instances of "<pruning_scripts>/calcPruningScores.pl" in parallel to process different parts of the phrase table.
+
+To do this, run:
+
+perl <pruning_scripts>/calcPruningScores.pl -moses_ini <moses_ini> -training_s <s_train> -training_t <t_train> -prune_bin <pruning_binaries> -prune_scripts <pruning_scripts> -moses_scripts <path_to_moses>/scripts/training/ -workdir <output_dir> -dec_size 10000 -start 0 -end 100000
+
+The -start and -end tags tell the script to only calculate the results for phrase pairs between 0 and 99999.
+
+Thus, an example of a shell script to run for the whole phrase table would be:
+
+size=`wc <phrase_table_file> | gawk '{print $1}'`
+phrases_per_process=100000
+
+for i in $(seq 0 $phrases_per_process $size)
+do
+ end=`expr $i + $phrases_per_process`
+ perl <pruning_scripts>/calcPruningScores.pl -moses_ini <moses_ini> -training_s <s_train> -training_t <t_train> -prune_bin <pruning_binaries> -prune_scripts <pruning_scripts> -moses_scripts <path_to_moses>/scripts/training/ -workdir <output_dir>.$i-$end -dec_size 10000 -start $i -end $end
+done
+
+After all processes finish, simply join the partial score files together in the same order.
+
+-------REFERENCES-------
+Ling, W., Graça, J., Trancoso, I., and Black, A. (2012). Entropy-based pruning for phrase-based
+machine translation. In Proceedings of the 2012
+Joint Conference on Empirical Methods in Natural Language Processing and
+Computational Natural Language Learning (EMNLP-CoNLL), pp. 962-971.
+
+H. Johnson, J. Martin, G. Foster and R. Kuhn. (2007) Improving Translation
+Quality by Discarding Most of the Phrasetable. In Proceedings of the 2007
+Joint Conference on Empirical Methods in Natural Language Processing and
+Computational Natural Language Learning (EMNLP-CoNLL), pp. 967-975.
diff --git a/contrib/relent-filter/scripts/calcEmpiricalDistribution.pl b/contrib/relent-filter/scripts/calcEmpiricalDistribution.pl
new file mode 100644
index 000000000..462ec5339
--- /dev/null
+++ b/contrib/relent-filter/scripts/calcEmpiricalDistribution.pl
@@ -0,0 +1,53 @@
+#!/usr/bin/perl -w
+
+# read arguments
+my $countFile = $ARGV[0];
+
+my $ZCAT = "gzip -cd";
+my $BZCAT = "bzcat";
+
+&process_count_file($countFile);
+
+sub process_count_file {
+ $file = $_[0];
+ open(COUNT_READER, &open_compressed($file)) or die "ERROR: Can't read $file";
+
+ print STDERR "reading file to calculate normalizer";
+ $normalizer=0;
+ while(<COUNT_READER>) {
+ my $line = $_;
+ chomp($line);
+ my @line_array = split(/\s+/, $line);
+ my $count = $line_array[0];
+ $normalizer+=$count;
+ }
+
+ close(COUNT_READER);
+
+ print STDERR "reading file again to print the counts";
+ open(COUNT_READER, &open_compressed($file)) or die "ERROR: Can't read $file";
+
+ while(<COUNT_READER>) {
+ my $line = $_;
+ chomp($line);
+ my @line_array = split(/\s+/, $line);
+ my $score = $line_array[0]/$normalizer;
+ print $score."\n";
+ }
+
+ close(COUNT_READER);
+}
+
+sub open_compressed {
+ my ($file) = @_;
+ print STDERR "FILE: $file\n";
+
+ # add extensions, if necessary
+ $file = $file.".bz2" if ! -e $file && -e $file.".bz2";
+ $file = $file.".gz" if ! -e $file && -e $file.".gz";
+
+ # pipe zipped, if necessary
+ return "$BZCAT $file|" if $file =~ /\.bz2$/;
+ return "$ZCAT $file|" if $file =~ /\.gz$/;
+ return $file;
+}
diff --git a/contrib/relent-filter/scripts/calcPruningScores.pl b/contrib/relent-filter/scripts/calcPruningScores.pl
new file mode 100755
index 000000000..cbfabac55
--- /dev/null
+++ b/contrib/relent-filter/scripts/calcPruningScores.pl
@@ -0,0 +1,351 @@
+#!/usr/bin/perl -w
+use Getopt::Long;
+use File::Basename;
+use POSIX;
+
+# read arguments
+my $line_start = 0;
+my $line_end = LONG_MAX;
+my $tmp_dir = "";
+my $dec_size = LONG_MAX;
+$_HELP = 1 if (@ARGV < 1 or !GetOptions ("moses_ini=s" => \$moses_ini, #moses conf file
+"start:i" => \$line_start, #fisrt phrase to process
+"end:i" => \$line_end, #last sentence to process (not including)
+"training_s=s" => \$training_s, #source training file
+"training_t=s" => \$training_t, #target training file
+"prune_bin=s" => \$prune_bin, #binary files in the pruning toolkit
+"prune_scripts=s" => \$prune_scripts, #scripts in the pruning toolkit
+"sig_bin=s" => \$sig_bin, #binary files to calculate significance
+"moses_scripts=s" => \$moses_scripts, #dir with the moses scripts
+"tmp_dir:s" => \$tmp_dir, #dir with the moses scripts
+"dec_size:i" => \$dec_size, #dir with the moses scripts
+"workdir=s" => \$workdir)); #directory to put all the output files
+
+# help message if arguments are not correct
+if ($_HELP) {
+ print "
+Usage: perl calcPruningScores.pl [PARAMS]
+Function: Calculates relative entropy for each phrase pair in a translation model.
+Authors: Wang Ling ( lingwang at cs dot cmu dot edu )
+PARAMS:
+ -moses_ini : moses configuration file with the model to prune (phrase table, reordering table, weights etc...)
+ -training_s : source training file, please run salm first
+ -training_t : target training file, please run salm first
+ -prune_bin : path to the binaries for pruning (probably <PATH_TO_MOSES>/bin)
+ -prune_scripts : path to the scripts for pruning (probably the directory where this script is)
+ -sig_bin : path to the binary for significance testing included in this toolkit
+ -moses_scripts : path to the moses training scripts (where filter-model-given-input.pl is)
+ -workdir : directory to produce the output
+ -tmp_dir : directory to store temporary files (improve performance if stored in a local disk), omit to store in workdir
+ -dec_size : number of phrase pairs to be decoded at a time, omit to decode all selected phrase pairs at once
+ -start and -end : starting and ending phrase pairs to process, to be used if you want to launch multiple processes in parallel for different parts of the phrase table. If specified the process will process the phrase pairs from <start> to <end-1>
+
+For any questions contact lingwang at cs dot cmu dot edu
+";
+ exit(1);
+}
+
+# setting up working dirs
+my $TMP_DIR = $tmp_dir;
+if ($tmp_dir eq ""){
+ $TMP_DIR = "$workdir/tmp";
+}
+my $SCORE_DIR = "$workdir/scores";
+my $FILTER_DIR = "$TMP_DIR/filter";
+
+# files for divergence module
+my $SOURCE_FILE = "$TMP_DIR/source.txt";
+my $CONSTRAINT_FILE = "$TMP_DIR/constraint.txt";
+my $DIVERGENCE_FILE = "$SCORE_DIR/divergence.txt";
+
+# files for significance module
+my $SIG_TABLE_FILE = "$TMP_DIR/source_target.txt";
+my $SIG_MOD_OUTPUT = "$TMP_DIR/sig_mod.out";
+my $SIG_FILE = "$SCORE_DIR/significance.txt";
+my $COUNT_FILE = "$SCORE_DIR/count.txt";
+my $EMP_DIST_FILE= "$SCORE_DIR/empirical.txt";
+my $REL_ENT_FILE= "$SCORE_DIR/rel_ent.txt";
+
+# setting up executables
+my $ZCAT = "gzip -cd";
+my $BZCAT = "bzcat";
+my $CP = "cp";
+my $SED = "sed";
+my $RM = "rm";
+my $SORT_EXEC = "sort";
+my $PRUNE_EXEC = "$prune_bin/calcDivergence";
+my $SIG_EXEC = "$sig_bin/filter-pt";
+my $FILTER_EXEC = "perl $moses_scripts/filter-model-given-input.pl";
+my $CALC_EMP_EXEC ="perl $prune_scripts/calcEmpiricalDistribution.pl";
+my $INT_TABLE_EXEC = "perl $prune_scripts/interpolateScores.pl";
+
+# moses ini variables
+my ($TRANSLATION_TABLE_FILE, $REORDERING_TABLE_FILE);
+
+# phrase table variables
+my ($N_PHRASES, $N_PHRASES_TO_PROCESS);
+
+# main functions
+&prepare();
+&calc_sig_and_counts();
+&calc_div();
+&clear_up();
+
+# (1) preparing data
+sub prepare {
+ print STDERR "(1) preparing data @ ".`date`;
+ safesystem("mkdir -p $workdir") or die("ERROR: could not create work dir $workdir");
+ safesystem("mkdir -p $TMP_DIR") or die("ERROR: could not create work dir $TMP_DIR");
+ safesystem("mkdir -p $SCORE_DIR") or die("ERROR: could not create work dir $SCORE_DIR");
+ &get_moses_ini_params();
+ &copy_tables_to_tmp_dir();
+ &write_data_files();
+
+ $N_PHRASES = &get_number_of_phrases();
+ $line_end = ($line_end > $N_PHRASES) ? $N_PHRASES : $line_end;
+ $N_PHRASES_TO_PROCESS = $line_end - $line_start;
+}
+
+sub write_data_files {
+ open(SOURCE_WRITER,">".$SOURCE_FILE) or die "ERROR: Can't write $SOURCE_FILE";
+ open(CONSTRAINT_WRITER,">".$CONSTRAINT_FILE) or die "ERROR: Can't write $CONSTRAINT_FILE";
+ open(TABLE_WRITER,">".$SIG_TABLE_FILE) or die "ERROR: Can't write $SIG_TABLE_FILE";
+ open(TTABLE_READER, &open_compressed($TRANSLATION_TABLE_FILE)) or die "ERROR: Can't read $TRANSLATION_TABLE_FILE";
+
+ $line_number = 0;
+ while($line_number < $line_start && !eof(TTABLE_READER)){
+ <TTABLE_READER>;
+ $line_number++;
+ }
+ while($line_number < $line_end && !eof(TTABLE_READER)) {
+ my $line = <TTABLE_READER>;
+ chomp($line);
+ my @line_array = split(/\s+\|\|\|\s+/, $line);
+ my $source = $line_array[0];
+ my $target = $line_array[1];
+ my $scores = $line_array[2];
+ print TABLE_WRITER $source." ||| ".$target." ||| ".$scores."\n";
+ print SOURCE_WRITER $source."\n";
+ print CONSTRAINT_WRITER $target."\n";
+ $line_number++;
+ }
+
+ close(SOURCE_WRITER);
+ close(CONSTRAINT_WRITER);
+ close(TABLE_WRITER);
+ close(TTABLE_READER);
+}
+
+sub copy_tables_to_tmp_dir {
+ $tmp_t_table = "$TMP_DIR/".basename($TRANSLATION_TABLE_FILE);
+ $tmp_r_table = "$TMP_DIR/".basename($REORDERING_TABLE_FILE);
+ $tmp_moses_ini = "$TMP_DIR/moses.ini";
+ $cp_t_cmd = "$CP $TRANSLATION_TABLE_FILE $TMP_DIR";
+ $cp_r_cmd = "$CP $REORDERING_TABLE_FILE $TMP_DIR";
+ safesystem("$cp_t_cmd") or die("ERROR: could not run:\n $cp_t_cmd");
+ safesystem("$cp_r_cmd") or die("ERROR: could not run:\n $cp_r_cmd");
+
+ $sed_cmd = "$SED s#$TRANSLATION_TABLE_FILE#$tmp_t_table#g $moses_ini | $SED s#$REORDERING_TABLE_FILE#$tmp_r_table#g > $tmp_moses_ini";
+ safesystem("$sed_cmd") or die("ERROR: could not run:\n $sed_cmd");
+
+ $TRANSLATION_TABLE_FILE = $tmp_t_table;
+ $REORDERING_TABLE_FILE = $tmp_r_table;
+ $moses_ini = $tmp_moses_ini;
+}
+
+# (2) calculating sig and counts
+sub calc_sig_and_counts {
+ print STDERR "(2) calculating counts and significance".`date`;
+ print STDERR "(2.1) running significance module".`date`;
+ &run_significance_module();
+ print STDERR "(2.2) writing counts and significance tables".`date`;
+ &write_counts_and_significance_table();
+ print STDERR "(2.3) calculating empirical distribution".`date`;
+}
+
+sub write_counts_and_significance_table {
+ open(COUNT_WRITER,">".$COUNT_FILE) or die "ERROR: Can't write $COUNT_FILE";
+ open(SIG_WRITER,">".$SIG_FILE) or die "ERROR: Can't write $SIG_FILE";
+ open(SIG_MOD_READER, &open_compressed($SIG_MOD_OUTPUT)) or die "ERROR: Can't read $SIG_MOD_OUTPUT";
+
+ while(<SIG_MOD_READER>) {
+ my($line) = $_;
+ chomp($line);
+ my @line_array = split(/\s+\|\|\|\s+/, $line);
+ my $count = $line_array[0];
+ my $sig = $line_array[1];
+ print COUNT_WRITER $count."\n";
+ print SIG_WRITER $sig."\n";
+ }
+
+ close(SIG_MOD_READER);
+ close(COUNT_WRITER);
+ close(SIG_WRITER);
+}
+
+sub run_significance_module {
+ my $sig_cmd = "cat $SIG_TABLE_FILE | $SIG_EXEC -e $training_t -f $training_s -l -10000 -p -c > $SIG_MOD_OUTPUT";
+ safesystem("$sig_cmd") or die("ERROR: could not run:\n $sig_cmd");
+}
+
+# (3) calculating divergence
+sub calc_div {
+ print STDERR "(3) calculating relative entropy".`date`;
+ print STDERR "(3.1) calculating empirical distribution".`date`;
+ &calculate_empirical_distribution();
+ print STDERR "(3.2) calculating divergence (this might take a while)".`date`;
+ if($N_PHRASES_TO_PROCESS > $dec_size) {
+ &calculate_divergence_shared("$FILTER_DIR");
+ }
+ else{
+ &calculate_divergence($moses_ini);
+ }
+ print STDERR "(3.3) calculating relative entropy from empirical and divergence distributions".`date`;
+ &calculate_relative_entropy();
+}
+
+sub calculate_empirical_distribution {
+ my $emp_cmd = "$CALC_EMP_EXEC $COUNT_FILE > $EMP_DIST_FILE";
+ safesystem("$emp_cmd") or die("ERROR: could not run:\n $emp_cmd");
+}
+
+sub get_fragmented_file_name {
+ my ($name, $frag, $interval) = @_;
+ return "$name-$frag-".($frag+$interval);
+}
+
+sub calculate_divergence {
+ my $moses_ini_file = $_[0];
+ print STDERR "force decoding phrase pairs\n";
+ my $prune_cmd = "cat $SOURCE_FILE | $PRUNE_EXEC -f $moses_ini_file -constraint $CONSTRAINT_FILE -early-discarding-threshold 0 -s 100000 -ttable-limit 0 > $DIVERGENCE_FILE 2> /dev/null";
+ safesystem("$prune_cmd") or die("ERROR: could not run:\n $prune_cmd");
+}
+
+sub calculate_divergence_shared {
+ my $filter_dir = $_[0];
+
+ &split_file_into_chunks($SOURCE_FILE, $dec_size, $N_PHRASES_TO_PROCESS);
+ &split_file_into_chunks($CONSTRAINT_FILE, $dec_size, $N_PHRASES_TO_PROCESS);
+
+ for(my $i = 0; $i < $N_PHRASES_TO_PROCESS; $i = $i + $dec_size) {
+ my $filter_cmd = "$FILTER_EXEC ".&get_fragmented_file_name($FILTER_DIR, $i, $dec_size)." $moses_ini ".&get_fragmented_file_name($SOURCE_FILE, $i, $dec_size);
+ safesystem("$filter_cmd") or die("ERROR: could not run:\n $filter_cmd");
+
+ my $moses_ini_file = &get_fragmented_file_name($filter_dir, $i, $dec_size)."/moses.ini";
+ my $source_file = &get_fragmented_file_name($SOURCE_FILE, $i, $dec_size);
+ my $constraint_file = &get_fragmented_file_name($CONSTRAINT_FILE, $i, $dec_size);
+ my $prune_cmd;
+ print STDERR "force decoding phrase pairs $i to ".($i + $dec_size)."\n";
+ if($i == 0){
+ $prune_cmd = "cat $source_file | $PRUNE_EXEC -f $moses_ini_file -constraint $constraint_file -early-discarding-threshold 0 -s 100000 -ttable-limit 0 > $DIVERGENCE_FILE 2> /dev/null";
+ }
+ else{
+ $prune_cmd = "cat $source_file | $PRUNE_EXEC -f $moses_ini_file -constraint $constraint_file -early-discarding-threshold 0 -s 100000 -ttable-limit 0 >> $DIVERGENCE_FILE 2> /dev/null";
+ }
+ safesystem("$prune_cmd") or die("ERROR: could not run:\n $prune_cmd");
+
+ my $rm_cmd = "$RM -r ".&get_fragmented_file_name($FILTER_DIR, $i, $dec_size);
+ safesystem("$rm_cmd") or die("ERROR: could not run:\n $rm_cmd");
+
+ }
+}
+
+sub calculate_relative_entropy {
+ my $int_cmd = "$INT_TABLE_EXEC -files \"$EMP_DIST_FILE $DIVERGENCE_FILE\" -weights \"1 1\" -operation \"*\" > $REL_ENT_FILE";
+ safesystem("$int_cmd") or die("ERROR: could not run:\n $int_cmd");
+
+}
+
+# (4) clear up stuff that is not needed
+sub clear_up {
+ print STDERR "(4) removing tmp dir".`date`;
+ $rm_cmd = "$RM -r $TMP_DIR";
+ safesystem("$rm_cmd") or die("ERROR: could not run:\n $rm_cmd");
+}
+
+# utility functions
+
+sub safesystem {
+ print STDERR "Executing: @_\n";
+ system(@_);
+ if ($? == -1) {
+ print STDERR "ERROR: Failed to execute: @_\n $!\n";
+ exit(1);
+ }
+ elsif ($? & 127) {
+ printf STDERR "ERROR: Execution of: @_\n died with signal %d, %s coredump\n",
+ ($? & 127), ($? & 128) ? 'with' : 'without';
+ exit(1);
+ }
+ else {
+ my $exitcode = $? >> 8;
+ print STDERR "Exit code: $exitcode\n" if $exitcode;
+ return ! $exitcode;
+ }
+}
+
+sub open_compressed {
+ my ($file) = @_;
+ print STDERR "FILE: $file\n";
+
+ # add extensions, if necessary
+ $file = $file.".bz2" if ! -e $file && -e $file.".bz2";
+ $file = $file.".gz" if ! -e $file && -e $file.".gz";
+
+ # pipe zipped, if necessary
+ return "$BZCAT $file|" if $file =~ /\.bz2$/;
+ return "$ZCAT $file|" if $file =~ /\.gz$/;
+ return $file;
+}
+
+sub get_moses_ini_params {
+
+ open(MOSES_READER, $moses_ini);
+ while(<MOSES_READER>) {
+ my($line) = $_;
+ chomp($line);
+
+ if($line eq "[ttable-file]"){
+ $tableLine = <MOSES_READER>;
+ chomp($tableLine);
+ ($_,$_,$_,$_,$TRANSLATION_TABLE_FILE) = split(" ",$tableLine); # put the other parameters there if needed
+ }
+ if($line eq "[distortion-file]"){
+ $tableLine = <MOSES_READER>;
+ chomp($tableLine);
+ ($_,$_,$_,$REORDERING_TABLE_FILE) = split(" ",$tableLine); # put the other parameters there if needed
+ }
+ }
+ close(MOSES_READER);
+}
+
+sub get_number_of_phrases {
+ my $ret = 0;
+ open(TABLE_READER, &open_compressed($TRANSLATION_TABLE_FILE)) or die "ERROR: Can't read $TRANSLATION_TABLE_FILE";
+
+ while(<TABLE_READER>) {
+ $ret++;
+ }
+
+ close (TABLE_READER);
+ return $ret;
+}
+
+sub split_file_into_chunks {
+ my ($file_to_split, $chunk_size, $number_of_phrases_to_process) = @_;
+ open(SOURCE_READER, &open_compressed($file_to_split)) or die "ERROR: Can't read $file_to_split";
+ my $FRAG_SOURCE_WRITER;
+ for(my $i = 0; $i < $number_of_phrases_to_process && !eof(SOURCE_READER); $i++) {
+ if(($i % $chunk_size) == 0){ # open fragmented file to write
+ my $frag_file = &get_fragmented_file_name($file_to_split, $i, $chunk_size);
+ open(FRAG_SOURCE_WRITER, ">".$frag_file) or die "ERROR: Can't write $frag_file";
+ }
+ my $line = <SOURCE_READER>;
+ print FRAG_SOURCE_WRITER $line;
+ if((%i % $chunk_size) == $chunk_size - 1 || (%i % $chunk_size) == $number_of_phrases_to_process - 1){ # close fragmented file before opening a new one
+ close(FRAG_SOURCE_WRITER);
+ }
+ }
+}
+
+
diff --git a/contrib/relent-filter/scripts/interpolateScores.pl b/contrib/relent-filter/scripts/interpolateScores.pl
new file mode 100644
index 000000000..b204e951a
--- /dev/null
+++ b/contrib/relent-filter/scripts/interpolateScores.pl
@@ -0,0 +1,94 @@
+#!/usr/bin/perl -w
+use Getopt::Long;
+use File::Basename;
+use POSIX;
+
+$operation="+";
+
+# read arguments
+$_HELP = 1 if (@ARGV < 1 or !GetOptions ("files=s" => \$files, #moses conf file
+"weights=s" => \$weights,
+"operation=s" => \$operation)); #directory to put all the output files
+
+
+# help message if arguments are not correct
+if ($_HELP) {
+ print "Relative Entropy Pruning
+Usage: perl interpolateScores.pl [PARAMS]
+Function: interpolates any number of score files interlated by their weights
+Authors: Wang Ling ( lingwang at cs dot cmu dot edu )
+PARAMS:
+ -files=s : table files to interpolate separated by a space (Ex \"file1 file2 file3\")
+ -weights : interpolation weights separated by a space (Ex \"0.3 0.3 0.4\")
+ -operation : +,* or min depending on the operation to perform to combine scores
+For any questions contact lingwang at cs dot cmu dot edu
+";
+ exit(1);
+}
+
+@FILES = split(/\s+/, $files);
+@WEIGHTS = split(/\s+/, $weights);
+
+my $ZCAT = "gzip -cd";
+my $BZCAT = "bzcat";
+
+&interpolate();
+
+sub interpolate {
+ my @READERS;
+ for($i = 0; $i < @FILES; $i++){
+ local *FILE;
+ open(FILE, &open_compressed($FILES[$i])) or die "ERROR: Can't read $FILES[$i]";
+ push(@READERS, *FILE);
+ }
+ $FIRST = $READERS[0];
+ while(!eof($FIRST)) {
+ if($operation eq "+"){
+ my $score = 0;
+ for($i = 0; $i < @FILES; $i++){
+ my $READER = $READERS[$i];
+ my $line = <$READER>;
+ chomp($line);
+ $score += $line*$WEIGHTS[$i];
+ }
+ print "$score\n";
+ }
+ if($operation eq "*"){
+ my $score = 1;
+ for($i = 0; $i < @FILES; $i++){
+ my $READER = $READERS[$i];
+ my $line = <$READER>;
+ chomp($line);
+ $score *= $line ** $WEIGHTS[$i];
+ }
+ print "$score\n"
+ }
+ if($operation eq "min"){
+ my $score = 99999;
+ for($i = 0; $i < @FILES; $i++){
+ my $READER = $READERS[$i];
+ my $line = <$READER>;
+ chomp($line);
+ if ($score > $line*$WEIGHTS[$i]){
+ $score = $line*$WEIGHTS[$i];
+ }
+ }
+ print "$score\n"
+
+ }
+ }
+}
+
+sub open_compressed {
+ my ($file) = @_;
+ print STDERR "FILE: $file\n";
+
+ # add extensions, if necessary
+ $file = $file.".bz2" if ! -e $file && -e $file.".bz2";
+ $file = $file.".gz" if ! -e $file && -e $file.".gz";
+
+ # pipe zipped, if necessary
+ return "$BZCAT $file|" if $file =~ /\.bz2$/;
+ return "$ZCAT $file|" if $file =~ /\.gz$/;
+ return $file;
+}
diff --git a/contrib/relent-filter/scripts/prunePT.pl b/contrib/relent-filter/scripts/prunePT.pl
new file mode 100755
index 000000000..37dc30bad
--- /dev/null
+++ b/contrib/relent-filter/scripts/prunePT.pl
@@ -0,0 +1,114 @@
+#!/usr/bin/perl -w
+
+# read arguments
+my $tmp_dir = "";
+my $percentage = -1;
+my $threshold = -1;
+use Getopt::Long;
+$_HELP = 1 if (@ARGV < 1 or !GetOptions ("table=s" => \$table, #table to filter
+"scores=s" => \$scores_file, #scores of each phrase pair, should have same size as the table to filter
+"percentage=i" => \$percentage, # percentage of phrase table to remain
+"threshold=i" => \$threshold)); # threshold (score < threshold equals prune entry)
+
+# help message if arguments are not correct
+if ($_HELP) {
+ print "Relative Entropy Pruning
+Usage: perl prunePT.pl [PARAMS]
+Function: prunes a phrase table given a score file
+Authors: Wang Ling ( lingwang at cs dot cmu dot edu )
+PARAMS:
+ -table : table to prune
+ -percentage : percentage of phrase table to remain (if the scores do not allow the exact percentage if multiple entries have the same threshold, the script chooses to retain more than the given percentage)
+ -threshold : threshold to prune (score < threshold equals prune entry), do not use this if percentage is specified
+For any questions contact lingwang at cs dot cmu dot edu
+";
+ exit(1);
+}
+
+
+my $THRESHOLD = $threshold;
+if ($percentage != -1){
+ $THRESHOLD = &get_threshold_by_percentage($percentage);
+}
+
+my $ZCAT = "gzip -cd";
+my $BZCAT = "bzcat";
+
+&prune_by_threshold($THRESHOLD);
+
+sub prune_by_threshold {
+ my $th = $_[0];
+ print STDERR "pruning using threshold $th \n";
+ open (SCORE_READER, &open_compressed($scores_file));
+ open (TABLE_READER, &open_compressed($table));
+ $number_of_phrases=0;
+ $number_of_unpruned_phrases=0;
+ while(!eof(SCORE_READER) && !eof(TABLE_READER)){
+ $score_line = <SCORE_READER>;
+ $table_line = <TABLE_READER>;
+ chomp($score_line);
+ if($score_line >= $th){
+ print $table_line;
+ $number_of_unpruned_phrases++;
+ }
+ $number_of_phrases++;
+ }
+ print STDERR "pruned ".($number_of_phrases - $number_of_unpruned_phrases)." phrase pairs out of $number_of_phrases\n";
+}
+
+sub get_threshold_by_percentage {
+ my $percentage = $_[0];
+ $ret = 0;
+
+ $number_of_phrases = &get_number_of_phrases();
+ $stop_phrase = ($percentage * $number_of_phrases) / 100;
+ $phrase_number = 0;
+
+
+ open (SCORE_READER, &open_compressed($scores_file));
+ while(<SCORE_READER>) {
+ my $line = $_;
+
+ }
+ close (SCORE_READER);
+
+ open (SCORE_READER, "cat $scores_file | LC_ALL=c sort -g |");
+ while(<SCORE_READER>) {
+ my $line = $_;
+ if($phrase_number >= $stop_phrase){
+ chomp($line);
+ $ret = $line;
+ last;
+ }
+ $phrase_number++;
+ }
+
+ close (SCORE_READER);
+ return $ret;
+}
+
+sub get_number_of_phrases {
+ $ret = 0;
+ open (SCORE_READER, $scores_file);
+
+ while(<SCORE_READER>) {
+ $ret++;
+ }
+
+ close (SCORE_READER);
+ return $ret;
+}
+
+sub open_compressed {
+ my ($file) = @_;
+ print STDERR "FILE: $file\n";
+
+ # add extensions, if necessary
+ $file = $file.".bz2" if ! -e $file && -e $file.".bz2";
+ $file = $file.".gz" if ! -e $file && -e $file.".gz";
+
+ # pipe zipped, if necessary
+ return "$BZCAT $file|" if $file =~ /\.bz2$/;
+ return "$ZCAT $file|" if $file =~ /\.gz$/;
+ return $file;
+}
diff --git a/contrib/relent-filter/sigtest-filter/Makefile b/contrib/relent-filter/sigtest-filter/Makefile
new file mode 100755
index 000000000..71de9c45f
--- /dev/null
+++ b/contrib/relent-filter/sigtest-filter/Makefile
@@ -0,0 +1,10 @@
+SALMDIR=/Users/hieuhoang/workspace/salm
+FLAVOR?=o64
+INC=-I$(SALMDIR)/Src/Shared -I$(SALMDIR)/Src/SuffixArrayApplications -I$(SALMDIR)/Src/SuffixArrayApplications/SuffixArraySearch
+OBJS=$(SALMDIR)/Distribution/Linux/Objs/Search/_SuffixArrayApplicationBase.$(FLAVOR) $(SALMDIR)/Distribution/Linux/Objs/Search/_SuffixArraySearchApplicationBase.$(FLAVOR) $(SALMDIR)/Distribution/Linux/Objs/Shared/_String.$(FLAVOR) $(SALMDIR)/Distribution/Linux/Objs/Shared/_IDVocabulary.$(FLAVOR)
+
+all: filter-pt
+
+filter-pt: filter-pt.cpp
+ ./check-install $(SALMDIR)
+ $(CXX) -O6 $(INC) $(OBJS) -o filter-pt filter-pt.cpp
diff --git a/contrib/relent-filter/sigtest-filter/README.txt b/contrib/relent-filter/sigtest-filter/README.txt
new file mode 100755
index 000000000..b21129b89
--- /dev/null
+++ b/contrib/relent-filter/sigtest-filter/README.txt
@@ -0,0 +1,42 @@
+Re-implementation of Johnson et al. (2007)'s phrasetable filtering strategy.
+
+This implementation relies on Joy Zhang's SALM Suffix Array toolkit. It is
+available here:
+
+ http://projectile.sv.cmu.edu/research/public/tools/salm/salm.htm
+
+--Chris Dyer <redpony@umd.edu>
+
+BUILD INSTRUCTIONS
+---------------------------------
+
+1. Download and build SALM.
+
+2. make SALMDIR=/path/to/SALM
+
+
+USAGE INSTRUCTIONS
+---------------------------------
+
+1. Using the SALM/Bin/Linux/Index/IndexSA.O32, create a suffix array index
+ of the source and target sides of your training bitext.
+
+2. cat phrase-table.txt | ./filter-pt -e TARG.suffix -f SOURCE.suffix \
+ -l <FILTER-VALUE>
+
+ FILTER-VALUE is the -log prob threshold described in Johnson et al.
+ (2007)'s paper. It may be either 'a+e', 'a-e', or a positive real
+ value. 'a+e' is a good setting- it filters out <1,1,1> phrase pairs.
+ I also recommend using -n 30, which filteres out all but the top
+ 30 phrase pairs, sorted by P(e|f). This was used in the paper.
+
+3. Run with no options to see more use-cases.
+
+
+REFERENCES
+---------------------------------
+
+H. Johnson, J. Martin, G. Foster and R. Kuhn. (2007) Improving Translation
+ Quality by Discarding Most of the Phrasetable. In Proceedings of the 2007
+ Joint Conference on Empirical Methods in Natural Language Processing and
+ Computational Natural Language Learning (EMNLP-CoNLL), pp. 967-975.
diff --git a/contrib/relent-filter/sigtest-filter/WIN32_functions.cpp b/contrib/relent-filter/sigtest-filter/WIN32_functions.cpp
new file mode 100755
index 000000000..60ddd340c
--- /dev/null
+++ b/contrib/relent-filter/sigtest-filter/WIN32_functions.cpp
@@ -0,0 +1,231 @@
+// XGetopt.cpp Version 1.2
+//
+// Author: Hans Dietrich
+// hdietrich2@hotmail.com
+//
+// Description:
+// XGetopt.cpp implements getopt(), a function to parse command lines.
+//
+// History
+// Version 1.2 - 2003 May 17
+// - Added Unicode support
+//
+// Version 1.1 - 2002 March 10
+// - Added example to XGetopt.cpp module header
+//
+// This software is released into the public domain.
+// You are free to use it in any way you like.
+//
+// This software is provided "as is" with no expressed
+// or implied warranty. I accept no liability for any
+// damage or loss of business that this software may cause.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////////////////
+// if you are using precompiled headers then include this line:
+///////////////////////////////////////////////////////////////////////////////
+
+
+///////////////////////////////////////////////////////////////////////////////
+// if you are not using precompiled headers then include these lines:
+//#include <windows.h>
+//#include <stdio.h>
+//#include <tchar.h>
+///////////////////////////////////////////////////////////////////////////////
+
+
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include "WIN32_functions.h"
+
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// X G e t o p t . c p p
+//
+//
+// NAME
+// getopt -- parse command line options
+//
+// SYNOPSIS
+// int getopt(int argc, char *argv[], char *optstring)
+//
+// extern char *optarg;
+// extern int optind;
+//
+// DESCRIPTION
+// The getopt() function parses the command line arguments. Its
+// arguments argc and argv are the argument count and array as
+// passed into the application on program invocation. In the case
+// of Visual C++ programs, argc and argv are available via the
+// variables __argc and __argv (double underscores), respectively.
+// getopt returns the next option letter in argv that matches a
+// letter in optstring. (Note: Unicode programs should use
+// __targv instead of __argv. Also, all character and string
+// literals should be enclosed in ( ) ).
+//
+// optstring is a string of recognized option letters; if a letter
+// is followed by a colon, the option is expected to have an argument
+// that may or may not be separated from it by white space. optarg
+// is set to point to the start of the option argument on return from
+// getopt.
+//
+// Option letters may be combined, e.g., "-ab" is equivalent to
+// "-a -b". Option letters are case sensitive.
+//
+// getopt places in the external variable optind the argv index
+// of the next argument to be processed. optind is initialized
+// to 0 before the first call to getopt.
+//
+// When all options have been processed (i.e., up to the first
+// non-option argument), getopt returns EOF, optarg will point
+// to the argument, and optind will be set to the argv index of
+// the argument. If there are no non-option arguments, optarg
+// will be set to NULL.
+//
+// The special option "--" may be used to delimit the end of the
+// options; EOF will be returned, and "--" (and everything after it)
+// will be skipped.
+//
+// RETURN VALUE
+// For option letters contained in the string optstring, getopt
+// will return the option letter. getopt returns a question mark (?)
+// when it encounters an option letter not included in optstring.
+// EOF is returned when processing is finished.
+//
+// BUGS
+// 1) Long options are not supported.
+// 2) The GNU double-colon extension is not supported.
+// 3) The environment variable POSIXLY_CORRECT is not supported.
+// 4) The + syntax is not supported.
+// 5) The automatic permutation of arguments is not supported.
+// 6) This implementation of getopt() returns EOF if an error is
+// encountered, instead of -1 as the latest standard requires.
+//
+// EXAMPLE
+// BOOL CMyApp::ProcessCommandLine(int argc, char *argv[])
+// {
+// int c;
+//
+// while ((c = getopt(argc, argv, ("aBn:"))) != EOF)
+// {
+// switch (c)
+// {
+// case ('a'):
+// TRACE(("option a\n"));
+// //
+// // set some flag here
+// //
+// break;
+//
+// case ('B'):
+// TRACE( ("option B\n"));
+// //
+// // set some other flag here
+// //
+// break;
+//
+// case ('n'):
+// TRACE(("option n: value=%d\n"), atoi(optarg));
+// //
+// // do something with value here
+// //
+// break;
+//
+// case ('?'):
+// TRACE(("ERROR: illegal option %s\n"), argv[optind-1]);
+// return FALSE;
+// break;
+//
+// default:
+// TRACE(("WARNING: no handler for option %c\n"), c);
+// return FALSE;
+// break;
+// }
+// }
+// //
+// // check for non-option args here
+// //
+// return TRUE;
+// }
+//
+///////////////////////////////////////////////////////////////////////////////
+
+char *optarg; // global argument pointer
+int optind = 0; // global argv index
+
+int getopt(int argc, char *argv[], char *optstring)
+{
+ static char *next = NULL;
+ if (optind == 0)
+ next = NULL;
+
+ optarg = NULL;
+
+ if (next == NULL || *next =='\0') {
+ if (optind == 0)
+ optind++;
+
+ if (optind >= argc || argv[optind][0] != ('-') || argv[optind][1] == ('\0')) {
+ optarg = NULL;
+ if (optind < argc)
+ optarg = argv[optind];
+ return EOF;
+ }
+
+ if (strcmp(argv[optind], "--") == 0) {
+ optind++;
+ optarg = NULL;
+ if (optind < argc)
+ optarg = argv[optind];
+ return EOF;
+ }
+
+ next = argv[optind];
+ next++; // skip past -
+ optind++;
+ }
+
+ char c = *next++;
+ char *cp = strchr(optstring, c);
+
+ if (cp == NULL || c == (':'))
+ return ('?');
+
+ cp++;
+ if (*cp == (':')) {
+ if (*next != ('\0')) {
+ optarg = next;
+ next = NULL;
+ } else if (optind < argc) {
+ optarg = argv[optind];
+ optind++;
+ } else {
+ return ('?');
+ }
+ }
+
+ return c;
+}
+
+// for an overview, see
+// W. Press, S. Teukolsky and W. Vetterling. (1992) Numerical Recipes in C. Chapter 6.1.
+double lgamma(int x)
+{
+ // size_t xx=(size_t)x; xx--; size_t sum=1; while (xx) { sum *= xx--; } return log((double)(sum));
+ if (x <= 2) {
+ return 0.0;
+ }
+ static double coefs[6] = {76.18009172947146, -86.50532032941677, 24.01409824083091, -1.231739572450155, 0.1208650973866179e-2, -0.5395239384953e-5};
+ double tmp=(double)x+5.5;
+ tmp -= (((double)x)+0.5)*log(tmp);
+ double y=(double)x;
+ double sum = 1.000000000190015;
+ for (size_t j=0; j<6; ++j) {
+ sum += coefs[j]/++y;
+ }
+ return -tmp+log(2.5066282746310005*sum/(double)x);
+} \ No newline at end of file
diff --git a/contrib/relent-filter/sigtest-filter/WIN32_functions.h b/contrib/relent-filter/sigtest-filter/WIN32_functions.h
new file mode 100755
index 000000000..6a719392e
--- /dev/null
+++ b/contrib/relent-filter/sigtest-filter/WIN32_functions.h
@@ -0,0 +1,24 @@
+// XGetopt.h Version 1.2
+//
+// Author: Hans Dietrich
+// hdietrich2@hotmail.com
+//
+// This software is released into the public domain.
+// You are free to use it in any way you like.
+//
+// This software is provided "as is" with no expressed
+// or implied warranty. I accept no liability for any
+// damage or loss of business that this software may cause.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#ifndef XGETOPT_H
+#define XGETOPT_H
+
+extern int optind, opterr;
+extern char *optarg;
+
+int getopt(int argc, char *argv[], char *optstring);
+double lgamma(int x);
+
+#endif //XGETOPT_H
diff --git a/contrib/relent-filter/sigtest-filter/check-install b/contrib/relent-filter/sigtest-filter/check-install
new file mode 100755
index 000000000..ba4f431e0
--- /dev/null
+++ b/contrib/relent-filter/sigtest-filter/check-install
@@ -0,0 +1,5 @@
+#!/usr/bin/perl -w
+use strict;
+my $path = shift @ARGV;
+die "Can't find SALM installation path: $path\nPlease use:\n\n make SALMDIR=/path/to/SALM\n\n" unless (-d $path);
+exit 0;
diff --git a/contrib/relent-filter/sigtest-filter/filter-pt.cpp b/contrib/relent-filter/sigtest-filter/filter-pt.cpp
new file mode 100755
index 000000000..4a51953ea
--- /dev/null
+++ b/contrib/relent-filter/sigtest-filter/filter-pt.cpp
@@ -0,0 +1,377 @@
+
+#include <cstring>
+#include <cassert>
+#include <cstdio>
+#include <cstdlib>
+#include <algorithm>
+
+#include "_SuffixArraySearchApplicationBase.h"
+
+#include <vector>
+#include <iostream>
+#include <set>
+
+#ifdef WIN32
+#include "WIN32_functions.h"
+#else
+#include <unistd.h>
+#endif
+
+typedef std::set<TextLenType> SentIdSet;
+typedef std::map<std::string, SentIdSet> PhraseSetMap;
+
+#undef min
+
+// constants
+const size_t MINIMUM_SIZE_TO_KEEP = 10000; // reduce this to improve memory usage,
+// increase for speed
+const std::string SEPARATOR = " ||| ";
+
+const double ALPHA_PLUS_EPS = -1000.0; // dummy value
+const double ALPHA_MINUS_EPS = -2000.0; // dummy value
+
+// configuration params
+int pfe_filter_limit = 0; // 0 = don't filter anything based on P(f|e)
+bool print_cooc_counts = false; // add cooc counts to phrase table?
+bool print_neglog_significance = false; // add -log(p) to phrase table?
+double sig_filter_limit = 0; // keep phrase pairs with -log(sig) > sig_filter_limit
+// higher = filter-more
+bool pef_filter_only = false; // only filter based on pef
+
+// globals
+PhraseSetMap esets;
+double p_111 = 0.0; // alpha
+size_t nremoved_sigfilter = 0;
+size_t nremoved_pfefilter = 0;
+
+C_SuffixArraySearchApplicationBase e_sa;
+C_SuffixArraySearchApplicationBase f_sa;
+int num_lines;
+
+void usage()
+{
+ std::cerr << "\nFilter phrase table using significance testing as described\n"
+ << "in H. Johnson, et al. (2007) Improving Translation Quality\n"
+ << "by Discarding Most of the Phrasetable. EMNLP 2007.\n"
+ << "\nUsage:\n"
+ << "\n filter-pt -e english.suf-arr -f french.suf-arr\n"
+ << " [-c] [-p] [-l threshold] [-n num] < PHRASE-TABLE > FILTERED-PHRASE-TABLE\n\n"
+ << " [-l threshold] >0.0, a+e, or a-e: keep values that have a -log significance > this\n"
+ << " [-n num ] 0, 1...: 0=no filtering, >0 sort by P(e|f) and keep the top num elements\n"
+ << " [-c ] add the cooccurence counts to the phrase table\n"
+ << " [-p ] add -log(significance) to the phrasetable\n\n";
+ exit(1);
+}
+
+struct PTEntry {
+ PTEntry(const std::string& str, int index);
+ std::string f_phrase;
+ std::string e_phrase;
+ std::string extra;
+ std::string scores;
+ float pfe;
+ int cf;
+ int ce;
+ int cfe;
+ float nlog_pte;
+ void set_cooc_stats(int _cef, int _cf, int _ce, float nlp) {
+ cfe = _cef;
+ cf = _cf;
+ ce = _ce;
+ nlog_pte = nlp;
+ }
+
+};
+
+PTEntry::PTEntry(const std::string& str, int index) :
+ cf(0), ce(0), cfe(0), nlog_pte(0.0)
+{
+ size_t pos = 0;
+ std::string::size_type nextPos = str.find(SEPARATOR, pos);
+ this->f_phrase = str.substr(pos,nextPos);
+
+ pos = nextPos + SEPARATOR.size();
+ nextPos = str.find(SEPARATOR, pos);
+ this->e_phrase = str.substr(pos,nextPos-pos);
+
+ pos = nextPos + SEPARATOR.size();
+ nextPos = str.find(SEPARATOR, pos);
+ this->scores = str.substr(pos,nextPos-pos);
+
+ pos = nextPos + SEPARATOR.size();
+ this->extra = str.substr(pos);
+
+ int c = 0;
+ std::string::iterator i=scores.begin();
+ if (index > 0) {
+ for (; i != scores.end(); ++i) {
+ if ((*i) == ' ') {
+ c++;
+ if (c == index) break;
+ }
+ }
+ }
+ if (i != scores.end()) {
+ ++i;
+ }
+ char f[24];
+ char *fp=f;
+ while (i != scores.end() && *i != ' ') {
+ *fp++=*i++;
+ }
+ *fp++=0;
+
+ this->pfe = atof(f);
+
+ // std::cerr << "L: " << f_phrase << " ::: " << e_phrase << " ::: " << scores << " ::: " << pfe << std::endl;
+ // std::cerr << "X: " << extra << "\n";
+}
+
+struct PfeComparer {
+ bool operator()(const PTEntry* a, const PTEntry* b) const {
+ return a->pfe > b->pfe;
+ }
+};
+
+struct NlogSigThresholder {
+ NlogSigThresholder(float threshold) : t(threshold) {}
+ float t;
+ bool operator()(const PTEntry* a) const {
+ if (a->nlog_pte < t) {
+ delete a;
+ return true;
+ } else return false;
+ }
+};
+
+std::ostream& operator << (std::ostream& os, const PTEntry& pp)
+{
+ //os << pp.f_phrase << " ||| " << pp.e_phrase;
+ //os << " ||| " << pp.scores;
+ //if (pp.extra.size()>0) os << " ||| " << pp.extra;
+ if (print_cooc_counts) os << pp.cfe << " " << pp.cf << " " << pp.ce;
+ if (print_neglog_significance) os << " ||| " << pp.nlog_pte;
+ return os;
+}
+
+void print(int a, int b, int c, int d, float p)
+{
+ std::cerr << a << "\t" << b << "\t P=" << p << "\n"
+ << c << "\t" << d << "\t xf=" << (double)(b)*(double)(c)/(double)(a+1)/(double)(d+1) << "\n\n";
+}
+
+// 2x2 (one-sided) Fisher's exact test
+// see B. Moore. (2004) On Log Likelihood and the Significance of Rare Events
+double fisher_exact(int cfe, int ce, int cf)
+{
+ assert(cfe <= ce);
+ assert(cfe <= cf);
+
+ int a = cfe;
+ int b = (cf - cfe);
+ int c = (ce - cfe);
+ int d = (num_lines - ce - cf + cfe);
+ int n = a + b + c + d;
+
+ double cp = exp(lgamma(1+a+c) + lgamma(1+b+d) + lgamma(1+a+b) + lgamma(1+c+d) - lgamma(1+n) - lgamma(1+a) - lgamma(1+b) - lgamma(1+c) - lgamma(1+d));
+ double total_p = 0.0;
+ int tc = std::min(b,c);
+ for (int i=0; i<=tc; i++) {
+ total_p += cp;
+// double lg = lgamma(1+a+c) + lgamma(1+b+d) + lgamma(1+a+b) + lgamma(1+c+d) - lgamma(1+n) - lgamma(1+a) - lgamma(1+b) - lgamma(1+c) - lgamma(1+d); double cp = exp(lg);
+// print(a,b,c,d,cp);
+ double coef = (double)(b)*(double)(c)/(double)(a+1)/(double)(d+1);
+ cp *= coef;
+ ++a;
+ --c;
+ ++d;
+ --b;
+ }
+ return total_p;
+}
+
+// input: unordered list of translation options for a single source phrase
+void compute_cooc_stats_and_filter(std::vector<PTEntry*>& options)
+{
+ if (pfe_filter_limit>0 && options.size() > pfe_filter_limit) {
+ nremoved_pfefilter += (options.size() - pfe_filter_limit);
+ std::nth_element(options.begin(), options.begin()+pfe_filter_limit, options.end(), PfeComparer());
+ for (std::vector<PTEntry*>::iterator i=options.begin()+pfe_filter_limit; i != options.end(); ++i)
+ delete *i;
+ options.erase(options.begin()+pfe_filter_limit,options.end());
+ }
+ if (pef_filter_only) return;
+
+ SentIdSet fset;
+ vector<S_SimplePhraseLocationElement> locations;
+ //std::cerr << "Looking up f-phrase: " << options.front()->f_phrase << "\n";
+
+ locations = f_sa.locateExactPhraseInCorpus(options.front()->f_phrase.c_str());
+ if(locations.size()==0) {
+ cerr<<"No occurrences found!!\n";
+ }
+ for (vector<S_SimplePhraseLocationElement>::iterator i=locations.begin();
+ i != locations.end();
+ ++i) {
+ fset.insert(i->sentIdInCorpus);
+ }
+ size_t cf = fset.size();
+ for (std::vector<PTEntry*>::iterator i=options.begin(); i != options.end(); ++i) {
+ const std::string& e_phrase = (*i)->e_phrase;
+ size_t cef=0;
+ SentIdSet& eset = esets[(*i)->e_phrase];
+ if (eset.empty()) {
+ //std::cerr << "Looking up e-phrase: " << e_phrase << "\n";
+ vector<S_SimplePhraseLocationElement> locations = e_sa.locateExactPhraseInCorpus(e_phrase.c_str());
+ for (vector<S_SimplePhraseLocationElement>::iterator i=locations.begin(); i!= locations.end(); ++i) {
+ TextLenType curSentId = i->sentIdInCorpus;
+ eset.insert(curSentId);
+ }
+ }
+ size_t ce=eset.size();
+ if (ce < cf) {
+ for (SentIdSet::iterator i=eset.begin(); i != eset.end(); ++i) {
+ if (fset.find(*i) != fset.end()) cef++;
+ }
+ } else {
+ for (SentIdSet::iterator i=fset.begin(); i != fset.end(); ++i) {
+ if (eset.find(*i) != eset.end()) cef++;
+ }
+ }
+ double nlp = -log(fisher_exact(cef, cf, ce));
+ (*i)->set_cooc_stats(cef, cf, ce, nlp);
+ if (ce < MINIMUM_SIZE_TO_KEEP) {
+ esets.erase(e_phrase);
+ }
+ }
+ std::vector<PTEntry*>::iterator new_end =
+ std::remove_if(options.begin(), options.end(), NlogSigThresholder(sig_filter_limit));
+ nremoved_sigfilter += (options.end() - new_end);
+ options.erase(new_end,options.end());
+}
+
+int main(int argc, char * argv[])
+{
+ int c;
+ const char* efile=0;
+ const char* ffile=0;
+ int pfe_index = 2;
+ while ((c = getopt(argc, argv, "cpf:e:i:n:l:")) != -1) {
+ switch (c) {
+ case 'e':
+ efile = optarg;
+ break;
+ case 'f':
+ ffile = optarg;
+ break;
+ case 'i': // index of pfe in phrase table
+ pfe_index = atoi(optarg);
+ break;
+ case 'n': // keep only the top n entries in phrase table sorted by p(f|e) (0=all)
+ pfe_filter_limit = atoi(optarg);
+ std::cerr << "P(f|e) filter limit: " << pfe_filter_limit << std::endl;
+ break;
+ case 'c':
+ print_cooc_counts = true;
+ break;
+ case 'p':
+ print_neglog_significance = true;
+ break;
+ case 'l':
+ std::cerr << "-l = " << optarg << "\n";
+ if (strcmp(optarg,"a+e") == 0) {
+ sig_filter_limit = ALPHA_PLUS_EPS;
+ } else if (strcmp(optarg,"a-e") == 0) {
+ sig_filter_limit = ALPHA_MINUS_EPS;
+ } else {
+ char *x;
+ sig_filter_limit = strtod(optarg, &x);
+ }
+ break;
+ default:
+ usage();
+ }
+ }
+ //-----------------------------------------------------------------------------
+ if (optind != argc || ((!efile || !ffile) && !pef_filter_only)) {
+ usage();
+ }
+
+ //load the indexed corpus with vocabulary(noVoc=false) and with offset(noOffset=false)
+ if (!pef_filter_only) {
+ e_sa.loadData_forSearch(efile, false, false);
+ f_sa.loadData_forSearch(ffile, false, false);
+ size_t elines = e_sa.returnTotalSentNumber();
+ size_t flines = f_sa.returnTotalSentNumber();
+ if (elines != flines) {
+ std::cerr << "Number of lines in e-corpus != number of lines in f-corpus!\n";
+ usage();
+ } else {
+ std::cerr << "Training corpus: " << elines << " lines\n";
+ num_lines = elines;
+ }
+ p_111 = -log(fisher_exact(1,1,1));
+ std::cerr << "\\alpha = " << p_111 << "\n";
+ if (sig_filter_limit == ALPHA_MINUS_EPS) {
+ sig_filter_limit = p_111 - 0.001;
+ } else if (sig_filter_limit == ALPHA_PLUS_EPS) {
+ sig_filter_limit = p_111 + 0.001;
+ }
+ std::cerr << "Sig filter threshold is = " << sig_filter_limit << "\n";
+ } else {
+ std::cerr << "Filtering using P(e|f) only. n=" << pfe_filter_limit << std::endl;
+ }
+
+ char tmpString[10000];
+ std::string prev = "";
+ std::vector<PTEntry*> options;
+ size_t pt_lines = 0;
+ while(!cin.eof()) {
+ cin.getline(tmpString,10000,'\n');
+ if(++pt_lines%10000==0) {
+ std::cerr << ".";
+ if(pt_lines%500000==0) std::cerr << "[n:"<<pt_lines<<"]\n";
+ }
+
+ if(strlen(tmpString)>0) {
+ PTEntry* pp = new PTEntry(tmpString, pfe_index);
+ if (prev != pp->f_phrase) {
+ prev = pp->f_phrase;
+
+ if (!options.empty()) { // always true after first line
+ compute_cooc_stats_and_filter(options);
+ }
+ for (std::vector<PTEntry*>::iterator i=options.begin(); i != options.end(); ++i) {
+ std::cout << **i << std::endl;
+ delete *i;
+ }
+ options.clear();
+ options.push_back(pp);
+
+ } else {
+ options.push_back(pp);
+ }
+ // for(int i=0;i<locations.size(); i++){
+ // cout<<"SentId="<<locations[i].sentIdInCorpus<<" Pos="<<(int)locations[i].posInSentInCorpus<<endl;
+ // }
+ }
+ }
+ compute_cooc_stats_and_filter(options);
+ for (std::vector<PTEntry*>::iterator i=options.begin(); i != options.end(); ++i) {
+ std::cout << **i << std::endl;
+ delete *i;
+ }
+ float pfefper = (100.0*(float)nremoved_pfefilter)/(float)pt_lines;
+ float sigfper = (100.0*(float)nremoved_sigfilter)/(float)pt_lines;
+ std::cerr << "\n\n------------------------------------------------------\n"
+ << " unfiltered phrases pairs: " << pt_lines << "\n"
+ << "\n"
+ << " P(f|e) filter [first]: " << nremoved_pfefilter << " (" << pfefper << "%)\n"
+ << " significance filter: " << nremoved_sigfilter << " (" << sigfper << "%)\n"
+ << " TOTAL FILTERED: " << (nremoved_pfefilter + nremoved_sigfilter) << " (" << (sigfper + pfefper) << "%)\n"
+ << "\n"
+ << " FILTERED phrase pairs: " << (pt_lines - nremoved_pfefilter - nremoved_sigfilter) << " (" << (100.0-sigfper - pfefper) << "%)\n"
+ << "------------------------------------------------------\n";
+
+ return 0;
+}
diff --git a/contrib/relent-filter/sigtest-filter/sigtest-filter.sln b/contrib/relent-filter/sigtest-filter/sigtest-filter.sln
new file mode 100755
index 000000000..517b06238
--- /dev/null
+++ b/contrib/relent-filter/sigtest-filter/sigtest-filter.sln
@@ -0,0 +1,20 @@
+
+Microsoft Visual Studio Solution File, Format Version 9.00
+# Visual Studio 2005
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "sigtest-filter", "sigtest-filter.vcproj", "{FA2910DF-FD9D-4E6D-A393-9F9F9E309E78}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|Win32 = Debug|Win32
+ Release|Win32 = Release|Win32
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {FA2910DF-FD9D-4E6D-A393-9F9F9E309E78}.Debug|Win32.ActiveCfg = Debug|Win32
+ {FA2910DF-FD9D-4E6D-A393-9F9F9E309E78}.Debug|Win32.Build.0 = Debug|Win32
+ {FA2910DF-FD9D-4E6D-A393-9F9F9E309E78}.Release|Win32.ActiveCfg = Release|Win32
+ {FA2910DF-FD9D-4E6D-A393-9F9F9E309E78}.Release|Win32.Build.0 = Release|Win32
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+EndGlobal
diff --git a/contrib/relent-filter/src/IOWrapper.cpp b/contrib/relent-filter/src/IOWrapper.cpp
new file mode 100755
index 000000000..053735c96
--- /dev/null
+++ b/contrib/relent-filter/src/IOWrapper.cpp
@@ -0,0 +1,580 @@
+// $Id$
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (c) 2006 University of Edinburgh
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+ * Neither the name of the University of Edinburgh nor the names of its contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+// example file on how to use moses library
+
+#include <iostream>
+#include <stack>
+#include "TypeDef.h"
+#include "Util.h"
+#include "IOWrapper.h"
+#include "Hypothesis.h"
+#include "WordsRange.h"
+#include "TrellisPathList.h"
+#include "StaticData.h"
+#include "DummyScoreProducers.h"
+#include "InputFileStream.h"
+
+using namespace std;
+using namespace Moses;
+
+namespace MosesCmd
+{
+
+IOWrapper::IOWrapper(
+ const vector<FactorType> &inputFactorOrder
+ , const vector<FactorType> &outputFactorOrder
+ , const FactorMask &inputFactorUsed
+ , size_t nBestSize
+ , const string &nBestFilePath)
+ :m_inputFactorOrder(inputFactorOrder)
+ ,m_outputFactorOrder(outputFactorOrder)
+ ,m_inputFactorUsed(inputFactorUsed)
+ ,m_inputFile(NULL)
+ ,m_inputStream(&std::cin)
+ ,m_nBestStream(NULL)
+ ,m_outputWordGraphStream(NULL)
+ ,m_outputSearchGraphStream(NULL)
+ ,m_detailedTranslationReportingStream(NULL)
+ ,m_alignmentOutputStream(NULL)
+{
+ Initialization(inputFactorOrder, outputFactorOrder
+ , inputFactorUsed
+ , nBestSize, nBestFilePath);
+}
+
+IOWrapper::IOWrapper(const std::vector<FactorType> &inputFactorOrder
+ , const std::vector<FactorType> &outputFactorOrder
+ , const FactorMask &inputFactorUsed
+ , size_t nBestSize
+ , const std::string &nBestFilePath
+ , const std::string &inputFilePath)
+ :m_inputFactorOrder(inputFactorOrder)
+ ,m_outputFactorOrder(outputFactorOrder)
+ ,m_inputFactorUsed(inputFactorUsed)
+ ,m_inputFilePath(inputFilePath)
+ ,m_inputFile(new InputFileStream(inputFilePath))
+ ,m_nBestStream(NULL)
+ ,m_outputWordGraphStream(NULL)
+ ,m_outputSearchGraphStream(NULL)
+ ,m_detailedTranslationReportingStream(NULL)
+ ,m_alignmentOutputStream(NULL)
+{
+ Initialization(inputFactorOrder, outputFactorOrder
+ , inputFactorUsed
+ , nBestSize, nBestFilePath);
+
+ m_inputStream = m_inputFile;
+}
+
+IOWrapper::~IOWrapper()
+{
+ if (m_inputFile != NULL)
+ delete m_inputFile;
+ if (m_nBestStream != NULL && !m_surpressSingleBestOutput) {
+ // outputting n-best to file, rather than stdout. need to close file and delete obj
+ delete m_nBestStream;
+ }
+ if (m_outputWordGraphStream != NULL) {
+ delete m_outputWordGraphStream;
+ }
+ if (m_outputSearchGraphStream != NULL) {
+ delete m_outputSearchGraphStream;
+ }
+ delete m_detailedTranslationReportingStream;
+ delete m_alignmentOutputStream;
+}
+
+void IOWrapper::Initialization(const std::vector<FactorType> &/*inputFactorOrder*/
+ , const std::vector<FactorType> &/*outputFactorOrder*/
+ , const FactorMask &/*inputFactorUsed*/
+ , size_t nBestSize
+ , const std::string &nBestFilePath)
+{
+ const StaticData &staticData = StaticData::Instance();
+
+ // n-best
+ m_surpressSingleBestOutput = false;
+
+ if (nBestSize > 0) {
+ if (nBestFilePath == "-" || nBestFilePath == "/dev/stdout") {
+ m_nBestStream = &std::cout;
+ m_surpressSingleBestOutput = true;
+ } else {
+ std::ofstream *file = new std::ofstream;
+ m_nBestStream = file;
+ file->open(nBestFilePath.c_str());
+ }
+ }
+
+ // wordgraph output
+ if (staticData.GetOutputWordGraph()) {
+ string fileName = staticData.GetParam("output-word-graph")[0];
+ std::ofstream *file = new std::ofstream;
+ m_outputWordGraphStream = file;
+ file->open(fileName.c_str());
+ }
+
+
+// search graph output
+ if (staticData.GetOutputSearchGraph()) {
+ string fileName;
+ if (staticData.GetOutputSearchGraphExtended())
+ fileName = staticData.GetParam("output-search-graph-extended")[0];
+ else
+ fileName = staticData.GetParam("output-search-graph")[0];
+ std::ofstream *file = new std::ofstream;
+ m_outputSearchGraphStream = file;
+ file->open(fileName.c_str());
+ }
+
+ // detailed translation reporting
+ if (staticData.IsDetailedTranslationReportingEnabled()) {
+ const std::string &path = staticData.GetDetailedTranslationReportingFilePath();
+ m_detailedTranslationReportingStream = new std::ofstream(path.c_str());
+ CHECK(m_detailedTranslationReportingStream->good());
+ }
+
+ // sentence alignment output
+ if (! staticData.GetAlignmentOutputFile().empty()) {
+ m_alignmentOutputStream = new ofstream(staticData.GetAlignmentOutputFile().c_str());
+ CHECK(m_alignmentOutputStream->good());
+ }
+
+}
+
+InputType*IOWrapper::GetInput(InputType* inputType)
+{
+ if(inputType->Read(*m_inputStream, m_inputFactorOrder)) {
+ if (long x = inputType->GetTranslationId()) {
+ if (x>=m_translationId) m_translationId = x+1;
+ } else inputType->SetTranslationId(m_translationId++);
+
+ return inputType;
+ } else {
+ delete inputType;
+ return NULL;
+ }
+}
+
+/***
+ * print surface factor only for the given phrase
+ */
+void OutputSurface(std::ostream &out, const Hypothesis &edge, const std::vector<FactorType> &outputFactorOrder,
+ bool reportSegmentation, bool reportAllFactors)
+{
+ CHECK(outputFactorOrder.size() > 0);
+ const Phrase& phrase = edge.GetCurrTargetPhrase();
+ if (reportAllFactors == true) {
+ out << phrase;
+ } else {
+ size_t size = phrase.GetSize();
+ for (size_t pos = 0 ; pos < size ; pos++) {
+ const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
+ out << *factor;
+ CHECK(factor);
+
+ for (size_t i = 1 ; i < outputFactorOrder.size() ; i++) {
+ const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[i]);
+ CHECK(factor);
+
+ out << "|" << *factor;
+ }
+ out << " ";
+ }
+ }
+
+ // trace option "-t"
+ if (reportSegmentation == true && phrase.GetSize() > 0) {
+ out << "|" << edge.GetCurrSourceWordsRange().GetStartPos()
+ << "-" << edge.GetCurrSourceWordsRange().GetEndPos() << "| ";
+ }
+}
+
+void OutputBestSurface(std::ostream &out, const Hypothesis *hypo, const std::vector<FactorType> &outputFactorOrder,
+ bool reportSegmentation, bool reportAllFactors)
+{
+ if (hypo != NULL) {
+ // recursively retrace this best path through the lattice, starting from the end of the hypothesis sentence
+ OutputBestSurface(out, hypo->GetPrevHypo(), outputFactorOrder, reportSegmentation, reportAllFactors);
+ OutputSurface(out, *hypo, outputFactorOrder, reportSegmentation, reportAllFactors);
+ }
+}
+
+void OutputAlignment(ostream &out, const AlignmentInfo &ai, size_t sourceOffset, size_t targetOffset)
+{
+ typedef std::vector< const std::pair<size_t,size_t>* > AlignVec;
+ AlignVec alignments = ai.GetSortedAlignments();
+
+ AlignVec::const_iterator it;
+ for (it = alignments.begin(); it != alignments.end(); ++it) {
+ const std::pair<size_t,size_t> &alignment = **it;
+ out << alignment.first + sourceOffset << "-" << alignment.second + targetOffset << " ";
+ }
+
+}
+
+void OutputAlignment(ostream &out, const vector<const Hypothesis *> &edges)
+{
+ size_t targetOffset = 0;
+
+ for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
+ const Hypothesis &edge = *edges[currEdge];
+ const TargetPhrase &tp = edge.GetCurrTargetPhrase();
+ size_t sourceOffset = edge.GetCurrSourceWordsRange().GetStartPos();
+
+ OutputAlignment(out, tp.GetAlignmentInfo(), sourceOffset, targetOffset);
+
+ targetOffset += tp.GetSize();
+ }
+ out << std::endl;
+}
+
+void OutputAlignment(OutputCollector* collector, size_t lineNo , const vector<const Hypothesis *> &edges)
+{
+ ostringstream out;
+ OutputAlignment(out, edges);
+
+ collector->Write(lineNo,out.str());
+}
+
+void OutputAlignment(OutputCollector* collector, size_t lineNo , const Hypothesis *hypo)
+{
+ if (collector) {
+ std::vector<const Hypothesis *> edges;
+ const Hypothesis *currentHypo = hypo;
+ while (currentHypo) {
+ edges.push_back(currentHypo);
+ currentHypo = currentHypo->GetPrevHypo();
+ }
+
+ OutputAlignment(collector,lineNo, edges);
+ }
+}
+
+void OutputAlignment(OutputCollector* collector, size_t lineNo , const TrellisPath &path)
+{
+ if (collector) {
+ OutputAlignment(collector,lineNo, path.GetEdges());
+ }
+}
+
+void OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/, bool reportSegmentation, bool reportAllFactors, std::ostream &out)
+{
+ const std::vector<const Hypothesis *> &edges = path.GetEdges();
+
+ for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
+ const Hypothesis &edge = *edges[currEdge];
+ OutputSurface(out, edge, StaticData::Instance().GetOutputFactorOrder(), reportSegmentation, reportAllFactors);
+ }
+ out << endl;
+}
+
+void IOWrapper::Backtrack(const Hypothesis *hypo)
+{
+
+ if (hypo->GetPrevHypo() != NULL) {
+ VERBOSE(3,hypo->GetId() << " <= ");
+ Backtrack(hypo->GetPrevHypo());
+ }
+}
+
+void OutputBestHypo(const std::vector<Word>& mbrBestHypo, long /*translationId*/, bool /*reportSegmentation*/, bool /*reportAllFactors*/, ostream& out)
+{
+
+ for (size_t i = 0 ; i < mbrBestHypo.size() ; i++) {
+ const Factor *factor = mbrBestHypo[i].GetFactor(StaticData::Instance().GetOutputFactorOrder()[0]);
+ CHECK(factor);
+ if (i>0) out << " " << *factor;
+ else out << *factor;
+ }
+ out << endl;
+}
+
+
+void OutputInput(std::vector<const Phrase*>& map, const Hypothesis* hypo)
+{
+ if (hypo->GetPrevHypo()) {
+ OutputInput(map, hypo->GetPrevHypo());
+ map[hypo->GetCurrSourceWordsRange().GetStartPos()] = hypo->GetSourcePhrase();
+ }
+}
+
+void OutputInput(std::ostream& os, const Hypothesis* hypo)
+{
+ size_t len = hypo->GetInput().GetSize();
+ std::vector<const Phrase*> inp_phrases(len, 0);
+ OutputInput(inp_phrases, hypo);
+ for (size_t i=0; i<len; ++i)
+ if (inp_phrases[i]) os << *inp_phrases[i];
+}
+
+void IOWrapper::OutputBestHypo(const Hypothesis *hypo, long /*translationId*/, bool reportSegmentation, bool reportAllFactors)
+{
+ if (hypo != NULL) {
+ VERBOSE(1,"BEST TRANSLATION: " << *hypo << endl);
+ VERBOSE(3,"Best path: ");
+ Backtrack(hypo);
+ VERBOSE(3,"0" << std::endl);
+ if (!m_surpressSingleBestOutput) {
+ if (StaticData::Instance().IsPathRecoveryEnabled()) {
+ OutputInput(cout, hypo);
+ cout << "||| ";
+ }
+ OutputBestSurface(cout, hypo, m_outputFactorOrder, reportSegmentation, reportAllFactors);
+ cout << endl;
+ }
+ } else {
+ VERBOSE(1, "NO BEST TRANSLATION" << endl);
+ if (!m_surpressSingleBestOutput) {
+ cout << endl;
+ }
+ }
+}
+
+void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector<Moses::FactorType>& outputFactorOrder, const TranslationSystem* system, long translationId, bool reportSegmentation)
+{
+ const StaticData &staticData = StaticData::Instance();
+ bool labeledOutput = staticData.IsLabeledNBestList();
+ bool reportAllFactors = staticData.GetReportAllFactorsNBest();
+ bool includeAlignment = staticData.NBestIncludesAlignment();
+ bool includeWordAlignment = staticData.PrintAlignmentInfoInNbest();
+
+ TrellisPathList::const_iterator iter;
+ for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
+ const TrellisPath &path = **iter;
+ const std::vector<const Hypothesis *> &edges = path.GetEdges();
+
+ // print the surface factor of the translation
+ out << translationId << " ||| ";
+ for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
+ const Hypothesis &edge = *edges[currEdge];
+ OutputSurface(out, edge, outputFactorOrder, reportSegmentation, reportAllFactors);
+ }
+ out << " |||";
+
+ std::string lastName = "";
+ const vector<const StatefulFeatureFunction*>& sff = system->GetStatefulFeatureFunctions();
+ for( size_t i=0; i<sff.size(); i++ ) {
+ if( labeledOutput && lastName != sff[i]->GetScoreProducerWeightShortName() ) {
+ lastName = sff[i]->GetScoreProducerWeightShortName();
+ out << " " << lastName << ":";
+ }
+ vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( sff[i] );
+ for (size_t j = 0; j<scores.size(); ++j) {
+ out << " " << scores[j];
+ }
+ }
+
+ const vector<const StatelessFeatureFunction*>& slf = system->GetStatelessFeatureFunctions();
+ for( size_t i=0; i<slf.size(); i++ ) {
+ if( labeledOutput && lastName != slf[i]->GetScoreProducerWeightShortName() ) {
+ lastName = slf[i]->GetScoreProducerWeightShortName();
+ out << " " << lastName << ":";
+ }
+ vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( slf[i] );
+ for (size_t j = 0; j<scores.size(); ++j) {
+ out << " " << scores[j];
+ }
+ }
+
+ // translation components
+ const vector<PhraseDictionaryFeature*>& pds = system->GetPhraseDictionaries();
+ if (pds.size() > 0) {
+
+ for( size_t i=0; i<pds.size(); i++ ) {
+ size_t pd_numinputscore = pds[i]->GetNumInputScores();
+ vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( pds[i] );
+ for (size_t j = 0; j<scores.size(); ++j){
+
+ if (labeledOutput && (i == 0) ){
+ if ((j == 0) || (j == pd_numinputscore)){
+ lastName = pds[i]->GetScoreProducerWeightShortName(j);
+ out << " " << lastName << ":";
+ }
+ }
+ out << " " << scores[j];
+ }
+ }
+ }
+
+ // generation
+ const vector<GenerationDictionary*>& gds = system->GetGenerationDictionaries();
+ if (gds.size() > 0) {
+
+ for( size_t i=0; i<gds.size(); i++ ) {
+ size_t pd_numinputscore = gds[i]->GetNumInputScores();
+ vector<float> scores = path.GetScoreBreakdown().GetScoresForProducer( gds[i] );
+ for (size_t j = 0; j<scores.size(); ++j){
+
+ if (labeledOutput && (i == 0) ){
+ if ((j == 0) || (j == pd_numinputscore)){
+ lastName = gds[i]->GetScoreProducerWeightShortName(j);
+ out << " " << lastName << ":";
+ }
+ }
+ out << " " << scores[j];
+ }
+ }
+ }
+
+ // total
+ out << " ||| " << path.GetTotalScore();
+
+ //phrase-to-phrase alignment
+ if (includeAlignment) {
+ out << " |||";
+ for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) {
+ const Hypothesis &edge = *edges[currEdge];
+ const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
+ WordsRange targetRange = path.GetTargetWordsRange(edge);
+ out << " " << sourceRange.GetStartPos();
+ if (sourceRange.GetStartPos() < sourceRange.GetEndPos()) {
+ out << "-" << sourceRange.GetEndPos();
+ }
+ out<< "=" << targetRange.GetStartPos();
+ if (targetRange.GetStartPos() < targetRange.GetEndPos()) {
+ out<< "-" << targetRange.GetEndPos();
+ }
+ }
+ }
+
+ if (includeWordAlignment) {
+ out << " ||| ";
+ for (int currEdge = (int)edges.size() - 2 ; currEdge >= 0 ; currEdge--) {
+ const Hypothesis &edge = *edges[currEdge];
+ const WordsRange &sourceRange = edge.GetCurrSourceWordsRange();
+ WordsRange targetRange = path.GetTargetWordsRange(edge);
+ const int sourceOffset = sourceRange.GetStartPos();
+ const int targetOffset = targetRange.GetStartPos();
+ const AlignmentInfo &ai = edge.GetCurrTargetPhrase().GetAlignmentInfo();
+
+ OutputAlignment(out, ai, sourceOffset, targetOffset);
+
+ }
+ }
+
+ if (StaticData::Instance().IsPathRecoveryEnabled()) {
+ out << "|||";
+ OutputInput(out, edges[0]);
+ }
+
+ out << endl;
+ }
+
+
+ out <<std::flush;
+}
+
+void OutputLatticeMBRNBest(std::ostream& out, const vector<LatticeMBRSolution>& solutions,long translationId)
+{
+ for (vector<LatticeMBRSolution>::const_iterator si = solutions.begin(); si != solutions.end(); ++si) {
+ out << translationId;
+ out << " |||";
+ const vector<Word> mbrHypo = si->GetWords();
+ for (size_t i = 0 ; i < mbrHypo.size() ; i++) {
+ const Factor *factor = mbrHypo[i].GetFactor(StaticData::Instance().GetOutputFactorOrder()[0]);
+ if (i>0) out << " " << *factor;
+ else out << *factor;
+ }
+ out << " |||";
+ out << " map: " << si->GetMapScore();
+ out << " w: " << mbrHypo.size();
+ const vector<float>& ngramScores = si->GetNgramScores();
+ for (size_t i = 0; i < ngramScores.size(); ++i) {
+ out << " " << ngramScores[i];
+ }
+ out << " ||| " << si->GetScore();
+
+ out << endl;
+ }
+}
+
+
+void IOWrapper::OutputLatticeMBRNBestList(const vector<LatticeMBRSolution>& solutions,long translationId)
+{
+ OutputLatticeMBRNBest(*m_nBestStream, solutions,translationId);
+}
+
+bool ReadInput(IOWrapper &ioWrapper, InputTypeEnum inputType, InputType*& source)
+{
+ delete source;
+ switch(inputType) {
+ case SentenceInput:
+ source = ioWrapper.GetInput(new Sentence);
+ break;
+ case ConfusionNetworkInput:
+ source = ioWrapper.GetInput(new ConfusionNet);
+ break;
+ case WordLatticeInput:
+ source = ioWrapper.GetInput(new WordLattice);
+ break;
+ default:
+ TRACE_ERR("Unknown input type: " << inputType << "\n");
+ }
+ return (source ? true : false);
+}
+
+
+
+IOWrapper *GetIOWrapper(const StaticData &staticData)
+{
+ IOWrapper *ioWrapper;
+ const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder()
+ ,&outputFactorOrder = staticData.GetOutputFactorOrder();
+ FactorMask inputFactorUsed(inputFactorOrder);
+
+ // io
+ if (staticData.GetParam("input-file").size() == 1) {
+ VERBOSE(2,"IO from File" << endl);
+ string filePath = staticData.GetParam("input-file")[0];
+
+ ioWrapper = new IOWrapper(inputFactorOrder, outputFactorOrder, inputFactorUsed
+ , staticData.GetNBestSize()
+ , staticData.GetNBestFilePath()
+ , filePath);
+ } else {
+ VERBOSE(1,"IO from STDOUT/STDIN" << endl);
+ ioWrapper = new IOWrapper(inputFactorOrder, outputFactorOrder, inputFactorUsed
+ , staticData.GetNBestSize()
+ , staticData.GetNBestFilePath());
+ }
+ ioWrapper->ResetTranslationId();
+
+ IFVERBOSE(1)
+ PrintUserTime("Created input-output object");
+
+ return ioWrapper;
+}
+
+}
+
diff --git a/contrib/relent-filter/src/IOWrapper.h b/contrib/relent-filter/src/IOWrapper.h
new file mode 100755
index 000000000..e44208002
--- /dev/null
+++ b/contrib/relent-filter/src/IOWrapper.h
@@ -0,0 +1,142 @@
+// $Id$
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (c) 2006 University of Edinburgh
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+ * Neither the name of the University of Edinburgh nor the names of its contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+// example file on how to use moses library
+
+#ifndef moses_cmd_IOWrapper_h
+#define moses_cmd_IOWrapper_h
+
+#include <cassert>
+#include <fstream>
+#include <ostream>
+#include <vector>
+#include "util/check.hh"
+
+#include "TypeDef.h"
+#include "Sentence.h"
+#include "FactorTypeSet.h"
+#include "FactorCollection.h"
+#include "Hypothesis.h"
+#include "OutputCollector.h"
+#include "TrellisPathList.h"
+#include "InputFileStream.h"
+#include "InputType.h"
+#include "WordLattice.h"
+#include "LatticeMBR.h"
+
+namespace MosesCmd
+{
+
+/** Helper class that holds misc variables to write data out to command line.
+ */
+class IOWrapper
+{
+protected:
+ long m_translationId;
+
+ const std::vector<Moses::FactorType> &m_inputFactorOrder;
+ const std::vector<Moses::FactorType> &m_outputFactorOrder;
+ const Moses::FactorMask &m_inputFactorUsed;
+ std::string m_inputFilePath;
+ Moses::InputFileStream *m_inputFile;
+ std::istream *m_inputStream;
+ std::ostream *m_nBestStream
+ ,*m_outputWordGraphStream,*m_outputSearchGraphStream;
+ std::ostream *m_detailedTranslationReportingStream;
+ std::ofstream *m_alignmentOutputStream;
+ bool m_surpressSingleBestOutput;
+
+ void Initialization(const std::vector<Moses::FactorType> &inputFactorOrder
+ , const std::vector<Moses::FactorType> &outputFactorOrder
+ , const Moses::FactorMask &inputFactorUsed
+ , size_t nBestSize
+ , const std::string &nBestFilePath);
+
+public:
+ IOWrapper(const std::vector<Moses::FactorType> &inputFactorOrder
+ , const std::vector<Moses::FactorType> &outputFactorOrder
+ , const Moses::FactorMask &inputFactorUsed
+ , size_t nBestSize
+ , const std::string &nBestFilePath);
+
+ IOWrapper(const std::vector<Moses::FactorType> &inputFactorOrder
+ , const std::vector<Moses::FactorType> &outputFactorOrder
+ , const Moses::FactorMask &inputFactorUsed
+ , size_t nBestSize
+ , const std::string &nBestFilePath
+ , const std::string &infilePath);
+ ~IOWrapper();
+
+ Moses::InputType* GetInput(Moses::InputType *inputType);
+
+ void OutputBestHypo(const Moses::Hypothesis *hypo, long translationId, bool reportSegmentation, bool reportAllFactors);
+ void OutputLatticeMBRNBestList(const std::vector<LatticeMBRSolution>& solutions,long translationId);
+ void Backtrack(const Moses::Hypothesis *hypo);
+
+ void ResetTranslationId() {
+ m_translationId = 0;
+ }
+
+ std::ofstream *GetAlignmentOutputStream() {
+ return m_alignmentOutputStream;
+ }
+
+ std::ostream &GetOutputWordGraphStream() {
+ return *m_outputWordGraphStream;
+ }
+ std::ostream &GetOutputSearchGraphStream() {
+ return *m_outputSearchGraphStream;
+ }
+
+ std::ostream &GetDetailedTranslationReportingStream() {
+ assert (m_detailedTranslationReportingStream);
+ return *m_detailedTranslationReportingStream;
+ }
+};
+
+IOWrapper *GetIOWrapper(const Moses::StaticData &staticData);
+bool ReadInput(IOWrapper &ioWrapper, Moses::InputTypeEnum inputType, Moses::InputType*& source);
+void OutputBestSurface(std::ostream &out, const Moses::Hypothesis *hypo, const std::vector<Moses::FactorType> &outputFactorOrder, bool reportSegmentation, bool reportAllFactors);
+void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector<Moses::FactorType>&,
+ const Moses::TranslationSystem* system, long translationId, bool reportSegmentation);
+void OutputLatticeMBRNBest(std::ostream& out, const std::vector<LatticeMBRSolution>& solutions,long translationId);
+void OutputBestHypo(const std::vector<Moses::Word>& mbrBestHypo, long /*translationId*/,
+ bool reportSegmentation, bool reportAllFactors, std::ostream& out);
+void OutputBestHypo(const Moses::TrellisPath &path, long /*translationId*/,bool reportSegmentation, bool reportAllFactors, std::ostream &out);
+void OutputInput(std::ostream& os, const Moses::Hypothesis* hypo);
+void OutputAlignment(Moses::OutputCollector* collector, size_t lineNo, const Moses::Hypothesis *hypo);
+void OutputAlignment(Moses::OutputCollector* collector, size_t lineNo, const Moses::TrellisPath &path);
+
+
+}
+
+#endif
diff --git a/contrib/relent-filter/src/Jamfile b/contrib/relent-filter/src/Jamfile
new file mode 100755
index 000000000..c0aa6160d
--- /dev/null
+++ b/contrib/relent-filter/src/Jamfile
@@ -0,0 +1,6 @@
+alias deps : ../../../moses/src//moses ;
+
+exe calcDivergence : Main.cpp mbr.cpp IOWrapper.cpp TranslationAnalysis.cpp LatticeMBR.cpp RelativeEntropyCalc.cpp deps ;
+
+alias programs : calcDivergence ;
+
diff --git a/contrib/relent-filter/src/LatticeMBR.cpp b/contrib/relent-filter/src/LatticeMBR.cpp
new file mode 100755
index 000000000..2bd62747e
--- /dev/null
+++ b/contrib/relent-filter/src/LatticeMBR.cpp
@@ -0,0 +1,669 @@
+/*
+ * LatticeMBR.cpp
+ * moses-cmd
+ *
+ * Created by Abhishek Arun on 26/01/2010.
+ * Copyright 2010 __MyCompanyName__. All rights reserved.
+ *
+ */
+
+#include "LatticeMBR.h"
+#include "StaticData.h"
+#include <algorithm>
+#include <set>
+
+using namespace std;
+using namespace Moses;
+
+namespace MosesCmd
+{
+
+size_t bleu_order = 4;
+float UNKNGRAMLOGPROB = -20;
+void GetOutputWords(const TrellisPath &path, vector <Word> &translation)
+{
+ const std::vector<const Hypothesis *> &edges = path.GetEdges();
+
+ // print the surface factor of the translation
+ for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
+ const Hypothesis &edge = *edges[currEdge];
+ const Phrase &phrase = edge.GetCurrTargetPhrase();
+ size_t size = phrase.GetSize();
+ for (size_t pos = 0 ; pos < size ; pos++) {
+ translation.push_back(phrase.GetWord(pos));
+ }
+ }
+}
+
+
+void extract_ngrams(const vector<Word >& sentence, map < Phrase, int > & allngrams)
+{
+ for (int k = 0; k < (int)bleu_order; k++) {
+ for(int i =0; i < max((int)sentence.size()-k,0); i++) {
+ Phrase ngram( k+1);
+ for ( int j = i; j<= i+k; j++) {
+ ngram.AddWord(sentence[j]);
+ }
+ ++allngrams[ngram];
+ }
+ }
+}
+
+
+
+void NgramScores::addScore(const Hypothesis* node, const Phrase& ngram, float score)
+{
+ set<Phrase>::const_iterator ngramIter = m_ngrams.find(ngram);
+ if (ngramIter == m_ngrams.end()) {
+ ngramIter = m_ngrams.insert(ngram).first;
+ }
+ map<const Phrase*,float>& ngramScores = m_scores[node];
+ map<const Phrase*,float>::iterator scoreIter = ngramScores.find(&(*ngramIter));
+ if (scoreIter == ngramScores.end()) {
+ ngramScores[&(*ngramIter)] = score;
+ } else {
+ ngramScores[&(*ngramIter)] = log_sum(score,scoreIter->second);
+ }
+}
+
+NgramScores::NodeScoreIterator NgramScores::nodeBegin(const Hypothesis* node)
+{
+ return m_scores[node].begin();
+}
+
+
+NgramScores::NodeScoreIterator NgramScores::nodeEnd(const Hypothesis* node)
+{
+ return m_scores[node].end();
+}
+
+LatticeMBRSolution::LatticeMBRSolution(const TrellisPath& path, bool isMap) :
+ m_score(0.0f)
+{
+ const std::vector<const Hypothesis *> &edges = path.GetEdges();
+
+ for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
+ const Hypothesis &edge = *edges[currEdge];
+ const Phrase &phrase = edge.GetCurrTargetPhrase();
+ size_t size = phrase.GetSize();
+ for (size_t pos = 0 ; pos < size ; pos++) {
+ m_words.push_back(phrase.GetWord(pos));
+ }
+ }
+ if (isMap) {
+ m_mapScore = path.GetTotalScore();
+ } else {
+ m_mapScore = 0;
+ }
+}
+
+
+void LatticeMBRSolution::CalcScore(map<Phrase, float>& finalNgramScores, const vector<float>& thetas, float mapWeight)
+{
+ m_ngramScores.assign(thetas.size()-1, -10000);
+
+ map < Phrase, int > counts;
+ extract_ngrams(m_words,counts);
+
+ //Now score this translation
+ m_score = thetas[0] * m_words.size();
+
+ //Calculate the ngramScores, working in log space at first
+ for (map < Phrase, int >::iterator ngrams = counts.begin(); ngrams != counts.end(); ++ngrams) {
+ float ngramPosterior = UNKNGRAMLOGPROB;
+ map<Phrase,float>::const_iterator ngramPosteriorIt = finalNgramScores.find(ngrams->first);
+ if (ngramPosteriorIt != finalNgramScores.end()) {
+ ngramPosterior = ngramPosteriorIt->second;
+ }
+ size_t ngramSize = ngrams->first.GetSize();
+ m_ngramScores[ngramSize-1] = log_sum(log((float)ngrams->second) + ngramPosterior,m_ngramScores[ngramSize-1]);
+ }
+
+ //convert from log to probability and create weighted sum
+ for (size_t i = 0; i < m_ngramScores.size(); ++i) {
+ m_ngramScores[i] = exp(m_ngramScores[i]);
+ m_score += thetas[i+1] * m_ngramScores[i];
+ }
+
+
+ //The map score
+ m_score += m_mapScore*mapWeight;
+}
+
+
+void pruneLatticeFB(Lattice & connectedHyp, map < const Hypothesis*, set <const Hypothesis* > > & outgoingHyps, map<const Hypothesis*, vector<Edge> >& incomingEdges,
+ const vector< float> & estimatedScores, const Hypothesis* bestHypo, size_t edgeDensity, float scale)
+{
+
+ //Need hyp 0 in connectedHyp - Find empty hypothesis
+ VERBOSE(2,"Pruning lattice to edge density " << edgeDensity << endl);
+ const Hypothesis* emptyHyp = connectedHyp.at(0);
+ while (emptyHyp->GetId() != 0) {
+ emptyHyp = emptyHyp->GetPrevHypo();
+ }
+ connectedHyp.push_back(emptyHyp); //Add it to list of hyps
+
+ //Need hyp 0's outgoing Hyps
+ for (size_t i = 0; i < connectedHyp.size(); ++i) {
+ if (connectedHyp[i]->GetId() > 0 && connectedHyp[i]->GetPrevHypo()->GetId() == 0)
+ outgoingHyps[emptyHyp].insert(connectedHyp[i]);
+ }
+
+ //sort hyps based on estimated scores - do so by copying to multimap
+ multimap<float, const Hypothesis*> sortHypsByVal;
+ for (size_t i =0; i < estimatedScores.size(); ++i) {
+ sortHypsByVal.insert(make_pair(estimatedScores[i], connectedHyp[i]));
+ }
+
+ multimap<float, const Hypothesis*>::const_iterator it = --sortHypsByVal.end();
+ float bestScore = it->first;
+ //store best score as score of hyp 0
+ sortHypsByVal.insert(make_pair(bestScore, emptyHyp));
+
+
+ IFVERBOSE(3) {
+ for (multimap<float, const Hypothesis*>::const_iterator it = --sortHypsByVal.end(); it != --sortHypsByVal.begin(); --it) {
+ const Hypothesis* currHyp = it->second;
+ cerr << "Hyp " << currHyp->GetId() << ", estimated score: " << it->first << endl;
+ }
+ }
+
+
+ set <const Hypothesis*> survivingHyps; //store hyps that make the cut in this
+
+ VERBOSE(2, "BEST HYPO TARGET LENGTH : " << bestHypo->GetSize() << endl)
+ size_t numEdgesTotal = edgeDensity * bestHypo->GetSize(); //as per Shankar, aim for (density * target length of MAP solution) arcs
+ size_t numEdgesCreated = 0;
+ VERBOSE(2, "Target edge count: " << numEdgesTotal << endl);
+
+ float prevScore = -999999;
+
+ //now iterate over multimap
+ for (multimap<float, const Hypothesis*>::const_iterator it = --sortHypsByVal.end(); it != --sortHypsByVal.begin(); --it) {
+ float currEstimatedScore = it->first;
+ const Hypothesis* currHyp = it->second;
+
+ if (numEdgesCreated >= numEdgesTotal && prevScore > currEstimatedScore) //if this hyp has equal estimated score to previous, include its edges too
+ break;
+
+ prevScore = currEstimatedScore;
+ VERBOSE(3, "Num edges created : "<< numEdgesCreated << ", numEdges wanted " << numEdgesTotal << endl)
+ VERBOSE(3, "Considering hyp " << currHyp->GetId() << ", estimated score: " << it->first << endl)
+
+ survivingHyps.insert(currHyp); //CurrHyp made the cut
+
+ // is its best predecessor already included ?
+ if (survivingHyps.find(currHyp->GetPrevHypo()) != survivingHyps.end()) { //yes, then add an edge
+ vector <Edge>& edges = incomingEdges[currHyp];
+ Edge winningEdge(currHyp->GetPrevHypo(),currHyp,scale*(currHyp->GetScore() - currHyp->GetPrevHypo()->GetScore()),currHyp->GetCurrTargetPhrase());
+ edges.push_back(winningEdge);
+ ++numEdgesCreated;
+ }
+
+ //let's try the arcs too
+ const ArcList *arcList = currHyp->GetArcList();
+ if (arcList != NULL) {
+ ArcList::const_iterator iterArcList;
+ for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList) {
+ const Hypothesis *loserHypo = *iterArcList;
+ const Hypothesis* loserPrevHypo = loserHypo->GetPrevHypo();
+ if (survivingHyps.find(loserPrevHypo) != survivingHyps.end()) { //found it, add edge
+ double arcScore = loserHypo->GetScore() - loserPrevHypo->GetScore();
+ Edge losingEdge(loserPrevHypo, currHyp, arcScore*scale, loserHypo->GetCurrTargetPhrase());
+ vector <Edge>& edges = incomingEdges[currHyp];
+ edges.push_back(losingEdge);
+ ++numEdgesCreated;
+ }
+ }
+ }
+
+ //Now if a successor node has already been visited, add an edge connecting the two
+ map < const Hypothesis*, set < const Hypothesis* > >::const_iterator outgoingIt = outgoingHyps.find(currHyp);
+
+ if (outgoingIt != outgoingHyps.end()) {//currHyp does have successors
+ const set<const Hypothesis*> & outHyps = outgoingIt->second; //the successors
+ for (set<const Hypothesis*>::const_iterator outHypIts = outHyps.begin(); outHypIts != outHyps.end(); ++outHypIts) {
+ const Hypothesis* succHyp = *outHypIts;
+
+ if (survivingHyps.find(succHyp) == survivingHyps.end()) //Have we encountered the successor yet?
+ continue; //No, move on to next
+
+ //Curr Hyp can be : a) the best predecessor of succ b) or an arc attached to succ
+ if (succHyp->GetPrevHypo() == currHyp) { //best predecessor
+ vector <Edge>& succEdges = incomingEdges[succHyp];
+ Edge succWinningEdge(currHyp, succHyp, scale*(succHyp->GetScore() - currHyp->GetScore()), succHyp->GetCurrTargetPhrase());
+ succEdges.push_back(succWinningEdge);
+ survivingHyps.insert(succHyp);
+ ++numEdgesCreated;
+ }
+
+ //now, let's find an arc
+ const ArcList *arcList = succHyp->GetArcList();
+ if (arcList != NULL) {
+ ArcList::const_iterator iterArcList;
+ //QUESTION: What happens if there's more than one loserPrevHypo?
+ for (iterArcList = arcList->begin() ; iterArcList != arcList->end() ; ++iterArcList) {
+ const Hypothesis *loserHypo = *iterArcList;
+ const Hypothesis* loserPrevHypo = loserHypo->GetPrevHypo();
+ if (loserPrevHypo == currHyp) { //found it
+ vector <Edge>& succEdges = incomingEdges[succHyp];
+ double arcScore = loserHypo->GetScore() - currHyp->GetScore();
+ Edge losingEdge(currHyp, succHyp,scale* arcScore, loserHypo->GetCurrTargetPhrase());
+ succEdges.push_back(losingEdge);
+ ++numEdgesCreated;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ connectedHyp.clear();
+ for (set <const Hypothesis*>::iterator it = survivingHyps.begin(); it != survivingHyps.end(); ++it) {
+ connectedHyp.push_back(*it);
+ }
+
+ VERBOSE(2, "Done! Num edges created : "<< numEdgesCreated << ", numEdges wanted " << numEdgesTotal << endl)
+
+ IFVERBOSE(3) {
+ cerr << "Surviving hyps: " ;
+ for (set <const Hypothesis*>::iterator it = survivingHyps.begin(); it != survivingHyps.end(); ++it) {
+ cerr << (*it)->GetId() << " ";
+ }
+ cerr << endl;
+ }
+
+
+}
+
+void calcNgramExpectations(Lattice & connectedHyp, map<const Hypothesis*, vector<Edge> >& incomingEdges,
+ map<Phrase, float>& finalNgramScores, bool posteriors)
+{
+
+ sort(connectedHyp.begin(),connectedHyp.end(),ascendingCoverageCmp); //sort by increasing source word cov
+
+ /*cerr << "Lattice:" << endl;
+ for (Lattice::const_iterator i = connectedHyp.begin(); i != connectedHyp.end(); ++i) {
+ const Hypothesis* h = *i;
+ cerr << *h << endl;
+ const vector<Edge>& edges = incomingEdges[h];
+ for (size_t e = 0; e < edges.size(); ++e) {
+ cerr << edges[e];
+ }
+ }*/
+
+ map<const Hypothesis*, float> forwardScore;
+ forwardScore[connectedHyp[0]] = 0.0f; //forward score of hyp 0 is 1 (or 0 in logprob space)
+ set< const Hypothesis *> finalHyps; //store completed hyps
+
+ NgramScores ngramScores;//ngram scores for each hyp
+
+ for (size_t i = 1; i < connectedHyp.size(); ++i) {
+ const Hypothesis* currHyp = connectedHyp[i];
+ if (currHyp->GetWordsBitmap().IsComplete()) {
+ finalHyps.insert(currHyp);
+ }
+
+ VERBOSE(3, "Processing hyp: " << currHyp->GetId() << ", num words cov= " << currHyp->GetWordsBitmap().GetNumWordsCovered() << endl)
+
+ vector <Edge> & edges = incomingEdges[currHyp];
+ for (size_t e = 0; e < edges.size(); ++e) {
+ const Edge& edge = edges[e];
+ if (forwardScore.find(currHyp) == forwardScore.end()) {
+ forwardScore[currHyp] = forwardScore[edge.GetTailNode()] + edge.GetScore();
+ VERBOSE(3, "Fwd score["<<currHyp->GetId()<<"] = fwdScore["<<edge.GetTailNode()->GetId() << "] + edge Score: " << edge.GetScore() << endl)
+ } else {
+ forwardScore[currHyp] = log_sum(forwardScore[currHyp], forwardScore[edge.GetTailNode()] + edge.GetScore());
+ VERBOSE(3, "Fwd score["<<currHyp->GetId()<<"] += fwdScore["<<edge.GetTailNode()->GetId() << "] + edge Score: " << edge.GetScore() << endl)
+ }
+ }
+
+ //Process ngrams now
+ for (size_t j =0 ; j < edges.size(); ++j) {
+ Edge& edge = edges[j];
+ const NgramHistory & incomingPhrases = edge.GetNgrams(incomingEdges);
+
+ //let's first score ngrams introduced by this edge
+ for (NgramHistory::const_iterator it = incomingPhrases.begin(); it != incomingPhrases.end(); ++it) {
+ const Phrase& ngram = it->first;
+ const PathCounts& pathCounts = it->second;
+ VERBOSE(4, "Calculating score for: " << it->first << endl)
+
+ for (PathCounts::const_iterator pathCountIt = pathCounts.begin(); pathCountIt != pathCounts.end(); ++pathCountIt) {
+ //Score of an n-gram is forward score of head node of leftmost edge + all edge scores
+ const Path& path = pathCountIt->first;
+ //cerr << "path count for " << ngram << " is " << pathCountIt->second << endl;
+ float score = forwardScore[path[0]->GetTailNode()];
+ for (size_t i = 0; i < path.size(); ++i) {
+ score += path[i]->GetScore();
+ }
+ //if we're doing expectations, then the number of times the ngram
+ //appears on the path is relevant.
+ size_t count = posteriors ? 1 : pathCountIt->second;
+ for (size_t k = 0; k < count; ++k) {
+ ngramScores.addScore(currHyp,ngram,score);
+ }
+ }
+ }
+
+ //Now score ngrams that are just being propagated from the history
+ for (NgramScores::NodeScoreIterator it = ngramScores.nodeBegin(edge.GetTailNode());
+ it != ngramScores.nodeEnd(edge.GetTailNode()); ++it) {
+ const Phrase & currNgram = *(it->first);
+ float currNgramScore = it->second;
+ VERBOSE(4, "Calculating score for: " << currNgram << endl)
+
+ // For posteriors, don't double count ngrams
+ if (!posteriors || incomingPhrases.find(currNgram) == incomingPhrases.end()) {
+ float score = edge.GetScore() + currNgramScore;
+ ngramScores.addScore(currHyp,currNgram,score);
+ }
+ }
+
+ }
+ }
+
+ float Z = 9999999; //the total score of the lattice
+
+ //Done - Print out ngram posteriors for final hyps
+ for (set< const Hypothesis *>::iterator finalHyp = finalHyps.begin(); finalHyp != finalHyps.end(); ++finalHyp) {
+ const Hypothesis* hyp = *finalHyp;
+
+ for (NgramScores::NodeScoreIterator it = ngramScores.nodeBegin(hyp); it != ngramScores.nodeEnd(hyp); ++it) {
+ const Phrase& ngram = *(it->first);
+ if (finalNgramScores.find(ngram) == finalNgramScores.end()) {
+ finalNgramScores[ngram] = it->second;
+ } else {
+ finalNgramScores[ngram] = log_sum(it->second, finalNgramScores[ngram]);
+ }
+ }
+
+ if (Z == 9999999) {
+ Z = forwardScore[hyp];
+ } else {
+ Z = log_sum(Z, forwardScore[hyp]);
+ }
+ }
+
+ //Z *= scale; //scale the score
+
+ for (map<Phrase, float>::iterator finalScoresIt = finalNgramScores.begin(); finalScoresIt != finalNgramScores.end(); ++finalScoresIt) {
+ finalScoresIt->second = finalScoresIt->second - Z;
+ IFVERBOSE(2) {
+ VERBOSE(2,finalScoresIt->first << " [" << finalScoresIt->second << "]" << endl);
+ }
+ }
+
+}
+
+const NgramHistory& Edge::GetNgrams(map<const Hypothesis*, vector<Edge> > & incomingEdges)
+{
+
+ if (m_ngrams.size() > 0)
+ return m_ngrams;
+
+ const Phrase& currPhrase = GetWords();
+ //Extract the n-grams local to this edge
+ for (size_t start = 0; start < currPhrase.GetSize(); ++start) {
+ for (size_t end = start; end < start + bleu_order; ++end) {
+ if (end < currPhrase.GetSize()) {
+ Phrase edgeNgram(end-start+1);
+ for (size_t index = start; index <= end; ++index) {
+ edgeNgram.AddWord(currPhrase.GetWord(index));
+ }
+ //cout << "Inserting Phrase : " << edgeNgram << endl;
+ vector<const Edge*> edgeHistory;
+ edgeHistory.push_back(this);
+ storeNgramHistory(edgeNgram, edgeHistory);
+ } else {
+ break;
+ }
+ }
+ }
+
+ map<const Hypothesis*, vector<Edge> >::iterator it = incomingEdges.find(m_tailNode);
+ if (it != incomingEdges.end()) { //node has incoming edges
+ vector<Edge> & inEdges = it->second;
+
+ for (vector<Edge>::iterator edge = inEdges.begin(); edge != inEdges.end(); ++edge) {//add the ngrams straddling prev and curr edge
+ const NgramHistory & edgeIncomingNgrams = edge->GetNgrams(incomingEdges);
+ for (NgramHistory::const_iterator edgeInNgramHist = edgeIncomingNgrams.begin(); edgeInNgramHist != edgeIncomingNgrams.end(); ++edgeInNgramHist) {
+ const Phrase& edgeIncomingNgram = edgeInNgramHist->first;
+ const PathCounts & edgeIncomingNgramPaths = edgeInNgramHist->second;
+ size_t back = min(edgeIncomingNgram.GetSize(), edge->GetWordsSize());
+ const Phrase& edgeWords = edge->GetWords();
+ IFVERBOSE(3) {
+ cerr << "Edge: "<< *edge <<endl;
+ cerr << "edgeWords: " << edgeWords << endl;
+ cerr << "edgeInNgram: " << edgeIncomingNgram << endl;
+ }
+
+ Phrase edgeSuffix(ARRAY_SIZE_INCR);
+ Phrase ngramSuffix(ARRAY_SIZE_INCR);
+ GetPhraseSuffix(edgeWords,back,edgeSuffix);
+ GetPhraseSuffix(edgeIncomingNgram,back,ngramSuffix);
+
+ if (ngramSuffix == edgeSuffix) { //we've got the suffix of previous edge
+ size_t edgeInNgramSize = edgeIncomingNgram.GetSize();
+
+ for (size_t i = 0; i < GetWordsSize() && i + edgeInNgramSize < bleu_order ; ++i) {
+ Phrase newNgram(edgeIncomingNgram);
+ for (size_t j = 0; j <= i ; ++j) {
+ newNgram.AddWord(GetWords().GetWord(j));
+ }
+ VERBOSE(3, "Inserting New Phrase : " << newNgram << endl)
+
+ for (PathCounts::const_iterator pathIt = edgeIncomingNgramPaths.begin(); pathIt != edgeIncomingNgramPaths.end(); ++pathIt) {
+ Path newNgramPath = pathIt->first;
+ newNgramPath.push_back(this);
+ storeNgramHistory(newNgram, newNgramPath, pathIt->second);
+ }
+ }
+ }
+ }
+ }
+ }
+ return m_ngrams;
+}
+
+//Add the last lastN words of origPhrase to targetPhrase
+void Edge::GetPhraseSuffix(const Phrase& origPhrase, size_t lastN, Phrase& targetPhrase) const
+{
+ size_t origSize = origPhrase.GetSize();
+ size_t startIndex = origSize - lastN;
+ for (size_t index = startIndex; index < origPhrase.GetSize(); ++index) {
+ targetPhrase.AddWord(origPhrase.GetWord(index));
+ }
+}
+
+bool Edge::operator< (const Edge& compare ) const
+{
+ if (m_headNode->GetId() < compare.m_headNode->GetId())
+ return true;
+ if (compare.m_headNode->GetId() < m_headNode->GetId())
+ return false;
+ if (m_tailNode->GetId() < compare.m_tailNode->GetId())
+ return true;
+ if (compare.m_tailNode->GetId() < m_tailNode->GetId())
+ return false;
+ return GetScore() < compare.GetScore();
+}
+
+ostream& operator<< (ostream& out, const Edge& edge)
+{
+ out << "Head: " << edge.m_headNode->GetId() << ", Tail: " << edge.m_tailNode->GetId() << ", Score: " << edge.m_score << ", Phrase: " << edge.m_targetPhrase << endl;
+ return out;
+}
+
+bool ascendingCoverageCmp(const Hypothesis* a, const Hypothesis* b)
+{
+ return a->GetWordsBitmap().GetNumWordsCovered() < b->GetWordsBitmap().GetNumWordsCovered();
+}
+
+void getLatticeMBRNBest(Manager& manager, TrellisPathList& nBestList,
+ vector<LatticeMBRSolution>& solutions, size_t n)
+{
+ const StaticData& staticData = StaticData::Instance();
+ std::map < int, bool > connected;
+ std::vector< const Hypothesis *> connectedList;
+ map<Phrase, float> ngramPosteriors;
+ std::map < const Hypothesis*, set <const Hypothesis*> > outgoingHyps;
+ map<const Hypothesis*, vector<Edge> > incomingEdges;
+ vector< float> estimatedScores;
+ manager.GetForwardBackwardSearchGraph(&connected, &connectedList, &outgoingHyps, &estimatedScores);
+ pruneLatticeFB(connectedList, outgoingHyps, incomingEdges, estimatedScores, manager.GetBestHypothesis(), staticData.GetLatticeMBRPruningFactor(),staticData.GetMBRScale());
+ calcNgramExpectations(connectedList, incomingEdges, ngramPosteriors,true);
+
+ vector<float> mbrThetas = staticData.GetLatticeMBRThetas();
+ float p = staticData.GetLatticeMBRPrecision();
+ float r = staticData.GetLatticeMBRPRatio();
+ float mapWeight = staticData.GetLatticeMBRMapWeight();
+ if (mbrThetas.size() == 0) { //thetas not specified on the command line, use p and r instead
+ mbrThetas.push_back(-1); //Theta 0
+ mbrThetas.push_back(1/(bleu_order*p));
+ for (size_t i = 2; i <= bleu_order; ++i) {
+ mbrThetas.push_back(mbrThetas[i-1] / r);
+ }
+ }
+ IFVERBOSE(2) {
+ VERBOSE(2,"Thetas: ");
+ for (size_t i = 0; i < mbrThetas.size(); ++i) {
+ VERBOSE(2,mbrThetas[i] << " ");
+ }
+ VERBOSE(2,endl);
+ }
+ TrellisPathList::const_iterator iter;
+ size_t ctr = 0;
+ LatticeMBRSolutionComparator comparator;
+ for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter, ++ctr) {
+ const TrellisPath &path = **iter;
+ solutions.push_back(LatticeMBRSolution(path,iter==nBestList.begin()));
+ solutions.back().CalcScore(ngramPosteriors,mbrThetas,mapWeight);
+ sort(solutions.begin(), solutions.end(), comparator);
+ while (solutions.size() > n) {
+ solutions.pop_back();
+ }
+ }
+ VERBOSE(2,"LMBR Score: " << solutions[0].GetScore() << endl);
+}
+
+vector<Word> doLatticeMBR(Manager& manager, TrellisPathList& nBestList)
+{
+
+ vector<LatticeMBRSolution> solutions;
+ getLatticeMBRNBest(manager, nBestList, solutions,1);
+ return solutions.at(0).GetWords();
+}
+
+const TrellisPath doConsensusDecoding(Manager& manager, TrellisPathList& nBestList)
+{
+ static const int BLEU_ORDER = 4;
+ static const float SMOOTH = 1;
+
+ //calculate the ngram expectations
+ const StaticData& staticData = StaticData::Instance();
+ std::map < int, bool > connected;
+ std::vector< const Hypothesis *> connectedList;
+ map<Phrase, float> ngramExpectations;
+ std::map < const Hypothesis*, set <const Hypothesis*> > outgoingHyps;
+ map<const Hypothesis*, vector<Edge> > incomingEdges;
+ vector< float> estimatedScores;
+ manager.GetForwardBackwardSearchGraph(&connected, &connectedList, &outgoingHyps, &estimatedScores);
+ pruneLatticeFB(connectedList, outgoingHyps, incomingEdges, estimatedScores, manager.GetBestHypothesis(), staticData.GetLatticeMBRPruningFactor(),staticData.GetMBRScale());
+ calcNgramExpectations(connectedList, incomingEdges, ngramExpectations,false);
+
+ //expected length is sum of expected unigram counts
+ //cerr << "Thread " << pthread_self() << " Ngram expectations size: " << ngramExpectations.size() << endl;
+ float ref_length = 0.0f;
+ for (map<Phrase,float>::const_iterator ref_iter = ngramExpectations.begin();
+ ref_iter != ngramExpectations.end(); ++ref_iter) {
+ //cerr << "Ngram: " << ref_iter->first << " score: " <<
+ // ref_iter->second << endl;
+ if (ref_iter->first.GetSize() == 1) {
+ ref_length += exp(ref_iter->second);
+ // cerr << "Expected for " << ref_iter->first << " is " << exp(ref_iter->second) << endl;
+ }
+ }
+
+ VERBOSE(2,"REF Length: " << ref_length << endl);
+
+ //use the ngram expectations to rescore the nbest list.
+ TrellisPathList::const_iterator iter;
+ TrellisPathList::const_iterator best = nBestList.end();
+ float bestScore = -100000;
+ //cerr << "nbest list size: " << nBestList.GetSize() << endl;
+ for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
+ const TrellisPath &path = **iter;
+ vector<Word> words;
+ map<Phrase,int> ngrams;
+ GetOutputWords(path,words);
+ /*for (size_t i = 0; i < words.size(); ++i) {
+ cerr << words[i].GetFactor(0)->GetString() << " ";
+ }
+ cerr << endl;
+ */
+ extract_ngrams(words,ngrams);
+
+ vector<float> comps(2*BLEU_ORDER+1);
+ float logbleu = 0.0;
+ float brevity = 0.0;
+ int hyp_length = words.size();
+ for (int i = 0; i < BLEU_ORDER; ++i) {
+ comps[2*i] = 0.0;
+ comps[2*i+1] = max(hyp_length-i,0);
+ }
+
+ for (map<Phrase,int>::const_iterator hyp_iter = ngrams.begin();
+ hyp_iter != ngrams.end(); ++hyp_iter) {
+ map<Phrase,float>::const_iterator ref_iter = ngramExpectations.find(hyp_iter->first);
+ if (ref_iter != ngramExpectations.end()) {
+ comps[2*(hyp_iter->first.GetSize()-1)] += min(exp(ref_iter->second), (float)(hyp_iter->second));
+ }
+
+ }
+ comps[comps.size()-1] = ref_length;
+ /*for (size_t i = 0; i < comps.size(); ++i) {
+ cerr << comps[i] << " ";
+ }
+ cerr << endl;
+ */
+
+ float score = 0.0f;
+ if (comps[0] != 0) {
+ for (int i=0; i<BLEU_ORDER; i++) {
+ if ( i > 0 ) {
+ logbleu += log((float)comps[2*i]+SMOOTH)-log((float)comps[2*i+1]+SMOOTH);
+ } else {
+ logbleu += log((float)comps[2*i])-log((float)comps[2*i+1]);
+ }
+ }
+ logbleu /= BLEU_ORDER;
+ brevity = 1.0-(float)comps[comps.size()-1]/comps[1]; // comps[comps_n-1] is the ref length, comps[1] is the test length
+ if (brevity < 0.0) {
+ logbleu += brevity;
+ }
+ score = exp(logbleu);
+ }
+
+ //cerr << "score: " << score << " bestScore: " << bestScore << endl;
+ if (score > bestScore) {
+ bestScore = score;
+ best = iter;
+ VERBOSE(2,"NEW BEST: " << score << endl);
+ //for (size_t i = 0; i < comps.size(); ++i) {
+ // cerr << comps[i] << " ";
+ //}
+ //cerr << endl;
+ }
+ }
+
+ assert (best != nBestList.end());
+ return **best;
+ //vector<Word> bestWords;
+ //GetOutputWords(**best,bestWords);
+ //return bestWords;
+}
+
+}
+
+
diff --git a/contrib/relent-filter/src/LatticeMBR.h b/contrib/relent-filter/src/LatticeMBR.h
new file mode 100755
index 000000000..14a2e22da
--- /dev/null
+++ b/contrib/relent-filter/src/LatticeMBR.h
@@ -0,0 +1,153 @@
+/*
+ * LatticeMBR.h
+ * moses-cmd
+ *
+ * Created by Abhishek Arun on 26/01/2010.
+ * Copyright 2010 __MyCompanyName__. All rights reserved.
+ *
+ */
+
+#ifndef moses_cmd_LatticeMBR_h
+#define moses_cmd_LatticeMBR_h
+
+#include <map>
+#include <vector>
+#include <set>
+#include "Hypothesis.h"
+#include "Manager.h"
+#include "TrellisPathList.h"
+
+
+
+namespace MosesCmd
+{
+
+class Edge;
+
+typedef std::vector< const Moses::Hypothesis *> Lattice;
+typedef std::vector<const Edge*> Path;
+typedef std::map<Path, size_t> PathCounts;
+typedef std::map<Moses::Phrase, PathCounts > NgramHistory;
+
+class Edge
+{
+ const Moses::Hypothesis* m_tailNode;
+ const Moses::Hypothesis* m_headNode;
+ float m_score;
+ Moses::TargetPhrase m_targetPhrase;
+ NgramHistory m_ngrams;
+
+public:
+ Edge(const Moses::Hypothesis* from, const Moses::Hypothesis* to, float score, const Moses::TargetPhrase& targetPhrase) : m_tailNode(from), m_headNode(to), m_score(score), m_targetPhrase(targetPhrase) {
+ //cout << "Creating new edge from Node " << from->GetId() << ", to Node : " << to->GetId() << ", score: " << score << " phrase: " << targetPhrase << endl;
+ }
+
+ const Moses::Hypothesis* GetHeadNode() const {
+ return m_headNode;
+ }
+
+ const Moses::Hypothesis* GetTailNode() const {
+ return m_tailNode;
+ }
+
+ float GetScore() const {
+ return m_score;
+ }
+
+ size_t GetWordsSize() const {
+ return m_targetPhrase.GetSize();
+ }
+
+ const Moses::Phrase& GetWords() const {
+ return m_targetPhrase;
+ }
+
+ friend std::ostream& operator<< (std::ostream& out, const Edge& edge);
+
+ const NgramHistory& GetNgrams( std::map<const Moses::Hypothesis*, std::vector<Edge> > & incomingEdges) ;
+
+ bool operator < (const Edge & compare) const;
+
+ void GetPhraseSuffix(const Moses::Phrase& origPhrase, size_t lastN, Moses::Phrase& targetPhrase) const;
+
+ void storeNgramHistory(const Moses::Phrase& phrase, Path & path, size_t count = 1) {
+ m_ngrams[phrase][path]+= count;
+ }
+
+};
+
+/**
+* Data structure to hold the ngram scores as we traverse the lattice. Maps (hypo,ngram) to score
+*/
+class NgramScores
+{
+public:
+ NgramScores() {}
+
+ /** logsum this score to the existing score */
+ void addScore(const Moses::Hypothesis* node, const Moses::Phrase& ngram, float score);
+
+ /** Iterate through ngrams for selected node */
+ typedef std::map<const Moses::Phrase*, float>::const_iterator NodeScoreIterator;
+ NodeScoreIterator nodeBegin(const Moses::Hypothesis* node);
+ NodeScoreIterator nodeEnd(const Moses::Hypothesis* node);
+
+private:
+ std::set<Moses::Phrase> m_ngrams;
+ std::map<const Moses::Hypothesis*, std::map<const Moses::Phrase*, float> > m_scores;
+};
+
+
+/** Holds a lattice mbr solution, and its scores */
+class LatticeMBRSolution
+{
+public:
+ /** Read the words from the path */
+ LatticeMBRSolution(const Moses::TrellisPath& path, bool isMap);
+ const std::vector<float>& GetNgramScores() const {
+ return m_ngramScores;
+ }
+ const std::vector<Moses::Word>& GetWords() const {
+ return m_words;
+ }
+ float GetMapScore() const {
+ return m_mapScore;
+ }
+ float GetScore() const {
+ return m_score;
+ }
+
+ /** Initialise ngram scores */
+ void CalcScore(std::map<Moses::Phrase, float>& finalNgramScores, const std::vector<float>& thetas, float mapWeight);
+
+private:
+ std::vector<Moses::Word> m_words;
+ float m_mapScore;
+ std::vector<float> m_ngramScores;
+ float m_score;
+};
+
+struct LatticeMBRSolutionComparator {
+ bool operator()(const LatticeMBRSolution& a, const LatticeMBRSolution& b) {
+ return a.GetScore() > b.GetScore();
+ }
+};
+
+void pruneLatticeFB(Lattice & connectedHyp, std::map < const Moses::Hypothesis*, std::set <const Moses::Hypothesis* > > & outgoingHyps, std::map<const Moses::Hypothesis*, std::vector<Edge> >& incomingEdges,
+ const std::vector< float> & estimatedScores, const Moses::Hypothesis*, size_t edgeDensity,float scale);
+
+//Use the ngram scores to rerank the nbest list, return at most n solutions
+void getLatticeMBRNBest(Moses::Manager& manager, Moses::TrellisPathList& nBestList, std::vector<LatticeMBRSolution>& solutions, size_t n);
+//calculate expectated ngram counts, clipping at 1 (ie calculating posteriors) if posteriors==true.
+void calcNgramExpectations(Lattice & connectedHyp, std::map<const Moses::Hypothesis*, std::vector<Edge> >& incomingEdges, std::map<Moses::Phrase,
+ float>& finalNgramScores, bool posteriors);
+void GetOutputFactors(const Moses::TrellisPath &path, std::vector <Moses::Word> &translation);
+void extract_ngrams(const std::vector<Moses::Word >& sentence, std::map < Moses::Phrase, int > & allngrams);
+bool ascendingCoverageCmp(const Moses::Hypothesis* a, const Moses::Hypothesis* b);
+std::vector<Moses::Word> doLatticeMBR(Moses::Manager& manager, Moses::TrellisPathList& nBestList);
+const Moses::TrellisPath doConsensusDecoding(Moses::Manager& manager, Moses::TrellisPathList& nBestList);
+//std::vector<Moses::Word> doConsensusDecoding(Moses::Manager& manager, Moses::TrellisPathList& nBestList);
+
+}
+
+#endif
diff --git a/contrib/relent-filter/src/LatticeMBRGrid.cpp b/contrib/relent-filter/src/LatticeMBRGrid.cpp
new file mode 100755
index 000000000..71c387839
--- /dev/null
+++ b/contrib/relent-filter/src/LatticeMBRGrid.cpp
@@ -0,0 +1,213 @@
+// $Id: LatticeMBRGrid.cpp 3045 2010-04-05 13:07:29Z hieuhoang1972 $
+
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (c) 2010 University of Edinburgh
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+ * Neither the name of the University of Edinburgh nor the names of its contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+/**
+* Lattice MBR grid search. Enables a grid search through the four parameters (p,r,scale and prune) used in lattice MBR.
+ See 'Lattice Minimum Bayes-Risk Decoding for Statistical Machine Translation by Tromble, Kumar, Och and Macherey,
+ EMNLP 2008 for details of the parameters.
+
+ The grid search is controlled by specifying comma separated lists for the lmbr parameters (-lmbr-p, -lmbr-r,
+ -lmbr-pruning-factor and -mbr-scale). All other parameters are passed through to moses. If any of the lattice mbr
+ parameters are missing, then they are set to their default values. Output is of the form:
+ sentence-id ||| p r prune scale ||| translation-hypothesis
+**/
+
+#include <cstdlib>
+#include <iostream>
+#include <map>
+#include <stdexcept>
+#include <set>
+
+#include "IOWrapper.h"
+#include "LatticeMBR.h"
+#include "Manager.h"
+#include "StaticData.h"
+
+
+using namespace std;
+using namespace Moses;
+using namespace MosesCmd;
+
+//keys
+enum gridkey {lmbr_p,lmbr_r,lmbr_prune,lmbr_scale};
+
+namespace MosesCmd
+{
+
+class Grid
+{
+public:
+ /** Add a parameter with key, command line argument, and default value */
+ void addParam(gridkey key, const string& arg, float defaultValue) {
+ m_args[arg] = key;
+ CHECK(m_grid.find(key) == m_grid.end());
+ m_grid[key].push_back(defaultValue);
+ }
+
+ /** Parse the arguments, removing those that define the grid and returning a copy of the rest */
+ void parseArgs(int& argc, char**& argv) {
+ char** newargv = new char*[argc+1]; //Space to add mbr parameter
+ int newargc = 0;
+ for (int i = 0; i < argc; ++i) {
+ bool consumed = false;
+ for (map<string,gridkey>::const_iterator argi = m_args.begin(); argi != m_args.end(); ++argi) {
+ if (!strcmp(argv[i], argi->first.c_str())) {
+ ++i;
+ if (i >= argc) {
+ cerr << "Error: missing parameter for " << argi->first << endl;
+ throw runtime_error("Missing parameter");
+ } else {
+ string value = argv[i];
+ gridkey key = argi->second;
+ if (m_grid[key].size() != 1) {
+ throw runtime_error("Duplicate grid argument");
+ }
+ m_grid[key].clear();
+ char delim = ',';
+ string::size_type lastpos = value.find_first_not_of(delim);
+ string::size_type pos = value.find_first_of(delim,lastpos);
+ while (string::npos != pos || string::npos != lastpos) {
+ float param = atof(value.substr(lastpos, pos-lastpos).c_str());
+ if (!param) {
+ cerr << "Error: Illegal grid parameter for " << argi->first << endl;
+ throw runtime_error("Illegal grid parameter");
+ }
+ m_grid[key].push_back(param);
+ lastpos = value.find_first_not_of(delim,pos);
+ pos = value.find_first_of(delim,lastpos);
+ }
+ consumed = true;
+ }
+ if (consumed) break;
+ }
+ }
+ if (!consumed) {
+ newargv[newargc] = new char[strlen(argv[i]) + 1];
+ strcpy(newargv[newargc],argv[i]);
+ ++newargc;
+ }
+ }
+ argc = newargc;
+ argv = newargv;
+ }
+
+ /** Get the grid for a particular key.*/
+ const vector<float>& getGrid(gridkey key) const {
+ map<gridkey,vector<float> >::const_iterator iter = m_grid.find(key);
+ assert (iter != m_grid.end());
+ return iter->second;
+
+ }
+
+private:
+ map<gridkey,vector<float> > m_grid;
+ map<string,gridkey> m_args;
+};
+
+} // namespace
+
+int main(int argc, char* argv[])
+{
+ cerr << "Lattice MBR Grid search" << endl;
+
+ Grid grid;
+ grid.addParam(lmbr_p, "-lmbr-p", 0.5);
+ grid.addParam(lmbr_r, "-lmbr-r", 0.5);
+ grid.addParam(lmbr_prune, "-lmbr-pruning-factor",30.0);
+ grid.addParam(lmbr_scale, "-mbr-scale",1.0);
+
+ grid.parseArgs(argc,argv);
+
+ Parameter* params = new Parameter();
+ if (!params->LoadParam(argc,argv)) {
+ params->Explain();
+ exit(1);
+ }
+ if (!StaticData::LoadDataStatic(params, argv[0])) {
+ exit(1);
+ }
+
+ StaticData& staticData = const_cast<StaticData&>(StaticData::Instance());
+ staticData.SetUseLatticeMBR(true);
+ IOWrapper* ioWrapper = GetIOWrapper(staticData);
+
+ if (!ioWrapper) {
+ throw runtime_error("Failed to initialise IOWrapper");
+ }
+ size_t nBestSize = staticData.GetMBRSize();
+
+ if (nBestSize <= 0) {
+ throw new runtime_error("Non-positive size specified for n-best list");
+ }
+
+ size_t lineCount = 0;
+ InputType* source = NULL;
+
+ const vector<float>& pgrid = grid.getGrid(lmbr_p);
+ const vector<float>& rgrid = grid.getGrid(lmbr_r);
+ const vector<float>& prune_grid = grid.getGrid(lmbr_prune);
+ const vector<float>& scale_grid = grid.getGrid(lmbr_scale);
+
+ while(ReadInput(*ioWrapper,staticData.GetInputType(),source)) {
+ ++lineCount;
+ Sentence sentence;
+ const TranslationSystem& system = staticData.GetTranslationSystem(TranslationSystem::DEFAULT);
+ Manager manager(*source,staticData.GetSearchAlgorithm(), &system);
+ manager.ProcessSentence();
+ TrellisPathList nBestList;
+ manager.CalcNBest(nBestSize, nBestList,true);
+ //grid search
+ for (vector<float>::const_iterator pi = pgrid.begin(); pi != pgrid.end(); ++pi) {
+ float p = *pi;
+ staticData.SetLatticeMBRPrecision(p);
+ for (vector<float>::const_iterator ri = rgrid.begin(); ri != rgrid.end(); ++ri) {
+ float r = *ri;
+ staticData.SetLatticeMBRPRatio(r);
+ for (vector<float>::const_iterator prune_i = prune_grid.begin(); prune_i != prune_grid.end(); ++prune_i) {
+ size_t prune = (size_t)(*prune_i);
+ staticData.SetLatticeMBRPruningFactor(prune);
+ for (vector<float>::const_iterator scale_i = scale_grid.begin(); scale_i != scale_grid.end(); ++scale_i) {
+ float scale = *scale_i;
+ staticData.SetMBRScale(scale);
+ cout << lineCount << " ||| " << p << " " << r << " " << prune << " " << scale << " ||| ";
+ vector<Word> mbrBestHypo = doLatticeMBR(manager,nBestList);
+ OutputBestHypo(mbrBestHypo, lineCount, staticData.GetReportSegmentation(),
+ staticData.GetReportAllFactors(),cout);
+ }
+ }
+
+ }
+ }
+
+
+ }
+
+}
diff --git a/contrib/relent-filter/src/Main.cpp b/contrib/relent-filter/src/Main.cpp
new file mode 100755
index 000000000..1f86e2cc7
--- /dev/null
+++ b/contrib/relent-filter/src/Main.cpp
@@ -0,0 +1,282 @@
+/***********************************************************************
+Relative Entropy-based Phrase table Pruning
+Copyright (C) 2012 Wang Ling
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+/**
+ * Moses main, for single-threaded and multi-threaded.
+ **/
+
+#include <exception>
+#include <fstream>
+#include <sstream>
+#include <vector>
+
+#ifdef WIN32
+// Include Visual Leak Detector
+//#include <vld.h>
+#endif
+
+#include "Hypothesis.h"
+#include "Manager.h"
+#include "IOWrapper.h"
+#include "StaticData.h"
+#include "Util.h"
+#include "ThreadPool.h"
+#include "TranslationAnalysis.h"
+#include "OutputCollector.h"
+#include "RelativeEntropyCalc.h"
+#include "LexicalReordering.h"
+#include "LexicalReorderingState.h"
+
+#ifdef HAVE_PROTOBUF
+#include "hypergraph.pb.h"
+#endif
+
+using namespace std;
+using namespace Moses;
+using namespace MosesCmd;
+
+namespace MosesCmd
+{
+// output floats with three significant digits
+static const size_t PRECISION = 3;
+
+/** Enforce rounding */
+void fix(std::ostream& stream, size_t size)
+{
+ stream.setf(std::ios::fixed);
+ stream.precision(size);
+}
+
+/** Translates a sentence.
+ * - calls the search (Manager)
+ * - applies the decision rule
+ * - outputs best translation and additional reporting
+ **/
+class TranslationTask : public Task
+{
+
+public:
+
+ TranslationTask(size_t lineNumber,
+ InputType* source, OutputCollector* searchGraphCollector) :
+ m_source(source), m_lineNumber(lineNumber),
+ m_searchGraphCollector(searchGraphCollector) {}
+
+ /** Translate one sentence
+ * gets called by main function implemented at end of this source file */
+ void Run() {
+
+ // report thread number
+#if defined(WITH_THREADS) && defined(BOOST_HAS_PTHREADS)
+ TRACE_ERR("Translating line " << m_lineNumber << " in thread id " << pthread_self() << std::endl);
+#endif
+
+ // shorthand for "global data"
+ const StaticData &staticData = StaticData::Instance();
+ // input sentence
+ Sentence sentence();
+ // set translation system
+ const TranslationSystem& system = staticData.GetTranslationSystem(TranslationSystem::DEFAULT);
+
+ // execute the translation
+ // note: this executes the search, resulting in a search graph
+ // we still need to apply the decision rule (MAP, MBR, ...)
+ Manager manager(m_lineNumber, *m_source,staticData.GetSearchAlgorithm(), &system);
+ manager.ProcessSentence();
+
+ // output search graph
+ if (m_searchGraphCollector) {
+ ostringstream out;
+ fix(out,PRECISION);
+
+ vector<SearchGraphNode> searchGraph;
+ manager.GetSearchGraph(searchGraph);
+ out << RelativeEntropyCalc::CalcRelativeEntropy(m_lineNumber,searchGraph) << endl;
+ m_searchGraphCollector->Write(m_lineNumber, out.str());
+
+ }
+ manager.CalcDecoderStatistics();
+ }
+
+ ~TranslationTask() {
+ delete m_source;
+ }
+
+private:
+ InputType* m_source;
+ size_t m_lineNumber;
+ OutputCollector* m_searchGraphCollector;
+ std::ofstream *m_alignmentStream;
+
+};
+
+static void PrintFeatureWeight(const FeatureFunction* ff)
+{
+
+ size_t weightStart = StaticData::Instance().GetScoreIndexManager().GetBeginIndex(ff->GetScoreBookkeepingID());
+ size_t weightEnd = StaticData::Instance().GetScoreIndexManager().GetEndIndex(ff->GetScoreBookkeepingID());
+ for (size_t i = weightStart; i < weightEnd; ++i) {
+ cout << ff->GetScoreProducerDescription(i-weightStart) << " " << ff->GetScoreProducerWeightShortName(i-weightStart) << " "
+ << StaticData::Instance().GetAllWeights()[i] << endl;
+ }
+}
+
+
+static void ShowWeights()
+{
+ fix(cout,6);
+ const StaticData& staticData = StaticData::Instance();
+ const TranslationSystem& system = staticData.GetTranslationSystem(TranslationSystem::DEFAULT);
+ const vector<const StatelessFeatureFunction*>& slf =system.GetStatelessFeatureFunctions();
+ const vector<const StatefulFeatureFunction*>& sff = system.GetStatefulFeatureFunctions();
+ const vector<PhraseDictionaryFeature*>& pds = system.GetPhraseDictionaries();
+ const vector<GenerationDictionary*>& gds = system.GetGenerationDictionaries();
+ for (size_t i = 0; i < sff.size(); ++i) {
+ PrintFeatureWeight(sff[i]);
+ }
+ for (size_t i = 0; i < slf.size(); ++i) {
+ PrintFeatureWeight(slf[i]);
+ }
+ for (size_t i = 0; i < pds.size(); ++i) {
+ PrintFeatureWeight(pds[i]);
+ }
+ for (size_t i = 0; i < gds.size(); ++i) {
+ PrintFeatureWeight(gds[i]);
+ }
+}
+
+} //namespace
+
+/** main function of the command line version of the decoder **/
+int main(int argc, char** argv)
+{
+ try {
+
+ // echo command line, if verbose
+ IFVERBOSE(1) {
+ TRACE_ERR("command: ");
+ for(int i=0; i<argc; ++i) TRACE_ERR(argv[i]<<" ");
+ TRACE_ERR(endl);
+ }
+
+ // set number of significant decimals in output
+ fix(cout,PRECISION);
+ fix(cerr,PRECISION);
+
+ // load all the settings into the Parameter class
+ // (stores them as strings, or array of strings)
+ Parameter* params = new Parameter();
+ if (!params->LoadParam(argc,argv)) {
+ params->Explain();
+ exit(1);
+ }
+
+
+ // initialize all "global" variables, which are stored in StaticData
+ // note: this also loads models such as the language model, etc.
+ if (!StaticData::LoadDataStatic(params, argv[0])) {
+ exit(1);
+ }
+
+ // setting "-show-weights" -> just dump out weights and exit
+ if (params->isParamSpecified("show-weights")) {
+ ShowWeights();
+ exit(0);
+ }
+
+ // shorthand for accessing information in StaticData
+ const StaticData& staticData = StaticData::Instance();
+
+
+ //initialise random numbers
+ srand(time(NULL));
+
+ // set up read/writing class
+ IOWrapper* ioWrapper = GetIOWrapper(staticData);
+ if (!ioWrapper) {
+ cerr << "Error; Failed to create IO object" << endl;
+ exit(1);
+ }
+
+ // check on weights
+ vector<float> weights = staticData.GetAllWeights();
+ IFVERBOSE(2) {
+ TRACE_ERR("The score component vector looks like this:\n" << staticData.GetScoreIndexManager());
+ TRACE_ERR("The global weight vector looks like this:");
+ for (size_t j=0; j<weights.size(); j++) {
+ TRACE_ERR(" " << weights[j]);
+ }
+ TRACE_ERR("\n");
+ }
+ // every score must have a weight! check that here:
+ if(weights.size() != staticData.GetScoreIndexManager().GetTotalNumberOfScores()) {
+ TRACE_ERR("ERROR: " << staticData.GetScoreIndexManager().GetTotalNumberOfScores() << " score components, but " << weights.size() << " weights defined" << std::endl);
+ exit(1);
+ }
+
+ // setting lexicalized reordering setup
+ PhraseBasedReorderingState::m_useFirstBackwardScore = false;
+
+
+ auto_ptr<OutputCollector> outputCollector;
+ outputCollector.reset(new OutputCollector());
+
+#ifdef WITH_THREADS
+ ThreadPool pool(staticData.ThreadCount());
+#endif
+
+ // main loop over set of input sentences
+ InputType* source = NULL;
+ size_t lineCount = 0;
+ while(ReadInput(*ioWrapper,staticData.GetInputType(),source)) {
+ IFVERBOSE(1) {
+ ResetUserTime();
+ }
+ // set up task of translating one sentence
+ TranslationTask* task =
+ new TranslationTask(lineCount,source, outputCollector.get());
+ // execute task
+#ifdef WITH_THREADS
+ pool.Submit(task);
+#else
+ task->Run();
+ delete task;
+#endif
+
+ source = NULL; //make sure it doesn't get deleted
+ ++lineCount;
+ }
+
+ // we are done, finishing up
+#ifdef WITH_THREADS
+ pool.Stop(true); //flush remaining jobs
+#endif
+
+ } catch (const std::exception &e) {
+ std::cerr << "Exception: " << e.what() << std::endl;
+ return EXIT_FAILURE;
+ }
+
+#ifndef EXIT_RETURN
+ //This avoids that destructors are called (it can take a long time)
+ exit(EXIT_SUCCESS);
+#else
+ return EXIT_SUCCESS;
+#endif
+}
diff --git a/contrib/relent-filter/src/Main.h b/contrib/relent-filter/src/Main.h
new file mode 100755
index 000000000..f0782144e
--- /dev/null
+++ b/contrib/relent-filter/src/Main.h
@@ -0,0 +1,39 @@
+/*********************************************************************
+Relative Entropy-based Phrase table Pruning
+Copyright (C) 2012 Wang Ling
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+ * Neither the name of the University of Edinburgh nor the names of its contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#ifndef moses_cmd_Main_h
+#define moses_cmd_Main_h
+
+#include "StaticData.h"
+
+class IOWrapper;
+
+int main(int argc, char* argv[]);
+#endif
diff --git a/contrib/relent-filter/src/RelativeEntropyCalc.cpp b/contrib/relent-filter/src/RelativeEntropyCalc.cpp
new file mode 100755
index 000000000..212eedf87
--- /dev/null
+++ b/contrib/relent-filter/src/RelativeEntropyCalc.cpp
@@ -0,0 +1,83 @@
+/***********************************************************************
+Relative Entropy-based Phrase table Pruning
+Copyright (C) 2012 Wang Ling
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <vector>
+#include "Hypothesis.h"
+#include "StaticData.h"
+#include "RelativeEntropyCalc.h"
+#include "Manager.h"
+
+using namespace std;
+using namespace Moses;
+using namespace MosesCmd;
+
+namespace MosesCmd
+{
+ double RelativeEntropyCalc::CalcRelativeEntropy(int translationId, std::vector<SearchGraphNode>& searchGraph){
+ const StaticData &staticData = StaticData::Instance();
+ const Phrase *m_constraint = staticData.GetConstrainingPhrase(translationId);
+
+ double prunedScore = -numeric_limits<double>::max();
+ double unprunedScore = -numeric_limits<double>::max();
+ for (size_t i = 0; i < searchGraph.size(); ++i) {
+ const SearchGraphNode& searchNode = searchGraph[i];
+ int nodeId = searchNode.hypo->GetId();
+ if(nodeId == 0) continue; // initial hypothesis
+
+ int forwardId = searchNode.forward;
+ if(forwardId == -1){ // is final hypothesis
+ Phrase catOutput(0);
+ ConcatOutputPhraseRecursive(catOutput, searchNode.hypo);
+ if(catOutput == *m_constraint){ // is the output actually the same as the constraint (forced decoding does not always force the output)
+ const Hypothesis *prevHypo = searchNode.hypo->GetPrevHypo();
+ int backId = prevHypo->GetId();
+ double derivationScore = searchNode.hypo->GetScore();
+ if(backId != 0){ // derivation using smaller units
+ if(prunedScore < derivationScore){
+ prunedScore = derivationScore;
+ }
+ }
+ if(unprunedScore < derivationScore){
+ unprunedScore = derivationScore;
+ }
+ }
+ }
+ }
+
+ double neg_log_div = 0;
+ if( unprunedScore == -numeric_limits<double>::max()){
+ neg_log_div = numeric_limits<double>::max(); // could not find phrase pair, give it a low score so that it doesnt get pruned
+ }
+ else{
+ neg_log_div = unprunedScore - prunedScore;
+ }
+ if (neg_log_div > 100){
+ return 100;
+ }
+ return neg_log_div;
+ }
+
+ void RelativeEntropyCalc::ConcatOutputPhraseRecursive(Phrase& phrase, const Hypothesis *hypo){
+ int nodeId = hypo->GetId();
+ if(nodeId == 0) return; // initial hypothesis
+ ConcatOutputPhraseRecursive(phrase, hypo->GetPrevHypo());
+ const Phrase &endPhrase = hypo->GetCurrTargetPhrase();
+ phrase.Append(endPhrase);
+ }
+}
diff --git a/contrib/relent-filter/src/RelativeEntropyCalc.h b/contrib/relent-filter/src/RelativeEntropyCalc.h
new file mode 100755
index 000000000..efe8ba495
--- /dev/null
+++ b/contrib/relent-filter/src/RelativeEntropyCalc.h
@@ -0,0 +1,51 @@
+/*********************************************************************
+Relative Entropy-based Phrase table Pruning
+Copyright (C) 2012 Wang Ling
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+ * Neither the name of the University of Edinburgh nor the names of its contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
+BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
+IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+***********************************************************************/
+
+#include <vector>
+#include "Hypothesis.h"
+#include "StaticData.h"
+#include "Manager.h"
+
+using namespace std;
+using namespace Moses;
+
+namespace MosesCmd
+{
+
+class RelativeEntropyCalc
+{
+public:
+ static double CalcRelativeEntropy(int translationId, std::vector<SearchGraphNode>& searchGraph);
+
+protected:
+ static void ConcatOutputPhraseRecursive(Phrase& phrase, const Hypothesis *hypo);
+};
+
+}
diff --git a/contrib/relent-filter/src/TranslationAnalysis.cpp b/contrib/relent-filter/src/TranslationAnalysis.cpp
new file mode 100755
index 000000000..89da48301
--- /dev/null
+++ b/contrib/relent-filter/src/TranslationAnalysis.cpp
@@ -0,0 +1,126 @@
+// $Id$
+
+#include <iostream>
+#include <sstream>
+#include <algorithm>
+#include "StaticData.h"
+#include "Hypothesis.h"
+#include "TranslationAnalysis.h"
+
+using namespace Moses;
+
+namespace TranslationAnalysis
+{
+
+void PrintTranslationAnalysis(const TranslationSystem* system, std::ostream &os, const Hypothesis* hypo)
+{
+ os << std::endl << "TRANSLATION HYPOTHESIS DETAILS:" << std::endl;
+ std::vector<const Hypothesis*> translationPath;
+
+ while (hypo) {
+ translationPath.push_back(hypo);
+ hypo = hypo->GetPrevHypo();
+ }
+
+ std::reverse(translationPath.begin(), translationPath.end());
+ std::vector<std::string> droppedWords;
+ std::vector<const Hypothesis*>::iterator tpi = translationPath.begin();
+ if(tpi == translationPath.end())
+ return;
+ ++tpi; // skip initial translation state
+ std::vector<std::string> sourceMap;
+ std::vector<std::string> targetMap;
+ std::vector<unsigned int> lmAcc(0);
+ size_t lmCalls = 0;
+ bool doLMStats = ((*tpi)->GetLMStats() != 0);
+ if (doLMStats)
+ lmAcc.resize((*tpi)->GetLMStats()->size(), 0);
+ for (; tpi != translationPath.end(); ++tpi) {
+ std::ostringstream sms;
+ std::ostringstream tms;
+ std::string target = (*tpi)->GetTargetPhraseStringRep();
+ std::string source = (*tpi)->GetSourcePhraseStringRep();
+ WordsRange twr = (*tpi)->GetCurrTargetWordsRange();
+ WordsRange swr = (*tpi)->GetCurrSourceWordsRange();
+ const AlignmentInfo &alignmentInfo = (*tpi)->GetCurrTargetPhrase().GetAlignmentInfo();
+ // language model backoff stats,
+ if (doLMStats) {
+ std::vector<std::vector<unsigned int> >& lmstats = *(*tpi)->GetLMStats();
+ std::vector<std::vector<unsigned int> >::iterator i = lmstats.begin();
+ std::vector<unsigned int>::iterator acc = lmAcc.begin();
+
+ for (; i != lmstats.end(); ++i, ++acc) {
+ std::vector<unsigned int>::iterator j = i->begin();
+ lmCalls += i->size();
+ for (; j != i->end(); ++j) {
+ (*acc) += *j;
+ }
+ }
+ }
+
+ bool epsilon = false;
+ if (target == "") {
+ target="<EPSILON>";
+ epsilon = true;
+ droppedWords.push_back(source);
+ }
+ os << " SOURCE: " << swr << " " << source << std::endl
+ << " TRANSLATED AS: " << target << std::endl
+ << " WORD ALIGNED: " << alignmentInfo << std::endl;
+ size_t twr_i = twr.GetStartPos();
+ size_t swr_i = swr.GetStartPos();
+ if (!epsilon) {
+ sms << twr_i;
+ }
+ if (epsilon) {
+ tms << "del(" << swr_i << ")";
+ } else {
+ tms << swr_i;
+ }
+ swr_i++;
+ twr_i++;
+ for (; twr_i <= twr.GetEndPos() && twr.GetEndPos() != NOT_FOUND; twr_i++) {
+ sms << '-' << twr_i;
+ }
+ for (; swr_i <= swr.GetEndPos() && swr.GetEndPos() != NOT_FOUND; swr_i++) {
+ tms << '-' << swr_i;
+ }
+ if (!epsilon) targetMap.push_back(sms.str());
+ sourceMap.push_back(tms.str());
+ }
+ std::vector<std::string>::iterator si = sourceMap.begin();
+ std::vector<std::string>::iterator ti = targetMap.begin();
+ os << std::endl << "SOURCE/TARGET SPANS:";
+ os << std::endl << " SOURCE:";
+ for (; si != sourceMap.end(); ++si) {
+ os << " " << *si;
+ }
+ os << std::endl << " TARGET:";
+ for (; ti != targetMap.end(); ++ti) {
+ os << " " << *ti;
+ }
+ os << std::endl << std::endl;
+ if (doLMStats && lmCalls > 0) {
+ std::vector<unsigned int>::iterator acc = lmAcc.begin();
+ const LMList& lmlist = system->GetLanguageModels();
+ LMList::const_iterator i = lmlist.begin();
+ for (; acc != lmAcc.end(); ++acc, ++i) {
+ char buf[256];
+ sprintf(buf, "%.4f", (float)(*acc)/(float)lmCalls);
+ os << (*i)->GetScoreProducerDescription() <<", AVG N-GRAM LENGTH: " << buf << std::endl;
+ }
+ }
+
+ if (droppedWords.size() > 0) {
+ std::vector<std::string>::iterator dwi = droppedWords.begin();
+ os << std::endl << "WORDS/PHRASES DROPPED:" << std::endl;
+ for (; dwi != droppedWords.end(); ++dwi) {
+ os << "\tdropped=" << *dwi << std::endl;
+ }
+ }
+ os << std::endl << "SCORES (UNWEIGHTED/WEIGHTED): ";
+ StaticData::Instance().GetScoreIndexManager().PrintLabeledWeightedScores(os, translationPath.back()->GetScoreBreakdown(), StaticData::Instance().GetAllWeights());
+ os << std::endl;
+}
+
+}
diff --git a/contrib/relent-filter/src/TranslationAnalysis.h b/contrib/relent-filter/src/TranslationAnalysis.h
new file mode 100755
index 000000000..1eb7a04fd
--- /dev/null
+++ b/contrib/relent-filter/src/TranslationAnalysis.h
@@ -0,0 +1,25 @@
+// $Id$
+
+/*
+ * also see moses/SentenceStats
+ */
+
+#ifndef moses_cmd_TranslationAnalysis_h
+#define moses_cmd_TranslationAnalysis_h
+
+#include <iostream>
+#include "Hypothesis.h"
+#include "TranslationSystem.h"
+
+namespace TranslationAnalysis
+{
+
+/***
+ * print details about the translation represented in hypothesis to
+ * os. Included information: phrase alignment, words dropped, scores
+ */
+void PrintTranslationAnalysis(const Moses::TranslationSystem* system, std::ostream &os, const Moses::Hypothesis* hypo);
+
+}
+
+#endif
diff --git a/contrib/relent-filter/src/mbr.cpp b/contrib/relent-filter/src/mbr.cpp
new file mode 100755
index 000000000..7462d3fc6
--- /dev/null
+++ b/contrib/relent-filter/src/mbr.cpp
@@ -0,0 +1,178 @@
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <iomanip>
+#include <vector>
+#include <map>
+#include <stdlib.h>
+#include <math.h>
+#include <algorithm>
+#include <stdio.h>
+#include "TrellisPathList.h"
+#include "TrellisPath.h"
+#include "StaticData.h"
+#include "Util.h"
+#include "mbr.h"
+
+using namespace std ;
+using namespace Moses;
+
+
+/* Input :
+ 1. a sorted n-best list, with duplicates filtered out in the following format
+ 0 ||| amr moussa is currently on a visit to libya , tomorrow , sunday , to hold talks with regard to the in sudan . ||| 0 -4.94418 0 0 -2.16036 0 0 -81.4462 -106.593 -114.43 -105.55 -12.7873 -26.9057 -25.3715 -52.9336 7.99917 -24 ||| -4.58432
+
+ 2. a weight vector
+ 3. bleu order ( default = 4)
+ 4. scaling factor to weigh the weight vector (default = 1.0)
+
+ Output :
+ translations that minimise the Bayes Risk of the n-best list
+
+
+*/
+
+int BLEU_ORDER = 4;
+int SMOOTH = 1;
+float min_interval = 1e-4;
+void extract_ngrams(const vector<const Factor* >& sentence, map < vector < const Factor* >, int > & allngrams)
+{
+ vector< const Factor* > ngram;
+ for (int k = 0; k < BLEU_ORDER; k++) {
+ for(int i =0; i < max((int)sentence.size()-k,0); i++) {
+ for ( int j = i; j<= i+k; j++) {
+ ngram.push_back(sentence[j]);
+ }
+ ++allngrams[ngram];
+ ngram.clear();
+ }
+ }
+}
+
+float calculate_score(const vector< vector<const Factor*> > & sents, int ref, int hyp, vector < map < vector < const Factor *>, int > > & ngram_stats )
+{
+ int comps_n = 2*BLEU_ORDER+1;
+ vector<int> comps(comps_n);
+ float logbleu = 0.0, brevity;
+
+ int hyp_length = sents[hyp].size();
+
+ for (int i =0; i<BLEU_ORDER; i++) {
+ comps[2*i] = 0;
+ comps[2*i+1] = max(hyp_length-i,0);
+ }
+
+ map< vector < const Factor * > ,int > & hyp_ngrams = ngram_stats[hyp] ;
+ map< vector < const Factor * >, int > & ref_ngrams = ngram_stats[ref] ;
+
+ for (map< vector< const Factor * >, int >::iterator it = hyp_ngrams.begin();
+ it != hyp_ngrams.end(); it++) {
+ map< vector< const Factor * >, int >::iterator ref_it = ref_ngrams.find(it->first);
+ if(ref_it != ref_ngrams.end()) {
+ comps[2* (it->first.size()-1)] += min(ref_it->second,it->second);
+ }
+ }
+ comps[comps_n-1] = sents[ref].size();
+
+ for (int i=0; i<BLEU_ORDER; i++) {
+ if (comps[0] == 0)
+ return 0.0;
+ if ( i > 0 )
+ logbleu += log((float)comps[2*i]+SMOOTH)-log((float)comps[2*i+1]+SMOOTH);
+ else
+ logbleu += log((float)comps[2*i])-log((float)comps[2*i+1]);
+ }
+ logbleu /= BLEU_ORDER;
+ brevity = 1.0-(float)comps[comps_n-1]/comps[1]; // comps[comps_n-1] is the ref length, comps[1] is the test length
+ if (brevity < 0.0)
+ logbleu += brevity;
+ return exp(logbleu);
+}
+
+const TrellisPath doMBR(const TrellisPathList& nBestList)
+{
+ float marginal = 0;
+
+ vector<float> joint_prob_vec;
+ vector< vector<const Factor*> > translations;
+ float joint_prob;
+ vector< map < vector <const Factor *>, int > > ngram_stats;
+
+ TrellisPathList::const_iterator iter;
+
+ // get max score to prevent underflow
+ float maxScore = -1e20;
+ for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
+ const TrellisPath &path = **iter;
+ float score = StaticData::Instance().GetMBRScale()
+ * path.GetScoreBreakdown().InnerProduct(StaticData::Instance().GetAllWeights());
+ if (maxScore < score) maxScore = score;
+ }
+
+ for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
+ const TrellisPath &path = **iter;
+ joint_prob = UntransformScore(StaticData::Instance().GetMBRScale() * path.GetScoreBreakdown().InnerProduct(StaticData::Instance().GetAllWeights()) - maxScore);
+ marginal += joint_prob;
+ joint_prob_vec.push_back(joint_prob);
+
+ // get words in translation
+ vector<const Factor*> translation;
+ GetOutputFactors(path, translation);
+
+ // collect n-gram counts
+ map < vector < const Factor *>, int > counts;
+ extract_ngrams(translation,counts);
+
+ ngram_stats.push_back(counts);
+ translations.push_back(translation);
+ }
+
+ vector<float> mbr_loss;
+ float bleu, weightedLoss;
+ float weightedLossCumul = 0;
+ float minMBRLoss = 1000000;
+ int minMBRLossIdx = -1;
+
+ /* Main MBR computation done here */
+ iter = nBestList.begin();
+ for (unsigned int i = 0; i < nBestList.GetSize(); i++) {
+ weightedLossCumul = 0;
+ for (unsigned int j = 0; j < nBestList.GetSize(); j++) {
+ if ( i != j) {
+ bleu = calculate_score(translations, j, i,ngram_stats );
+ weightedLoss = ( 1 - bleu) * ( joint_prob_vec[j]/marginal);
+ weightedLossCumul += weightedLoss;
+ if (weightedLossCumul > minMBRLoss)
+ break;
+ }
+ }
+ if (weightedLossCumul < minMBRLoss) {
+ minMBRLoss = weightedLossCumul;
+ minMBRLossIdx = i;
+ }
+ iter++;
+ }
+ /* Find sentence that minimises Bayes Risk under 1- BLEU loss */
+ return nBestList.at(minMBRLossIdx);
+ //return translations[minMBRLossIdx];
+}
+
+void GetOutputFactors(const TrellisPath &path, vector <const Factor*> &translation)
+{
+ const std::vector<const Hypothesis *> &edges = path.GetEdges();
+ const std::vector<FactorType>& outputFactorOrder = StaticData::Instance().GetOutputFactorOrder();
+ assert (outputFactorOrder.size() == 1);
+
+ // print the surface factor of the translation
+ for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) {
+ const Hypothesis &edge = *edges[currEdge];
+ const Phrase &phrase = edge.GetCurrTargetPhrase();
+ size_t size = phrase.GetSize();
+ for (size_t pos = 0 ; pos < size ; pos++) {
+
+ const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]);
+ translation.push_back(factor);
+ }
+ }
+}
+
diff --git a/moses/src/OnlineCommand.h b/contrib/relent-filter/src/mbr.h
index 90f778ea4..d08b11a98 100644..100755
--- a/moses/src/OnlineCommand.h
+++ b/contrib/relent-filter/src/mbr.h
@@ -1,4 +1,4 @@
-// $Id: OnlineCommand.h 3428 2010-09-13 17:55:23Z nicolabertoldi $
+// $Id$
/***********************************************************************
Moses - factored phrase-based language decoder
@@ -19,41 +19,10 @@ License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
-#ifndef moses_OnlineCommand_h
-#define moses_OnlineCommand_h
-
-#include <string>
-#include "InputType.h"
-#include "StaticData.h"
-
-namespace Moses
-{
-
-/***
- * A class used specifically to read online commnds to modify on the fly system' parameters
- */
-class OnlineCommand
-{
-
- private:
-
- std::string command_type;
- std::string command_value;
- PARAM_VEC accepted_commands;
-
- public:
- OnlineCommand();
-
- bool Parse(std::string& str);
- void Print(std::ostream& out = std::cerr) const;
- void Execute() const;
- void Clean();
-
- inline std::string GetType(){ return command_type; };
- inline std::string GetValue(){ return command_value; };
-};
-
-
-}
+#ifndef moses_cmd_mbr_h
+#define moses_cmd_mbr_h
+const Moses::TrellisPath doMBR(const Moses::TrellisPathList& nBestList);
+void GetOutputFactors(const Moses::TrellisPath &path, std::vector <const Moses::Factor*> &translation);
+float calculate_score(const std::vector< std::vector<const Moses::Factor*> > & sents, int ref, int hyp, std::vector < std::map < std::vector < const Moses::Factor *>, int > > & ngram_stats );
#endif
diff --git a/contrib/reranking/data/README b/contrib/reranking/data/README
deleted file mode 100644
index 59b20b32d..000000000
--- a/contrib/reranking/data/README
+++ /dev/null
@@ -1,5 +0,0 @@
-
-sample usage:
-
-../src/nbest -input-file nbest.small -output-file nbest.1best 1 -sort -weights weights
-
diff --git a/contrib/reranking/data/nbest.small b/contrib/reranking/data/nbest.small
deleted file mode 100644
index 0fcbc44ce..000000000
--- a/contrib/reranking/data/nbest.small
+++ /dev/null
@@ -1,7 +0,0 @@
-0 ||| Once a major milestone in the Balkans ||| d: 0 -0.608213 0 0 -0.512647 0 0 lm: -35.7187 tm: -3.97053 -17.5137 -3.24082 -15.8638 2.99969 w: -7 ||| -3.92049
-0 ||| Once a crucial period in the Balkans ||| d: 0 -0.944329 0 0 -1.06468 0 0 lm: -37.5341 tm: -4.27619 -19.441 -3.81074 -14.767 3.99959 w: -7 ||| -4.00353
-1 ||| Since the world is focused on Iraq , North Korea and a possible crisis with Iran on nuclear weapons , Kosovo is somewhat unnoticed . ||| d: -6 -5.80589 -0.65383 -1.29291 -6.19413 -0.0861354 -0.993748 lm: -112.868 tm: -42.7841 -61.6487 -16.5351 -23.8061 21.9977 w: -25 ||| -13.0796
-2 ||| The public will soon turn its attention back to that province during a decision regarding his fate . ||| d: -8 -4.61691 0 -3.62979 -4.85916 0 -4.43407 lm: -81.3478 tm: -46.0407 -63.79 -23.7663 -25.175 14.9984 w: -18 ||| -12.1226
-2 ||| The public will soon be able to turn its attention back into this province during a decision on his fate . ||| d: -8 -5.53064 0 -3.51999 -3.26708 0 -4.44003 lm: -84.7939 tm: -36.2621 -66.32 -21.0804 -33.9136 13.9985 w: -21 ||| -12.1227
-2 ||| The public will soon turn his attention to them at a decision on his destiny . ||| d: -8 -5.3448 0 -2.65118 -4.35949 0 -3.95447 lm: -67.451 tm: -54.851 -89.0503 -17.9389 -22.9488 12.9986 w: -16 ||| -12.1234
-2 ||| The public will soon turn his attention to them at a decision on his destiny . ||| d: -8 -5.3448 0 -2.65118 -4.35949 0 -3.95447 lm: -67.451 tm: -54.851 -89.0503 -17.9389 -22.9488 12.9986 w: -16 ||| -12.1234
diff --git a/contrib/reranking/data/weights b/contrib/reranking/data/weights
deleted file mode 100644
index c6b6c1ac0..000000000
--- a/contrib/reranking/data/weights
+++ /dev/null
@@ -1,11 +0,0 @@
-0
-1 2 3
-4
-5
-6
-7
-8
-9
-10
-11
-12 13
diff --git a/contrib/reranking/src/Hypo.cpp b/contrib/reranking/src/Hypo.cpp
deleted file mode 100644
index 0ceb21abd..000000000
--- a/contrib/reranking/src/Hypo.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * nbest: tool to process moses n-best lists
- *
- * File: Hypo.cpp
- * basic functions to process one hypothesis
- *
- * Created by Holger Schwenk, University of Le Mans, 05/16/2008
- *
- */
-
-
-#include "Hypo.h"
-#include <iostream>
-
-//const char* NBEST_DELIM = "|||";
-
-Hypo::Hypo()
-{
- //cerr << "Hypo: constructor called" << endl;
-}
-
-Hypo::~Hypo()
-{
- //cerr << "Hypo: destructor called" << endl;
-}
-
-void Hypo::Write(ofstream &outf)
-{
- outf << id << NBEST_DELIM2 << trg << NBEST_DELIM2;
- for (vector<float>::iterator i = f.begin(); i != f.end(); i++)
- outf << (*i) << " ";
- outf << NBEST_DELIM << " " << s << endl;
-
-}
-
-float Hypo::CalcGlobal(Weights &w)
-{
- //cerr << " HYP: calc global" << endl;
- int sz=w.val.size();
- if (sz<f.size()) {
- cerr << " - NOTE: padding weight vector with " << f.size()-sz << " zeros" << endl;
- w.val.resize(f.size());
- }
-
- s=0;
- for (int i=0; i<f.size(); i++) {
- //cerr << "i=" << i << ", " << w.val[i] << ", " << f[i] << endl;
- s+=w.val[i]*f[i];
- }
- //cerr << "s=" << s << endl;
- return s;
-}
-
-// this is actually a "greater than" since we want to sort in descending order
-bool Hypo::operator< (const Hypo &h2) const
-{
- return (this->s > h2.s);
-}
-
diff --git a/contrib/reranking/src/Hypo.h b/contrib/reranking/src/Hypo.h
deleted file mode 100644
index a85410289..000000000
--- a/contrib/reranking/src/Hypo.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * nbest: tool to process moses n-best lists
- *
- * File: Hypo.h
- * basic functions to process one hypothesis
- *
- * Created by Holger Schwenk, University of Le Mans, 05/16/2008
- *
- */
-
-
-#ifndef _HYPO_H_
-#define _HYPO_H_
-
-using namespace std;
-
-#include <iostream>
-#include <fstream>
-#include <string>
-#include <vector>
-
-#include "Tools.h"
-
-#define NBEST_DELIM "|||"
-#define NBEST_DELIM2 " ||| "
-
-class Hypo
-{
- int id;
- string trg; // translation
- vector<float> f; // feature function scores
- float s; // global score
- // segmentation
-public:
- Hypo();
- Hypo(int p_id,string &p_trg, vector<float> &p_f, float p_s) : id(p_id),trg(p_trg),f(p_f),s(p_s) {};
- ~Hypo();
- float CalcGlobal(Weights&);
- void Write(ofstream&);
- bool operator< (const Hypo&) const;
- // bool CompareLikelihoods (const Hypo&, const Hypo&) const;
-};
-
-#endif
diff --git a/contrib/reranking/src/Main.cpp b/contrib/reranking/src/Main.cpp
deleted file mode 100644
index 4a20b013c..000000000
--- a/contrib/reranking/src/Main.cpp
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * nbest: tool to process moses n-best lists
- *
- * File: Main.cpp
- * command line interface
- *
- * Created by Holger Schwenk, University of Le Mans, 05/16/2008
- *
- */
-
-#include <iostream>
-#include <fstream>
-#include "ParameterNBest.h"
-#include "NBest.h"
-#include "Tools.h"
-
-#include "../../../moses/src/Util.h"
-
-
-using namespace std;
-
-int main (int argc, char *argv[])
-{
- // parse parameters
- ParameterNBest *parameter = new ParameterNBest();
- if (!parameter->LoadParam(argc, argv)) {
- parameter->Explain();
- delete parameter;
- return 1;
- }
-
- // read input
- ifstream inpf;
- PARAM_VEC p=parameter->GetParam("input-file");
- if (p.size()<1 || p.size()>2) Error("The option -input-file requires one or two arguments");
- int in_n=p.size()>1 ? Moses::Scan<int>(p[1]) : 0;
- cout << "NBest version 0.1, written by Holger.Schwenk@lium.univ-lemans.fr" << endl
- << " - reading input from file '" << p[0] << "'";
- if (in_n>0) cout << " (limited to the first " << in_n << " hypothesis)";
- cout << endl;
- inpf.open(p[0].c_str());
- if (inpf.fail()) {
- perror ("ERROR");
- exit(1);
- }
-
- // open output
- ofstream outf;
- p=parameter->GetParam("output-file");
- if (p.size()<1 || p.size()>2) Error("The option -output-file requires one or two arguments");
- int out_n=p.size()>1 ? Moses::Scan<int>(p[1]) : 0;
- cout << " - writing output to file '" << p[0] << "'";
- if (out_n>0) cout << " (limited to the first " << out_n << " hypothesis)";
- cout << endl;
- outf.open(p[0].c_str());
- if (outf.fail()) {
- perror ("ERROR");
- exit(1);
- }
-
- // eventually read weights
- Weights w;
- int do_calc=false;
- if (parameter->isParamSpecified("weights")) {
- p=parameter->GetParam("weights");
- if (p.size()<1) Error("The option -weights requires one argument");
- cout << " - reading weights from file '" << p[0] << "'";
- int n=w.Read(p[0].c_str());
- cout << " (found " << n << " values)" << endl;
- do_calc=true;
- cout << " - recalculating global scores" << endl;
- }
-
- // shall we sort ?
- bool do_sort = parameter->isParamSpecified("sort");
- if (do_sort) cout << " - sorting global scores" << endl;
-
- // main loop
- int nb_sent=0, nb_nbest=0;
- while (!inpf.eof()) {
- NBest nbest(inpf, in_n);
-
- if (do_calc) nbest.CalcGlobal(w);
- if (do_sort) nbest.Sort();
- nbest.Write(outf, out_n);
-
- nb_sent++;
- nb_nbest+=nbest.NbNBest();
- }
- inpf.close();
- outf.close();
-
- // display final statistics
- cout << " - processed " << nb_nbest << " n-best hypotheses in " << nb_sent << " sentences"
- << " (average " << (float) nb_nbest/nb_sent << ")" << endl;
-
- return 0;
-}
diff --git a/contrib/reranking/src/Makefile b/contrib/reranking/src/Makefile
deleted file mode 100644
index c2711741e..000000000
--- a/contrib/reranking/src/Makefile
+++ /dev/null
@@ -1,18 +0,0 @@
-
-# where to find include files and libraries from Moses
-MOSES_INC=../../../moses/src ../../..
-LIB_DIR=../../../moses/src/
-
-LIBS=-lmoses -lz
-OBJS=Main.o NBest.o Hypo.o Tools.o ParameterNBest.o
-
-CFLAGS=-I$(MOSES_INC)
-
-nbest-tool: $(OBJS)
- $(CXX) -o nbest $(OBJS) -L$(LIB_DIR) $(LIBS)
-
-%.o: %.cpp
- $(CXX) $(CFLAGS) -o $@ -c $<
-
-clean:
- -rm $(OBJS) nbest
diff --git a/contrib/reranking/src/NBest.cpp b/contrib/reranking/src/NBest.cpp
deleted file mode 100644
index 24a0f60c3..000000000
--- a/contrib/reranking/src/NBest.cpp
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * nbest: tool to process moses n-best lists
- *
- * File: NBest.cpp
- * basic functions on n-best lists
- *
- * Created by Holger Schwenk, University of Le Mans, 05/16/2008
- *
- */
-
-
-#include "NBest.h"
-
-#include "Util.h" // from Moses
-
-#include <sstream>
-#include <algorithm>
-
-//NBest::NBest() {
-//cerr << "NBEST: constructor called" << endl;
-//}
-
-
-bool NBest::ParseLine(ifstream &inpf, const int n)
-{
- static string line; // used internally to buffer an input line
- static int prev_id=-1; // used to detect a change of the n-best ID
- int id;
- vector<float> f;
- float s;
- int pos=0, epos;
- vector<string> blocks;
-
-
- if (line.empty()) {
- getline(inpf,line);
- if (inpf.eof()) return false;
- }
-
- // split line into blocks
- //cerr << "PARSE line: " << line << endl;
- while ((epos=line.find(NBEST_DELIM,pos))!=string::npos) {
- blocks.push_back(line.substr(pos,epos-pos));
- // cerr << " block: " << blocks.back() << endl;
- pos=epos+strlen(NBEST_DELIM);
- }
- blocks.push_back(line.substr(pos,line.size()));
- // cerr << " block: " << blocks.back() << endl;
-
- if (blocks.size()<4) {
- cerr << line << endl;
- Error("can't parse the above line");
- }
-
- // parse ID
- id=Scan<int>(blocks[0]);
- if (prev_id>=0 && id!=prev_id) {
- prev_id=id; // new nbest list has started
- return false;
- }
- prev_id=id;
- //cerr << "same ID " << id << endl;
-
- if (n>0 && nbest.size() >= n) {
- //cerr << "skipped" << endl;
- line.clear();
- return true; // skip parsing of unused hypos
- }
-
- // parse feature function scores
- //cerr << "PARSE features: '" << blocks[2] << "' size: " << blocks[2].size() << endl;
- pos=blocks[2].find_first_not_of(' ');
- while (pos<blocks[2].size() && (epos=blocks[2].find(" ",pos))!=string::npos) {
- string feat=blocks[2].substr(pos,epos-pos);
- //cerr << " feat: '" << feat << "', pos: " << pos << ", " << epos << endl;
- if (feat.find(":",0)!=string::npos) {
- //cerr << " name: " << feat << endl;
- } else {
- f.push_back(Scan<float>(feat));
- //cerr << " value: " << f.back() << endl;
- }
- pos=epos+1;
- }
-
- // eventually parse segmentation
- if (blocks.size()>4) {
- Error("parsing segmentation not yet supported");
- }
-
- nbest.push_back(Hypo(id, blocks[1], f, Scan<float>(blocks[3])));
-
- line.clear(); // force read of new line
-
- return true;
-}
-
-
-NBest::NBest(ifstream &inpf, const int n)
-{
- //cerr << "NBEST: constructor with file called" << endl;
- while (ParseLine(inpf,n));
- //cerr << "NBEST: found " << nbest.size() << " lines" << endl;
-}
-
-
-NBest::~NBest()
-{
- //cerr << "NBEST: destructor called" << endl;
-}
-
-void NBest::Write(ofstream &outf, int n)
-{
- if (n<1 || n>nbest.size()) n=nbest.size();
- for (int i=0; i<n; i++) nbest[i].Write(outf);
-}
-
-
-float NBest::CalcGlobal(Weights &w)
-{
- //cerr << "NBEST: calc global of size " << nbest.size() << endl;
- for (vector<Hypo>::iterator i = nbest.begin(); i != nbest.end(); i++) {
- (*i).CalcGlobal(w);
- }
-}
-
-
-void NBest::Sort()
-{
- sort(nbest.begin(),nbest.end());
-}
-
diff --git a/contrib/reranking/src/NBest.h b/contrib/reranking/src/NBest.h
deleted file mode 100644
index 9a4aa9447..000000000
--- a/contrib/reranking/src/NBest.h
+++ /dev/null
@@ -1,44 +0,0 @@
-/*
- * nbest: tool to process moses n-best lists
- *
- * File: NBest.h
- * basic functions on n-best lists
- *
- * Created by Holger Schwenk, University of Le Mans, 05/16/2008
- *
- */
-
-
-#ifndef _NBEST_H_
-#define _NBEST_H_
-
-using namespace std;
-
-#include <iostream>
-#include <fstream>
-#include <string>
-#include <vector>
-
-#include "Tools.h"
-#include "Hypo.h"
-
-class NBest
-{
- int id;
- string src;
- vector<Hypo> nbest;
- bool ParseLine(ifstream &inpf, const int n);
-public:
- NBest(ifstream&, const int=0);
- ~NBest();
- int NbNBest() {
- return nbest.size();
- };
- float CalcGlobal(Weights&);
- void Sort(); // largest values first
- void Write(ofstream&, int=0);
-};
-
-void Error(char *msg);
-
-#endif
diff --git a/contrib/reranking/src/ParameterNBest.cpp b/contrib/reranking/src/ParameterNBest.cpp
deleted file mode 100644
index 005f3890c..000000000
--- a/contrib/reranking/src/ParameterNBest.cpp
+++ /dev/null
@@ -1,337 +0,0 @@
-// $Id: $
-
-/***********************************************************************
-nbest - tool to process Moses n-best list
-Copyright (C) 2008 Holger Schwenk, University of Le Mans, France
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-***********************************************************************/
-
-#include <iostream>
-#include <iterator>
-#include <fstream>
-#include <sstream>
-#include <algorithm>
-#include "ParameterNBest.h"
-#include "Tools.h"
-
-#include "Util.h" // from Moses
-#include "InputFileStream.h"
-#include "UserMessage.h"
-
-using namespace std;
-
-/** define allowed parameters */
-ParameterNBest::ParameterNBest()
-{
- AddParam("input-file", "i", "file name of the input n-best list");
- AddParam("output-file", "o", "file name of the output n-best list");
- AddParam("recalc", "r", "recalc global scores");
- AddParam("weights", "w", "coefficients of the feature functions");
- AddParam("sort", "s", "sort n-best list according to the global scores");
- AddParam("lexical", "l", "report number of lexically different hypothesis");
-}
-
-ParameterNBest::~ParameterNBest()
-{
-}
-
-/** initialize a parameter, sub of constructor */
-void ParameterNBest::AddParam(const string &paramName, const string &description)
-{
- m_valid[paramName] = true;
- m_description[paramName] = description;
-}
-
-/** initialize a parameter (including abbreviation), sub of constructor */
-void ParameterNBest::AddParam(const string &paramName, const string &abbrevName, const string &description)
-{
- m_valid[paramName] = true;
- m_valid[abbrevName] = true;
- m_abbreviation[paramName] = abbrevName;
- m_description[paramName] = description;
-}
-
-/** print descriptions of all parameters */
-void ParameterNBest::Explain()
-{
- cerr << "Usage:" << endl;
- for(PARAM_STRING::const_iterator iterParam = m_description.begin(); iterParam != m_description.end(); iterParam++) {
- const string paramName = iterParam->first;
- const string paramDescription = iterParam->second;
- cerr << "\t-" << paramName;
- PARAM_STRING::const_iterator iterAbbr = m_abbreviation.find( paramName );
- if ( iterAbbr != m_abbreviation.end() )
- cerr << " (" << iterAbbr->second << ")";
- cerr << ": " << paramDescription << endl;
- }
-}
-
-/** check whether an item on the command line is a switch or a value
- * \param token token on the command line to checked **/
-
-bool ParameterNBest::isOption(const char* token)
-{
- if (! token) return false;
- std::string tokenString(token);
- size_t length = tokenString.size();
- if (length > 0 && tokenString.substr(0,1) != "-") return false;
- if (length > 1 && tokenString.substr(1,1).find_first_not_of("0123456789") == 0) return true;
- return false;
-}
-
-/** load all parameters from the configuration file and the command line switches */
-bool ParameterNBest::LoadParam(const string &filePath)
-{
- const char *argv[] = {"executable", "-f", filePath.c_str() };
- return LoadParam(3, (char**) argv);
-}
-
-/** load all parameters from the configuration file and the command line switches */
-bool ParameterNBest::LoadParam(int argc, char* argv[])
-{
- // config file (-f) arg mandatory
- string configPath;
- /*
- if ( (configPath = FindParam("-f", argc, argv)) == ""
- && (configPath = FindParam("-config", argc, argv)) == "")
- {
- PrintCredit();
-
- UserMessage::Add("No configuration file was specified. Use -config or -f");
- return false;
- }
- else
- {
- if (!ReadConfigFile(configPath))
- {
- UserMessage::Add("Could not read "+configPath);
- return false;
- }
- }
- */
-
- // overwrite parameters with values from switches
- for(PARAM_STRING::const_iterator iterParam = m_description.begin(); iterParam != m_description.end(); iterParam++) {
- const string paramName = iterParam->first;
- OverwriteParam("-" + paramName, paramName, argc, argv);
- }
-
- // ... also shortcuts
- for(PARAM_STRING::const_iterator iterParam = m_abbreviation.begin(); iterParam != m_abbreviation.end(); iterParam++) {
- const string paramName = iterParam->first;
- const string paramShortName = iterParam->second;
- OverwriteParam("-" + paramShortName, paramName, argc, argv);
- }
-
- // logging of parameters that were set in either config or switch
- int verbose = 1;
- if (m_setting.find("verbose") != m_setting.end() &&
- m_setting["verbose"].size() > 0)
- verbose = Scan<int>(m_setting["verbose"][0]);
- if (verbose >= 1) { // only if verbose
- TRACE_ERR( "Defined parameters (per moses.ini or switch):" << endl);
- for(PARAM_MAP::const_iterator iterParam = m_setting.begin() ; iterParam != m_setting.end(); iterParam++) {
- TRACE_ERR( "\t" << iterParam->first << ": ");
- for ( size_t i = 0; i < iterParam->second.size(); i++ )
- TRACE_ERR( iterParam->second[i] << " ");
- TRACE_ERR( endl);
- }
- }
-
- // check for illegal parameters
- bool noErrorFlag = true;
- for (int i = 0 ; i < argc ; i++) {
- if (isOption(argv[i])) {
- string paramSwitch = (string) argv[i];
- string paramName = paramSwitch.substr(1);
- if (m_valid.find(paramName) == m_valid.end()) {
- UserMessage::Add("illegal switch: " + paramSwitch);
- noErrorFlag = false;
- }
- }
- }
-
- // check if parameters make sense
- return Validate() && noErrorFlag;
-}
-
-/** check that parameter settings make sense */
-bool ParameterNBest::Validate()
-{
- bool noErrorFlag = true;
-
- // required parameters
- if (m_setting["input-file"].size() == 0) {
- UserMessage::Add("No input-file");
- noErrorFlag = false;
- }
-
- if (m_setting["output-file"].size() == 0) {
- UserMessage::Add("No output-file");
- noErrorFlag = false;
- }
-
- if (m_setting["recalc"].size() > 0 && m_setting["weights"].size()==0) {
- UserMessage::Add("you need to spezify weight when recalculating global scores");
- noErrorFlag = false;
- }
-
-
- return noErrorFlag;
-}
-
-/** check whether a file exists */
-bool ParameterNBest::FilesExist(const string &paramName, size_t tokenizeIndex,std::vector<std::string> const& extensions)
-{
- typedef std::vector<std::string> StringVec;
- StringVec::const_iterator iter;
-
- PARAM_MAP::const_iterator iterParam = m_setting.find(paramName);
- if (iterParam == m_setting.end()) {
- // no param. therefore nothing to check
- return true;
- }
- const StringVec &pathVec = (*iterParam).second;
- for (iter = pathVec.begin() ; iter != pathVec.end() ; ++iter) {
- StringVec vec = Tokenize(*iter);
- if (tokenizeIndex >= vec.size()) {
- stringstream errorMsg("");
- errorMsg << "Expected at least " << (tokenizeIndex+1) << " tokens per emtry in '"
- << paramName << "', but only found "
- << vec.size();
- UserMessage::Add(errorMsg.str());
- return false;
- }
- const string &pathStr = vec[tokenizeIndex];
-
- bool fileFound=0;
- for(size_t i=0; i<extensions.size() && !fileFound; ++i) {
- fileFound|=FileExists(pathStr + extensions[i]);
- }
- if(!fileFound) {
- stringstream errorMsg("");
- errorMsg << "File " << pathStr << " does not exist";
- UserMessage::Add(errorMsg.str());
- return false;
- }
- }
- return true;
-}
-
-/** look for a switch in arg, update parameter */
-// TODO arg parsing like this does not belong in the library, it belongs
-// in moses-cmd
-string ParameterNBest::FindParam(const string &paramSwitch, int argc, char* argv[])
-{
- for (int i = 0 ; i < argc ; i++) {
- if (string(argv[i]) == paramSwitch) {
- if (i+1 < argc) {
- return argv[i+1];
- } else {
- stringstream errorMsg("");
- errorMsg << "Option " << paramSwitch << " requires a parameter!";
- UserMessage::Add(errorMsg.str());
- // TODO return some sort of error, not the empty string
- }
- }
- }
- return "";
-}
-
-/** update parameter settings with command line switches
- * \param paramSwitch (potentially short) name of switch
- * \param paramName full name of parameter
- * \param argc number of arguments on command line
- * \param argv values of paramters on command line */
-void ParameterNBest::OverwriteParam(const string &paramSwitch, const string &paramName, int argc, char* argv[])
-{
- int startPos = -1;
- for (int i = 0 ; i < argc ; i++) {
- if (string(argv[i]) == paramSwitch) {
- startPos = i+1;
- break;
- }
- }
- if (startPos < 0)
- return;
-
- int index = 0;
- m_setting[paramName]; // defines the parameter, important for boolean switches
- while (startPos < argc && (!isOption(argv[startPos]))) {
- if (m_setting[paramName].size() > (size_t)index)
- m_setting[paramName][index] = argv[startPos];
- else
- m_setting[paramName].push_back(argv[startPos]);
- index++;
- startPos++;
- }
-}
-
-
-/** read parameters from a configuration file */
-bool ParameterNBest::ReadConfigFile( string filePath )
-{
- InputFileStream inFile(filePath);
- string line, paramName;
- while(getline(inFile, line)) {
- // comments
- size_t comPos = line.find_first_of("#");
- if (comPos != string::npos)
- line = line.substr(0, comPos);
- // trim leading and trailing spaces/tabs
- line = Trim(line);
-
- if (line[0]=='[') {
- // new parameter
- for (size_t currPos = 0 ; currPos < line.size() ; currPos++) {
- if (line[currPos] == ']') {
- paramName = line.substr(1, currPos - 1);
- break;
- }
- }
- } else if (line != "") {
- // add value to parameter
- m_setting[paramName].push_back(line);
- }
- }
- return true;
-}
-
-
-void ParameterNBest::PrintCredit()
-{
- cerr << "NBest - A tool to process Moses n-best lists" << endl
- << "Copyright (C) 2008 Holger Schwenk" << endl << endl
-
- << "This library is free software; you can redistribute it and/or" << endl
- << "modify it under the terms of the GNU Lesser General Public" << endl
- << "License as published by the Free Software Foundation; either" << endl
- << "version 2.1 of the License, or (at your option) any later version." << endl << endl
-
- << "This library is distributed in the hope that it will be useful," << endl
- << "but WITHOUT ANY WARRANTY; without even the implied warranty of" << endl
- << "MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU" << endl
- << "Lesser General Public License for more details." << endl << endl
-
- << "You should have received a copy of the GNU Lesser General Public" << endl
- << "License along with this library; if not, write to the Free Software" << endl
- << "Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA" << endl << endl
- << "***********************************************************************" << endl << endl
- << "Built on " << __DATE__ << endl << endl
-
- << "Written by Holger Schwenk, Holger.Schwenk@lium.univ-lemans.fr" << endl << endl;
-}
-
diff --git a/contrib/reranking/src/ParameterNBest.h b/contrib/reranking/src/ParameterNBest.h
deleted file mode 100644
index bc554d4b9..000000000
--- a/contrib/reranking/src/ParameterNBest.h
+++ /dev/null
@@ -1,76 +0,0 @@
-// $Id: $
-
-/***********************************************************************
-nbest - tool to process Moses n-best list
-Copyright (C) 2008 Holger Schwenk, University of Le Mans, France
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-***********************************************************************/
-
-#ifndef _PARAMETER_NBEST_H_
-#define _PARAMETER_NBEST_H_
-
-#include <string>
-#include <map>
-#include <vector>
-#include "TypeDef.h"
-
-typedef std::vector<std::string> PARAM_VEC;
-typedef std::map<std::string, PARAM_VEC > PARAM_MAP;
-typedef std::map<std::string, bool> PARAM_BOOL;
-typedef std::map<std::string, std::string > PARAM_STRING;
-
-/** Handles parameter values set in config file or on command line.
- * Process raw parameter data (names and values as strings) for StaticData
- * to parse; to get useful values, see StaticData. */
-class ParameterNBest
-{
-protected:
- PARAM_MAP m_setting;
- PARAM_BOOL m_valid;
- PARAM_STRING m_abbreviation;
- PARAM_STRING m_description;
-
- std::string FindParam(const std::string &paramSwitch, int argc, char* argv[]);
- void OverwriteParam(const std::string &paramSwitch, const std::string &paramName, int argc, char* argv[]);
- bool ReadConfigFile( std::string filePath );
- bool FilesExist(const std::string &paramName, size_t tokenizeIndex,std::vector<std::string> const& fileExtension=std::vector<std::string>(1,""));
- bool isOption(const char* token);
- bool Validate();
-
- void AddParam(const std::string &paramName, const std::string &description);
- void AddParam(const std::string &paramName, const std::string &abbrevName, const std::string &description);
-
- void PrintCredit();
-
-public:
- ParameterNBest();
- ~ParameterNBest();
- bool LoadParam(int argc, char* argv[]);
- bool LoadParam(const std::string &filePath);
- void Explain();
-
- /** return a vector of strings holding the whitespace-delimited values on the ini-file line corresponding to the given parameter name */
- const PARAM_VEC &GetParam(const std::string &paramName) {
- return m_setting[paramName];
- }
- /** check if parameter is defined (either in moses.ini or as switch) */
- bool isParamSpecified(const std::string &paramName) {
- return m_setting.find( paramName ) != m_setting.end();
- }
-
-};
-
-#endif
diff --git a/contrib/reranking/src/Tools.cpp b/contrib/reranking/src/Tools.cpp
deleted file mode 100644
index 8312c3370..000000000
--- a/contrib/reranking/src/Tools.cpp
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * nbest: tool to process moses n-best lists
- *
- * File: Tools.cpp
- * basic utility functions
- *
- * Created by Holger Schwenk, University of Le Mans, 05/16/2008
- *
- */
-
-#include "Tools.h"
-
-int Weights::Read(const char *fname)
-{
- ifstream inpf;
-
- inpf.open(fname);
- if (inpf.fail()) {
- perror ("ERROR");
- exit(1);
- }
-
- float f;
- while (inpf >> f) val.push_back(f);
-
- inpf.close();
- return val.size();
-}
-
diff --git a/contrib/reranking/src/Tools.h b/contrib/reranking/src/Tools.h
deleted file mode 100644
index eb71746b0..000000000
--- a/contrib/reranking/src/Tools.h
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * nbest: tool to process moses n-best lists
- *
- * File: Tools.cpp
- * basic utility functions
- *
- * Created by Holger Schwenk, University of Le Mans, 05/16/2008
- *
- */
-
-
-#ifndef _TOOLS_H_
-#define _TOOLS_H_
-
-using namespace std;
-
-#include <iostream>
-#include <fstream>
-#include <vector>
-
-class Weights
-{
- vector<float> val;
-public:
- Weights() {};
- ~Weights() {};
- int Read(const char *);
- friend class Hypo;
-};
-
-//******************************************************
-
-/*
-template<typename T>
-inline T Scan(const std::string &input)
-{
- std::stringstream stream(input);
- T ret;
- stream >> ret;
- return ret;
-}
-*/
-
-//******************************************************
-
-inline void Error (char *msg)
-{
- cerr << "ERROR: " << msg << endl;
- exit(1);
-}
-
-//******************************************************
-// From Moses code:
-
-
-/*
- * Outputting debugging/verbose information to stderr.
- * Use TRACE_ENABLE flag to redirect tracing output into oblivion
- * so that you can output your own ad-hoc debugging info.
- * However, if you use stderr diretly, please delete calls to it once
- * you finished debugging so that it won't clutter up.
- * Also use TRACE_ENABLE to turn off output of any debugging info
- * when compiling for a gui front-end so that running gui won't generate
- * output on command line
- * */
-#ifdef TRACE_ENABLE
-#define TRACE_ERR(str) std::cerr << str
-#else
-#define TRACE_ERR(str) {}
-#endif
-
-#endif
-
diff --git a/contrib/server/Translation-web/src/conf/MANIFEST.MF b/contrib/server/Translation-web/src/conf/MANIFEST.MF
new file mode 100755
index 000000000..58630c02e
--- /dev/null
+++ b/contrib/server/Translation-web/src/conf/MANIFEST.MF
@@ -0,0 +1,2 @@
+Manifest-Version: 1.0
+
diff --git a/contrib/server/Translation-web/src/java/com/hpl/mt/Translate.java b/contrib/server/Translation-web/src/java/com/hpl/mt/Translate.java
new file mode 100755
index 000000000..b0823431d
--- /dev/null
+++ b/contrib/server/Translation-web/src/java/com/hpl/mt/Translate.java
@@ -0,0 +1,129 @@
+package com.hpl.mt;
+
+/*
+ * To change this template, choose Tools | Templates
+ * and open the template in the editor.
+ */
+
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.net.URL;
+import java.util.HashMap;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+import javax.servlet.ServletException;
+import javax.servlet.http.HttpServlet;
+import javax.servlet.http.HttpServletRequest;
+import javax.servlet.http.HttpServletResponse;
+import org.apache.xmlrpc.XmlRpcException;
+import org.apache.xmlrpc.client.XmlRpcClient;
+import org.apache.xmlrpc.client.XmlRpcClientConfigImpl;
+
+/**
+ *
+ * @author ulanov
+ */
+public class Translate extends HttpServlet {
+
+ /**
+ * Processes requests for both HTTP
+ * <code>GET</code> and
+ * <code>POST</code> methods.
+ *
+ * @param request servlet request
+ * @param response servlet response
+ * @throws ServletException if a servlet-specific error occurs
+ * @throws IOException if an I/O error occurs
+ */
+ protected void processRequest(HttpServletRequest request, HttpServletResponse response)
+ throws ServletException, IOException {
+ response.setContentType("text/html;charset=UTF-8");
+ System.out.println("before" + request.getCharacterEncoding());
+ request.setCharacterEncoding("UTF-8");
+ System.out.println("after" + request.getCharacterEncoding());
+ PrintWriter out = response.getWriter();
+ try {
+ /*
+ * TODO output your page here. You may use following sample code.
+ */
+ // Create an instance of XmlRpcClient
+ String textToTranslate = request.getParameter("text");
+ XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl();
+ config.setServerURL(new URL("http://localhost:9008/RPC2"));
+ XmlRpcClient client = new XmlRpcClient();
+ client.setConfig(config);
+ // The XML-RPC data type used by mosesserver is <struct>. In Java, this data type can be represented using HashMap.
+ HashMap<String,String> mosesParams = new HashMap<String,String>();
+ mosesParams.put("text", textToTranslate);
+ mosesParams.put("align", "true");
+ mosesParams.put("report-all-factors", "true");
+ // The XmlRpcClient.execute method doesn't accept Hashmap (pParams). It's either Object[] or List.
+ Object[] params = new Object[] { null };
+ params[0] = mosesParams;
+ // Invoke the remote method "translate". The result is an Object, convert it to a HashMap.
+ HashMap result;
+ try {
+ result = (HashMap)client.execute("translate", params);
+ } catch (XmlRpcException ex) {
+ Logger.getLogger(Translate.class.getName()).log(Level.SEVERE, null, ex);
+ throw new IOException("XML-RPC failed");
+ }
+ // Print the returned results
+ String textTranslation = (String)result.get("text");
+ System.out.println("Input : "+textToTranslate);
+ System.out.println("Translation : "+textTranslation);
+ out.write(textTranslation);
+ if (result.get("align") != null){
+ Object[] aligns = (Object[])result.get("align");
+ System.out.println("Phrase alignments : [Source Start:Source End][Target Start]");
+ for ( Object element : aligns) {
+ HashMap align = (HashMap)element;
+ System.out.println("["+align.get("src-start")+":"+align.get("src-end")+"]["+align.get("tgt-start")+"]");
+ }
+ }
+ } finally {
+ out.close();
+ }
+ }
+
+ // <editor-fold defaultstate="collapsed" desc="HttpServlet methods. Click on the + sign on the left to edit the code.">
+ /**
+ * Handles the HTTP
+ * <code>GET</code> method.
+ *
+ * @param request servlet request
+ * @param response servlet response
+ * @throws ServletException if a servlet-specific error occurs
+ * @throws IOException if an I/O error occurs
+ */
+ @Override
+ protected void doGet(HttpServletRequest request, HttpServletResponse response)
+ throws ServletException, IOException {
+ processRequest(request, response);
+ }
+
+ /**
+ * Handles the HTTP
+ * <code>POST</code> method.
+ *
+ * @param request servlet request
+ * @param response servlet response
+ * @throws ServletException if a servlet-specific error occurs
+ * @throws IOException if an I/O error occurs
+ */
+ @Override
+ protected void doPost(HttpServletRequest request, HttpServletResponse response)
+ throws ServletException, IOException {
+ processRequest(request, response);
+ }
+
+ /**
+ * Returns a short description of the servlet.
+ *
+ * @return a String containing servlet description
+ */
+ @Override
+ public String getServletInfo() {
+ return "Short description";
+ }// </editor-fold>
+}
diff --git a/contrib/server/Translation-web/web/META-INF/context.xml b/contrib/server/Translation-web/web/META-INF/context.xml
new file mode 100755
index 000000000..9772ce4a1
--- /dev/null
+++ b/contrib/server/Translation-web/web/META-INF/context.xml
@@ -0,0 +1,2 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<Context antiJARLocking="true" path="/Translation"/>
diff --git a/contrib/server/Translation-web/web/WEB-INF/web.xml b/contrib/server/Translation-web/web/WEB-INF/web.xml
new file mode 100755
index 000000000..4147aafae
--- /dev/null
+++ b/contrib/server/Translation-web/web/WEB-INF/web.xml
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<web-app version="3.0" xmlns="http://java.sun.com/xml/ns/javaee" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://java.sun.com/xml/ns/javaee http://java.sun.com/xml/ns/javaee/web-app_3_0.xsd">
+ <servlet>
+ <servlet-name>Translate</servlet-name>
+ <servlet-class>com.hpl.mt.Translate</servlet-class>
+ </servlet>
+ <servlet-mapping>
+ <servlet-name>Translate</servlet-name>
+ <url-pattern>/Translate</url-pattern>
+ </servlet-mapping>
+ <session-config>
+ <session-timeout>
+ 30
+ </session-timeout>
+ </session-config>
+</web-app>
diff --git a/contrib/server/Translation-web/web/css/common.css b/contrib/server/Translation-web/web/css/common.css
new file mode 100755
index 000000000..c379ac161
--- /dev/null
+++ b/contrib/server/Translation-web/web/css/common.css
@@ -0,0 +1,22 @@
+/*
+ Document : common
+ Created on : Jul 31, 2012, 11:53:29 AM
+ Author : ulanov
+ Description:
+ Purpose of the stylesheet follows.
+*/
+
+root {
+ display: block;
+}
+
+body {font-size:small; font-family: Verdana,Arial,sans-serif;height:auto; width: auto;}
+span {font-size:medium;}
+
+#north_tab {height: 10%; width: 100%; float: top;}
+#south_tab {height: 80%; width: 100%; float: bottom;}
+
+#input_text {height: 50%; width: 30%; margin-right: 10px; float: left;}
+#output_text {height: 50%; width: 30%; margin-right: 10px; float: left;}
+
+#translate {float: left; margin-right: 10px;}
diff --git a/contrib/server/Translation-web/web/index.html b/contrib/server/Translation-web/web/index.html
new file mode 100755
index 000000000..dd7934739
--- /dev/null
+++ b/contrib/server/Translation-web/web/index.html
@@ -0,0 +1,47 @@
+<html lang="fr">
+<head>
+<style>
+</style>
+<link href="http://ajax.googleapis.com/ajax/libs/jqueryui/1.8/themes/base/jquery-ui.css" rel="stylesheet"
+ type="text/css"/>
+<script src="lib/jquery-1.6.4.js" type="text/javascript"></script>
+<script src="lib/jquery-ui-1.8.16.custom.js" type="text/javascript"></script>
+
+<link rel="stylesheet" href="css/common.css" type="text/css"/>
+<script>
+$(document).ready(function () {
+ $( "input:submit").button();
+ $( "input:submit").click(function(){
+ $.ajax({
+ url: "Translate",
+ type: "POST",
+ context: document.body,
+ data: {text: $("#input_text").val()}
+ }).done(function(data) {
+ $("#output_text").val(data);
+ });
+ })
+
+
+
+});
+</script>
+<title>Translate FR-EN</title>
+<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+</head>
+<body>
+<div id="north_tab">
+ <h2>Translate FR-EN</h2>
+</div>
+<div id="south_tab">
+ <textarea id="input_text">
+ </textarea>
+
+ <input id="translate" type="submit" value="Translate">
+
+ <textarea id="output_text" readonly="readonly">
+ </textarea>
+</div>
+
+</body>
+</html> \ No newline at end of file
diff --git a/contrib/server/Translation-web/web/lib/jquery-1.6.4.js b/contrib/server/Translation-web/web/lib/jquery-1.6.4.js
new file mode 100755
index 000000000..7a160217c
--- /dev/null
+++ b/contrib/server/Translation-web/web/lib/jquery-1.6.4.js
@@ -0,0 +1,9046 @@
+/*!
+ * jQuery JavaScript Library v1.6.4
+ * http://jquery.com/
+ *
+ * Copyright 2011, John Resig
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://jquery.org/license
+ *
+ * Includes Sizzle.js
+ * http://sizzlejs.com/
+ * Copyright 2011, The Dojo Foundation
+ * Released under the MIT, BSD, and GPL Licenses.
+ *
+ * Date: Mon Sep 12 18:54:48 2011 -0400
+ */
+(function( window, undefined ) {
+
+// Use the correct document accordingly with window argument (sandbox)
+var document = window.document,
+ navigator = window.navigator,
+ location = window.location;
+var jQuery = (function() {
+
+// Define a local copy of jQuery
+var jQuery = function( selector, context ) {
+ // The jQuery object is actually just the init constructor 'enhanced'
+ return new jQuery.fn.init( selector, context, rootjQuery );
+ },
+
+ // Map over jQuery in case of overwrite
+ _jQuery = window.jQuery,
+
+ // Map over the $ in case of overwrite
+ _$ = window.$,
+
+ // A central reference to the root jQuery(document)
+ rootjQuery,
+
+ // A simple way to check for HTML strings or ID strings
+ // Prioritize #id over <tag> to avoid XSS via location.hash (#9521)
+ quickExpr = /^(?:[^#<]*(<[\w\W]+>)[^>]*$|#([\w\-]*)$)/,
+
+ // Check if a string has a non-whitespace character in it
+ rnotwhite = /\S/,
+
+ // Used for trimming whitespace
+ trimLeft = /^\s+/,
+ trimRight = /\s+$/,
+
+ // Check for digits
+ rdigit = /\d/,
+
+ // Match a standalone tag
+ rsingleTag = /^<(\w+)\s*\/?>(?:<\/\1>)?$/,
+
+ // JSON RegExp
+ rvalidchars = /^[\],:{}\s]*$/,
+ rvalidescape = /\\(?:["\\\/bfnrt]|u[0-9a-fA-F]{4})/g,
+ rvalidtokens = /"[^"\\\n\r]*"|true|false|null|-?\d+(?:\.\d*)?(?:[eE][+\-]?\d+)?/g,
+ rvalidbraces = /(?:^|:|,)(?:\s*\[)+/g,
+
+ // Useragent RegExp
+ rwebkit = /(webkit)[ \/]([\w.]+)/,
+ ropera = /(opera)(?:.*version)?[ \/]([\w.]+)/,
+ rmsie = /(msie) ([\w.]+)/,
+ rmozilla = /(mozilla)(?:.*? rv:([\w.]+))?/,
+
+ // Matches dashed string for camelizing
+ rdashAlpha = /-([a-z]|[0-9])/ig,
+ rmsPrefix = /^-ms-/,
+
+ // Used by jQuery.camelCase as callback to replace()
+ fcamelCase = function( all, letter ) {
+ return ( letter + "" ).toUpperCase();
+ },
+
+ // Keep a UserAgent string for use with jQuery.browser
+ userAgent = navigator.userAgent,
+
+ // For matching the engine and version of the browser
+ browserMatch,
+
+ // The deferred used on DOM ready
+ readyList,
+
+ // The ready event handler
+ DOMContentLoaded,
+
+ // Save a reference to some core methods
+ toString = Object.prototype.toString,
+ hasOwn = Object.prototype.hasOwnProperty,
+ push = Array.prototype.push,
+ slice = Array.prototype.slice,
+ trim = String.prototype.trim,
+ indexOf = Array.prototype.indexOf,
+
+ // [[Class]] -> type pairs
+ class2type = {};
+
+jQuery.fn = jQuery.prototype = {
+ constructor: jQuery,
+ init: function( selector, context, rootjQuery ) {
+ var match, elem, ret, doc;
+
+ // Handle $(""), $(null), or $(undefined)
+ if ( !selector ) {
+ return this;
+ }
+
+ // Handle $(DOMElement)
+ if ( selector.nodeType ) {
+ this.context = this[0] = selector;
+ this.length = 1;
+ return this;
+ }
+
+ // The body element only exists once, optimize finding it
+ if ( selector === "body" && !context && document.body ) {
+ this.context = document;
+ this[0] = document.body;
+ this.selector = selector;
+ this.length = 1;
+ return this;
+ }
+
+ // Handle HTML strings
+ if ( typeof selector === "string" ) {
+ // Are we dealing with HTML string or an ID?
+ if ( selector.charAt(0) === "<" && selector.charAt( selector.length - 1 ) === ">" && selector.length >= 3 ) {
+ // Assume that strings that start and end with <> are HTML and skip the regex check
+ match = [ null, selector, null ];
+
+ } else {
+ match = quickExpr.exec( selector );
+ }
+
+ // Verify a match, and that no context was specified for #id
+ if ( match && (match[1] || !context) ) {
+
+ // HANDLE: $(html) -> $(array)
+ if ( match[1] ) {
+ context = context instanceof jQuery ? context[0] : context;
+ doc = (context ? context.ownerDocument || context : document);
+
+ // If a single string is passed in and it's a single tag
+ // just do a createElement and skip the rest
+ ret = rsingleTag.exec( selector );
+
+ if ( ret ) {
+ if ( jQuery.isPlainObject( context ) ) {
+ selector = [ document.createElement( ret[1] ) ];
+ jQuery.fn.attr.call( selector, context, true );
+
+ } else {
+ selector = [ doc.createElement( ret[1] ) ];
+ }
+
+ } else {
+ ret = jQuery.buildFragment( [ match[1] ], [ doc ] );
+ selector = (ret.cacheable ? jQuery.clone(ret.fragment) : ret.fragment).childNodes;
+ }
+
+ return jQuery.merge( this, selector );
+
+ // HANDLE: $("#id")
+ } else {
+ elem = document.getElementById( match[2] );
+
+ // Check parentNode to catch when Blackberry 4.6 returns
+ // nodes that are no longer in the document #6963
+ if ( elem && elem.parentNode ) {
+ // Handle the case where IE and Opera return items
+ // by name instead of ID
+ if ( elem.id !== match[2] ) {
+ return rootjQuery.find( selector );
+ }
+
+ // Otherwise, we inject the element directly into the jQuery object
+ this.length = 1;
+ this[0] = elem;
+ }
+
+ this.context = document;
+ this.selector = selector;
+ return this;
+ }
+
+ // HANDLE: $(expr, $(...))
+ } else if ( !context || context.jquery ) {
+ return (context || rootjQuery).find( selector );
+
+ // HANDLE: $(expr, context)
+ // (which is just equivalent to: $(context).find(expr)
+ } else {
+ return this.constructor( context ).find( selector );
+ }
+
+ // HANDLE: $(function)
+ // Shortcut for document ready
+ } else if ( jQuery.isFunction( selector ) ) {
+ return rootjQuery.ready( selector );
+ }
+
+ if (selector.selector !== undefined) {
+ this.selector = selector.selector;
+ this.context = selector.context;
+ }
+
+ return jQuery.makeArray( selector, this );
+ },
+
+ // Start with an empty selector
+ selector: "",
+
+ // The current version of jQuery being used
+ jquery: "1.6.4",
+
+ // The default length of a jQuery object is 0
+ length: 0,
+
+ // The number of elements contained in the matched element set
+ size: function() {
+ return this.length;
+ },
+
+ toArray: function() {
+ return slice.call( this, 0 );
+ },
+
+ // Get the Nth element in the matched element set OR
+ // Get the whole matched element set as a clean array
+ get: function( num ) {
+ return num == null ?
+
+ // Return a 'clean' array
+ this.toArray() :
+
+ // Return just the object
+ ( num < 0 ? this[ this.length + num ] : this[ num ] );
+ },
+
+ // Take an array of elements and push it onto the stack
+ // (returning the new matched element set)
+ pushStack: function( elems, name, selector ) {
+ // Build a new jQuery matched element set
+ var ret = this.constructor();
+
+ if ( jQuery.isArray( elems ) ) {
+ push.apply( ret, elems );
+
+ } else {
+ jQuery.merge( ret, elems );
+ }
+
+ // Add the old object onto the stack (as a reference)
+ ret.prevObject = this;
+
+ ret.context = this.context;
+
+ if ( name === "find" ) {
+ ret.selector = this.selector + (this.selector ? " " : "") + selector;
+ } else if ( name ) {
+ ret.selector = this.selector + "." + name + "(" + selector + ")";
+ }
+
+ // Return the newly-formed element set
+ return ret;
+ },
+
+ // Execute a callback for every element in the matched set.
+ // (You can seed the arguments with an array of args, but this is
+ // only used internally.)
+ each: function( callback, args ) {
+ return jQuery.each( this, callback, args );
+ },
+
+ ready: function( fn ) {
+ // Attach the listeners
+ jQuery.bindReady();
+
+ // Add the callback
+ readyList.done( fn );
+
+ return this;
+ },
+
+ eq: function( i ) {
+ return i === -1 ?
+ this.slice( i ) :
+ this.slice( i, +i + 1 );
+ },
+
+ first: function() {
+ return this.eq( 0 );
+ },
+
+ last: function() {
+ return this.eq( -1 );
+ },
+
+ slice: function() {
+ return this.pushStack( slice.apply( this, arguments ),
+ "slice", slice.call(arguments).join(",") );
+ },
+
+ map: function( callback ) {
+ return this.pushStack( jQuery.map(this, function( elem, i ) {
+ return callback.call( elem, i, elem );
+ }));
+ },
+
+ end: function() {
+ return this.prevObject || this.constructor(null);
+ },
+
+ // For internal use only.
+ // Behaves like an Array's method, not like a jQuery method.
+ push: push,
+ sort: [].sort,
+ splice: [].splice
+};
+
+// Give the init function the jQuery prototype for later instantiation
+jQuery.fn.init.prototype = jQuery.fn;
+
+jQuery.extend = jQuery.fn.extend = function() {
+ var options, name, src, copy, copyIsArray, clone,
+ target = arguments[0] || {},
+ i = 1,
+ length = arguments.length,
+ deep = false;
+
+ // Handle a deep copy situation
+ if ( typeof target === "boolean" ) {
+ deep = target;
+ target = arguments[1] || {};
+ // skip the boolean and the target
+ i = 2;
+ }
+
+ // Handle case when target is a string or something (possible in deep copy)
+ if ( typeof target !== "object" && !jQuery.isFunction(target) ) {
+ target = {};
+ }
+
+ // extend jQuery itself if only one argument is passed
+ if ( length === i ) {
+ target = this;
+ --i;
+ }
+
+ for ( ; i < length; i++ ) {
+ // Only deal with non-null/undefined values
+ if ( (options = arguments[ i ]) != null ) {
+ // Extend the base object
+ for ( name in options ) {
+ src = target[ name ];
+ copy = options[ name ];
+
+ // Prevent never-ending loop
+ if ( target === copy ) {
+ continue;
+ }
+
+ // Recurse if we're merging plain objects or arrays
+ if ( deep && copy && ( jQuery.isPlainObject(copy) || (copyIsArray = jQuery.isArray(copy)) ) ) {
+ if ( copyIsArray ) {
+ copyIsArray = false;
+ clone = src && jQuery.isArray(src) ? src : [];
+
+ } else {
+ clone = src && jQuery.isPlainObject(src) ? src : {};
+ }
+
+ // Never move original objects, clone them
+ target[ name ] = jQuery.extend( deep, clone, copy );
+
+ // Don't bring in undefined values
+ } else if ( copy !== undefined ) {
+ target[ name ] = copy;
+ }
+ }
+ }
+ }
+
+ // Return the modified object
+ return target;
+};
+
+jQuery.extend({
+ noConflict: function( deep ) {
+ if ( window.$ === jQuery ) {
+ window.$ = _$;
+ }
+
+ if ( deep && window.jQuery === jQuery ) {
+ window.jQuery = _jQuery;
+ }
+
+ return jQuery;
+ },
+
+ // Is the DOM ready to be used? Set to true once it occurs.
+ isReady: false,
+
+ // A counter to track how many items to wait for before
+ // the ready event fires. See #6781
+ readyWait: 1,
+
+ // Hold (or release) the ready event
+ holdReady: function( hold ) {
+ if ( hold ) {
+ jQuery.readyWait++;
+ } else {
+ jQuery.ready( true );
+ }
+ },
+
+ // Handle when the DOM is ready
+ ready: function( wait ) {
+ // Either a released hold or an DOMready/load event and not yet ready
+ if ( (wait === true && !--jQuery.readyWait) || (wait !== true && !jQuery.isReady) ) {
+ // Make sure body exists, at least, in case IE gets a little overzealous (ticket #5443).
+ if ( !document.body ) {
+ return setTimeout( jQuery.ready, 1 );
+ }
+
+ // Remember that the DOM is ready
+ jQuery.isReady = true;
+
+ // If a normal DOM Ready event fired, decrement, and wait if need be
+ if ( wait !== true && --jQuery.readyWait > 0 ) {
+ return;
+ }
+
+ // If there are functions bound, to execute
+ readyList.resolveWith( document, [ jQuery ] );
+
+ // Trigger any bound ready events
+ if ( jQuery.fn.trigger ) {
+ jQuery( document ).trigger( "ready" ).unbind( "ready" );
+ }
+ }
+ },
+
+ bindReady: function() {
+ if ( readyList ) {
+ return;
+ }
+
+ readyList = jQuery._Deferred();
+
+ // Catch cases where $(document).ready() is called after the
+ // browser event has already occurred.
+ if ( document.readyState === "complete" ) {
+ // Handle it asynchronously to allow scripts the opportunity to delay ready
+ return setTimeout( jQuery.ready, 1 );
+ }
+
+ // Mozilla, Opera and webkit nightlies currently support this event
+ if ( document.addEventListener ) {
+ // Use the handy event callback
+ document.addEventListener( "DOMContentLoaded", DOMContentLoaded, false );
+
+ // A fallback to window.onload, that will always work
+ window.addEventListener( "load", jQuery.ready, false );
+
+ // If IE event model is used
+ } else if ( document.attachEvent ) {
+ // ensure firing before onload,
+ // maybe late but safe also for iframes
+ document.attachEvent( "onreadystatechange", DOMContentLoaded );
+
+ // A fallback to window.onload, that will always work
+ window.attachEvent( "onload", jQuery.ready );
+
+ // If IE and not a frame
+ // continually check to see if the document is ready
+ var toplevel = false;
+
+ try {
+ toplevel = window.frameElement == null;
+ } catch(e) {}
+
+ if ( document.documentElement.doScroll && toplevel ) {
+ doScrollCheck();
+ }
+ }
+ },
+
+ // See test/unit/core.js for details concerning isFunction.
+ // Since version 1.3, DOM methods and functions like alert
+ // aren't supported. They return false on IE (#2968).
+ isFunction: function( obj ) {
+ return jQuery.type(obj) === "function";
+ },
+
+ isArray: Array.isArray || function( obj ) {
+ return jQuery.type(obj) === "array";
+ },
+
+ // A crude way of determining if an object is a window
+ isWindow: function( obj ) {
+ return obj && typeof obj === "object" && "setInterval" in obj;
+ },
+
+ isNaN: function( obj ) {
+ return obj == null || !rdigit.test( obj ) || isNaN( obj );
+ },
+
+ type: function( obj ) {
+ return obj == null ?
+ String( obj ) :
+ class2type[ toString.call(obj) ] || "object";
+ },
+
+ isPlainObject: function( obj ) {
+ // Must be an Object.
+ // Because of IE, we also have to check the presence of the constructor property.
+ // Make sure that DOM nodes and window objects don't pass through, as well
+ if ( !obj || jQuery.type(obj) !== "object" || obj.nodeType || jQuery.isWindow( obj ) ) {
+ return false;
+ }
+
+ try {
+ // Not own constructor property must be Object
+ if ( obj.constructor &&
+ !hasOwn.call(obj, "constructor") &&
+ !hasOwn.call(obj.constructor.prototype, "isPrototypeOf") ) {
+ return false;
+ }
+ } catch ( e ) {
+ // IE8,9 Will throw exceptions on certain host objects #9897
+ return false;
+ }
+
+ // Own properties are enumerated firstly, so to speed up,
+ // if last one is own, then all properties are own.
+
+ var key;
+ for ( key in obj ) {}
+
+ return key === undefined || hasOwn.call( obj, key );
+ },
+
+ isEmptyObject: function( obj ) {
+ for ( var name in obj ) {
+ return false;
+ }
+ return true;
+ },
+
+ error: function( msg ) {
+ throw msg;
+ },
+
+ parseJSON: function( data ) {
+ if ( typeof data !== "string" || !data ) {
+ return null;
+ }
+
+ // Make sure leading/trailing whitespace is removed (IE can't handle it)
+ data = jQuery.trim( data );
+
+ // Attempt to parse using the native JSON parser first
+ if ( window.JSON && window.JSON.parse ) {
+ return window.JSON.parse( data );
+ }
+
+ // Make sure the incoming data is actual JSON
+ // Logic borrowed from http://json.org/json2.js
+ if ( rvalidchars.test( data.replace( rvalidescape, "@" )
+ .replace( rvalidtokens, "]" )
+ .replace( rvalidbraces, "")) ) {
+
+ return (new Function( "return " + data ))();
+
+ }
+ jQuery.error( "Invalid JSON: " + data );
+ },
+
+ // Cross-browser xml parsing
+ parseXML: function( data ) {
+ var xml, tmp;
+ try {
+ if ( window.DOMParser ) { // Standard
+ tmp = new DOMParser();
+ xml = tmp.parseFromString( data , "text/xml" );
+ } else { // IE
+ xml = new ActiveXObject( "Microsoft.XMLDOM" );
+ xml.async = "false";
+ xml.loadXML( data );
+ }
+ } catch( e ) {
+ xml = undefined;
+ }
+ if ( !xml || !xml.documentElement || xml.getElementsByTagName( "parsererror" ).length ) {
+ jQuery.error( "Invalid XML: " + data );
+ }
+ return xml;
+ },
+
+ noop: function() {},
+
+ // Evaluates a script in a global context
+ // Workarounds based on findings by Jim Driscoll
+ // http://weblogs.java.net/blog/driscoll/archive/2009/09/08/eval-javascript-global-context
+ globalEval: function( data ) {
+ if ( data && rnotwhite.test( data ) ) {
+ // We use execScript on Internet Explorer
+ // We use an anonymous function so that context is window
+ // rather than jQuery in Firefox
+ ( window.execScript || function( data ) {
+ window[ "eval" ].call( window, data );
+ } )( data );
+ }
+ },
+
+ // Convert dashed to camelCase; used by the css and data modules
+ // Microsoft forgot to hump their vendor prefix (#9572)
+ camelCase: function( string ) {
+ return string.replace( rmsPrefix, "ms-" ).replace( rdashAlpha, fcamelCase );
+ },
+
+ nodeName: function( elem, name ) {
+ return elem.nodeName && elem.nodeName.toUpperCase() === name.toUpperCase();
+ },
+
+ // args is for internal usage only
+ each: function( object, callback, args ) {
+ var name, i = 0,
+ length = object.length,
+ isObj = length === undefined || jQuery.isFunction( object );
+
+ if ( args ) {
+ if ( isObj ) {
+ for ( name in object ) {
+ if ( callback.apply( object[ name ], args ) === false ) {
+ break;
+ }
+ }
+ } else {
+ for ( ; i < length; ) {
+ if ( callback.apply( object[ i++ ], args ) === false ) {
+ break;
+ }
+ }
+ }
+
+ // A special, fast, case for the most common use of each
+ } else {
+ if ( isObj ) {
+ for ( name in object ) {
+ if ( callback.call( object[ name ], name, object[ name ] ) === false ) {
+ break;
+ }
+ }
+ } else {
+ for ( ; i < length; ) {
+ if ( callback.call( object[ i ], i, object[ i++ ] ) === false ) {
+ break;
+ }
+ }
+ }
+ }
+
+ return object;
+ },
+
+ // Use native String.trim function wherever possible
+ trim: trim ?
+ function( text ) {
+ return text == null ?
+ "" :
+ trim.call( text );
+ } :
+
+ // Otherwise use our own trimming functionality
+ function( text ) {
+ return text == null ?
+ "" :
+ text.toString().replace( trimLeft, "" ).replace( trimRight, "" );
+ },
+
+ // results is for internal usage only
+ makeArray: function( array, results ) {
+ var ret = results || [];
+
+ if ( array != null ) {
+ // The window, strings (and functions) also have 'length'
+ // The extra typeof function check is to prevent crashes
+ // in Safari 2 (See: #3039)
+ // Tweaked logic slightly to handle Blackberry 4.7 RegExp issues #6930
+ var type = jQuery.type( array );
+
+ if ( array.length == null || type === "string" || type === "function" || type === "regexp" || jQuery.isWindow( array ) ) {
+ push.call( ret, array );
+ } else {
+ jQuery.merge( ret, array );
+ }
+ }
+
+ return ret;
+ },
+
+ inArray: function( elem, array ) {
+ if ( !array ) {
+ return -1;
+ }
+
+ if ( indexOf ) {
+ return indexOf.call( array, elem );
+ }
+
+ for ( var i = 0, length = array.length; i < length; i++ ) {
+ if ( array[ i ] === elem ) {
+ return i;
+ }
+ }
+
+ return -1;
+ },
+
+ merge: function( first, second ) {
+ var i = first.length,
+ j = 0;
+
+ if ( typeof second.length === "number" ) {
+ for ( var l = second.length; j < l; j++ ) {
+ first[ i++ ] = second[ j ];
+ }
+
+ } else {
+ while ( second[j] !== undefined ) {
+ first[ i++ ] = second[ j++ ];
+ }
+ }
+
+ first.length = i;
+
+ return first;
+ },
+
+ grep: function( elems, callback, inv ) {
+ var ret = [], retVal;
+ inv = !!inv;
+
+ // Go through the array, only saving the items
+ // that pass the validator function
+ for ( var i = 0, length = elems.length; i < length; i++ ) {
+ retVal = !!callback( elems[ i ], i );
+ if ( inv !== retVal ) {
+ ret.push( elems[ i ] );
+ }
+ }
+
+ return ret;
+ },
+
+ // arg is for internal usage only
+ map: function( elems, callback, arg ) {
+ var value, key, ret = [],
+ i = 0,
+ length = elems.length,
+ // jquery objects are treated as arrays
+ isArray = elems instanceof jQuery || length !== undefined && typeof length === "number" && ( ( length > 0 && elems[ 0 ] && elems[ length -1 ] ) || length === 0 || jQuery.isArray( elems ) ) ;
+
+ // Go through the array, translating each of the items to their
+ if ( isArray ) {
+ for ( ; i < length; i++ ) {
+ value = callback( elems[ i ], i, arg );
+
+ if ( value != null ) {
+ ret[ ret.length ] = value;
+ }
+ }
+
+ // Go through every key on the object,
+ } else {
+ for ( key in elems ) {
+ value = callback( elems[ key ], key, arg );
+
+ if ( value != null ) {
+ ret[ ret.length ] = value;
+ }
+ }
+ }
+
+ // Flatten any nested arrays
+ return ret.concat.apply( [], ret );
+ },
+
+ // A global GUID counter for objects
+ guid: 1,
+
+ // Bind a function to a context, optionally partially applying any
+ // arguments.
+ proxy: function( fn, context ) {
+ if ( typeof context === "string" ) {
+ var tmp = fn[ context ];
+ context = fn;
+ fn = tmp;
+ }
+
+ // Quick check to determine if target is callable, in the spec
+ // this throws a TypeError, but we will just return undefined.
+ if ( !jQuery.isFunction( fn ) ) {
+ return undefined;
+ }
+
+ // Simulated bind
+ var args = slice.call( arguments, 2 ),
+ proxy = function() {
+ return fn.apply( context, args.concat( slice.call( arguments ) ) );
+ };
+
+ // Set the guid of unique handler to the same of original handler, so it can be removed
+ proxy.guid = fn.guid = fn.guid || proxy.guid || jQuery.guid++;
+
+ return proxy;
+ },
+
+ // Mutifunctional method to get and set values to a collection
+ // The value/s can optionally be executed if it's a function
+ access: function( elems, key, value, exec, fn, pass ) {
+ var length = elems.length;
+
+ // Setting many attributes
+ if ( typeof key === "object" ) {
+ for ( var k in key ) {
+ jQuery.access( elems, k, key[k], exec, fn, value );
+ }
+ return elems;
+ }
+
+ // Setting one attribute
+ if ( value !== undefined ) {
+ // Optionally, function values get executed if exec is true
+ exec = !pass && exec && jQuery.isFunction(value);
+
+ for ( var i = 0; i < length; i++ ) {
+ fn( elems[i], key, exec ? value.call( elems[i], i, fn( elems[i], key ) ) : value, pass );
+ }
+
+ return elems;
+ }
+
+ // Getting an attribute
+ return length ? fn( elems[0], key ) : undefined;
+ },
+
+ now: function() {
+ return (new Date()).getTime();
+ },
+
+ // Use of jQuery.browser is frowned upon.
+ // More details: http://docs.jquery.com/Utilities/jQuery.browser
+ uaMatch: function( ua ) {
+ ua = ua.toLowerCase();
+
+ var match = rwebkit.exec( ua ) ||
+ ropera.exec( ua ) ||
+ rmsie.exec( ua ) ||
+ ua.indexOf("compatible") < 0 && rmozilla.exec( ua ) ||
+ [];
+
+ return { browser: match[1] || "", version: match[2] || "0" };
+ },
+
+ sub: function() {
+ function jQuerySub( selector, context ) {
+ return new jQuerySub.fn.init( selector, context );
+ }
+ jQuery.extend( true, jQuerySub, this );
+ jQuerySub.superclass = this;
+ jQuerySub.fn = jQuerySub.prototype = this();
+ jQuerySub.fn.constructor = jQuerySub;
+ jQuerySub.sub = this.sub;
+ jQuerySub.fn.init = function init( selector, context ) {
+ if ( context && context instanceof jQuery && !(context instanceof jQuerySub) ) {
+ context = jQuerySub( context );
+ }
+
+ return jQuery.fn.init.call( this, selector, context, rootjQuerySub );
+ };
+ jQuerySub.fn.init.prototype = jQuerySub.fn;
+ var rootjQuerySub = jQuerySub(document);
+ return jQuerySub;
+ },
+
+ browser: {}
+});
+
+// Populate the class2type map
+jQuery.each("Boolean Number String Function Array Date RegExp Object".split(" "), function(i, name) {
+ class2type[ "[object " + name + "]" ] = name.toLowerCase();
+});
+
+browserMatch = jQuery.uaMatch( userAgent );
+if ( browserMatch.browser ) {
+ jQuery.browser[ browserMatch.browser ] = true;
+ jQuery.browser.version = browserMatch.version;
+}
+
+// Deprecated, use jQuery.browser.webkit instead
+if ( jQuery.browser.webkit ) {
+ jQuery.browser.safari = true;
+}
+
+// IE doesn't match non-breaking spaces with \s
+if ( rnotwhite.test( "\xA0" ) ) {
+ trimLeft = /^[\s\xA0]+/;
+ trimRight = /[\s\xA0]+$/;
+}
+
+// All jQuery objects should point back to these
+rootjQuery = jQuery(document);
+
+// Cleanup functions for the document ready method
+if ( document.addEventListener ) {
+ DOMContentLoaded = function() {
+ document.removeEventListener( "DOMContentLoaded", DOMContentLoaded, false );
+ jQuery.ready();
+ };
+
+} else if ( document.attachEvent ) {
+ DOMContentLoaded = function() {
+ // Make sure body exists, at least, in case IE gets a little overzealous (ticket #5443).
+ if ( document.readyState === "complete" ) {
+ document.detachEvent( "onreadystatechange", DOMContentLoaded );
+ jQuery.ready();
+ }
+ };
+}
+
+// The DOM ready check for Internet Explorer
+function doScrollCheck() {
+ if ( jQuery.isReady ) {
+ return;
+ }
+
+ try {
+ // If IE is used, use the trick by Diego Perini
+ // http://javascript.nwbox.com/IEContentLoaded/
+ document.documentElement.doScroll("left");
+ } catch(e) {
+ setTimeout( doScrollCheck, 1 );
+ return;
+ }
+
+ // and execute any waiting functions
+ jQuery.ready();
+}
+
+return jQuery;
+
+})();
+
+
+var // Promise methods
+ promiseMethods = "done fail isResolved isRejected promise then always pipe".split( " " ),
+ // Static reference to slice
+ sliceDeferred = [].slice;
+
+jQuery.extend({
+ // Create a simple deferred (one callbacks list)
+ _Deferred: function() {
+ var // callbacks list
+ callbacks = [],
+ // stored [ context , args ]
+ fired,
+ // to avoid firing when already doing so
+ firing,
+ // flag to know if the deferred has been cancelled
+ cancelled,
+ // the deferred itself
+ deferred = {
+
+ // done( f1, f2, ...)
+ done: function() {
+ if ( !cancelled ) {
+ var args = arguments,
+ i,
+ length,
+ elem,
+ type,
+ _fired;
+ if ( fired ) {
+ _fired = fired;
+ fired = 0;
+ }
+ for ( i = 0, length = args.length; i < length; i++ ) {
+ elem = args[ i ];
+ type = jQuery.type( elem );
+ if ( type === "array" ) {
+ deferred.done.apply( deferred, elem );
+ } else if ( type === "function" ) {
+ callbacks.push( elem );
+ }
+ }
+ if ( _fired ) {
+ deferred.resolveWith( _fired[ 0 ], _fired[ 1 ] );
+ }
+ }
+ return this;
+ },
+
+ // resolve with given context and args
+ resolveWith: function( context, args ) {
+ if ( !cancelled && !fired && !firing ) {
+ // make sure args are available (#8421)
+ args = args || [];
+ firing = 1;
+ try {
+ while( callbacks[ 0 ] ) {
+ callbacks.shift().apply( context, args );
+ }
+ }
+ finally {
+ fired = [ context, args ];
+ firing = 0;
+ }
+ }
+ return this;
+ },
+
+ // resolve with this as context and given arguments
+ resolve: function() {
+ deferred.resolveWith( this, arguments );
+ return this;
+ },
+
+ // Has this deferred been resolved?
+ isResolved: function() {
+ return !!( firing || fired );
+ },
+
+ // Cancel
+ cancel: function() {
+ cancelled = 1;
+ callbacks = [];
+ return this;
+ }
+ };
+
+ return deferred;
+ },
+
+ // Full fledged deferred (two callbacks list)
+ Deferred: function( func ) {
+ var deferred = jQuery._Deferred(),
+ failDeferred = jQuery._Deferred(),
+ promise;
+ // Add errorDeferred methods, then and promise
+ jQuery.extend( deferred, {
+ then: function( doneCallbacks, failCallbacks ) {
+ deferred.done( doneCallbacks ).fail( failCallbacks );
+ return this;
+ },
+ always: function() {
+ return deferred.done.apply( deferred, arguments ).fail.apply( this, arguments );
+ },
+ fail: failDeferred.done,
+ rejectWith: failDeferred.resolveWith,
+ reject: failDeferred.resolve,
+ isRejected: failDeferred.isResolved,
+ pipe: function( fnDone, fnFail ) {
+ return jQuery.Deferred(function( newDefer ) {
+ jQuery.each( {
+ done: [ fnDone, "resolve" ],
+ fail: [ fnFail, "reject" ]
+ }, function( handler, data ) {
+ var fn = data[ 0 ],
+ action = data[ 1 ],
+ returned;
+ if ( jQuery.isFunction( fn ) ) {
+ deferred[ handler ](function() {
+ returned = fn.apply( this, arguments );
+ if ( returned && jQuery.isFunction( returned.promise ) ) {
+ returned.promise().then( newDefer.resolve, newDefer.reject );
+ } else {
+ newDefer[ action + "With" ]( this === deferred ? newDefer : this, [ returned ] );
+ }
+ });
+ } else {
+ deferred[ handler ]( newDefer[ action ] );
+ }
+ });
+ }).promise();
+ },
+ // Get a promise for this deferred
+ // If obj is provided, the promise aspect is added to the object
+ promise: function( obj ) {
+ if ( obj == null ) {
+ if ( promise ) {
+ return promise;
+ }
+ promise = obj = {};
+ }
+ var i = promiseMethods.length;
+ while( i-- ) {
+ obj[ promiseMethods[i] ] = deferred[ promiseMethods[i] ];
+ }
+ return obj;
+ }
+ });
+ // Make sure only one callback list will be used
+ deferred.done( failDeferred.cancel ).fail( deferred.cancel );
+ // Unexpose cancel
+ delete deferred.cancel;
+ // Call given func if any
+ if ( func ) {
+ func.call( deferred, deferred );
+ }
+ return deferred;
+ },
+
+ // Deferred helper
+ when: function( firstParam ) {
+ var args = arguments,
+ i = 0,
+ length = args.length,
+ count = length,
+ deferred = length <= 1 && firstParam && jQuery.isFunction( firstParam.promise ) ?
+ firstParam :
+ jQuery.Deferred();
+ function resolveFunc( i ) {
+ return function( value ) {
+ args[ i ] = arguments.length > 1 ? sliceDeferred.call( arguments, 0 ) : value;
+ if ( !( --count ) ) {
+ // Strange bug in FF4:
+ // Values changed onto the arguments object sometimes end up as undefined values
+ // outside the $.when method. Cloning the object into a fresh array solves the issue
+ deferred.resolveWith( deferred, sliceDeferred.call( args, 0 ) );
+ }
+ };
+ }
+ if ( length > 1 ) {
+ for( ; i < length; i++ ) {
+ if ( args[ i ] && jQuery.isFunction( args[ i ].promise ) ) {
+ args[ i ].promise().then( resolveFunc(i), deferred.reject );
+ } else {
+ --count;
+ }
+ }
+ if ( !count ) {
+ deferred.resolveWith( deferred, args );
+ }
+ } else if ( deferred !== firstParam ) {
+ deferred.resolveWith( deferred, length ? [ firstParam ] : [] );
+ }
+ return deferred.promise();
+ }
+});
+
+
+
+jQuery.support = (function() {
+
+ var div = document.createElement( "div" ),
+ documentElement = document.documentElement,
+ all,
+ a,
+ select,
+ opt,
+ input,
+ marginDiv,
+ support,
+ fragment,
+ body,
+ testElementParent,
+ testElement,
+ testElementStyle,
+ tds,
+ events,
+ eventName,
+ i,
+ isSupported;
+
+ // Preliminary tests
+ div.setAttribute("className", "t");
+ div.innerHTML = " <link/><table></table><a href='/a' style='top:1px;float:left;opacity:.55;'>a</a><input type='checkbox'/>";
+
+
+ all = div.getElementsByTagName( "*" );
+ a = div.getElementsByTagName( "a" )[ 0 ];
+
+ // Can't get basic test support
+ if ( !all || !all.length || !a ) {
+ return {};
+ }
+
+ // First batch of supports tests
+ select = document.createElement( "select" );
+ opt = select.appendChild( document.createElement("option") );
+ input = div.getElementsByTagName( "input" )[ 0 ];
+
+ support = {
+ // IE strips leading whitespace when .innerHTML is used
+ leadingWhitespace: ( div.firstChild.nodeType === 3 ),
+
+ // Make sure that tbody elements aren't automatically inserted
+ // IE will insert them into empty tables
+ tbody: !div.getElementsByTagName( "tbody" ).length,
+
+ // Make sure that link elements get serialized correctly by innerHTML
+ // This requires a wrapper element in IE
+ htmlSerialize: !!div.getElementsByTagName( "link" ).length,
+
+ // Get the style information from getAttribute
+ // (IE uses .cssText instead)
+ style: /top/.test( a.getAttribute("style") ),
+
+ // Make sure that URLs aren't manipulated
+ // (IE normalizes it by default)
+ hrefNormalized: ( a.getAttribute( "href" ) === "/a" ),
+
+ // Make sure that element opacity exists
+ // (IE uses filter instead)
+ // Use a regex to work around a WebKit issue. See #5145
+ opacity: /^0.55$/.test( a.style.opacity ),
+
+ // Verify style float existence
+ // (IE uses styleFloat instead of cssFloat)
+ cssFloat: !!a.style.cssFloat,
+
+ // Make sure that if no value is specified for a checkbox
+ // that it defaults to "on".
+ // (WebKit defaults to "" instead)
+ checkOn: ( input.value === "on" ),
+
+ // Make sure that a selected-by-default option has a working selected property.
+ // (WebKit defaults to false instead of true, IE too, if it's in an optgroup)
+ optSelected: opt.selected,
+
+ // Test setAttribute on camelCase class. If it works, we need attrFixes when doing get/setAttribute (ie6/7)
+ getSetAttribute: div.className !== "t",
+
+ // Will be defined later
+ submitBubbles: true,
+ changeBubbles: true,
+ focusinBubbles: false,
+ deleteExpando: true,
+ noCloneEvent: true,
+ inlineBlockNeedsLayout: false,
+ shrinkWrapBlocks: false,
+ reliableMarginRight: true
+ };
+
+ // Make sure checked status is properly cloned
+ input.checked = true;
+ support.noCloneChecked = input.cloneNode( true ).checked;
+
+ // Make sure that the options inside disabled selects aren't marked as disabled
+ // (WebKit marks them as disabled)
+ select.disabled = true;
+ support.optDisabled = !opt.disabled;
+
+ // Test to see if it's possible to delete an expando from an element
+ // Fails in Internet Explorer
+ try {
+ delete div.test;
+ } catch( e ) {
+ support.deleteExpando = false;
+ }
+
+ if ( !div.addEventListener && div.attachEvent && div.fireEvent ) {
+ div.attachEvent( "onclick", function() {
+ // Cloning a node shouldn't copy over any
+ // bound event handlers (IE does this)
+ support.noCloneEvent = false;
+ });
+ div.cloneNode( true ).fireEvent( "onclick" );
+ }
+
+ // Check if a radio maintains it's value
+ // after being appended to the DOM
+ input = document.createElement("input");
+ input.value = "t";
+ input.setAttribute("type", "radio");
+ support.radioValue = input.value === "t";
+
+ input.setAttribute("checked", "checked");
+ div.appendChild( input );
+ fragment = document.createDocumentFragment();
+ fragment.appendChild( div.firstChild );
+
+ // WebKit doesn't clone checked state correctly in fragments
+ support.checkClone = fragment.cloneNode( true ).cloneNode( true ).lastChild.checked;
+
+ div.innerHTML = "";
+
+ // Figure out if the W3C box model works as expected
+ div.style.width = div.style.paddingLeft = "1px";
+
+ body = document.getElementsByTagName( "body" )[ 0 ];
+ // We use our own, invisible, body unless the body is already present
+ // in which case we use a div (#9239)
+ testElement = document.createElement( body ? "div" : "body" );
+ testElementStyle = {
+ visibility: "hidden",
+ width: 0,
+ height: 0,
+ border: 0,
+ margin: 0,
+ background: "none"
+ };
+ if ( body ) {
+ jQuery.extend( testElementStyle, {
+ position: "absolute",
+ left: "-1000px",
+ top: "-1000px"
+ });
+ }
+ for ( i in testElementStyle ) {
+ testElement.style[ i ] = testElementStyle[ i ];
+ }
+ testElement.appendChild( div );
+ testElementParent = body || documentElement;
+ testElementParent.insertBefore( testElement, testElementParent.firstChild );
+
+ // Check if a disconnected checkbox will retain its checked
+ // value of true after appended to the DOM (IE6/7)
+ support.appendChecked = input.checked;
+
+ support.boxModel = div.offsetWidth === 2;
+
+ if ( "zoom" in div.style ) {
+ // Check if natively block-level elements act like inline-block
+ // elements when setting their display to 'inline' and giving
+ // them layout
+ // (IE < 8 does this)
+ div.style.display = "inline";
+ div.style.zoom = 1;
+ support.inlineBlockNeedsLayout = ( div.offsetWidth === 2 );
+
+ // Check if elements with layout shrink-wrap their children
+ // (IE 6 does this)
+ div.style.display = "";
+ div.innerHTML = "<div style='width:4px;'></div>";
+ support.shrinkWrapBlocks = ( div.offsetWidth !== 2 );
+ }
+
+ div.innerHTML = "<table><tr><td style='padding:0;border:0;display:none'></td><td>t</td></tr></table>";
+ tds = div.getElementsByTagName( "td" );
+
+ // Check if table cells still have offsetWidth/Height when they are set
+ // to display:none and there are still other visible table cells in a
+ // table row; if so, offsetWidth/Height are not reliable for use when
+ // determining if an element has been hidden directly using
+ // display:none (it is still safe to use offsets if a parent element is
+ // hidden; don safety goggles and see bug #4512 for more information).
+ // (only IE 8 fails this test)
+ isSupported = ( tds[ 0 ].offsetHeight === 0 );
+
+ tds[ 0 ].style.display = "";
+ tds[ 1 ].style.display = "none";
+
+ // Check if empty table cells still have offsetWidth/Height
+ // (IE < 8 fail this test)
+ support.reliableHiddenOffsets = isSupported && ( tds[ 0 ].offsetHeight === 0 );
+ div.innerHTML = "";
+
+ // Check if div with explicit width and no margin-right incorrectly
+ // gets computed margin-right based on width of container. For more
+ // info see bug #3333
+ // Fails in WebKit before Feb 2011 nightlies
+ // WebKit Bug 13343 - getComputedStyle returns wrong value for margin-right
+ if ( document.defaultView && document.defaultView.getComputedStyle ) {
+ marginDiv = document.createElement( "div" );
+ marginDiv.style.width = "0";
+ marginDiv.style.marginRight = "0";
+ div.appendChild( marginDiv );
+ support.reliableMarginRight =
+ ( parseInt( ( document.defaultView.getComputedStyle( marginDiv, null ) || { marginRight: 0 } ).marginRight, 10 ) || 0 ) === 0;
+ }
+
+ // Remove the body element we added
+ testElement.innerHTML = "";
+ testElementParent.removeChild( testElement );
+
+ // Technique from Juriy Zaytsev
+ // http://thinkweb2.com/projects/prototype/detecting-event-support-without-browser-sniffing/
+ // We only care about the case where non-standard event systems
+ // are used, namely in IE. Short-circuiting here helps us to
+ // avoid an eval call (in setAttribute) which can cause CSP
+ // to go haywire. See: https://developer.mozilla.org/en/Security/CSP
+ if ( div.attachEvent ) {
+ for( i in {
+ submit: 1,
+ change: 1,
+ focusin: 1
+ } ) {
+ eventName = "on" + i;
+ isSupported = ( eventName in div );
+ if ( !isSupported ) {
+ div.setAttribute( eventName, "return;" );
+ isSupported = ( typeof div[ eventName ] === "function" );
+ }
+ support[ i + "Bubbles" ] = isSupported;
+ }
+ }
+
+ // Null connected elements to avoid leaks in IE
+ testElement = fragment = select = opt = body = marginDiv = div = input = null;
+
+ return support;
+})();
+
+// Keep track of boxModel
+jQuery.boxModel = jQuery.support.boxModel;
+
+
+
+
+var rbrace = /^(?:\{.*\}|\[.*\])$/,
+ rmultiDash = /([A-Z])/g;
+
+jQuery.extend({
+ cache: {},
+
+ // Please use with caution
+ uuid: 0,
+
+ // Unique for each copy of jQuery on the page
+ // Non-digits removed to match rinlinejQuery
+ expando: "jQuery" + ( jQuery.fn.jquery + Math.random() ).replace( /\D/g, "" ),
+
+ // The following elements throw uncatchable exceptions if you
+ // attempt to add expando properties to them.
+ noData: {
+ "embed": true,
+ // Ban all objects except for Flash (which handle expandos)
+ "object": "clsid:D27CDB6E-AE6D-11cf-96B8-444553540000",
+ "applet": true
+ },
+
+ hasData: function( elem ) {
+ elem = elem.nodeType ? jQuery.cache[ elem[jQuery.expando] ] : elem[ jQuery.expando ];
+
+ return !!elem && !isEmptyDataObject( elem );
+ },
+
+ data: function( elem, name, data, pvt /* Internal Use Only */ ) {
+ if ( !jQuery.acceptData( elem ) ) {
+ return;
+ }
+
+ var thisCache, ret,
+ internalKey = jQuery.expando,
+ getByName = typeof name === "string",
+
+ // We have to handle DOM nodes and JS objects differently because IE6-7
+ // can't GC object references properly across the DOM-JS boundary
+ isNode = elem.nodeType,
+
+ // Only DOM nodes need the global jQuery cache; JS object data is
+ // attached directly to the object so GC can occur automatically
+ cache = isNode ? jQuery.cache : elem,
+
+ // Only defining an ID for JS objects if its cache already exists allows
+ // the code to shortcut on the same path as a DOM node with no cache
+ id = isNode ? elem[ jQuery.expando ] : elem[ jQuery.expando ] && jQuery.expando;
+
+ // Avoid doing any more work than we need to when trying to get data on an
+ // object that has no data at all
+ if ( (!id || (pvt && id && (cache[ id ] && !cache[ id ][ internalKey ]))) && getByName && data === undefined ) {
+ return;
+ }
+
+ if ( !id ) {
+ // Only DOM nodes need a new unique ID for each element since their data
+ // ends up in the global cache
+ if ( isNode ) {
+ elem[ jQuery.expando ] = id = ++jQuery.uuid;
+ } else {
+ id = jQuery.expando;
+ }
+ }
+
+ if ( !cache[ id ] ) {
+ cache[ id ] = {};
+
+ // TODO: This is a hack for 1.5 ONLY. Avoids exposing jQuery
+ // metadata on plain JS objects when the object is serialized using
+ // JSON.stringify
+ if ( !isNode ) {
+ cache[ id ].toJSON = jQuery.noop;
+ }
+ }
+
+ // An object can be passed to jQuery.data instead of a key/value pair; this gets
+ // shallow copied over onto the existing cache
+ if ( typeof name === "object" || typeof name === "function" ) {
+ if ( pvt ) {
+ cache[ id ][ internalKey ] = jQuery.extend(cache[ id ][ internalKey ], name);
+ } else {
+ cache[ id ] = jQuery.extend(cache[ id ], name);
+ }
+ }
+
+ thisCache = cache[ id ];
+
+ // Internal jQuery data is stored in a separate object inside the object's data
+ // cache in order to avoid key collisions between internal data and user-defined
+ // data
+ if ( pvt ) {
+ if ( !thisCache[ internalKey ] ) {
+ thisCache[ internalKey ] = {};
+ }
+
+ thisCache = thisCache[ internalKey ];
+ }
+
+ if ( data !== undefined ) {
+ thisCache[ jQuery.camelCase( name ) ] = data;
+ }
+
+ // TODO: This is a hack for 1.5 ONLY. It will be removed in 1.6. Users should
+ // not attempt to inspect the internal events object using jQuery.data, as this
+ // internal data object is undocumented and subject to change.
+ if ( name === "events" && !thisCache[name] ) {
+ return thisCache[ internalKey ] && thisCache[ internalKey ].events;
+ }
+
+ // Check for both converted-to-camel and non-converted data property names
+ // If a data property was specified
+ if ( getByName ) {
+
+ // First Try to find as-is property data
+ ret = thisCache[ name ];
+
+ // Test for null|undefined property data
+ if ( ret == null ) {
+
+ // Try to find the camelCased property
+ ret = thisCache[ jQuery.camelCase( name ) ];
+ }
+ } else {
+ ret = thisCache;
+ }
+
+ return ret;
+ },
+
+ removeData: function( elem, name, pvt /* Internal Use Only */ ) {
+ if ( !jQuery.acceptData( elem ) ) {
+ return;
+ }
+
+ var thisCache,
+
+ // Reference to internal data cache key
+ internalKey = jQuery.expando,
+
+ isNode = elem.nodeType,
+
+ // See jQuery.data for more information
+ cache = isNode ? jQuery.cache : elem,
+
+ // See jQuery.data for more information
+ id = isNode ? elem[ jQuery.expando ] : jQuery.expando;
+
+ // If there is already no cache entry for this object, there is no
+ // purpose in continuing
+ if ( !cache[ id ] ) {
+ return;
+ }
+
+ if ( name ) {
+
+ thisCache = pvt ? cache[ id ][ internalKey ] : cache[ id ];
+
+ if ( thisCache ) {
+
+ // Support interoperable removal of hyphenated or camelcased keys
+ if ( !thisCache[ name ] ) {
+ name = jQuery.camelCase( name );
+ }
+
+ delete thisCache[ name ];
+
+ // If there is no data left in the cache, we want to continue
+ // and let the cache object itself get destroyed
+ if ( !isEmptyDataObject(thisCache) ) {
+ return;
+ }
+ }
+ }
+
+ // See jQuery.data for more information
+ if ( pvt ) {
+ delete cache[ id ][ internalKey ];
+
+ // Don't destroy the parent cache unless the internal data object
+ // had been the only thing left in it
+ if ( !isEmptyDataObject(cache[ id ]) ) {
+ return;
+ }
+ }
+
+ var internalCache = cache[ id ][ internalKey ];
+
+ // Browsers that fail expando deletion also refuse to delete expandos on
+ // the window, but it will allow it on all other JS objects; other browsers
+ // don't care
+ // Ensure that `cache` is not a window object #10080
+ if ( jQuery.support.deleteExpando || !cache.setInterval ) {
+ delete cache[ id ];
+ } else {
+ cache[ id ] = null;
+ }
+
+ // We destroyed the entire user cache at once because it's faster than
+ // iterating through each key, but we need to continue to persist internal
+ // data if it existed
+ if ( internalCache ) {
+ cache[ id ] = {};
+ // TODO: This is a hack for 1.5 ONLY. Avoids exposing jQuery
+ // metadata on plain JS objects when the object is serialized using
+ // JSON.stringify
+ if ( !isNode ) {
+ cache[ id ].toJSON = jQuery.noop;
+ }
+
+ cache[ id ][ internalKey ] = internalCache;
+
+ // Otherwise, we need to eliminate the expando on the node to avoid
+ // false lookups in the cache for entries that no longer exist
+ } else if ( isNode ) {
+ // IE does not allow us to delete expando properties from nodes,
+ // nor does it have a removeAttribute function on Document nodes;
+ // we must handle all of these cases
+ if ( jQuery.support.deleteExpando ) {
+ delete elem[ jQuery.expando ];
+ } else if ( elem.removeAttribute ) {
+ elem.removeAttribute( jQuery.expando );
+ } else {
+ elem[ jQuery.expando ] = null;
+ }
+ }
+ },
+
+ // For internal use only.
+ _data: function( elem, name, data ) {
+ return jQuery.data( elem, name, data, true );
+ },
+
+ // A method for determining if a DOM node can handle the data expando
+ acceptData: function( elem ) {
+ if ( elem.nodeName ) {
+ var match = jQuery.noData[ elem.nodeName.toLowerCase() ];
+
+ if ( match ) {
+ return !(match === true || elem.getAttribute("classid") !== match);
+ }
+ }
+
+ return true;
+ }
+});
+
+jQuery.fn.extend({
+ data: function( key, value ) {
+ var data = null;
+
+ if ( typeof key === "undefined" ) {
+ if ( this.length ) {
+ data = jQuery.data( this[0] );
+
+ if ( this[0].nodeType === 1 ) {
+ var attr = this[0].attributes, name;
+ for ( var i = 0, l = attr.length; i < l; i++ ) {
+ name = attr[i].name;
+
+ if ( name.indexOf( "data-" ) === 0 ) {
+ name = jQuery.camelCase( name.substring(5) );
+
+ dataAttr( this[0], name, data[ name ] );
+ }
+ }
+ }
+ }
+
+ return data;
+
+ } else if ( typeof key === "object" ) {
+ return this.each(function() {
+ jQuery.data( this, key );
+ });
+ }
+
+ var parts = key.split(".");
+ parts[1] = parts[1] ? "." + parts[1] : "";
+
+ if ( value === undefined ) {
+ data = this.triggerHandler("getData" + parts[1] + "!", [parts[0]]);
+
+ // Try to fetch any internally stored data first
+ if ( data === undefined && this.length ) {
+ data = jQuery.data( this[0], key );
+ data = dataAttr( this[0], key, data );
+ }
+
+ return data === undefined && parts[1] ?
+ this.data( parts[0] ) :
+ data;
+
+ } else {
+ return this.each(function() {
+ var $this = jQuery( this ),
+ args = [ parts[0], value ];
+
+ $this.triggerHandler( "setData" + parts[1] + "!", args );
+ jQuery.data( this, key, value );
+ $this.triggerHandler( "changeData" + parts[1] + "!", args );
+ });
+ }
+ },
+
+ removeData: function( key ) {
+ return this.each(function() {
+ jQuery.removeData( this, key );
+ });
+ }
+});
+
+function dataAttr( elem, key, data ) {
+ // If nothing was found internally, try to fetch any
+ // data from the HTML5 data-* attribute
+ if ( data === undefined && elem.nodeType === 1 ) {
+
+ var name = "data-" + key.replace( rmultiDash, "-$1" ).toLowerCase();
+
+ data = elem.getAttribute( name );
+
+ if ( typeof data === "string" ) {
+ try {
+ data = data === "true" ? true :
+ data === "false" ? false :
+ data === "null" ? null :
+ !jQuery.isNaN( data ) ? parseFloat( data ) :
+ rbrace.test( data ) ? jQuery.parseJSON( data ) :
+ data;
+ } catch( e ) {}
+
+ // Make sure we set the data so it isn't changed later
+ jQuery.data( elem, key, data );
+
+ } else {
+ data = undefined;
+ }
+ }
+
+ return data;
+}
+
+// TODO: This is a hack for 1.5 ONLY to allow objects with a single toJSON
+// property to be considered empty objects; this property always exists in
+// order to make sure JSON.stringify does not expose internal metadata
+function isEmptyDataObject( obj ) {
+ for ( var name in obj ) {
+ if ( name !== "toJSON" ) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+
+
+
+function handleQueueMarkDefer( elem, type, src ) {
+ var deferDataKey = type + "defer",
+ queueDataKey = type + "queue",
+ markDataKey = type + "mark",
+ defer = jQuery.data( elem, deferDataKey, undefined, true );
+ if ( defer &&
+ ( src === "queue" || !jQuery.data( elem, queueDataKey, undefined, true ) ) &&
+ ( src === "mark" || !jQuery.data( elem, markDataKey, undefined, true ) ) ) {
+ // Give room for hard-coded callbacks to fire first
+ // and eventually mark/queue something else on the element
+ setTimeout( function() {
+ if ( !jQuery.data( elem, queueDataKey, undefined, true ) &&
+ !jQuery.data( elem, markDataKey, undefined, true ) ) {
+ jQuery.removeData( elem, deferDataKey, true );
+ defer.resolve();
+ }
+ }, 0 );
+ }
+}
+
+jQuery.extend({
+
+ _mark: function( elem, type ) {
+ if ( elem ) {
+ type = (type || "fx") + "mark";
+ jQuery.data( elem, type, (jQuery.data(elem,type,undefined,true) || 0) + 1, true );
+ }
+ },
+
+ _unmark: function( force, elem, type ) {
+ if ( force !== true ) {
+ type = elem;
+ elem = force;
+ force = false;
+ }
+ if ( elem ) {
+ type = type || "fx";
+ var key = type + "mark",
+ count = force ? 0 : ( (jQuery.data( elem, key, undefined, true) || 1 ) - 1 );
+ if ( count ) {
+ jQuery.data( elem, key, count, true );
+ } else {
+ jQuery.removeData( elem, key, true );
+ handleQueueMarkDefer( elem, type, "mark" );
+ }
+ }
+ },
+
+ queue: function( elem, type, data ) {
+ if ( elem ) {
+ type = (type || "fx") + "queue";
+ var q = jQuery.data( elem, type, undefined, true );
+ // Speed up dequeue by getting out quickly if this is just a lookup
+ if ( data ) {
+ if ( !q || jQuery.isArray(data) ) {
+ q = jQuery.data( elem, type, jQuery.makeArray(data), true );
+ } else {
+ q.push( data );
+ }
+ }
+ return q || [];
+ }
+ },
+
+ dequeue: function( elem, type ) {
+ type = type || "fx";
+
+ var queue = jQuery.queue( elem, type ),
+ fn = queue.shift(),
+ defer;
+
+ // If the fx queue is dequeued, always remove the progress sentinel
+ if ( fn === "inprogress" ) {
+ fn = queue.shift();
+ }
+
+ if ( fn ) {
+ // Add a progress sentinel to prevent the fx queue from being
+ // automatically dequeued
+ if ( type === "fx" ) {
+ queue.unshift("inprogress");
+ }
+
+ fn.call(elem, function() {
+ jQuery.dequeue(elem, type);
+ });
+ }
+
+ if ( !queue.length ) {
+ jQuery.removeData( elem, type + "queue", true );
+ handleQueueMarkDefer( elem, type, "queue" );
+ }
+ }
+});
+
+jQuery.fn.extend({
+ queue: function( type, data ) {
+ if ( typeof type !== "string" ) {
+ data = type;
+ type = "fx";
+ }
+
+ if ( data === undefined ) {
+ return jQuery.queue( this[0], type );
+ }
+ return this.each(function() {
+ var queue = jQuery.queue( this, type, data );
+
+ if ( type === "fx" && queue[0] !== "inprogress" ) {
+ jQuery.dequeue( this, type );
+ }
+ });
+ },
+ dequeue: function( type ) {
+ return this.each(function() {
+ jQuery.dequeue( this, type );
+ });
+ },
+ // Based off of the plugin by Clint Helfers, with permission.
+ // http://blindsignals.com/index.php/2009/07/jquery-delay/
+ delay: function( time, type ) {
+ time = jQuery.fx ? jQuery.fx.speeds[time] || time : time;
+ type = type || "fx";
+
+ return this.queue( type, function() {
+ var elem = this;
+ setTimeout(function() {
+ jQuery.dequeue( elem, type );
+ }, time );
+ });
+ },
+ clearQueue: function( type ) {
+ return this.queue( type || "fx", [] );
+ },
+ // Get a promise resolved when queues of a certain type
+ // are emptied (fx is the type by default)
+ promise: function( type, object ) {
+ if ( typeof type !== "string" ) {
+ object = type;
+ type = undefined;
+ }
+ type = type || "fx";
+ var defer = jQuery.Deferred(),
+ elements = this,
+ i = elements.length,
+ count = 1,
+ deferDataKey = type + "defer",
+ queueDataKey = type + "queue",
+ markDataKey = type + "mark",
+ tmp;
+ function resolve() {
+ if ( !( --count ) ) {
+ defer.resolveWith( elements, [ elements ] );
+ }
+ }
+ while( i-- ) {
+ if (( tmp = jQuery.data( elements[ i ], deferDataKey, undefined, true ) ||
+ ( jQuery.data( elements[ i ], queueDataKey, undefined, true ) ||
+ jQuery.data( elements[ i ], markDataKey, undefined, true ) ) &&
+ jQuery.data( elements[ i ], deferDataKey, jQuery._Deferred(), true ) )) {
+ count++;
+ tmp.done( resolve );
+ }
+ }
+ resolve();
+ return defer.promise();
+ }
+});
+
+
+
+
+var rclass = /[\n\t\r]/g,
+ rspace = /\s+/,
+ rreturn = /\r/g,
+ rtype = /^(?:button|input)$/i,
+ rfocusable = /^(?:button|input|object|select|textarea)$/i,
+ rclickable = /^a(?:rea)?$/i,
+ rboolean = /^(?:autofocus|autoplay|async|checked|controls|defer|disabled|hidden|loop|multiple|open|readonly|required|scoped|selected)$/i,
+ nodeHook, boolHook;
+
+jQuery.fn.extend({
+ attr: function( name, value ) {
+ return jQuery.access( this, name, value, true, jQuery.attr );
+ },
+
+ removeAttr: function( name ) {
+ return this.each(function() {
+ jQuery.removeAttr( this, name );
+ });
+ },
+
+ prop: function( name, value ) {
+ return jQuery.access( this, name, value, true, jQuery.prop );
+ },
+
+ removeProp: function( name ) {
+ name = jQuery.propFix[ name ] || name;
+ return this.each(function() {
+ // try/catch handles cases where IE balks (such as removing a property on window)
+ try {
+ this[ name ] = undefined;
+ delete this[ name ];
+ } catch( e ) {}
+ });
+ },
+
+ addClass: function( value ) {
+ var classNames, i, l, elem,
+ setClass, c, cl;
+
+ if ( jQuery.isFunction( value ) ) {
+ return this.each(function( j ) {
+ jQuery( this ).addClass( value.call(this, j, this.className) );
+ });
+ }
+
+ if ( value && typeof value === "string" ) {
+ classNames = value.split( rspace );
+
+ for ( i = 0, l = this.length; i < l; i++ ) {
+ elem = this[ i ];
+
+ if ( elem.nodeType === 1 ) {
+ if ( !elem.className && classNames.length === 1 ) {
+ elem.className = value;
+
+ } else {
+ setClass = " " + elem.className + " ";
+
+ for ( c = 0, cl = classNames.length; c < cl; c++ ) {
+ if ( !~setClass.indexOf( " " + classNames[ c ] + " " ) ) {
+ setClass += classNames[ c ] + " ";
+ }
+ }
+ elem.className = jQuery.trim( setClass );
+ }
+ }
+ }
+ }
+
+ return this;
+ },
+
+ removeClass: function( value ) {
+ var classNames, i, l, elem, className, c, cl;
+
+ if ( jQuery.isFunction( value ) ) {
+ return this.each(function( j ) {
+ jQuery( this ).removeClass( value.call(this, j, this.className) );
+ });
+ }
+
+ if ( (value && typeof value === "string") || value === undefined ) {
+ classNames = (value || "").split( rspace );
+
+ for ( i = 0, l = this.length; i < l; i++ ) {
+ elem = this[ i ];
+
+ if ( elem.nodeType === 1 && elem.className ) {
+ if ( value ) {
+ className = (" " + elem.className + " ").replace( rclass, " " );
+ for ( c = 0, cl = classNames.length; c < cl; c++ ) {
+ className = className.replace(" " + classNames[ c ] + " ", " ");
+ }
+ elem.className = jQuery.trim( className );
+
+ } else {
+ elem.className = "";
+ }
+ }
+ }
+ }
+
+ return this;
+ },
+
+ toggleClass: function( value, stateVal ) {
+ var type = typeof value,
+ isBool = typeof stateVal === "boolean";
+
+ if ( jQuery.isFunction( value ) ) {
+ return this.each(function( i ) {
+ jQuery( this ).toggleClass( value.call(this, i, this.className, stateVal), stateVal );
+ });
+ }
+
+ return this.each(function() {
+ if ( type === "string" ) {
+ // toggle individual class names
+ var className,
+ i = 0,
+ self = jQuery( this ),
+ state = stateVal,
+ classNames = value.split( rspace );
+
+ while ( (className = classNames[ i++ ]) ) {
+ // check each className given, space seperated list
+ state = isBool ? state : !self.hasClass( className );
+ self[ state ? "addClass" : "removeClass" ]( className );
+ }
+
+ } else if ( type === "undefined" || type === "boolean" ) {
+ if ( this.className ) {
+ // store className if set
+ jQuery._data( this, "__className__", this.className );
+ }
+
+ // toggle whole className
+ this.className = this.className || value === false ? "" : jQuery._data( this, "__className__" ) || "";
+ }
+ });
+ },
+
+ hasClass: function( selector ) {
+ var className = " " + selector + " ";
+ for ( var i = 0, l = this.length; i < l; i++ ) {
+ if ( this[i].nodeType === 1 && (" " + this[i].className + " ").replace(rclass, " ").indexOf( className ) > -1 ) {
+ return true;
+ }
+ }
+
+ return false;
+ },
+
+ val: function( value ) {
+ var hooks, ret,
+ elem = this[0];
+
+ if ( !arguments.length ) {
+ if ( elem ) {
+ hooks = jQuery.valHooks[ elem.nodeName.toLowerCase() ] || jQuery.valHooks[ elem.type ];
+
+ if ( hooks && "get" in hooks && (ret = hooks.get( elem, "value" )) !== undefined ) {
+ return ret;
+ }
+
+ ret = elem.value;
+
+ return typeof ret === "string" ?
+ // handle most common string cases
+ ret.replace(rreturn, "") :
+ // handle cases where value is null/undef or number
+ ret == null ? "" : ret;
+ }
+
+ return undefined;
+ }
+
+ var isFunction = jQuery.isFunction( value );
+
+ return this.each(function( i ) {
+ var self = jQuery(this), val;
+
+ if ( this.nodeType !== 1 ) {
+ return;
+ }
+
+ if ( isFunction ) {
+ val = value.call( this, i, self.val() );
+ } else {
+ val = value;
+ }
+
+ // Treat null/undefined as ""; convert numbers to string
+ if ( val == null ) {
+ val = "";
+ } else if ( typeof val === "number" ) {
+ val += "";
+ } else if ( jQuery.isArray( val ) ) {
+ val = jQuery.map(val, function ( value ) {
+ return value == null ? "" : value + "";
+ });
+ }
+
+ hooks = jQuery.valHooks[ this.nodeName.toLowerCase() ] || jQuery.valHooks[ this.type ];
+
+ // If set returns undefined, fall back to normal setting
+ if ( !hooks || !("set" in hooks) || hooks.set( this, val, "value" ) === undefined ) {
+ this.value = val;
+ }
+ });
+ }
+});
+
+jQuery.extend({
+ valHooks: {
+ option: {
+ get: function( elem ) {
+ // attributes.value is undefined in Blackberry 4.7 but
+ // uses .value. See #6932
+ var val = elem.attributes.value;
+ return !val || val.specified ? elem.value : elem.text;
+ }
+ },
+ select: {
+ get: function( elem ) {
+ var value,
+ index = elem.selectedIndex,
+ values = [],
+ options = elem.options,
+ one = elem.type === "select-one";
+
+ // Nothing was selected
+ if ( index < 0 ) {
+ return null;
+ }
+
+ // Loop through all the selected options
+ for ( var i = one ? index : 0, max = one ? index + 1 : options.length; i < max; i++ ) {
+ var option = options[ i ];
+
+ // Don't return options that are disabled or in a disabled optgroup
+ if ( option.selected && (jQuery.support.optDisabled ? !option.disabled : option.getAttribute("disabled") === null) &&
+ (!option.parentNode.disabled || !jQuery.nodeName( option.parentNode, "optgroup" )) ) {
+
+ // Get the specific value for the option
+ value = jQuery( option ).val();
+
+ // We don't need an array for one selects
+ if ( one ) {
+ return value;
+ }
+
+ // Multi-Selects return an array
+ values.push( value );
+ }
+ }
+
+ // Fixes Bug #2551 -- select.val() broken in IE after form.reset()
+ if ( one && !values.length && options.length ) {
+ return jQuery( options[ index ] ).val();
+ }
+
+ return values;
+ },
+
+ set: function( elem, value ) {
+ var values = jQuery.makeArray( value );
+
+ jQuery(elem).find("option").each(function() {
+ this.selected = jQuery.inArray( jQuery(this).val(), values ) >= 0;
+ });
+
+ if ( !values.length ) {
+ elem.selectedIndex = -1;
+ }
+ return values;
+ }
+ }
+ },
+
+ attrFn: {
+ val: true,
+ css: true,
+ html: true,
+ text: true,
+ data: true,
+ width: true,
+ height: true,
+ offset: true
+ },
+
+ attrFix: {
+ // Always normalize to ensure hook usage
+ tabindex: "tabIndex"
+ },
+
+ attr: function( elem, name, value, pass ) {
+ var nType = elem.nodeType;
+
+ // don't get/set attributes on text, comment and attribute nodes
+ if ( !elem || nType === 3 || nType === 8 || nType === 2 ) {
+ return undefined;
+ }
+
+ if ( pass && name in jQuery.attrFn ) {
+ return jQuery( elem )[ name ]( value );
+ }
+
+ // Fallback to prop when attributes are not supported
+ if ( !("getAttribute" in elem) ) {
+ return jQuery.prop( elem, name, value );
+ }
+
+ var ret, hooks,
+ notxml = nType !== 1 || !jQuery.isXMLDoc( elem );
+
+ // Normalize the name if needed
+ if ( notxml ) {
+ name = jQuery.attrFix[ name ] || name;
+
+ hooks = jQuery.attrHooks[ name ];
+
+ if ( !hooks ) {
+ // Use boolHook for boolean attributes
+ if ( rboolean.test( name ) ) {
+ hooks = boolHook;
+
+ // Use nodeHook if available( IE6/7 )
+ } else if ( nodeHook ) {
+ hooks = nodeHook;
+ }
+ }
+ }
+
+ if ( value !== undefined ) {
+
+ if ( value === null ) {
+ jQuery.removeAttr( elem, name );
+ return undefined;
+
+ } else if ( hooks && "set" in hooks && notxml && (ret = hooks.set( elem, value, name )) !== undefined ) {
+ return ret;
+
+ } else {
+ elem.setAttribute( name, "" + value );
+ return value;
+ }
+
+ } else if ( hooks && "get" in hooks && notxml && (ret = hooks.get( elem, name )) !== null ) {
+ return ret;
+
+ } else {
+
+ ret = elem.getAttribute( name );
+
+ // Non-existent attributes return null, we normalize to undefined
+ return ret === null ?
+ undefined :
+ ret;
+ }
+ },
+
+ removeAttr: function( elem, name ) {
+ var propName;
+ if ( elem.nodeType === 1 ) {
+ name = jQuery.attrFix[ name ] || name;
+
+ jQuery.attr( elem, name, "" );
+ elem.removeAttribute( name );
+
+ // Set corresponding property to false for boolean attributes
+ if ( rboolean.test( name ) && (propName = jQuery.propFix[ name ] || name) in elem ) {
+ elem[ propName ] = false;
+ }
+ }
+ },
+
+ attrHooks: {
+ type: {
+ set: function( elem, value ) {
+ // We can't allow the type property to be changed (since it causes problems in IE)
+ if ( rtype.test( elem.nodeName ) && elem.parentNode ) {
+ jQuery.error( "type property can't be changed" );
+ } else if ( !jQuery.support.radioValue && value === "radio" && jQuery.nodeName(elem, "input") ) {
+ // Setting the type on a radio button after the value resets the value in IE6-9
+ // Reset value to it's default in case type is set after value
+ // This is for element creation
+ var val = elem.value;
+ elem.setAttribute( "type", value );
+ if ( val ) {
+ elem.value = val;
+ }
+ return value;
+ }
+ }
+ },
+ // Use the value property for back compat
+ // Use the nodeHook for button elements in IE6/7 (#1954)
+ value: {
+ get: function( elem, name ) {
+ if ( nodeHook && jQuery.nodeName( elem, "button" ) ) {
+ return nodeHook.get( elem, name );
+ }
+ return name in elem ?
+ elem.value :
+ null;
+ },
+ set: function( elem, value, name ) {
+ if ( nodeHook && jQuery.nodeName( elem, "button" ) ) {
+ return nodeHook.set( elem, value, name );
+ }
+ // Does not return so that setAttribute is also used
+ elem.value = value;
+ }
+ }
+ },
+
+ propFix: {
+ tabindex: "tabIndex",
+ readonly: "readOnly",
+ "for": "htmlFor",
+ "class": "className",
+ maxlength: "maxLength",
+ cellspacing: "cellSpacing",
+ cellpadding: "cellPadding",
+ rowspan: "rowSpan",
+ colspan: "colSpan",
+ usemap: "useMap",
+ frameborder: "frameBorder",
+ contenteditable: "contentEditable"
+ },
+
+ prop: function( elem, name, value ) {
+ var nType = elem.nodeType;
+
+ // don't get/set properties on text, comment and attribute nodes
+ if ( !elem || nType === 3 || nType === 8 || nType === 2 ) {
+ return undefined;
+ }
+
+ var ret, hooks,
+ notxml = nType !== 1 || !jQuery.isXMLDoc( elem );
+
+ if ( notxml ) {
+ // Fix name and attach hooks
+ name = jQuery.propFix[ name ] || name;
+ hooks = jQuery.propHooks[ name ];
+ }
+
+ if ( value !== undefined ) {
+ if ( hooks && "set" in hooks && (ret = hooks.set( elem, value, name )) !== undefined ) {
+ return ret;
+
+ } else {
+ return (elem[ name ] = value);
+ }
+
+ } else {
+ if ( hooks && "get" in hooks && (ret = hooks.get( elem, name )) !== null ) {
+ return ret;
+
+ } else {
+ return elem[ name ];
+ }
+ }
+ },
+
+ propHooks: {
+ tabIndex: {
+ get: function( elem ) {
+ // elem.tabIndex doesn't always return the correct value when it hasn't been explicitly set
+ // http://fluidproject.org/blog/2008/01/09/getting-setting-and-removing-tabindex-values-with-javascript/
+ var attributeNode = elem.getAttributeNode("tabindex");
+
+ return attributeNode && attributeNode.specified ?
+ parseInt( attributeNode.value, 10 ) :
+ rfocusable.test( elem.nodeName ) || rclickable.test( elem.nodeName ) && elem.href ?
+ 0 :
+ undefined;
+ }
+ }
+ }
+});
+
+// Add the tabindex propHook to attrHooks for back-compat
+jQuery.attrHooks.tabIndex = jQuery.propHooks.tabIndex;
+
+// Hook for boolean attributes
+boolHook = {
+ get: function( elem, name ) {
+ // Align boolean attributes with corresponding properties
+ // Fall back to attribute presence where some booleans are not supported
+ var attrNode;
+ return jQuery.prop( elem, name ) === true || ( attrNode = elem.getAttributeNode( name ) ) && attrNode.nodeValue !== false ?
+ name.toLowerCase() :
+ undefined;
+ },
+ set: function( elem, value, name ) {
+ var propName;
+ if ( value === false ) {
+ // Remove boolean attributes when set to false
+ jQuery.removeAttr( elem, name );
+ } else {
+ // value is true since we know at this point it's type boolean and not false
+ // Set boolean attributes to the same name and set the DOM property
+ propName = jQuery.propFix[ name ] || name;
+ if ( propName in elem ) {
+ // Only set the IDL specifically if it already exists on the element
+ elem[ propName ] = true;
+ }
+
+ elem.setAttribute( name, name.toLowerCase() );
+ }
+ return name;
+ }
+};
+
+// IE6/7 do not support getting/setting some attributes with get/setAttribute
+if ( !jQuery.support.getSetAttribute ) {
+
+ // Use this for any attribute in IE6/7
+ // This fixes almost every IE6/7 issue
+ nodeHook = jQuery.valHooks.button = {
+ get: function( elem, name ) {
+ var ret;
+ ret = elem.getAttributeNode( name );
+ // Return undefined if nodeValue is empty string
+ return ret && ret.nodeValue !== "" ?
+ ret.nodeValue :
+ undefined;
+ },
+ set: function( elem, value, name ) {
+ // Set the existing or create a new attribute node
+ var ret = elem.getAttributeNode( name );
+ if ( !ret ) {
+ ret = document.createAttribute( name );
+ elem.setAttributeNode( ret );
+ }
+ return (ret.nodeValue = value + "");
+ }
+ };
+
+ // Set width and height to auto instead of 0 on empty string( Bug #8150 )
+ // This is for removals
+ jQuery.each([ "width", "height" ], function( i, name ) {
+ jQuery.attrHooks[ name ] = jQuery.extend( jQuery.attrHooks[ name ], {
+ set: function( elem, value ) {
+ if ( value === "" ) {
+ elem.setAttribute( name, "auto" );
+ return value;
+ }
+ }
+ });
+ });
+}
+
+
+// Some attributes require a special call on IE
+if ( !jQuery.support.hrefNormalized ) {
+ jQuery.each([ "href", "src", "width", "height" ], function( i, name ) {
+ jQuery.attrHooks[ name ] = jQuery.extend( jQuery.attrHooks[ name ], {
+ get: function( elem ) {
+ var ret = elem.getAttribute( name, 2 );
+ return ret === null ? undefined : ret;
+ }
+ });
+ });
+}
+
+if ( !jQuery.support.style ) {
+ jQuery.attrHooks.style = {
+ get: function( elem ) {
+ // Return undefined in the case of empty string
+ // Normalize to lowercase since IE uppercases css property names
+ return elem.style.cssText.toLowerCase() || undefined;
+ },
+ set: function( elem, value ) {
+ return (elem.style.cssText = "" + value);
+ }
+ };
+}
+
+// Safari mis-reports the default selected property of an option
+// Accessing the parent's selectedIndex property fixes it
+if ( !jQuery.support.optSelected ) {
+ jQuery.propHooks.selected = jQuery.extend( jQuery.propHooks.selected, {
+ get: function( elem ) {
+ var parent = elem.parentNode;
+
+ if ( parent ) {
+ parent.selectedIndex;
+
+ // Make sure that it also works with optgroups, see #5701
+ if ( parent.parentNode ) {
+ parent.parentNode.selectedIndex;
+ }
+ }
+ return null;
+ }
+ });
+}
+
+// Radios and checkboxes getter/setter
+if ( !jQuery.support.checkOn ) {
+ jQuery.each([ "radio", "checkbox" ], function() {
+ jQuery.valHooks[ this ] = {
+ get: function( elem ) {
+ // Handle the case where in Webkit "" is returned instead of "on" if a value isn't specified
+ return elem.getAttribute("value") === null ? "on" : elem.value;
+ }
+ };
+ });
+}
+jQuery.each([ "radio", "checkbox" ], function() {
+ jQuery.valHooks[ this ] = jQuery.extend( jQuery.valHooks[ this ], {
+ set: function( elem, value ) {
+ if ( jQuery.isArray( value ) ) {
+ return (elem.checked = jQuery.inArray( jQuery(elem).val(), value ) >= 0);
+ }
+ }
+ });
+});
+
+
+
+
+var rnamespaces = /\.(.*)$/,
+ rformElems = /^(?:textarea|input|select)$/i,
+ rperiod = /\./g,
+ rspaces = / /g,
+ rescape = /[^\w\s.|`]/g,
+ fcleanup = function( nm ) {
+ return nm.replace(rescape, "\\$&");
+ };
+
+/*
+ * A number of helper functions used for managing events.
+ * Many of the ideas behind this code originated from
+ * Dean Edwards' addEvent library.
+ */
+jQuery.event = {
+
+ // Bind an event to an element
+ // Original by Dean Edwards
+ add: function( elem, types, handler, data ) {
+ if ( elem.nodeType === 3 || elem.nodeType === 8 ) {
+ return;
+ }
+
+ if ( handler === false ) {
+ handler = returnFalse;
+ } else if ( !handler ) {
+ // Fixes bug #7229. Fix recommended by jdalton
+ return;
+ }
+
+ var handleObjIn, handleObj;
+
+ if ( handler.handler ) {
+ handleObjIn = handler;
+ handler = handleObjIn.handler;
+ }
+
+ // Make sure that the function being executed has a unique ID
+ if ( !handler.guid ) {
+ handler.guid = jQuery.guid++;
+ }
+
+ // Init the element's event structure
+ var elemData = jQuery._data( elem );
+
+ // If no elemData is found then we must be trying to bind to one of the
+ // banned noData elements
+ if ( !elemData ) {
+ return;
+ }
+
+ var events = elemData.events,
+ eventHandle = elemData.handle;
+
+ if ( !events ) {
+ elemData.events = events = {};
+ }
+
+ if ( !eventHandle ) {
+ elemData.handle = eventHandle = function( e ) {
+ // Discard the second event of a jQuery.event.trigger() and
+ // when an event is called after a page has unloaded
+ return typeof jQuery !== "undefined" && (!e || jQuery.event.triggered !== e.type) ?
+ jQuery.event.handle.apply( eventHandle.elem, arguments ) :
+ undefined;
+ };
+ }
+
+ // Add elem as a property of the handle function
+ // This is to prevent a memory leak with non-native events in IE.
+ eventHandle.elem = elem;
+
+ // Handle multiple events separated by a space
+ // jQuery(...).bind("mouseover mouseout", fn);
+ types = types.split(" ");
+
+ var type, i = 0, namespaces;
+
+ while ( (type = types[ i++ ]) ) {
+ handleObj = handleObjIn ?
+ jQuery.extend({}, handleObjIn) :
+ { handler: handler, data: data };
+
+ // Namespaced event handlers
+ if ( type.indexOf(".") > -1 ) {
+ namespaces = type.split(".");
+ type = namespaces.shift();
+ handleObj.namespace = namespaces.slice(0).sort().join(".");
+
+ } else {
+ namespaces = [];
+ handleObj.namespace = "";
+ }
+
+ handleObj.type = type;
+ if ( !handleObj.guid ) {
+ handleObj.guid = handler.guid;
+ }
+
+ // Get the current list of functions bound to this event
+ var handlers = events[ type ],
+ special = jQuery.event.special[ type ] || {};
+
+ // Init the event handler queue
+ if ( !handlers ) {
+ handlers = events[ type ] = [];
+
+ // Check for a special event handler
+ // Only use addEventListener/attachEvent if the special
+ // events handler returns false
+ if ( !special.setup || special.setup.call( elem, data, namespaces, eventHandle ) === false ) {
+ // Bind the global event handler to the element
+ if ( elem.addEventListener ) {
+ elem.addEventListener( type, eventHandle, false );
+
+ } else if ( elem.attachEvent ) {
+ elem.attachEvent( "on" + type, eventHandle );
+ }
+ }
+ }
+
+ if ( special.add ) {
+ special.add.call( elem, handleObj );
+
+ if ( !handleObj.handler.guid ) {
+ handleObj.handler.guid = handler.guid;
+ }
+ }
+
+ // Add the function to the element's handler list
+ handlers.push( handleObj );
+
+ // Keep track of which events have been used, for event optimization
+ jQuery.event.global[ type ] = true;
+ }
+
+ // Nullify elem to prevent memory leaks in IE
+ elem = null;
+ },
+
+ global: {},
+
+ // Detach an event or set of events from an element
+ remove: function( elem, types, handler, pos ) {
+ // don't do events on text and comment nodes
+ if ( elem.nodeType === 3 || elem.nodeType === 8 ) {
+ return;
+ }
+
+ if ( handler === false ) {
+ handler = returnFalse;
+ }
+
+ var ret, type, fn, j, i = 0, all, namespaces, namespace, special, eventType, handleObj, origType,
+ elemData = jQuery.hasData( elem ) && jQuery._data( elem ),
+ events = elemData && elemData.events;
+
+ if ( !elemData || !events ) {
+ return;
+ }
+
+ // types is actually an event object here
+ if ( types && types.type ) {
+ handler = types.handler;
+ types = types.type;
+ }
+
+ // Unbind all events for the element
+ if ( !types || typeof types === "string" && types.charAt(0) === "." ) {
+ types = types || "";
+
+ for ( type in events ) {
+ jQuery.event.remove( elem, type + types );
+ }
+
+ return;
+ }
+
+ // Handle multiple events separated by a space
+ // jQuery(...).unbind("mouseover mouseout", fn);
+ types = types.split(" ");
+
+ while ( (type = types[ i++ ]) ) {
+ origType = type;
+ handleObj = null;
+ all = type.indexOf(".") < 0;
+ namespaces = [];
+
+ if ( !all ) {
+ // Namespaced event handlers
+ namespaces = type.split(".");
+ type = namespaces.shift();
+
+ namespace = new RegExp("(^|\\.)" +
+ jQuery.map( namespaces.slice(0).sort(), fcleanup ).join("\\.(?:.*\\.)?") + "(\\.|$)");
+ }
+
+ eventType = events[ type ];
+
+ if ( !eventType ) {
+ continue;
+ }
+
+ if ( !handler ) {
+ for ( j = 0; j < eventType.length; j++ ) {
+ handleObj = eventType[ j ];
+
+ if ( all || namespace.test( handleObj.namespace ) ) {
+ jQuery.event.remove( elem, origType, handleObj.handler, j );
+ eventType.splice( j--, 1 );
+ }
+ }
+
+ continue;
+ }
+
+ special = jQuery.event.special[ type ] || {};
+
+ for ( j = pos || 0; j < eventType.length; j++ ) {
+ handleObj = eventType[ j ];
+
+ if ( handler.guid === handleObj.guid ) {
+ // remove the given handler for the given type
+ if ( all || namespace.test( handleObj.namespace ) ) {
+ if ( pos == null ) {
+ eventType.splice( j--, 1 );
+ }
+
+ if ( special.remove ) {
+ special.remove.call( elem, handleObj );
+ }
+ }
+
+ if ( pos != null ) {
+ break;
+ }
+ }
+ }
+
+ // remove generic event handler if no more handlers exist
+ if ( eventType.length === 0 || pos != null && eventType.length === 1 ) {
+ if ( !special.teardown || special.teardown.call( elem, namespaces ) === false ) {
+ jQuery.removeEvent( elem, type, elemData.handle );
+ }
+
+ ret = null;
+ delete events[ type ];
+ }
+ }
+
+ // Remove the expando if it's no longer used
+ if ( jQuery.isEmptyObject( events ) ) {
+ var handle = elemData.handle;
+ if ( handle ) {
+ handle.elem = null;
+ }
+
+ delete elemData.events;
+ delete elemData.handle;
+
+ if ( jQuery.isEmptyObject( elemData ) ) {
+ jQuery.removeData( elem, undefined, true );
+ }
+ }
+ },
+
+ // Events that are safe to short-circuit if no handlers are attached.
+ // Native DOM events should not be added, they may have inline handlers.
+ customEvent: {
+ "getData": true,
+ "setData": true,
+ "changeData": true
+ },
+
+ trigger: function( event, data, elem, onlyHandlers ) {
+ // Event object or event type
+ var type = event.type || event,
+ namespaces = [],
+ exclusive;
+
+ if ( type.indexOf("!") >= 0 ) {
+ // Exclusive events trigger only for the exact event (no namespaces)
+ type = type.slice(0, -1);
+ exclusive = true;
+ }
+
+ if ( type.indexOf(".") >= 0 ) {
+ // Namespaced trigger; create a regexp to match event type in handle()
+ namespaces = type.split(".");
+ type = namespaces.shift();
+ namespaces.sort();
+ }
+
+ if ( (!elem || jQuery.event.customEvent[ type ]) && !jQuery.event.global[ type ] ) {
+ // No jQuery handlers for this event type, and it can't have inline handlers
+ return;
+ }
+
+ // Caller can pass in an Event, Object, or just an event type string
+ event = typeof event === "object" ?
+ // jQuery.Event object
+ event[ jQuery.expando ] ? event :
+ // Object literal
+ new jQuery.Event( type, event ) :
+ // Just the event type (string)
+ new jQuery.Event( type );
+
+ event.type = type;
+ event.exclusive = exclusive;
+ event.namespace = namespaces.join(".");
+ event.namespace_re = new RegExp("(^|\\.)" + namespaces.join("\\.(?:.*\\.)?") + "(\\.|$)");
+
+ // triggerHandler() and global events don't bubble or run the default action
+ if ( onlyHandlers || !elem ) {
+ event.preventDefault();
+ event.stopPropagation();
+ }
+
+ // Handle a global trigger
+ if ( !elem ) {
+ // TODO: Stop taunting the data cache; remove global events and always attach to document
+ jQuery.each( jQuery.cache, function() {
+ // internalKey variable is just used to make it easier to find
+ // and potentially change this stuff later; currently it just
+ // points to jQuery.expando
+ var internalKey = jQuery.expando,
+ internalCache = this[ internalKey ];
+ if ( internalCache && internalCache.events && internalCache.events[ type ] ) {
+ jQuery.event.trigger( event, data, internalCache.handle.elem );
+ }
+ });
+ return;
+ }
+
+ // Don't do events on text and comment nodes
+ if ( elem.nodeType === 3 || elem.nodeType === 8 ) {
+ return;
+ }
+
+ // Clean up the event in case it is being reused
+ event.result = undefined;
+ event.target = elem;
+
+ // Clone any incoming data and prepend the event, creating the handler arg list
+ data = data != null ? jQuery.makeArray( data ) : [];
+ data.unshift( event );
+
+ var cur = elem,
+ // IE doesn't like method names with a colon (#3533, #8272)
+ ontype = type.indexOf(":") < 0 ? "on" + type : "";
+
+ // Fire event on the current element, then bubble up the DOM tree
+ do {
+ var handle = jQuery._data( cur, "handle" );
+
+ event.currentTarget = cur;
+ if ( handle ) {
+ handle.apply( cur, data );
+ }
+
+ // Trigger an inline bound script
+ if ( ontype && jQuery.acceptData( cur ) && cur[ ontype ] && cur[ ontype ].apply( cur, data ) === false ) {
+ event.result = false;
+ event.preventDefault();
+ }
+
+ // Bubble up to document, then to window
+ cur = cur.parentNode || cur.ownerDocument || cur === event.target.ownerDocument && window;
+ } while ( cur && !event.isPropagationStopped() );
+
+ // If nobody prevented the default action, do it now
+ if ( !event.isDefaultPrevented() ) {
+ var old,
+ special = jQuery.event.special[ type ] || {};
+
+ if ( (!special._default || special._default.call( elem.ownerDocument, event ) === false) &&
+ !(type === "click" && jQuery.nodeName( elem, "a" )) && jQuery.acceptData( elem ) ) {
+
+ // Call a native DOM method on the target with the same name name as the event.
+ // Can't use an .isFunction)() check here because IE6/7 fails that test.
+ // IE<9 dies on focus to hidden element (#1486), may want to revisit a try/catch.
+ try {
+ if ( ontype && elem[ type ] ) {
+ // Don't re-trigger an onFOO event when we call its FOO() method
+ old = elem[ ontype ];
+
+ if ( old ) {
+ elem[ ontype ] = null;
+ }
+
+ jQuery.event.triggered = type;
+ elem[ type ]();
+ }
+ } catch ( ieError ) {}
+
+ if ( old ) {
+ elem[ ontype ] = old;
+ }
+
+ jQuery.event.triggered = undefined;
+ }
+ }
+
+ return event.result;
+ },
+
+ handle: function( event ) {
+ event = jQuery.event.fix( event || window.event );
+ // Snapshot the handlers list since a called handler may add/remove events.
+ var handlers = ((jQuery._data( this, "events" ) || {})[ event.type ] || []).slice(0),
+ run_all = !event.exclusive && !event.namespace,
+ args = Array.prototype.slice.call( arguments, 0 );
+
+ // Use the fix-ed Event rather than the (read-only) native event
+ args[0] = event;
+ event.currentTarget = this;
+
+ for ( var j = 0, l = handlers.length; j < l; j++ ) {
+ var handleObj = handlers[ j ];
+
+ // Triggered event must 1) be non-exclusive and have no namespace, or
+ // 2) have namespace(s) a subset or equal to those in the bound event.
+ if ( run_all || event.namespace_re.test( handleObj.namespace ) ) {
+ // Pass in a reference to the handler function itself
+ // So that we can later remove it
+ event.handler = handleObj.handler;
+ event.data = handleObj.data;
+ event.handleObj = handleObj;
+
+ var ret = handleObj.handler.apply( this, args );
+
+ if ( ret !== undefined ) {
+ event.result = ret;
+ if ( ret === false ) {
+ event.preventDefault();
+ event.stopPropagation();
+ }
+ }
+
+ if ( event.isImmediatePropagationStopped() ) {
+ break;
+ }
+ }
+ }
+ return event.result;
+ },
+
+ props: "altKey attrChange attrName bubbles button cancelable charCode clientX clientY ctrlKey currentTarget data detail eventPhase fromElement handler keyCode layerX layerY metaKey newValue offsetX offsetY pageX pageY prevValue relatedNode relatedTarget screenX screenY shiftKey srcElement target toElement view wheelDelta which".split(" "),
+
+ fix: function( event ) {
+ if ( event[ jQuery.expando ] ) {
+ return event;
+ }
+
+ // store a copy of the original event object
+ // and "clone" to set read-only properties
+ var originalEvent = event;
+ event = jQuery.Event( originalEvent );
+
+ for ( var i = this.props.length, prop; i; ) {
+ prop = this.props[ --i ];
+ event[ prop ] = originalEvent[ prop ];
+ }
+
+ // Fix target property, if necessary
+ if ( !event.target ) {
+ // Fixes #1925 where srcElement might not be defined either
+ event.target = event.srcElement || document;
+ }
+
+ // check if target is a textnode (safari)
+ if ( event.target.nodeType === 3 ) {
+ event.target = event.target.parentNode;
+ }
+
+ // Add relatedTarget, if necessary
+ if ( !event.relatedTarget && event.fromElement ) {
+ event.relatedTarget = event.fromElement === event.target ? event.toElement : event.fromElement;
+ }
+
+ // Calculate pageX/Y if missing and clientX/Y available
+ if ( event.pageX == null && event.clientX != null ) {
+ var eventDocument = event.target.ownerDocument || document,
+ doc = eventDocument.documentElement,
+ body = eventDocument.body;
+
+ event.pageX = event.clientX + (doc && doc.scrollLeft || body && body.scrollLeft || 0) - (doc && doc.clientLeft || body && body.clientLeft || 0);
+ event.pageY = event.clientY + (doc && doc.scrollTop || body && body.scrollTop || 0) - (doc && doc.clientTop || body && body.clientTop || 0);
+ }
+
+ // Add which for key events
+ if ( event.which == null && (event.charCode != null || event.keyCode != null) ) {
+ event.which = event.charCode != null ? event.charCode : event.keyCode;
+ }
+
+ // Add metaKey to non-Mac browsers (use ctrl for PC's and Meta for Macs)
+ if ( !event.metaKey && event.ctrlKey ) {
+ event.metaKey = event.ctrlKey;
+ }
+
+ // Add which for click: 1 === left; 2 === middle; 3 === right
+ // Note: button is not normalized, so don't use it
+ if ( !event.which && event.button !== undefined ) {
+ event.which = (event.button & 1 ? 1 : ( event.button & 2 ? 3 : ( event.button & 4 ? 2 : 0 ) ));
+ }
+
+ return event;
+ },
+
+ // Deprecated, use jQuery.guid instead
+ guid: 1E8,
+
+ // Deprecated, use jQuery.proxy instead
+ proxy: jQuery.proxy,
+
+ special: {
+ ready: {
+ // Make sure the ready event is setup
+ setup: jQuery.bindReady,
+ teardown: jQuery.noop
+ },
+
+ live: {
+ add: function( handleObj ) {
+ jQuery.event.add( this,
+ liveConvert( handleObj.origType, handleObj.selector ),
+ jQuery.extend({}, handleObj, {handler: liveHandler, guid: handleObj.handler.guid}) );
+ },
+
+ remove: function( handleObj ) {
+ jQuery.event.remove( this, liveConvert( handleObj.origType, handleObj.selector ), handleObj );
+ }
+ },
+
+ beforeunload: {
+ setup: function( data, namespaces, eventHandle ) {
+ // We only want to do this special case on windows
+ if ( jQuery.isWindow( this ) ) {
+ this.onbeforeunload = eventHandle;
+ }
+ },
+
+ teardown: function( namespaces, eventHandle ) {
+ if ( this.onbeforeunload === eventHandle ) {
+ this.onbeforeunload = null;
+ }
+ }
+ }
+ }
+};
+
+jQuery.removeEvent = document.removeEventListener ?
+ function( elem, type, handle ) {
+ if ( elem.removeEventListener ) {
+ elem.removeEventListener( type, handle, false );
+ }
+ } :
+ function( elem, type, handle ) {
+ if ( elem.detachEvent ) {
+ elem.detachEvent( "on" + type, handle );
+ }
+ };
+
+jQuery.Event = function( src, props ) {
+ // Allow instantiation without the 'new' keyword
+ if ( !this.preventDefault ) {
+ return new jQuery.Event( src, props );
+ }
+
+ // Event object
+ if ( src && src.type ) {
+ this.originalEvent = src;
+ this.type = src.type;
+
+ // Events bubbling up the document may have been marked as prevented
+ // by a handler lower down the tree; reflect the correct value.
+ this.isDefaultPrevented = (src.defaultPrevented || src.returnValue === false ||
+ src.getPreventDefault && src.getPreventDefault()) ? returnTrue : returnFalse;
+
+ // Event type
+ } else {
+ this.type = src;
+ }
+
+ // Put explicitly provided properties onto the event object
+ if ( props ) {
+ jQuery.extend( this, props );
+ }
+
+ // timeStamp is buggy for some events on Firefox(#3843)
+ // So we won't rely on the native value
+ this.timeStamp = jQuery.now();
+
+ // Mark it as fixed
+ this[ jQuery.expando ] = true;
+};
+
+function returnFalse() {
+ return false;
+}
+function returnTrue() {
+ return true;
+}
+
+// jQuery.Event is based on DOM3 Events as specified by the ECMAScript Language Binding
+// http://www.w3.org/TR/2003/WD-DOM-Level-3-Events-20030331/ecma-script-binding.html
+jQuery.Event.prototype = {
+ preventDefault: function() {
+ this.isDefaultPrevented = returnTrue;
+
+ var e = this.originalEvent;
+ if ( !e ) {
+ return;
+ }
+
+ // if preventDefault exists run it on the original event
+ if ( e.preventDefault ) {
+ e.preventDefault();
+
+ // otherwise set the returnValue property of the original event to false (IE)
+ } else {
+ e.returnValue = false;
+ }
+ },
+ stopPropagation: function() {
+ this.isPropagationStopped = returnTrue;
+
+ var e = this.originalEvent;
+ if ( !e ) {
+ return;
+ }
+ // if stopPropagation exists run it on the original event
+ if ( e.stopPropagation ) {
+ e.stopPropagation();
+ }
+ // otherwise set the cancelBubble property of the original event to true (IE)
+ e.cancelBubble = true;
+ },
+ stopImmediatePropagation: function() {
+ this.isImmediatePropagationStopped = returnTrue;
+ this.stopPropagation();
+ },
+ isDefaultPrevented: returnFalse,
+ isPropagationStopped: returnFalse,
+ isImmediatePropagationStopped: returnFalse
+};
+
+// Checks if an event happened on an element within another element
+// Used in jQuery.event.special.mouseenter and mouseleave handlers
+var withinElement = function( event ) {
+
+ // Check if mouse(over|out) are still within the same parent element
+ var related = event.relatedTarget,
+ inside = false,
+ eventType = event.type;
+
+ event.type = event.data;
+
+ if ( related !== this ) {
+
+ if ( related ) {
+ inside = jQuery.contains( this, related );
+ }
+
+ if ( !inside ) {
+
+ jQuery.event.handle.apply( this, arguments );
+
+ event.type = eventType;
+ }
+ }
+},
+
+// In case of event delegation, we only need to rename the event.type,
+// liveHandler will take care of the rest.
+delegate = function( event ) {
+ event.type = event.data;
+ jQuery.event.handle.apply( this, arguments );
+};
+
+// Create mouseenter and mouseleave events
+jQuery.each({
+ mouseenter: "mouseover",
+ mouseleave: "mouseout"
+}, function( orig, fix ) {
+ jQuery.event.special[ orig ] = {
+ setup: function( data ) {
+ jQuery.event.add( this, fix, data && data.selector ? delegate : withinElement, orig );
+ },
+ teardown: function( data ) {
+ jQuery.event.remove( this, fix, data && data.selector ? delegate : withinElement );
+ }
+ };
+});
+
+// submit delegation
+if ( !jQuery.support.submitBubbles ) {
+
+ jQuery.event.special.submit = {
+ setup: function( data, namespaces ) {
+ if ( !jQuery.nodeName( this, "form" ) ) {
+ jQuery.event.add(this, "click.specialSubmit", function( e ) {
+ // Avoid triggering error on non-existent type attribute in IE VML (#7071)
+ var elem = e.target,
+ type = jQuery.nodeName( elem, "input" ) || jQuery.nodeName( elem, "button" ) ? elem.type : "";
+
+ if ( (type === "submit" || type === "image") && jQuery( elem ).closest("form").length ) {
+ trigger( "submit", this, arguments );
+ }
+ });
+
+ jQuery.event.add(this, "keypress.specialSubmit", function( e ) {
+ var elem = e.target,
+ type = jQuery.nodeName( elem, "input" ) || jQuery.nodeName( elem, "button" ) ? elem.type : "";
+
+ if ( (type === "text" || type === "password") && jQuery( elem ).closest("form").length && e.keyCode === 13 ) {
+ trigger( "submit", this, arguments );
+ }
+ });
+
+ } else {
+ return false;
+ }
+ },
+
+ teardown: function( namespaces ) {
+ jQuery.event.remove( this, ".specialSubmit" );
+ }
+ };
+
+}
+
+// change delegation, happens here so we have bind.
+if ( !jQuery.support.changeBubbles ) {
+
+ var changeFilters,
+
+ getVal = function( elem ) {
+ var type = jQuery.nodeName( elem, "input" ) ? elem.type : "",
+ val = elem.value;
+
+ if ( type === "radio" || type === "checkbox" ) {
+ val = elem.checked;
+
+ } else if ( type === "select-multiple" ) {
+ val = elem.selectedIndex > -1 ?
+ jQuery.map( elem.options, function( elem ) {
+ return elem.selected;
+ }).join("-") :
+ "";
+
+ } else if ( jQuery.nodeName( elem, "select" ) ) {
+ val = elem.selectedIndex;
+ }
+
+ return val;
+ },
+
+ testChange = function testChange( e ) {
+ var elem = e.target, data, val;
+
+ if ( !rformElems.test( elem.nodeName ) || elem.readOnly ) {
+ return;
+ }
+
+ data = jQuery._data( elem, "_change_data" );
+ val = getVal(elem);
+
+ // the current data will be also retrieved by beforeactivate
+ if ( e.type !== "focusout" || elem.type !== "radio" ) {
+ jQuery._data( elem, "_change_data", val );
+ }
+
+ if ( data === undefined || val === data ) {
+ return;
+ }
+
+ if ( data != null || val ) {
+ e.type = "change";
+ e.liveFired = undefined;
+ jQuery.event.trigger( e, arguments[1], elem );
+ }
+ };
+
+ jQuery.event.special.change = {
+ filters: {
+ focusout: testChange,
+
+ beforedeactivate: testChange,
+
+ click: function( e ) {
+ var elem = e.target, type = jQuery.nodeName( elem, "input" ) ? elem.type : "";
+
+ if ( type === "radio" || type === "checkbox" || jQuery.nodeName( elem, "select" ) ) {
+ testChange.call( this, e );
+ }
+ },
+
+ // Change has to be called before submit
+ // Keydown will be called before keypress, which is used in submit-event delegation
+ keydown: function( e ) {
+ var elem = e.target, type = jQuery.nodeName( elem, "input" ) ? elem.type : "";
+
+ if ( (e.keyCode === 13 && !jQuery.nodeName( elem, "textarea" ) ) ||
+ (e.keyCode === 32 && (type === "checkbox" || type === "radio")) ||
+ type === "select-multiple" ) {
+ testChange.call( this, e );
+ }
+ },
+
+ // Beforeactivate happens also before the previous element is blurred
+ // with this event you can't trigger a change event, but you can store
+ // information
+ beforeactivate: function( e ) {
+ var elem = e.target;
+ jQuery._data( elem, "_change_data", getVal(elem) );
+ }
+ },
+
+ setup: function( data, namespaces ) {
+ if ( this.type === "file" ) {
+ return false;
+ }
+
+ for ( var type in changeFilters ) {
+ jQuery.event.add( this, type + ".specialChange", changeFilters[type] );
+ }
+
+ return rformElems.test( this.nodeName );
+ },
+
+ teardown: function( namespaces ) {
+ jQuery.event.remove( this, ".specialChange" );
+
+ return rformElems.test( this.nodeName );
+ }
+ };
+
+ changeFilters = jQuery.event.special.change.filters;
+
+ // Handle when the input is .focus()'d
+ changeFilters.focus = changeFilters.beforeactivate;
+}
+
+function trigger( type, elem, args ) {
+ // Piggyback on a donor event to simulate a different one.
+ // Fake originalEvent to avoid donor's stopPropagation, but if the
+ // simulated event prevents default then we do the same on the donor.
+ // Don't pass args or remember liveFired; they apply to the donor event.
+ var event = jQuery.extend( {}, args[ 0 ] );
+ event.type = type;
+ event.originalEvent = {};
+ event.liveFired = undefined;
+ jQuery.event.handle.call( elem, event );
+ if ( event.isDefaultPrevented() ) {
+ args[ 0 ].preventDefault();
+ }
+}
+
+// Create "bubbling" focus and blur events
+if ( !jQuery.support.focusinBubbles ) {
+ jQuery.each({ focus: "focusin", blur: "focusout" }, function( orig, fix ) {
+
+ // Attach a single capturing handler while someone wants focusin/focusout
+ var attaches = 0;
+
+ jQuery.event.special[ fix ] = {
+ setup: function() {
+ if ( attaches++ === 0 ) {
+ document.addEventListener( orig, handler, true );
+ }
+ },
+ teardown: function() {
+ if ( --attaches === 0 ) {
+ document.removeEventListener( orig, handler, true );
+ }
+ }
+ };
+
+ function handler( donor ) {
+ // Donor event is always a native one; fix it and switch its type.
+ // Let focusin/out handler cancel the donor focus/blur event.
+ var e = jQuery.event.fix( donor );
+ e.type = fix;
+ e.originalEvent = {};
+ jQuery.event.trigger( e, null, e.target );
+ if ( e.isDefaultPrevented() ) {
+ donor.preventDefault();
+ }
+ }
+ });
+}
+
+jQuery.each(["bind", "one"], function( i, name ) {
+ jQuery.fn[ name ] = function( type, data, fn ) {
+ var handler;
+
+ // Handle object literals
+ if ( typeof type === "object" ) {
+ for ( var key in type ) {
+ this[ name ](key, data, type[key], fn);
+ }
+ return this;
+ }
+
+ if ( arguments.length === 2 || data === false ) {
+ fn = data;
+ data = undefined;
+ }
+
+ if ( name === "one" ) {
+ handler = function( event ) {
+ jQuery( this ).unbind( event, handler );
+ return fn.apply( this, arguments );
+ };
+ handler.guid = fn.guid || jQuery.guid++;
+ } else {
+ handler = fn;
+ }
+
+ if ( type === "unload" && name !== "one" ) {
+ this.one( type, data, fn );
+
+ } else {
+ for ( var i = 0, l = this.length; i < l; i++ ) {
+ jQuery.event.add( this[i], type, handler, data );
+ }
+ }
+
+ return this;
+ };
+});
+
+jQuery.fn.extend({
+ unbind: function( type, fn ) {
+ // Handle object literals
+ if ( typeof type === "object" && !type.preventDefault ) {
+ for ( var key in type ) {
+ this.unbind(key, type[key]);
+ }
+
+ } else {
+ for ( var i = 0, l = this.length; i < l; i++ ) {
+ jQuery.event.remove( this[i], type, fn );
+ }
+ }
+
+ return this;
+ },
+
+ delegate: function( selector, types, data, fn ) {
+ return this.live( types, data, fn, selector );
+ },
+
+ undelegate: function( selector, types, fn ) {
+ if ( arguments.length === 0 ) {
+ return this.unbind( "live" );
+
+ } else {
+ return this.die( types, null, fn, selector );
+ }
+ },
+
+ trigger: function( type, data ) {
+ return this.each(function() {
+ jQuery.event.trigger( type, data, this );
+ });
+ },
+
+ triggerHandler: function( type, data ) {
+ if ( this[0] ) {
+ return jQuery.event.trigger( type, data, this[0], true );
+ }
+ },
+
+ toggle: function( fn ) {
+ // Save reference to arguments for access in closure
+ var args = arguments,
+ guid = fn.guid || jQuery.guid++,
+ i = 0,
+ toggler = function( event ) {
+ // Figure out which function to execute
+ var lastToggle = ( jQuery.data( this, "lastToggle" + fn.guid ) || 0 ) % i;
+ jQuery.data( this, "lastToggle" + fn.guid, lastToggle + 1 );
+
+ // Make sure that clicks stop
+ event.preventDefault();
+
+ // and execute the function
+ return args[ lastToggle ].apply( this, arguments ) || false;
+ };
+
+ // link all the functions, so any of them can unbind this click handler
+ toggler.guid = guid;
+ while ( i < args.length ) {
+ args[ i++ ].guid = guid;
+ }
+
+ return this.click( toggler );
+ },
+
+ hover: function( fnOver, fnOut ) {
+ return this.mouseenter( fnOver ).mouseleave( fnOut || fnOver );
+ }
+});
+
+var liveMap = {
+ focus: "focusin",
+ blur: "focusout",
+ mouseenter: "mouseover",
+ mouseleave: "mouseout"
+};
+
+jQuery.each(["live", "die"], function( i, name ) {
+ jQuery.fn[ name ] = function( types, data, fn, origSelector /* Internal Use Only */ ) {
+ var type, i = 0, match, namespaces, preType,
+ selector = origSelector || this.selector,
+ context = origSelector ? this : jQuery( this.context );
+
+ if ( typeof types === "object" && !types.preventDefault ) {
+ for ( var key in types ) {
+ context[ name ]( key, data, types[key], selector );
+ }
+
+ return this;
+ }
+
+ if ( name === "die" && !types &&
+ origSelector && origSelector.charAt(0) === "." ) {
+
+ context.unbind( origSelector );
+
+ return this;
+ }
+
+ if ( data === false || jQuery.isFunction( data ) ) {
+ fn = data || returnFalse;
+ data = undefined;
+ }
+
+ types = (types || "").split(" ");
+
+ while ( (type = types[ i++ ]) != null ) {
+ match = rnamespaces.exec( type );
+ namespaces = "";
+
+ if ( match ) {
+ namespaces = match[0];
+ type = type.replace( rnamespaces, "" );
+ }
+
+ if ( type === "hover" ) {
+ types.push( "mouseenter" + namespaces, "mouseleave" + namespaces );
+ continue;
+ }
+
+ preType = type;
+
+ if ( liveMap[ type ] ) {
+ types.push( liveMap[ type ] + namespaces );
+ type = type + namespaces;
+
+ } else {
+ type = (liveMap[ type ] || type) + namespaces;
+ }
+
+ if ( name === "live" ) {
+ // bind live handler
+ for ( var j = 0, l = context.length; j < l; j++ ) {
+ jQuery.event.add( context[j], "live." + liveConvert( type, selector ),
+ { data: data, selector: selector, handler: fn, origType: type, origHandler: fn, preType: preType } );
+ }
+
+ } else {
+ // unbind live handler
+ context.unbind( "live." + liveConvert( type, selector ), fn );
+ }
+ }
+
+ return this;
+ };
+});
+
+function liveHandler( event ) {
+ var stop, maxLevel, related, match, handleObj, elem, j, i, l, data, close, namespace, ret,
+ elems = [],
+ selectors = [],
+ events = jQuery._data( this, "events" );
+
+ // Make sure we avoid non-left-click bubbling in Firefox (#3861) and disabled elements in IE (#6911)
+ if ( event.liveFired === this || !events || !events.live || event.target.disabled || event.button && event.type === "click" ) {
+ return;
+ }
+
+ if ( event.namespace ) {
+ namespace = new RegExp("(^|\\.)" + event.namespace.split(".").join("\\.(?:.*\\.)?") + "(\\.|$)");
+ }
+
+ event.liveFired = this;
+
+ var live = events.live.slice(0);
+
+ for ( j = 0; j < live.length; j++ ) {
+ handleObj = live[j];
+
+ if ( handleObj.origType.replace( rnamespaces, "" ) === event.type ) {
+ selectors.push( handleObj.selector );
+
+ } else {
+ live.splice( j--, 1 );
+ }
+ }
+
+ match = jQuery( event.target ).closest( selectors, event.currentTarget );
+
+ for ( i = 0, l = match.length; i < l; i++ ) {
+ close = match[i];
+
+ for ( j = 0; j < live.length; j++ ) {
+ handleObj = live[j];
+
+ if ( close.selector === handleObj.selector && (!namespace || namespace.test( handleObj.namespace )) && !close.elem.disabled ) {
+ elem = close.elem;
+ related = null;
+
+ // Those two events require additional checking
+ if ( handleObj.preType === "mouseenter" || handleObj.preType === "mouseleave" ) {
+ event.type = handleObj.preType;
+ related = jQuery( event.relatedTarget ).closest( handleObj.selector )[0];
+
+ // Make sure not to accidentally match a child element with the same selector
+ if ( related && jQuery.contains( elem, related ) ) {
+ related = elem;
+ }
+ }
+
+ if ( !related || related !== elem ) {
+ elems.push({ elem: elem, handleObj: handleObj, level: close.level });
+ }
+ }
+ }
+ }
+
+ for ( i = 0, l = elems.length; i < l; i++ ) {
+ match = elems[i];
+
+ if ( maxLevel && match.level > maxLevel ) {
+ break;
+ }
+
+ event.currentTarget = match.elem;
+ event.data = match.handleObj.data;
+ event.handleObj = match.handleObj;
+
+ ret = match.handleObj.origHandler.apply( match.elem, arguments );
+
+ if ( ret === false || event.isPropagationStopped() ) {
+ maxLevel = match.level;
+
+ if ( ret === false ) {
+ stop = false;
+ }
+ if ( event.isImmediatePropagationStopped() ) {
+ break;
+ }
+ }
+ }
+
+ return stop;
+}
+
+function liveConvert( type, selector ) {
+ return (type && type !== "*" ? type + "." : "") + selector.replace(rperiod, "`").replace(rspaces, "&");
+}
+
+jQuery.each( ("blur focus focusin focusout load resize scroll unload click dblclick " +
+ "mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave " +
+ "change select submit keydown keypress keyup error").split(" "), function( i, name ) {
+
+ // Handle event binding
+ jQuery.fn[ name ] = function( data, fn ) {
+ if ( fn == null ) {
+ fn = data;
+ data = null;
+ }
+
+ return arguments.length > 0 ?
+ this.bind( name, data, fn ) :
+ this.trigger( name );
+ };
+
+ if ( jQuery.attrFn ) {
+ jQuery.attrFn[ name ] = true;
+ }
+});
+
+
+
+/*!
+ * Sizzle CSS Selector Engine
+ * Copyright 2011, The Dojo Foundation
+ * Released under the MIT, BSD, and GPL Licenses.
+ * More information: http://sizzlejs.com/
+ */
+(function(){
+
+var chunker = /((?:\((?:\([^()]+\)|[^()]+)+\)|\[(?:\[[^\[\]]*\]|['"][^'"]*['"]|[^\[\]'"]+)+\]|\\.|[^ >+~,(\[\\]+)+|[>+~])(\s*,\s*)?((?:.|\r|\n)*)/g,
+ done = 0,
+ toString = Object.prototype.toString,
+ hasDuplicate = false,
+ baseHasDuplicate = true,
+ rBackslash = /\\/g,
+ rNonWord = /\W/;
+
+// Here we check if the JavaScript engine is using some sort of
+// optimization where it does not always call our comparision
+// function. If that is the case, discard the hasDuplicate value.
+// Thus far that includes Google Chrome.
+[0, 0].sort(function() {
+ baseHasDuplicate = false;
+ return 0;
+});
+
+var Sizzle = function( selector, context, results, seed ) {
+ results = results || [];
+ context = context || document;
+
+ var origContext = context;
+
+ if ( context.nodeType !== 1 && context.nodeType !== 9 ) {
+ return [];
+ }
+
+ if ( !selector || typeof selector !== "string" ) {
+ return results;
+ }
+
+ var m, set, checkSet, extra, ret, cur, pop, i,
+ prune = true,
+ contextXML = Sizzle.isXML( context ),
+ parts = [],
+ soFar = selector;
+
+ // Reset the position of the chunker regexp (start from head)
+ do {
+ chunker.exec( "" );
+ m = chunker.exec( soFar );
+
+ if ( m ) {
+ soFar = m[3];
+
+ parts.push( m[1] );
+
+ if ( m[2] ) {
+ extra = m[3];
+ break;
+ }
+ }
+ } while ( m );
+
+ if ( parts.length > 1 && origPOS.exec( selector ) ) {
+
+ if ( parts.length === 2 && Expr.relative[ parts[0] ] ) {
+ set = posProcess( parts[0] + parts[1], context );
+
+ } else {
+ set = Expr.relative[ parts[0] ] ?
+ [ context ] :
+ Sizzle( parts.shift(), context );
+
+ while ( parts.length ) {
+ selector = parts.shift();
+
+ if ( Expr.relative[ selector ] ) {
+ selector += parts.shift();
+ }
+
+ set = posProcess( selector, set );
+ }
+ }
+
+ } else {
+ // Take a shortcut and set the context if the root selector is an ID
+ // (but not if it'll be faster if the inner selector is an ID)
+ if ( !seed && parts.length > 1 && context.nodeType === 9 && !contextXML &&
+ Expr.match.ID.test(parts[0]) && !Expr.match.ID.test(parts[parts.length - 1]) ) {
+
+ ret = Sizzle.find( parts.shift(), context, contextXML );
+ context = ret.expr ?
+ Sizzle.filter( ret.expr, ret.set )[0] :
+ ret.set[0];
+ }
+
+ if ( context ) {
+ ret = seed ?
+ { expr: parts.pop(), set: makeArray(seed) } :
+ Sizzle.find( parts.pop(), parts.length === 1 && (parts[0] === "~" || parts[0] === "+") && context.parentNode ? context.parentNode : context, contextXML );
+
+ set = ret.expr ?
+ Sizzle.filter( ret.expr, ret.set ) :
+ ret.set;
+
+ if ( parts.length > 0 ) {
+ checkSet = makeArray( set );
+
+ } else {
+ prune = false;
+ }
+
+ while ( parts.length ) {
+ cur = parts.pop();
+ pop = cur;
+
+ if ( !Expr.relative[ cur ] ) {
+ cur = "";
+ } else {
+ pop = parts.pop();
+ }
+
+ if ( pop == null ) {
+ pop = context;
+ }
+
+ Expr.relative[ cur ]( checkSet, pop, contextXML );
+ }
+
+ } else {
+ checkSet = parts = [];
+ }
+ }
+
+ if ( !checkSet ) {
+ checkSet = set;
+ }
+
+ if ( !checkSet ) {
+ Sizzle.error( cur || selector );
+ }
+
+ if ( toString.call(checkSet) === "[object Array]" ) {
+ if ( !prune ) {
+ results.push.apply( results, checkSet );
+
+ } else if ( context && context.nodeType === 1 ) {
+ for ( i = 0; checkSet[i] != null; i++ ) {
+ if ( checkSet[i] && (checkSet[i] === true || checkSet[i].nodeType === 1 && Sizzle.contains(context, checkSet[i])) ) {
+ results.push( set[i] );
+ }
+ }
+
+ } else {
+ for ( i = 0; checkSet[i] != null; i++ ) {
+ if ( checkSet[i] && checkSet[i].nodeType === 1 ) {
+ results.push( set[i] );
+ }
+ }
+ }
+
+ } else {
+ makeArray( checkSet, results );
+ }
+
+ if ( extra ) {
+ Sizzle( extra, origContext, results, seed );
+ Sizzle.uniqueSort( results );
+ }
+
+ return results;
+};
+
+Sizzle.uniqueSort = function( results ) {
+ if ( sortOrder ) {
+ hasDuplicate = baseHasDuplicate;
+ results.sort( sortOrder );
+
+ if ( hasDuplicate ) {
+ for ( var i = 1; i < results.length; i++ ) {
+ if ( results[i] === results[ i - 1 ] ) {
+ results.splice( i--, 1 );
+ }
+ }
+ }
+ }
+
+ return results;
+};
+
+Sizzle.matches = function( expr, set ) {
+ return Sizzle( expr, null, null, set );
+};
+
+Sizzle.matchesSelector = function( node, expr ) {
+ return Sizzle( expr, null, null, [node] ).length > 0;
+};
+
+Sizzle.find = function( expr, context, isXML ) {
+ var set;
+
+ if ( !expr ) {
+ return [];
+ }
+
+ for ( var i = 0, l = Expr.order.length; i < l; i++ ) {
+ var match,
+ type = Expr.order[i];
+
+ if ( (match = Expr.leftMatch[ type ].exec( expr )) ) {
+ var left = match[1];
+ match.splice( 1, 1 );
+
+ if ( left.substr( left.length - 1 ) !== "\\" ) {
+ match[1] = (match[1] || "").replace( rBackslash, "" );
+ set = Expr.find[ type ]( match, context, isXML );
+
+ if ( set != null ) {
+ expr = expr.replace( Expr.match[ type ], "" );
+ break;
+ }
+ }
+ }
+ }
+
+ if ( !set ) {
+ set = typeof context.getElementsByTagName !== "undefined" ?
+ context.getElementsByTagName( "*" ) :
+ [];
+ }
+
+ return { set: set, expr: expr };
+};
+
+Sizzle.filter = function( expr, set, inplace, not ) {
+ var match, anyFound,
+ old = expr,
+ result = [],
+ curLoop = set,
+ isXMLFilter = set && set[0] && Sizzle.isXML( set[0] );
+
+ while ( expr && set.length ) {
+ for ( var type in Expr.filter ) {
+ if ( (match = Expr.leftMatch[ type ].exec( expr )) != null && match[2] ) {
+ var found, item,
+ filter = Expr.filter[ type ],
+ left = match[1];
+
+ anyFound = false;
+
+ match.splice(1,1);
+
+ if ( left.substr( left.length - 1 ) === "\\" ) {
+ continue;
+ }
+
+ if ( curLoop === result ) {
+ result = [];
+ }
+
+ if ( Expr.preFilter[ type ] ) {
+ match = Expr.preFilter[ type ]( match, curLoop, inplace, result, not, isXMLFilter );
+
+ if ( !match ) {
+ anyFound = found = true;
+
+ } else if ( match === true ) {
+ continue;
+ }
+ }
+
+ if ( match ) {
+ for ( var i = 0; (item = curLoop[i]) != null; i++ ) {
+ if ( item ) {
+ found = filter( item, match, i, curLoop );
+ var pass = not ^ !!found;
+
+ if ( inplace && found != null ) {
+ if ( pass ) {
+ anyFound = true;
+
+ } else {
+ curLoop[i] = false;
+ }
+
+ } else if ( pass ) {
+ result.push( item );
+ anyFound = true;
+ }
+ }
+ }
+ }
+
+ if ( found !== undefined ) {
+ if ( !inplace ) {
+ curLoop = result;
+ }
+
+ expr = expr.replace( Expr.match[ type ], "" );
+
+ if ( !anyFound ) {
+ return [];
+ }
+
+ break;
+ }
+ }
+ }
+
+ // Improper expression
+ if ( expr === old ) {
+ if ( anyFound == null ) {
+ Sizzle.error( expr );
+
+ } else {
+ break;
+ }
+ }
+
+ old = expr;
+ }
+
+ return curLoop;
+};
+
+Sizzle.error = function( msg ) {
+ throw "Syntax error, unrecognized expression: " + msg;
+};
+
+var Expr = Sizzle.selectors = {
+ order: [ "ID", "NAME", "TAG" ],
+
+ match: {
+ ID: /#((?:[\w\u00c0-\uFFFF\-]|\\.)+)/,
+ CLASS: /\.((?:[\w\u00c0-\uFFFF\-]|\\.)+)/,
+ NAME: /\[name=['"]*((?:[\w\u00c0-\uFFFF\-]|\\.)+)['"]*\]/,
+ ATTR: /\[\s*((?:[\w\u00c0-\uFFFF\-]|\\.)+)\s*(?:(\S?=)\s*(?:(['"])(.*?)\3|(#?(?:[\w\u00c0-\uFFFF\-]|\\.)*)|)|)\s*\]/,
+ TAG: /^((?:[\w\u00c0-\uFFFF\*\-]|\\.)+)/,
+ CHILD: /:(only|nth|last|first)-child(?:\(\s*(even|odd|(?:[+\-]?\d+|(?:[+\-]?\d*)?n\s*(?:[+\-]\s*\d+)?))\s*\))?/,
+ POS: /:(nth|eq|gt|lt|first|last|even|odd)(?:\((\d*)\))?(?=[^\-]|$)/,
+ PSEUDO: /:((?:[\w\u00c0-\uFFFF\-]|\\.)+)(?:\((['"]?)((?:\([^\)]+\)|[^\(\)]*)+)\2\))?/
+ },
+
+ leftMatch: {},
+
+ attrMap: {
+ "class": "className",
+ "for": "htmlFor"
+ },
+
+ attrHandle: {
+ href: function( elem ) {
+ return elem.getAttribute( "href" );
+ },
+ type: function( elem ) {
+ return elem.getAttribute( "type" );
+ }
+ },
+
+ relative: {
+ "+": function(checkSet, part){
+ var isPartStr = typeof part === "string",
+ isTag = isPartStr && !rNonWord.test( part ),
+ isPartStrNotTag = isPartStr && !isTag;
+
+ if ( isTag ) {
+ part = part.toLowerCase();
+ }
+
+ for ( var i = 0, l = checkSet.length, elem; i < l; i++ ) {
+ if ( (elem = checkSet[i]) ) {
+ while ( (elem = elem.previousSibling) && elem.nodeType !== 1 ) {}
+
+ checkSet[i] = isPartStrNotTag || elem && elem.nodeName.toLowerCase() === part ?
+ elem || false :
+ elem === part;
+ }
+ }
+
+ if ( isPartStrNotTag ) {
+ Sizzle.filter( part, checkSet, true );
+ }
+ },
+
+ ">": function( checkSet, part ) {
+ var elem,
+ isPartStr = typeof part === "string",
+ i = 0,
+ l = checkSet.length;
+
+ if ( isPartStr && !rNonWord.test( part ) ) {
+ part = part.toLowerCase();
+
+ for ( ; i < l; i++ ) {
+ elem = checkSet[i];
+
+ if ( elem ) {
+ var parent = elem.parentNode;
+ checkSet[i] = parent.nodeName.toLowerCase() === part ? parent : false;
+ }
+ }
+
+ } else {
+ for ( ; i < l; i++ ) {
+ elem = checkSet[i];
+
+ if ( elem ) {
+ checkSet[i] = isPartStr ?
+ elem.parentNode :
+ elem.parentNode === part;
+ }
+ }
+
+ if ( isPartStr ) {
+ Sizzle.filter( part, checkSet, true );
+ }
+ }
+ },
+
+ "": function(checkSet, part, isXML){
+ var nodeCheck,
+ doneName = done++,
+ checkFn = dirCheck;
+
+ if ( typeof part === "string" && !rNonWord.test( part ) ) {
+ part = part.toLowerCase();
+ nodeCheck = part;
+ checkFn = dirNodeCheck;
+ }
+
+ checkFn( "parentNode", part, doneName, checkSet, nodeCheck, isXML );
+ },
+
+ "~": function( checkSet, part, isXML ) {
+ var nodeCheck,
+ doneName = done++,
+ checkFn = dirCheck;
+
+ if ( typeof part === "string" && !rNonWord.test( part ) ) {
+ part = part.toLowerCase();
+ nodeCheck = part;
+ checkFn = dirNodeCheck;
+ }
+
+ checkFn( "previousSibling", part, doneName, checkSet, nodeCheck, isXML );
+ }
+ },
+
+ find: {
+ ID: function( match, context, isXML ) {
+ if ( typeof context.getElementById !== "undefined" && !isXML ) {
+ var m = context.getElementById(match[1]);
+ // Check parentNode to catch when Blackberry 4.6 returns
+ // nodes that are no longer in the document #6963
+ return m && m.parentNode ? [m] : [];
+ }
+ },
+
+ NAME: function( match, context ) {
+ if ( typeof context.getElementsByName !== "undefined" ) {
+ var ret = [],
+ results = context.getElementsByName( match[1] );
+
+ for ( var i = 0, l = results.length; i < l; i++ ) {
+ if ( results[i].getAttribute("name") === match[1] ) {
+ ret.push( results[i] );
+ }
+ }
+
+ return ret.length === 0 ? null : ret;
+ }
+ },
+
+ TAG: function( match, context ) {
+ if ( typeof context.getElementsByTagName !== "undefined" ) {
+ return context.getElementsByTagName( match[1] );
+ }
+ }
+ },
+ preFilter: {
+ CLASS: function( match, curLoop, inplace, result, not, isXML ) {
+ match = " " + match[1].replace( rBackslash, "" ) + " ";
+
+ if ( isXML ) {
+ return match;
+ }
+
+ for ( var i = 0, elem; (elem = curLoop[i]) != null; i++ ) {
+ if ( elem ) {
+ if ( not ^ (elem.className && (" " + elem.className + " ").replace(/[\t\n\r]/g, " ").indexOf(match) >= 0) ) {
+ if ( !inplace ) {
+ result.push( elem );
+ }
+
+ } else if ( inplace ) {
+ curLoop[i] = false;
+ }
+ }
+ }
+
+ return false;
+ },
+
+ ID: function( match ) {
+ return match[1].replace( rBackslash, "" );
+ },
+
+ TAG: function( match, curLoop ) {
+ return match[1].replace( rBackslash, "" ).toLowerCase();
+ },
+
+ CHILD: function( match ) {
+ if ( match[1] === "nth" ) {
+ if ( !match[2] ) {
+ Sizzle.error( match[0] );
+ }
+
+ match[2] = match[2].replace(/^\+|\s*/g, '');
+
+ // parse equations like 'even', 'odd', '5', '2n', '3n+2', '4n-1', '-n+6'
+ var test = /(-?)(\d*)(?:n([+\-]?\d*))?/.exec(
+ match[2] === "even" && "2n" || match[2] === "odd" && "2n+1" ||
+ !/\D/.test( match[2] ) && "0n+" + match[2] || match[2]);
+
+ // calculate the numbers (first)n+(last) including if they are negative
+ match[2] = (test[1] + (test[2] || 1)) - 0;
+ match[3] = test[3] - 0;
+ }
+ else if ( match[2] ) {
+ Sizzle.error( match[0] );
+ }
+
+ // TODO: Move to normal caching system
+ match[0] = done++;
+
+ return match;
+ },
+
+ ATTR: function( match, curLoop, inplace, result, not, isXML ) {
+ var name = match[1] = match[1].replace( rBackslash, "" );
+
+ if ( !isXML && Expr.attrMap[name] ) {
+ match[1] = Expr.attrMap[name];
+ }
+
+ // Handle if an un-quoted value was used
+ match[4] = ( match[4] || match[5] || "" ).replace( rBackslash, "" );
+
+ if ( match[2] === "~=" ) {
+ match[4] = " " + match[4] + " ";
+ }
+
+ return match;
+ },
+
+ PSEUDO: function( match, curLoop, inplace, result, not ) {
+ if ( match[1] === "not" ) {
+ // If we're dealing with a complex expression, or a simple one
+ if ( ( chunker.exec(match[3]) || "" ).length > 1 || /^\w/.test(match[3]) ) {
+ match[3] = Sizzle(match[3], null, null, curLoop);
+
+ } else {
+ var ret = Sizzle.filter(match[3], curLoop, inplace, true ^ not);
+
+ if ( !inplace ) {
+ result.push.apply( result, ret );
+ }
+
+ return false;
+ }
+
+ } else if ( Expr.match.POS.test( match[0] ) || Expr.match.CHILD.test( match[0] ) ) {
+ return true;
+ }
+
+ return match;
+ },
+
+ POS: function( match ) {
+ match.unshift( true );
+
+ return match;
+ }
+ },
+
+ filters: {
+ enabled: function( elem ) {
+ return elem.disabled === false && elem.type !== "hidden";
+ },
+
+ disabled: function( elem ) {
+ return elem.disabled === true;
+ },
+
+ checked: function( elem ) {
+ return elem.checked === true;
+ },
+
+ selected: function( elem ) {
+ // Accessing this property makes selected-by-default
+ // options in Safari work properly
+ if ( elem.parentNode ) {
+ elem.parentNode.selectedIndex;
+ }
+
+ return elem.selected === true;
+ },
+
+ parent: function( elem ) {
+ return !!elem.firstChild;
+ },
+
+ empty: function( elem ) {
+ return !elem.firstChild;
+ },
+
+ has: function( elem, i, match ) {
+ return !!Sizzle( match[3], elem ).length;
+ },
+
+ header: function( elem ) {
+ return (/h\d/i).test( elem.nodeName );
+ },
+
+ text: function( elem ) {
+ var attr = elem.getAttribute( "type" ), type = elem.type;
+ // IE6 and 7 will map elem.type to 'text' for new HTML5 types (search, etc)
+ // use getAttribute instead to test this case
+ return elem.nodeName.toLowerCase() === "input" && "text" === type && ( attr === type || attr === null );
+ },
+
+ radio: function( elem ) {
+ return elem.nodeName.toLowerCase() === "input" && "radio" === elem.type;
+ },
+
+ checkbox: function( elem ) {
+ return elem.nodeName.toLowerCase() === "input" && "checkbox" === elem.type;
+ },
+
+ file: function( elem ) {
+ return elem.nodeName.toLowerCase() === "input" && "file" === elem.type;
+ },
+
+ password: function( elem ) {
+ return elem.nodeName.toLowerCase() === "input" && "password" === elem.type;
+ },
+
+ submit: function( elem ) {
+ var name = elem.nodeName.toLowerCase();
+ return (name === "input" || name === "button") && "submit" === elem.type;
+ },
+
+ image: function( elem ) {
+ return elem.nodeName.toLowerCase() === "input" && "image" === elem.type;
+ },
+
+ reset: function( elem ) {
+ var name = elem.nodeName.toLowerCase();
+ return (name === "input" || name === "button") && "reset" === elem.type;
+ },
+
+ button: function( elem ) {
+ var name = elem.nodeName.toLowerCase();
+ return name === "input" && "button" === elem.type || name === "button";
+ },
+
+ input: function( elem ) {
+ return (/input|select|textarea|button/i).test( elem.nodeName );
+ },
+
+ focus: function( elem ) {
+ return elem === elem.ownerDocument.activeElement;
+ }
+ },
+ setFilters: {
+ first: function( elem, i ) {
+ return i === 0;
+ },
+
+ last: function( elem, i, match, array ) {
+ return i === array.length - 1;
+ },
+
+ even: function( elem, i ) {
+ return i % 2 === 0;
+ },
+
+ odd: function( elem, i ) {
+ return i % 2 === 1;
+ },
+
+ lt: function( elem, i, match ) {
+ return i < match[3] - 0;
+ },
+
+ gt: function( elem, i, match ) {
+ return i > match[3] - 0;
+ },
+
+ nth: function( elem, i, match ) {
+ return match[3] - 0 === i;
+ },
+
+ eq: function( elem, i, match ) {
+ return match[3] - 0 === i;
+ }
+ },
+ filter: {
+ PSEUDO: function( elem, match, i, array ) {
+ var name = match[1],
+ filter = Expr.filters[ name ];
+
+ if ( filter ) {
+ return filter( elem, i, match, array );
+
+ } else if ( name === "contains" ) {
+ return (elem.textContent || elem.innerText || Sizzle.getText([ elem ]) || "").indexOf(match[3]) >= 0;
+
+ } else if ( name === "not" ) {
+ var not = match[3];
+
+ for ( var j = 0, l = not.length; j < l; j++ ) {
+ if ( not[j] === elem ) {
+ return false;
+ }
+ }
+
+ return true;
+
+ } else {
+ Sizzle.error( name );
+ }
+ },
+
+ CHILD: function( elem, match ) {
+ var type = match[1],
+ node = elem;
+
+ switch ( type ) {
+ case "only":
+ case "first":
+ while ( (node = node.previousSibling) ) {
+ if ( node.nodeType === 1 ) {
+ return false;
+ }
+ }
+
+ if ( type === "first" ) {
+ return true;
+ }
+
+ node = elem;
+
+ case "last":
+ while ( (node = node.nextSibling) ) {
+ if ( node.nodeType === 1 ) {
+ return false;
+ }
+ }
+
+ return true;
+
+ case "nth":
+ var first = match[2],
+ last = match[3];
+
+ if ( first === 1 && last === 0 ) {
+ return true;
+ }
+
+ var doneName = match[0],
+ parent = elem.parentNode;
+
+ if ( parent && (parent.sizcache !== doneName || !elem.nodeIndex) ) {
+ var count = 0;
+
+ for ( node = parent.firstChild; node; node = node.nextSibling ) {
+ if ( node.nodeType === 1 ) {
+ node.nodeIndex = ++count;
+ }
+ }
+
+ parent.sizcache = doneName;
+ }
+
+ var diff = elem.nodeIndex - last;
+
+ if ( first === 0 ) {
+ return diff === 0;
+
+ } else {
+ return ( diff % first === 0 && diff / first >= 0 );
+ }
+ }
+ },
+
+ ID: function( elem, match ) {
+ return elem.nodeType === 1 && elem.getAttribute("id") === match;
+ },
+
+ TAG: function( elem, match ) {
+ return (match === "*" && elem.nodeType === 1) || elem.nodeName.toLowerCase() === match;
+ },
+
+ CLASS: function( elem, match ) {
+ return (" " + (elem.className || elem.getAttribute("class")) + " ")
+ .indexOf( match ) > -1;
+ },
+
+ ATTR: function( elem, match ) {
+ var name = match[1],
+ result = Expr.attrHandle[ name ] ?
+ Expr.attrHandle[ name ]( elem ) :
+ elem[ name ] != null ?
+ elem[ name ] :
+ elem.getAttribute( name ),
+ value = result + "",
+ type = match[2],
+ check = match[4];
+
+ return result == null ?
+ type === "!=" :
+ type === "=" ?
+ value === check :
+ type === "*=" ?
+ value.indexOf(check) >= 0 :
+ type === "~=" ?
+ (" " + value + " ").indexOf(check) >= 0 :
+ !check ?
+ value && result !== false :
+ type === "!=" ?
+ value !== check :
+ type === "^=" ?
+ value.indexOf(check) === 0 :
+ type === "$=" ?
+ value.substr(value.length - check.length) === check :
+ type === "|=" ?
+ value === check || value.substr(0, check.length + 1) === check + "-" :
+ false;
+ },
+
+ POS: function( elem, match, i, array ) {
+ var name = match[2],
+ filter = Expr.setFilters[ name ];
+
+ if ( filter ) {
+ return filter( elem, i, match, array );
+ }
+ }
+ }
+};
+
+var origPOS = Expr.match.POS,
+ fescape = function(all, num){
+ return "\\" + (num - 0 + 1);
+ };
+
+for ( var type in Expr.match ) {
+ Expr.match[ type ] = new RegExp( Expr.match[ type ].source + (/(?![^\[]*\])(?![^\(]*\))/.source) );
+ Expr.leftMatch[ type ] = new RegExp( /(^(?:.|\r|\n)*?)/.source + Expr.match[ type ].source.replace(/\\(\d+)/g, fescape) );
+}
+
+var makeArray = function( array, results ) {
+ array = Array.prototype.slice.call( array, 0 );
+
+ if ( results ) {
+ results.push.apply( results, array );
+ return results;
+ }
+
+ return array;
+};
+
+// Perform a simple check to determine if the browser is capable of
+// converting a NodeList to an array using builtin methods.
+// Also verifies that the returned array holds DOM nodes
+// (which is not the case in the Blackberry browser)
+try {
+ Array.prototype.slice.call( document.documentElement.childNodes, 0 )[0].nodeType;
+
+// Provide a fallback method if it does not work
+} catch( e ) {
+ makeArray = function( array, results ) {
+ var i = 0,
+ ret = results || [];
+
+ if ( toString.call(array) === "[object Array]" ) {
+ Array.prototype.push.apply( ret, array );
+
+ } else {
+ if ( typeof array.length === "number" ) {
+ for ( var l = array.length; i < l; i++ ) {
+ ret.push( array[i] );
+ }
+
+ } else {
+ for ( ; array[i]; i++ ) {
+ ret.push( array[i] );
+ }
+ }
+ }
+
+ return ret;
+ };
+}
+
+var sortOrder, siblingCheck;
+
+if ( document.documentElement.compareDocumentPosition ) {
+ sortOrder = function( a, b ) {
+ if ( a === b ) {
+ hasDuplicate = true;
+ return 0;
+ }
+
+ if ( !a.compareDocumentPosition || !b.compareDocumentPosition ) {
+ return a.compareDocumentPosition ? -1 : 1;
+ }
+
+ return a.compareDocumentPosition(b) & 4 ? -1 : 1;
+ };
+
+} else {
+ sortOrder = function( a, b ) {
+ // The nodes are identical, we can exit early
+ if ( a === b ) {
+ hasDuplicate = true;
+ return 0;
+
+ // Fallback to using sourceIndex (in IE) if it's available on both nodes
+ } else if ( a.sourceIndex && b.sourceIndex ) {
+ return a.sourceIndex - b.sourceIndex;
+ }
+
+ var al, bl,
+ ap = [],
+ bp = [],
+ aup = a.parentNode,
+ bup = b.parentNode,
+ cur = aup;
+
+ // If the nodes are siblings (or identical) we can do a quick check
+ if ( aup === bup ) {
+ return siblingCheck( a, b );
+
+ // If no parents were found then the nodes are disconnected
+ } else if ( !aup ) {
+ return -1;
+
+ } else if ( !bup ) {
+ return 1;
+ }
+
+ // Otherwise they're somewhere else in the tree so we need
+ // to build up a full list of the parentNodes for comparison
+ while ( cur ) {
+ ap.unshift( cur );
+ cur = cur.parentNode;
+ }
+
+ cur = bup;
+
+ while ( cur ) {
+ bp.unshift( cur );
+ cur = cur.parentNode;
+ }
+
+ al = ap.length;
+ bl = bp.length;
+
+ // Start walking down the tree looking for a discrepancy
+ for ( var i = 0; i < al && i < bl; i++ ) {
+ if ( ap[i] !== bp[i] ) {
+ return siblingCheck( ap[i], bp[i] );
+ }
+ }
+
+ // We ended someplace up the tree so do a sibling check
+ return i === al ?
+ siblingCheck( a, bp[i], -1 ) :
+ siblingCheck( ap[i], b, 1 );
+ };
+
+ siblingCheck = function( a, b, ret ) {
+ if ( a === b ) {
+ return ret;
+ }
+
+ var cur = a.nextSibling;
+
+ while ( cur ) {
+ if ( cur === b ) {
+ return -1;
+ }
+
+ cur = cur.nextSibling;
+ }
+
+ return 1;
+ };
+}
+
+// Utility function for retreiving the text value of an array of DOM nodes
+Sizzle.getText = function( elems ) {
+ var ret = "", elem;
+
+ for ( var i = 0; elems[i]; i++ ) {
+ elem = elems[i];
+
+ // Get the text from text nodes and CDATA nodes
+ if ( elem.nodeType === 3 || elem.nodeType === 4 ) {
+ ret += elem.nodeValue;
+
+ // Traverse everything else, except comment nodes
+ } else if ( elem.nodeType !== 8 ) {
+ ret += Sizzle.getText( elem.childNodes );
+ }
+ }
+
+ return ret;
+};
+
+// Check to see if the browser returns elements by name when
+// querying by getElementById (and provide a workaround)
+(function(){
+ // We're going to inject a fake input element with a specified name
+ var form = document.createElement("div"),
+ id = "script" + (new Date()).getTime(),
+ root = document.documentElement;
+
+ form.innerHTML = "<a name='" + id + "'/>";
+
+ // Inject it into the root element, check its status, and remove it quickly
+ root.insertBefore( form, root.firstChild );
+
+ // The workaround has to do additional checks after a getElementById
+ // Which slows things down for other browsers (hence the branching)
+ if ( document.getElementById( id ) ) {
+ Expr.find.ID = function( match, context, isXML ) {
+ if ( typeof context.getElementById !== "undefined" && !isXML ) {
+ var m = context.getElementById(match[1]);
+
+ return m ?
+ m.id === match[1] || typeof m.getAttributeNode !== "undefined" && m.getAttributeNode("id").nodeValue === match[1] ?
+ [m] :
+ undefined :
+ [];
+ }
+ };
+
+ Expr.filter.ID = function( elem, match ) {
+ var node = typeof elem.getAttributeNode !== "undefined" && elem.getAttributeNode("id");
+
+ return elem.nodeType === 1 && node && node.nodeValue === match;
+ };
+ }
+
+ root.removeChild( form );
+
+ // release memory in IE
+ root = form = null;
+})();
+
+(function(){
+ // Check to see if the browser returns only elements
+ // when doing getElementsByTagName("*")
+
+ // Create a fake element
+ var div = document.createElement("div");
+ div.appendChild( document.createComment("") );
+
+ // Make sure no comments are found
+ if ( div.getElementsByTagName("*").length > 0 ) {
+ Expr.find.TAG = function( match, context ) {
+ var results = context.getElementsByTagName( match[1] );
+
+ // Filter out possible comments
+ if ( match[1] === "*" ) {
+ var tmp = [];
+
+ for ( var i = 0; results[i]; i++ ) {
+ if ( results[i].nodeType === 1 ) {
+ tmp.push( results[i] );
+ }
+ }
+
+ results = tmp;
+ }
+
+ return results;
+ };
+ }
+
+ // Check to see if an attribute returns normalized href attributes
+ div.innerHTML = "<a href='#'></a>";
+
+ if ( div.firstChild && typeof div.firstChild.getAttribute !== "undefined" &&
+ div.firstChild.getAttribute("href") !== "#" ) {
+
+ Expr.attrHandle.href = function( elem ) {
+ return elem.getAttribute( "href", 2 );
+ };
+ }
+
+ // release memory in IE
+ div = null;
+})();
+
+if ( document.querySelectorAll ) {
+ (function(){
+ var oldSizzle = Sizzle,
+ div = document.createElement("div"),
+ id = "__sizzle__";
+
+ div.innerHTML = "<p class='TEST'></p>";
+
+ // Safari can't handle uppercase or unicode characters when
+ // in quirks mode.
+ if ( div.querySelectorAll && div.querySelectorAll(".TEST").length === 0 ) {
+ return;
+ }
+
+ Sizzle = function( query, context, extra, seed ) {
+ context = context || document;
+
+ // Only use querySelectorAll on non-XML documents
+ // (ID selectors don't work in non-HTML documents)
+ if ( !seed && !Sizzle.isXML(context) ) {
+ // See if we find a selector to speed up
+ var match = /^(\w+$)|^\.([\w\-]+$)|^#([\w\-]+$)/.exec( query );
+
+ if ( match && (context.nodeType === 1 || context.nodeType === 9) ) {
+ // Speed-up: Sizzle("TAG")
+ if ( match[1] ) {
+ return makeArray( context.getElementsByTagName( query ), extra );
+
+ // Speed-up: Sizzle(".CLASS")
+ } else if ( match[2] && Expr.find.CLASS && context.getElementsByClassName ) {
+ return makeArray( context.getElementsByClassName( match[2] ), extra );
+ }
+ }
+
+ if ( context.nodeType === 9 ) {
+ // Speed-up: Sizzle("body")
+ // The body element only exists once, optimize finding it
+ if ( query === "body" && context.body ) {
+ return makeArray( [ context.body ], extra );
+
+ // Speed-up: Sizzle("#ID")
+ } else if ( match && match[3] ) {
+ var elem = context.getElementById( match[3] );
+
+ // Check parentNode to catch when Blackberry 4.6 returns
+ // nodes that are no longer in the document #6963
+ if ( elem && elem.parentNode ) {
+ // Handle the case where IE and Opera return items
+ // by name instead of ID
+ if ( elem.id === match[3] ) {
+ return makeArray( [ elem ], extra );
+ }
+
+ } else {
+ return makeArray( [], extra );
+ }
+ }
+
+ try {
+ return makeArray( context.querySelectorAll(query), extra );
+ } catch(qsaError) {}
+
+ // qSA works strangely on Element-rooted queries
+ // We can work around this by specifying an extra ID on the root
+ // and working up from there (Thanks to Andrew Dupont for the technique)
+ // IE 8 doesn't work on object elements
+ } else if ( context.nodeType === 1 && context.nodeName.toLowerCase() !== "object" ) {
+ var oldContext = context,
+ old = context.getAttribute( "id" ),
+ nid = old || id,
+ hasParent = context.parentNode,
+ relativeHierarchySelector = /^\s*[+~]/.test( query );
+
+ if ( !old ) {
+ context.setAttribute( "id", nid );
+ } else {
+ nid = nid.replace( /'/g, "\\$&" );
+ }
+ if ( relativeHierarchySelector && hasParent ) {
+ context = context.parentNode;
+ }
+
+ try {
+ if ( !relativeHierarchySelector || hasParent ) {
+ return makeArray( context.querySelectorAll( "[id='" + nid + "'] " + query ), extra );
+ }
+
+ } catch(pseudoError) {
+ } finally {
+ if ( !old ) {
+ oldContext.removeAttribute( "id" );
+ }
+ }
+ }
+ }
+
+ return oldSizzle(query, context, extra, seed);
+ };
+
+ for ( var prop in oldSizzle ) {
+ Sizzle[ prop ] = oldSizzle[ prop ];
+ }
+
+ // release memory in IE
+ div = null;
+ })();
+}
+
+(function(){
+ var html = document.documentElement,
+ matches = html.matchesSelector || html.mozMatchesSelector || html.webkitMatchesSelector || html.msMatchesSelector;
+
+ if ( matches ) {
+ // Check to see if it's possible to do matchesSelector
+ // on a disconnected node (IE 9 fails this)
+ var disconnectedMatch = !matches.call( document.createElement( "div" ), "div" ),
+ pseudoWorks = false;
+
+ try {
+ // This should fail with an exception
+ // Gecko does not error, returns false instead
+ matches.call( document.documentElement, "[test!='']:sizzle" );
+
+ } catch( pseudoError ) {
+ pseudoWorks = true;
+ }
+
+ Sizzle.matchesSelector = function( node, expr ) {
+ // Make sure that attribute selectors are quoted
+ expr = expr.replace(/\=\s*([^'"\]]*)\s*\]/g, "='$1']");
+
+ if ( !Sizzle.isXML( node ) ) {
+ try {
+ if ( pseudoWorks || !Expr.match.PSEUDO.test( expr ) && !/!=/.test( expr ) ) {
+ var ret = matches.call( node, expr );
+
+ // IE 9's matchesSelector returns false on disconnected nodes
+ if ( ret || !disconnectedMatch ||
+ // As well, disconnected nodes are said to be in a document
+ // fragment in IE 9, so check for that
+ node.document && node.document.nodeType !== 11 ) {
+ return ret;
+ }
+ }
+ } catch(e) {}
+ }
+
+ return Sizzle(expr, null, null, [node]).length > 0;
+ };
+ }
+})();
+
+(function(){
+ var div = document.createElement("div");
+
+ div.innerHTML = "<div class='test e'></div><div class='test'></div>";
+
+ // Opera can't find a second classname (in 9.6)
+ // Also, make sure that getElementsByClassName actually exists
+ if ( !div.getElementsByClassName || div.getElementsByClassName("e").length === 0 ) {
+ return;
+ }
+
+ // Safari caches class attributes, doesn't catch changes (in 3.2)
+ div.lastChild.className = "e";
+
+ if ( div.getElementsByClassName("e").length === 1 ) {
+ return;
+ }
+
+ Expr.order.splice(1, 0, "CLASS");
+ Expr.find.CLASS = function( match, context, isXML ) {
+ if ( typeof context.getElementsByClassName !== "undefined" && !isXML ) {
+ return context.getElementsByClassName(match[1]);
+ }
+ };
+
+ // release memory in IE
+ div = null;
+})();
+
+function dirNodeCheck( dir, cur, doneName, checkSet, nodeCheck, isXML ) {
+ for ( var i = 0, l = checkSet.length; i < l; i++ ) {
+ var elem = checkSet[i];
+
+ if ( elem ) {
+ var match = false;
+
+ elem = elem[dir];
+
+ while ( elem ) {
+ if ( elem.sizcache === doneName ) {
+ match = checkSet[elem.sizset];
+ break;
+ }
+
+ if ( elem.nodeType === 1 && !isXML ){
+ elem.sizcache = doneName;
+ elem.sizset = i;
+ }
+
+ if ( elem.nodeName.toLowerCase() === cur ) {
+ match = elem;
+ break;
+ }
+
+ elem = elem[dir];
+ }
+
+ checkSet[i] = match;
+ }
+ }
+}
+
+function dirCheck( dir, cur, doneName, checkSet, nodeCheck, isXML ) {
+ for ( var i = 0, l = checkSet.length; i < l; i++ ) {
+ var elem = checkSet[i];
+
+ if ( elem ) {
+ var match = false;
+
+ elem = elem[dir];
+
+ while ( elem ) {
+ if ( elem.sizcache === doneName ) {
+ match = checkSet[elem.sizset];
+ break;
+ }
+
+ if ( elem.nodeType === 1 ) {
+ if ( !isXML ) {
+ elem.sizcache = doneName;
+ elem.sizset = i;
+ }
+
+ if ( typeof cur !== "string" ) {
+ if ( elem === cur ) {
+ match = true;
+ break;
+ }
+
+ } else if ( Sizzle.filter( cur, [elem] ).length > 0 ) {
+ match = elem;
+ break;
+ }
+ }
+
+ elem = elem[dir];
+ }
+
+ checkSet[i] = match;
+ }
+ }
+}
+
+if ( document.documentElement.contains ) {
+ Sizzle.contains = function( a, b ) {
+ return a !== b && (a.contains ? a.contains(b) : true);
+ };
+
+} else if ( document.documentElement.compareDocumentPosition ) {
+ Sizzle.contains = function( a, b ) {
+ return !!(a.compareDocumentPosition(b) & 16);
+ };
+
+} else {
+ Sizzle.contains = function() {
+ return false;
+ };
+}
+
+Sizzle.isXML = function( elem ) {
+ // documentElement is verified for cases where it doesn't yet exist
+ // (such as loading iframes in IE - #4833)
+ var documentElement = (elem ? elem.ownerDocument || elem : 0).documentElement;
+
+ return documentElement ? documentElement.nodeName !== "HTML" : false;
+};
+
+var posProcess = function( selector, context ) {
+ var match,
+ tmpSet = [],
+ later = "",
+ root = context.nodeType ? [context] : context;
+
+ // Position selectors must be done after the filter
+ // And so must :not(positional) so we move all PSEUDOs to the end
+ while ( (match = Expr.match.PSEUDO.exec( selector )) ) {
+ later += match[0];
+ selector = selector.replace( Expr.match.PSEUDO, "" );
+ }
+
+ selector = Expr.relative[selector] ? selector + "*" : selector;
+
+ for ( var i = 0, l = root.length; i < l; i++ ) {
+ Sizzle( selector, root[i], tmpSet );
+ }
+
+ return Sizzle.filter( later, tmpSet );
+};
+
+// EXPOSE
+jQuery.find = Sizzle;
+jQuery.expr = Sizzle.selectors;
+jQuery.expr[":"] = jQuery.expr.filters;
+jQuery.unique = Sizzle.uniqueSort;
+jQuery.text = Sizzle.getText;
+jQuery.isXMLDoc = Sizzle.isXML;
+jQuery.contains = Sizzle.contains;
+
+
+})();
+
+
+var runtil = /Until$/,
+ rparentsprev = /^(?:parents|prevUntil|prevAll)/,
+ // Note: This RegExp should be improved, or likely pulled from Sizzle
+ rmultiselector = /,/,
+ isSimple = /^.[^:#\[\.,]*$/,
+ slice = Array.prototype.slice,
+ POS = jQuery.expr.match.POS,
+ // methods guaranteed to produce a unique set when starting from a unique set
+ guaranteedUnique = {
+ children: true,
+ contents: true,
+ next: true,
+ prev: true
+ };
+
+jQuery.fn.extend({
+ find: function( selector ) {
+ var self = this,
+ i, l;
+
+ if ( typeof selector !== "string" ) {
+ return jQuery( selector ).filter(function() {
+ for ( i = 0, l = self.length; i < l; i++ ) {
+ if ( jQuery.contains( self[ i ], this ) ) {
+ return true;
+ }
+ }
+ });
+ }
+
+ var ret = this.pushStack( "", "find", selector ),
+ length, n, r;
+
+ for ( i = 0, l = this.length; i < l; i++ ) {
+ length = ret.length;
+ jQuery.find( selector, this[i], ret );
+
+ if ( i > 0 ) {
+ // Make sure that the results are unique
+ for ( n = length; n < ret.length; n++ ) {
+ for ( r = 0; r < length; r++ ) {
+ if ( ret[r] === ret[n] ) {
+ ret.splice(n--, 1);
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ return ret;
+ },
+
+ has: function( target ) {
+ var targets = jQuery( target );
+ return this.filter(function() {
+ for ( var i = 0, l = targets.length; i < l; i++ ) {
+ if ( jQuery.contains( this, targets[i] ) ) {
+ return true;
+ }
+ }
+ });
+ },
+
+ not: function( selector ) {
+ return this.pushStack( winnow(this, selector, false), "not", selector);
+ },
+
+ filter: function( selector ) {
+ return this.pushStack( winnow(this, selector, true), "filter", selector );
+ },
+
+ is: function( selector ) {
+ return !!selector && ( typeof selector === "string" ?
+ jQuery.filter( selector, this ).length > 0 :
+ this.filter( selector ).length > 0 );
+ },
+
+ closest: function( selectors, context ) {
+ var ret = [], i, l, cur = this[0];
+
+ // Array
+ if ( jQuery.isArray( selectors ) ) {
+ var match, selector,
+ matches = {},
+ level = 1;
+
+ if ( cur && selectors.length ) {
+ for ( i = 0, l = selectors.length; i < l; i++ ) {
+ selector = selectors[i];
+
+ if ( !matches[ selector ] ) {
+ matches[ selector ] = POS.test( selector ) ?
+ jQuery( selector, context || this.context ) :
+ selector;
+ }
+ }
+
+ while ( cur && cur.ownerDocument && cur !== context ) {
+ for ( selector in matches ) {
+ match = matches[ selector ];
+
+ if ( match.jquery ? match.index( cur ) > -1 : jQuery( cur ).is( match ) ) {
+ ret.push({ selector: selector, elem: cur, level: level });
+ }
+ }
+
+ cur = cur.parentNode;
+ level++;
+ }
+ }
+
+ return ret;
+ }
+
+ // String
+ var pos = POS.test( selectors ) || typeof selectors !== "string" ?
+ jQuery( selectors, context || this.context ) :
+ 0;
+
+ for ( i = 0, l = this.length; i < l; i++ ) {
+ cur = this[i];
+
+ while ( cur ) {
+ if ( pos ? pos.index(cur) > -1 : jQuery.find.matchesSelector(cur, selectors) ) {
+ ret.push( cur );
+ break;
+
+ } else {
+ cur = cur.parentNode;
+ if ( !cur || !cur.ownerDocument || cur === context || cur.nodeType === 11 ) {
+ break;
+ }
+ }
+ }
+ }
+
+ ret = ret.length > 1 ? jQuery.unique( ret ) : ret;
+
+ return this.pushStack( ret, "closest", selectors );
+ },
+
+ // Determine the position of an element within
+ // the matched set of elements
+ index: function( elem ) {
+
+ // No argument, return index in parent
+ if ( !elem ) {
+ return ( this[0] && this[0].parentNode ) ? this.prevAll().length : -1;
+ }
+
+ // index in selector
+ if ( typeof elem === "string" ) {
+ return jQuery.inArray( this[0], jQuery( elem ) );
+ }
+
+ // Locate the position of the desired element
+ return jQuery.inArray(
+ // If it receives a jQuery object, the first element is used
+ elem.jquery ? elem[0] : elem, this );
+ },
+
+ add: function( selector, context ) {
+ var set = typeof selector === "string" ?
+ jQuery( selector, context ) :
+ jQuery.makeArray( selector && selector.nodeType ? [ selector ] : selector ),
+ all = jQuery.merge( this.get(), set );
+
+ return this.pushStack( isDisconnected( set[0] ) || isDisconnected( all[0] ) ?
+ all :
+ jQuery.unique( all ) );
+ },
+
+ andSelf: function() {
+ return this.add( this.prevObject );
+ }
+});
+
+// A painfully simple check to see if an element is disconnected
+// from a document (should be improved, where feasible).
+function isDisconnected( node ) {
+ return !node || !node.parentNode || node.parentNode.nodeType === 11;
+}
+
+jQuery.each({
+ parent: function( elem ) {
+ var parent = elem.parentNode;
+ return parent && parent.nodeType !== 11 ? parent : null;
+ },
+ parents: function( elem ) {
+ return jQuery.dir( elem, "parentNode" );
+ },
+ parentsUntil: function( elem, i, until ) {
+ return jQuery.dir( elem, "parentNode", until );
+ },
+ next: function( elem ) {
+ return jQuery.nth( elem, 2, "nextSibling" );
+ },
+ prev: function( elem ) {
+ return jQuery.nth( elem, 2, "previousSibling" );
+ },
+ nextAll: function( elem ) {
+ return jQuery.dir( elem, "nextSibling" );
+ },
+ prevAll: function( elem ) {
+ return jQuery.dir( elem, "previousSibling" );
+ },
+ nextUntil: function( elem, i, until ) {
+ return jQuery.dir( elem, "nextSibling", until );
+ },
+ prevUntil: function( elem, i, until ) {
+ return jQuery.dir( elem, "previousSibling", until );
+ },
+ siblings: function( elem ) {
+ return jQuery.sibling( elem.parentNode.firstChild, elem );
+ },
+ children: function( elem ) {
+ return jQuery.sibling( elem.firstChild );
+ },
+ contents: function( elem ) {
+ return jQuery.nodeName( elem, "iframe" ) ?
+ elem.contentDocument || elem.contentWindow.document :
+ jQuery.makeArray( elem.childNodes );
+ }
+}, function( name, fn ) {
+ jQuery.fn[ name ] = function( until, selector ) {
+ var ret = jQuery.map( this, fn, until ),
+ // The variable 'args' was introduced in
+ // https://github.com/jquery/jquery/commit/52a0238
+ // to work around a bug in Chrome 10 (Dev) and should be removed when the bug is fixed.
+ // http://code.google.com/p/v8/issues/detail?id=1050
+ args = slice.call(arguments);
+
+ if ( !runtil.test( name ) ) {
+ selector = until;
+ }
+
+ if ( selector && typeof selector === "string" ) {
+ ret = jQuery.filter( selector, ret );
+ }
+
+ ret = this.length > 1 && !guaranteedUnique[ name ] ? jQuery.unique( ret ) : ret;
+
+ if ( (this.length > 1 || rmultiselector.test( selector )) && rparentsprev.test( name ) ) {
+ ret = ret.reverse();
+ }
+
+ return this.pushStack( ret, name, args.join(",") );
+ };
+});
+
+jQuery.extend({
+ filter: function( expr, elems, not ) {
+ if ( not ) {
+ expr = ":not(" + expr + ")";
+ }
+
+ return elems.length === 1 ?
+ jQuery.find.matchesSelector(elems[0], expr) ? [ elems[0] ] : [] :
+ jQuery.find.matches(expr, elems);
+ },
+
+ dir: function( elem, dir, until ) {
+ var matched = [],
+ cur = elem[ dir ];
+
+ while ( cur && cur.nodeType !== 9 && (until === undefined || cur.nodeType !== 1 || !jQuery( cur ).is( until )) ) {
+ if ( cur.nodeType === 1 ) {
+ matched.push( cur );
+ }
+ cur = cur[dir];
+ }
+ return matched;
+ },
+
+ nth: function( cur, result, dir, elem ) {
+ result = result || 1;
+ var num = 0;
+
+ for ( ; cur; cur = cur[dir] ) {
+ if ( cur.nodeType === 1 && ++num === result ) {
+ break;
+ }
+ }
+
+ return cur;
+ },
+
+ sibling: function( n, elem ) {
+ var r = [];
+
+ for ( ; n; n = n.nextSibling ) {
+ if ( n.nodeType === 1 && n !== elem ) {
+ r.push( n );
+ }
+ }
+
+ return r;
+ }
+});
+
+// Implement the identical functionality for filter and not
+function winnow( elements, qualifier, keep ) {
+
+ // Can't pass null or undefined to indexOf in Firefox 4
+ // Set to 0 to skip string check
+ qualifier = qualifier || 0;
+
+ if ( jQuery.isFunction( qualifier ) ) {
+ return jQuery.grep(elements, function( elem, i ) {
+ var retVal = !!qualifier.call( elem, i, elem );
+ return retVal === keep;
+ });
+
+ } else if ( qualifier.nodeType ) {
+ return jQuery.grep(elements, function( elem, i ) {
+ return (elem === qualifier) === keep;
+ });
+
+ } else if ( typeof qualifier === "string" ) {
+ var filtered = jQuery.grep(elements, function( elem ) {
+ return elem.nodeType === 1;
+ });
+
+ if ( isSimple.test( qualifier ) ) {
+ return jQuery.filter(qualifier, filtered, !keep);
+ } else {
+ qualifier = jQuery.filter( qualifier, filtered );
+ }
+ }
+
+ return jQuery.grep(elements, function( elem, i ) {
+ return (jQuery.inArray( elem, qualifier ) >= 0) === keep;
+ });
+}
+
+
+
+
+var rinlinejQuery = / jQuery\d+="(?:\d+|null)"/g,
+ rleadingWhitespace = /^\s+/,
+ rxhtmlTag = /<(?!area|br|col|embed|hr|img|input|link|meta|param)(([\w:]+)[^>]*)\/>/ig,
+ rtagName = /<([\w:]+)/,
+ rtbody = /<tbody/i,
+ rhtml = /<|&#?\w+;/,
+ rnocache = /<(?:script|object|embed|option|style)/i,
+ // checked="checked" or checked
+ rchecked = /checked\s*(?:[^=]|=\s*.checked.)/i,
+ rscriptType = /\/(java|ecma)script/i,
+ rcleanScript = /^\s*<!(?:\[CDATA\[|\-\-)/,
+ wrapMap = {
+ option: [ 1, "<select multiple='multiple'>", "</select>" ],
+ legend: [ 1, "<fieldset>", "</fieldset>" ],
+ thead: [ 1, "<table>", "</table>" ],
+ tr: [ 2, "<table><tbody>", "</tbody></table>" ],
+ td: [ 3, "<table><tbody><tr>", "</tr></tbody></table>" ],
+ col: [ 2, "<table><tbody></tbody><colgroup>", "</colgroup></table>" ],
+ area: [ 1, "<map>", "</map>" ],
+ _default: [ 0, "", "" ]
+ };
+
+wrapMap.optgroup = wrapMap.option;
+wrapMap.tbody = wrapMap.tfoot = wrapMap.colgroup = wrapMap.caption = wrapMap.thead;
+wrapMap.th = wrapMap.td;
+
+// IE can't serialize <link> and <script> tags normally
+if ( !jQuery.support.htmlSerialize ) {
+ wrapMap._default = [ 1, "div<div>", "</div>" ];
+}
+
+jQuery.fn.extend({
+ text: function( text ) {
+ if ( jQuery.isFunction(text) ) {
+ return this.each(function(i) {
+ var self = jQuery( this );
+
+ self.text( text.call(this, i, self.text()) );
+ });
+ }
+
+ if ( typeof text !== "object" && text !== undefined ) {
+ return this.empty().append( (this[0] && this[0].ownerDocument || document).createTextNode( text ) );
+ }
+
+ return jQuery.text( this );
+ },
+
+ wrapAll: function( html ) {
+ if ( jQuery.isFunction( html ) ) {
+ return this.each(function(i) {
+ jQuery(this).wrapAll( html.call(this, i) );
+ });
+ }
+
+ if ( this[0] ) {
+ // The elements to wrap the target around
+ var wrap = jQuery( html, this[0].ownerDocument ).eq(0).clone(true);
+
+ if ( this[0].parentNode ) {
+ wrap.insertBefore( this[0] );
+ }
+
+ wrap.map(function() {
+ var elem = this;
+
+ while ( elem.firstChild && elem.firstChild.nodeType === 1 ) {
+ elem = elem.firstChild;
+ }
+
+ return elem;
+ }).append( this );
+ }
+
+ return this;
+ },
+
+ wrapInner: function( html ) {
+ if ( jQuery.isFunction( html ) ) {
+ return this.each(function(i) {
+ jQuery(this).wrapInner( html.call(this, i) );
+ });
+ }
+
+ return this.each(function() {
+ var self = jQuery( this ),
+ contents = self.contents();
+
+ if ( contents.length ) {
+ contents.wrapAll( html );
+
+ } else {
+ self.append( html );
+ }
+ });
+ },
+
+ wrap: function( html ) {
+ return this.each(function() {
+ jQuery( this ).wrapAll( html );
+ });
+ },
+
+ unwrap: function() {
+ return this.parent().each(function() {
+ if ( !jQuery.nodeName( this, "body" ) ) {
+ jQuery( this ).replaceWith( this.childNodes );
+ }
+ }).end();
+ },
+
+ append: function() {
+ return this.domManip(arguments, true, function( elem ) {
+ if ( this.nodeType === 1 ) {
+ this.appendChild( elem );
+ }
+ });
+ },
+
+ prepend: function() {
+ return this.domManip(arguments, true, function( elem ) {
+ if ( this.nodeType === 1 ) {
+ this.insertBefore( elem, this.firstChild );
+ }
+ });
+ },
+
+ before: function() {
+ if ( this[0] && this[0].parentNode ) {
+ return this.domManip(arguments, false, function( elem ) {
+ this.parentNode.insertBefore( elem, this );
+ });
+ } else if ( arguments.length ) {
+ var set = jQuery(arguments[0]);
+ set.push.apply( set, this.toArray() );
+ return this.pushStack( set, "before", arguments );
+ }
+ },
+
+ after: function() {
+ if ( this[0] && this[0].parentNode ) {
+ return this.domManip(arguments, false, function( elem ) {
+ this.parentNode.insertBefore( elem, this.nextSibling );
+ });
+ } else if ( arguments.length ) {
+ var set = this.pushStack( this, "after", arguments );
+ set.push.apply( set, jQuery(arguments[0]).toArray() );
+ return set;
+ }
+ },
+
+ // keepData is for internal use only--do not document
+ remove: function( selector, keepData ) {
+ for ( var i = 0, elem; (elem = this[i]) != null; i++ ) {
+ if ( !selector || jQuery.filter( selector, [ elem ] ).length ) {
+ if ( !keepData && elem.nodeType === 1 ) {
+ jQuery.cleanData( elem.getElementsByTagName("*") );
+ jQuery.cleanData( [ elem ] );
+ }
+
+ if ( elem.parentNode ) {
+ elem.parentNode.removeChild( elem );
+ }
+ }
+ }
+
+ return this;
+ },
+
+ empty: function() {
+ for ( var i = 0, elem; (elem = this[i]) != null; i++ ) {
+ // Remove element nodes and prevent memory leaks
+ if ( elem.nodeType === 1 ) {
+ jQuery.cleanData( elem.getElementsByTagName("*") );
+ }
+
+ // Remove any remaining nodes
+ while ( elem.firstChild ) {
+ elem.removeChild( elem.firstChild );
+ }
+ }
+
+ return this;
+ },
+
+ clone: function( dataAndEvents, deepDataAndEvents ) {
+ dataAndEvents = dataAndEvents == null ? false : dataAndEvents;
+ deepDataAndEvents = deepDataAndEvents == null ? dataAndEvents : deepDataAndEvents;
+
+ return this.map( function () {
+ return jQuery.clone( this, dataAndEvents, deepDataAndEvents );
+ });
+ },
+
+ html: function( value ) {
+ if ( value === undefined ) {
+ return this[0] && this[0].nodeType === 1 ?
+ this[0].innerHTML.replace(rinlinejQuery, "") :
+ null;
+
+ // See if we can take a shortcut and just use innerHTML
+ } else if ( typeof value === "string" && !rnocache.test( value ) &&
+ (jQuery.support.leadingWhitespace || !rleadingWhitespace.test( value )) &&
+ !wrapMap[ (rtagName.exec( value ) || ["", ""])[1].toLowerCase() ] ) {
+
+ value = value.replace(rxhtmlTag, "<$1></$2>");
+
+ try {
+ for ( var i = 0, l = this.length; i < l; i++ ) {
+ // Remove element nodes and prevent memory leaks
+ if ( this[i].nodeType === 1 ) {
+ jQuery.cleanData( this[i].getElementsByTagName("*") );
+ this[i].innerHTML = value;
+ }
+ }
+
+ // If using innerHTML throws an exception, use the fallback method
+ } catch(e) {
+ this.empty().append( value );
+ }
+
+ } else if ( jQuery.isFunction( value ) ) {
+ this.each(function(i){
+ var self = jQuery( this );
+
+ self.html( value.call(this, i, self.html()) );
+ });
+
+ } else {
+ this.empty().append( value );
+ }
+
+ return this;
+ },
+
+ replaceWith: function( value ) {
+ if ( this[0] && this[0].parentNode ) {
+ // Make sure that the elements are removed from the DOM before they are inserted
+ // this can help fix replacing a parent with child elements
+ if ( jQuery.isFunction( value ) ) {
+ return this.each(function(i) {
+ var self = jQuery(this), old = self.html();
+ self.replaceWith( value.call( this, i, old ) );
+ });
+ }
+
+ if ( typeof value !== "string" ) {
+ value = jQuery( value ).detach();
+ }
+
+ return this.each(function() {
+ var next = this.nextSibling,
+ parent = this.parentNode;
+
+ jQuery( this ).remove();
+
+ if ( next ) {
+ jQuery(next).before( value );
+ } else {
+ jQuery(parent).append( value );
+ }
+ });
+ } else {
+ return this.length ?
+ this.pushStack( jQuery(jQuery.isFunction(value) ? value() : value), "replaceWith", value ) :
+ this;
+ }
+ },
+
+ detach: function( selector ) {
+ return this.remove( selector, true );
+ },
+
+ domManip: function( args, table, callback ) {
+ var results, first, fragment, parent,
+ value = args[0],
+ scripts = [];
+
+ // We can't cloneNode fragments that contain checked, in WebKit
+ if ( !jQuery.support.checkClone && arguments.length === 3 && typeof value === "string" && rchecked.test( value ) ) {
+ return this.each(function() {
+ jQuery(this).domManip( args, table, callback, true );
+ });
+ }
+
+ if ( jQuery.isFunction(value) ) {
+ return this.each(function(i) {
+ var self = jQuery(this);
+ args[0] = value.call(this, i, table ? self.html() : undefined);
+ self.domManip( args, table, callback );
+ });
+ }
+
+ if ( this[0] ) {
+ parent = value && value.parentNode;
+
+ // If we're in a fragment, just use that instead of building a new one
+ if ( jQuery.support.parentNode && parent && parent.nodeType === 11 && parent.childNodes.length === this.length ) {
+ results = { fragment: parent };
+
+ } else {
+ results = jQuery.buildFragment( args, this, scripts );
+ }
+
+ fragment = results.fragment;
+
+ if ( fragment.childNodes.length === 1 ) {
+ first = fragment = fragment.firstChild;
+ } else {
+ first = fragment.firstChild;
+ }
+
+ if ( first ) {
+ table = table && jQuery.nodeName( first, "tr" );
+
+ for ( var i = 0, l = this.length, lastIndex = l - 1; i < l; i++ ) {
+ callback.call(
+ table ?
+ root(this[i], first) :
+ this[i],
+ // Make sure that we do not leak memory by inadvertently discarding
+ // the original fragment (which might have attached data) instead of
+ // using it; in addition, use the original fragment object for the last
+ // item instead of first because it can end up being emptied incorrectly
+ // in certain situations (Bug #8070).
+ // Fragments from the fragment cache must always be cloned and never used
+ // in place.
+ results.cacheable || (l > 1 && i < lastIndex) ?
+ jQuery.clone( fragment, true, true ) :
+ fragment
+ );
+ }
+ }
+
+ if ( scripts.length ) {
+ jQuery.each( scripts, evalScript );
+ }
+ }
+
+ return this;
+ }
+});
+
+function root( elem, cur ) {
+ return jQuery.nodeName(elem, "table") ?
+ (elem.getElementsByTagName("tbody")[0] ||
+ elem.appendChild(elem.ownerDocument.createElement("tbody"))) :
+ elem;
+}
+
+function cloneCopyEvent( src, dest ) {
+
+ if ( dest.nodeType !== 1 || !jQuery.hasData( src ) ) {
+ return;
+ }
+
+ var internalKey = jQuery.expando,
+ oldData = jQuery.data( src ),
+ curData = jQuery.data( dest, oldData );
+
+ // Switch to use the internal data object, if it exists, for the next
+ // stage of data copying
+ if ( (oldData = oldData[ internalKey ]) ) {
+ var events = oldData.events;
+ curData = curData[ internalKey ] = jQuery.extend({}, oldData);
+
+ if ( events ) {
+ delete curData.handle;
+ curData.events = {};
+
+ for ( var type in events ) {
+ for ( var i = 0, l = events[ type ].length; i < l; i++ ) {
+ jQuery.event.add( dest, type + ( events[ type ][ i ].namespace ? "." : "" ) + events[ type ][ i ].namespace, events[ type ][ i ], events[ type ][ i ].data );
+ }
+ }
+ }
+ }
+}
+
+function cloneFixAttributes( src, dest ) {
+ var nodeName;
+
+ // We do not need to do anything for non-Elements
+ if ( dest.nodeType !== 1 ) {
+ return;
+ }
+
+ // clearAttributes removes the attributes, which we don't want,
+ // but also removes the attachEvent events, which we *do* want
+ if ( dest.clearAttributes ) {
+ dest.clearAttributes();
+ }
+
+ // mergeAttributes, in contrast, only merges back on the
+ // original attributes, not the events
+ if ( dest.mergeAttributes ) {
+ dest.mergeAttributes( src );
+ }
+
+ nodeName = dest.nodeName.toLowerCase();
+
+ // IE6-8 fail to clone children inside object elements that use
+ // the proprietary classid attribute value (rather than the type
+ // attribute) to identify the type of content to display
+ if ( nodeName === "object" ) {
+ dest.outerHTML = src.outerHTML;
+
+ } else if ( nodeName === "input" && (src.type === "checkbox" || src.type === "radio") ) {
+ // IE6-8 fails to persist the checked state of a cloned checkbox
+ // or radio button. Worse, IE6-7 fail to give the cloned element
+ // a checked appearance if the defaultChecked value isn't also set
+ if ( src.checked ) {
+ dest.defaultChecked = dest.checked = src.checked;
+ }
+
+ // IE6-7 get confused and end up setting the value of a cloned
+ // checkbox/radio button to an empty string instead of "on"
+ if ( dest.value !== src.value ) {
+ dest.value = src.value;
+ }
+
+ // IE6-8 fails to return the selected option to the default selected
+ // state when cloning options
+ } else if ( nodeName === "option" ) {
+ dest.selected = src.defaultSelected;
+
+ // IE6-8 fails to set the defaultValue to the correct value when
+ // cloning other types of input fields
+ } else if ( nodeName === "input" || nodeName === "textarea" ) {
+ dest.defaultValue = src.defaultValue;
+ }
+
+ // Event data gets referenced instead of copied if the expando
+ // gets copied too
+ dest.removeAttribute( jQuery.expando );
+}
+
+jQuery.buildFragment = function( args, nodes, scripts ) {
+ var fragment, cacheable, cacheresults, doc;
+
+ // nodes may contain either an explicit document object,
+ // a jQuery collection or context object.
+ // If nodes[0] contains a valid object to assign to doc
+ if ( nodes && nodes[0] ) {
+ doc = nodes[0].ownerDocument || nodes[0];
+ }
+
+ // Ensure that an attr object doesn't incorrectly stand in as a document object
+ // Chrome and Firefox seem to allow this to occur and will throw exception
+ // Fixes #8950
+ if ( !doc.createDocumentFragment ) {
+ doc = document;
+ }
+
+ // Only cache "small" (1/2 KB) HTML strings that are associated with the main document
+ // Cloning options loses the selected state, so don't cache them
+ // IE 6 doesn't like it when you put <object> or <embed> elements in a fragment
+ // Also, WebKit does not clone 'checked' attributes on cloneNode, so don't cache
+ if ( args.length === 1 && typeof args[0] === "string" && args[0].length < 512 && doc === document &&
+ args[0].charAt(0) === "<" && !rnocache.test( args[0] ) && (jQuery.support.checkClone || !rchecked.test( args[0] )) ) {
+
+ cacheable = true;
+
+ cacheresults = jQuery.fragments[ args[0] ];
+ if ( cacheresults && cacheresults !== 1 ) {
+ fragment = cacheresults;
+ }
+ }
+
+ if ( !fragment ) {
+ fragment = doc.createDocumentFragment();
+ jQuery.clean( args, doc, fragment, scripts );
+ }
+
+ if ( cacheable ) {
+ jQuery.fragments[ args[0] ] = cacheresults ? fragment : 1;
+ }
+
+ return { fragment: fragment, cacheable: cacheable };
+};
+
+jQuery.fragments = {};
+
+jQuery.each({
+ appendTo: "append",
+ prependTo: "prepend",
+ insertBefore: "before",
+ insertAfter: "after",
+ replaceAll: "replaceWith"
+}, function( name, original ) {
+ jQuery.fn[ name ] = function( selector ) {
+ var ret = [],
+ insert = jQuery( selector ),
+ parent = this.length === 1 && this[0].parentNode;
+
+ if ( parent && parent.nodeType === 11 && parent.childNodes.length === 1 && insert.length === 1 ) {
+ insert[ original ]( this[0] );
+ return this;
+
+ } else {
+ for ( var i = 0, l = insert.length; i < l; i++ ) {
+ var elems = (i > 0 ? this.clone(true) : this).get();
+ jQuery( insert[i] )[ original ]( elems );
+ ret = ret.concat( elems );
+ }
+
+ return this.pushStack( ret, name, insert.selector );
+ }
+ };
+});
+
+function getAll( elem ) {
+ if ( "getElementsByTagName" in elem ) {
+ return elem.getElementsByTagName( "*" );
+
+ } else if ( "querySelectorAll" in elem ) {
+ return elem.querySelectorAll( "*" );
+
+ } else {
+ return [];
+ }
+}
+
+// Used in clean, fixes the defaultChecked property
+function fixDefaultChecked( elem ) {
+ if ( elem.type === "checkbox" || elem.type === "radio" ) {
+ elem.defaultChecked = elem.checked;
+ }
+}
+// Finds all inputs and passes them to fixDefaultChecked
+function findInputs( elem ) {
+ if ( jQuery.nodeName( elem, "input" ) ) {
+ fixDefaultChecked( elem );
+ } else if ( "getElementsByTagName" in elem ) {
+ jQuery.grep( elem.getElementsByTagName("input"), fixDefaultChecked );
+ }
+}
+
+jQuery.extend({
+ clone: function( elem, dataAndEvents, deepDataAndEvents ) {
+ var clone = elem.cloneNode(true),
+ srcElements,
+ destElements,
+ i;
+
+ if ( (!jQuery.support.noCloneEvent || !jQuery.support.noCloneChecked) &&
+ (elem.nodeType === 1 || elem.nodeType === 11) && !jQuery.isXMLDoc(elem) ) {
+ // IE copies events bound via attachEvent when using cloneNode.
+ // Calling detachEvent on the clone will also remove the events
+ // from the original. In order to get around this, we use some
+ // proprietary methods to clear the events. Thanks to MooTools
+ // guys for this hotness.
+
+ cloneFixAttributes( elem, clone );
+
+ // Using Sizzle here is crazy slow, so we use getElementsByTagName
+ // instead
+ srcElements = getAll( elem );
+ destElements = getAll( clone );
+
+ // Weird iteration because IE will replace the length property
+ // with an element if you are cloning the body and one of the
+ // elements on the page has a name or id of "length"
+ for ( i = 0; srcElements[i]; ++i ) {
+ // Ensure that the destination node is not null; Fixes #9587
+ if ( destElements[i] ) {
+ cloneFixAttributes( srcElements[i], destElements[i] );
+ }
+ }
+ }
+
+ // Copy the events from the original to the clone
+ if ( dataAndEvents ) {
+ cloneCopyEvent( elem, clone );
+
+ if ( deepDataAndEvents ) {
+ srcElements = getAll( elem );
+ destElements = getAll( clone );
+
+ for ( i = 0; srcElements[i]; ++i ) {
+ cloneCopyEvent( srcElements[i], destElements[i] );
+ }
+ }
+ }
+
+ srcElements = destElements = null;
+
+ // Return the cloned set
+ return clone;
+ },
+
+ clean: function( elems, context, fragment, scripts ) {
+ var checkScriptType;
+
+ context = context || document;
+
+ // !context.createElement fails in IE with an error but returns typeof 'object'
+ if ( typeof context.createElement === "undefined" ) {
+ context = context.ownerDocument || context[0] && context[0].ownerDocument || document;
+ }
+
+ var ret = [], j;
+
+ for ( var i = 0, elem; (elem = elems[i]) != null; i++ ) {
+ if ( typeof elem === "number" ) {
+ elem += "";
+ }
+
+ if ( !elem ) {
+ continue;
+ }
+
+ // Convert html string into DOM nodes
+ if ( typeof elem === "string" ) {
+ if ( !rhtml.test( elem ) ) {
+ elem = context.createTextNode( elem );
+ } else {
+ // Fix "XHTML"-style tags in all browsers
+ elem = elem.replace(rxhtmlTag, "<$1></$2>");
+
+ // Trim whitespace, otherwise indexOf won't work as expected
+ var tag = (rtagName.exec( elem ) || ["", ""])[1].toLowerCase(),
+ wrap = wrapMap[ tag ] || wrapMap._default,
+ depth = wrap[0],
+ div = context.createElement("div");
+
+ // Go to html and back, then peel off extra wrappers
+ div.innerHTML = wrap[1] + elem + wrap[2];
+
+ // Move to the right depth
+ while ( depth-- ) {
+ div = div.lastChild;
+ }
+
+ // Remove IE's autoinserted <tbody> from table fragments
+ if ( !jQuery.support.tbody ) {
+
+ // String was a <table>, *may* have spurious <tbody>
+ var hasBody = rtbody.test(elem),
+ tbody = tag === "table" && !hasBody ?
+ div.firstChild && div.firstChild.childNodes :
+
+ // String was a bare <thead> or <tfoot>
+ wrap[1] === "<table>" && !hasBody ?
+ div.childNodes :
+ [];
+
+ for ( j = tbody.length - 1; j >= 0 ; --j ) {
+ if ( jQuery.nodeName( tbody[ j ], "tbody" ) && !tbody[ j ].childNodes.length ) {
+ tbody[ j ].parentNode.removeChild( tbody[ j ] );
+ }
+ }
+ }
+
+ // IE completely kills leading whitespace when innerHTML is used
+ if ( !jQuery.support.leadingWhitespace && rleadingWhitespace.test( elem ) ) {
+ div.insertBefore( context.createTextNode( rleadingWhitespace.exec(elem)[0] ), div.firstChild );
+ }
+
+ elem = div.childNodes;
+ }
+ }
+
+ // Resets defaultChecked for any radios and checkboxes
+ // about to be appended to the DOM in IE 6/7 (#8060)
+ var len;
+ if ( !jQuery.support.appendChecked ) {
+ if ( elem[0] && typeof (len = elem.length) === "number" ) {
+ for ( j = 0; j < len; j++ ) {
+ findInputs( elem[j] );
+ }
+ } else {
+ findInputs( elem );
+ }
+ }
+
+ if ( elem.nodeType ) {
+ ret.push( elem );
+ } else {
+ ret = jQuery.merge( ret, elem );
+ }
+ }
+
+ if ( fragment ) {
+ checkScriptType = function( elem ) {
+ return !elem.type || rscriptType.test( elem.type );
+ };
+ for ( i = 0; ret[i]; i++ ) {
+ if ( scripts && jQuery.nodeName( ret[i], "script" ) && (!ret[i].type || ret[i].type.toLowerCase() === "text/javascript") ) {
+ scripts.push( ret[i].parentNode ? ret[i].parentNode.removeChild( ret[i] ) : ret[i] );
+
+ } else {
+ if ( ret[i].nodeType === 1 ) {
+ var jsTags = jQuery.grep( ret[i].getElementsByTagName( "script" ), checkScriptType );
+
+ ret.splice.apply( ret, [i + 1, 0].concat( jsTags ) );
+ }
+ fragment.appendChild( ret[i] );
+ }
+ }
+ }
+
+ return ret;
+ },
+
+ cleanData: function( elems ) {
+ var data, id, cache = jQuery.cache, internalKey = jQuery.expando, special = jQuery.event.special,
+ deleteExpando = jQuery.support.deleteExpando;
+
+ for ( var i = 0, elem; (elem = elems[i]) != null; i++ ) {
+ if ( elem.nodeName && jQuery.noData[elem.nodeName.toLowerCase()] ) {
+ continue;
+ }
+
+ id = elem[ jQuery.expando ];
+
+ if ( id ) {
+ data = cache[ id ] && cache[ id ][ internalKey ];
+
+ if ( data && data.events ) {
+ for ( var type in data.events ) {
+ if ( special[ type ] ) {
+ jQuery.event.remove( elem, type );
+
+ // This is a shortcut to avoid jQuery.event.remove's overhead
+ } else {
+ jQuery.removeEvent( elem, type, data.handle );
+ }
+ }
+
+ // Null the DOM reference to avoid IE6/7/8 leak (#7054)
+ if ( data.handle ) {
+ data.handle.elem = null;
+ }
+ }
+
+ if ( deleteExpando ) {
+ delete elem[ jQuery.expando ];
+
+ } else if ( elem.removeAttribute ) {
+ elem.removeAttribute( jQuery.expando );
+ }
+
+ delete cache[ id ];
+ }
+ }
+ }
+});
+
+function evalScript( i, elem ) {
+ if ( elem.src ) {
+ jQuery.ajax({
+ url: elem.src,
+ async: false,
+ dataType: "script"
+ });
+ } else {
+ jQuery.globalEval( ( elem.text || elem.textContent || elem.innerHTML || "" ).replace( rcleanScript, "/*$0*/" ) );
+ }
+
+ if ( elem.parentNode ) {
+ elem.parentNode.removeChild( elem );
+ }
+}
+
+
+
+
+var ralpha = /alpha\([^)]*\)/i,
+ ropacity = /opacity=([^)]*)/,
+ // fixed for IE9, see #8346
+ rupper = /([A-Z]|^ms)/g,
+ rnumpx = /^-?\d+(?:px)?$/i,
+ rnum = /^-?\d/,
+ rrelNum = /^([\-+])=([\-+.\de]+)/,
+
+ cssShow = { position: "absolute", visibility: "hidden", display: "block" },
+ cssWidth = [ "Left", "Right" ],
+ cssHeight = [ "Top", "Bottom" ],
+ curCSS,
+
+ getComputedStyle,
+ currentStyle;
+
+jQuery.fn.css = function( name, value ) {
+ // Setting 'undefined' is a no-op
+ if ( arguments.length === 2 && value === undefined ) {
+ return this;
+ }
+
+ return jQuery.access( this, name, value, true, function( elem, name, value ) {
+ return value !== undefined ?
+ jQuery.style( elem, name, value ) :
+ jQuery.css( elem, name );
+ });
+};
+
+jQuery.extend({
+ // Add in style property hooks for overriding the default
+ // behavior of getting and setting a style property
+ cssHooks: {
+ opacity: {
+ get: function( elem, computed ) {
+ if ( computed ) {
+ // We should always get a number back from opacity
+ var ret = curCSS( elem, "opacity", "opacity" );
+ return ret === "" ? "1" : ret;
+
+ } else {
+ return elem.style.opacity;
+ }
+ }
+ }
+ },
+
+ // Exclude the following css properties to add px
+ cssNumber: {
+ "fillOpacity": true,
+ "fontWeight": true,
+ "lineHeight": true,
+ "opacity": true,
+ "orphans": true,
+ "widows": true,
+ "zIndex": true,
+ "zoom": true
+ },
+
+ // Add in properties whose names you wish to fix before
+ // setting or getting the value
+ cssProps: {
+ // normalize float css property
+ "float": jQuery.support.cssFloat ? "cssFloat" : "styleFloat"
+ },
+
+ // Get and set the style property on a DOM Node
+ style: function( elem, name, value, extra ) {
+ // Don't set styles on text and comment nodes
+ if ( !elem || elem.nodeType === 3 || elem.nodeType === 8 || !elem.style ) {
+ return;
+ }
+
+ // Make sure that we're working with the right name
+ var ret, type, origName = jQuery.camelCase( name ),
+ style = elem.style, hooks = jQuery.cssHooks[ origName ];
+
+ name = jQuery.cssProps[ origName ] || origName;
+
+ // Check if we're setting a value
+ if ( value !== undefined ) {
+ type = typeof value;
+
+ // convert relative number strings (+= or -=) to relative numbers. #7345
+ if ( type === "string" && (ret = rrelNum.exec( value )) ) {
+ value = ( +( ret[1] + 1) * +ret[2] ) + parseFloat( jQuery.css( elem, name ) );
+ // Fixes bug #9237
+ type = "number";
+ }
+
+ // Make sure that NaN and null values aren't set. See: #7116
+ if ( value == null || type === "number" && isNaN( value ) ) {
+ return;
+ }
+
+ // If a number was passed in, add 'px' to the (except for certain CSS properties)
+ if ( type === "number" && !jQuery.cssNumber[ origName ] ) {
+ value += "px";
+ }
+
+ // If a hook was provided, use that value, otherwise just set the specified value
+ if ( !hooks || !("set" in hooks) || (value = hooks.set( elem, value )) !== undefined ) {
+ // Wrapped to prevent IE from throwing errors when 'invalid' values are provided
+ // Fixes bug #5509
+ try {
+ style[ name ] = value;
+ } catch(e) {}
+ }
+
+ } else {
+ // If a hook was provided get the non-computed value from there
+ if ( hooks && "get" in hooks && (ret = hooks.get( elem, false, extra )) !== undefined ) {
+ return ret;
+ }
+
+ // Otherwise just get the value from the style object
+ return style[ name ];
+ }
+ },
+
+ css: function( elem, name, extra ) {
+ var ret, hooks;
+
+ // Make sure that we're working with the right name
+ name = jQuery.camelCase( name );
+ hooks = jQuery.cssHooks[ name ];
+ name = jQuery.cssProps[ name ] || name;
+
+ // cssFloat needs a special treatment
+ if ( name === "cssFloat" ) {
+ name = "float";
+ }
+
+ // If a hook was provided get the computed value from there
+ if ( hooks && "get" in hooks && (ret = hooks.get( elem, true, extra )) !== undefined ) {
+ return ret;
+
+ // Otherwise, if a way to get the computed value exists, use that
+ } else if ( curCSS ) {
+ return curCSS( elem, name );
+ }
+ },
+
+ // A method for quickly swapping in/out CSS properties to get correct calculations
+ swap: function( elem, options, callback ) {
+ var old = {};
+
+ // Remember the old values, and insert the new ones
+ for ( var name in options ) {
+ old[ name ] = elem.style[ name ];
+ elem.style[ name ] = options[ name ];
+ }
+
+ callback.call( elem );
+
+ // Revert the old values
+ for ( name in options ) {
+ elem.style[ name ] = old[ name ];
+ }
+ }
+});
+
+// DEPRECATED, Use jQuery.css() instead
+jQuery.curCSS = jQuery.css;
+
+jQuery.each(["height", "width"], function( i, name ) {
+ jQuery.cssHooks[ name ] = {
+ get: function( elem, computed, extra ) {
+ var val;
+
+ if ( computed ) {
+ if ( elem.offsetWidth !== 0 ) {
+ return getWH( elem, name, extra );
+ } else {
+ jQuery.swap( elem, cssShow, function() {
+ val = getWH( elem, name, extra );
+ });
+ }
+
+ return val;
+ }
+ },
+
+ set: function( elem, value ) {
+ if ( rnumpx.test( value ) ) {
+ // ignore negative width and height values #1599
+ value = parseFloat( value );
+
+ if ( value >= 0 ) {
+ return value + "px";
+ }
+
+ } else {
+ return value;
+ }
+ }
+ };
+});
+
+if ( !jQuery.support.opacity ) {
+ jQuery.cssHooks.opacity = {
+ get: function( elem, computed ) {
+ // IE uses filters for opacity
+ return ropacity.test( (computed && elem.currentStyle ? elem.currentStyle.filter : elem.style.filter) || "" ) ?
+ ( parseFloat( RegExp.$1 ) / 100 ) + "" :
+ computed ? "1" : "";
+ },
+
+ set: function( elem, value ) {
+ var style = elem.style,
+ currentStyle = elem.currentStyle,
+ opacity = jQuery.isNaN( value ) ? "" : "alpha(opacity=" + value * 100 + ")",
+ filter = currentStyle && currentStyle.filter || style.filter || "";
+
+ // IE has trouble with opacity if it does not have layout
+ // Force it by setting the zoom level
+ style.zoom = 1;
+
+ // if setting opacity to 1, and no other filters exist - attempt to remove filter attribute #6652
+ if ( value >= 1 && jQuery.trim( filter.replace( ralpha, "" ) ) === "" ) {
+
+ // Setting style.filter to null, "" & " " still leave "filter:" in the cssText
+ // if "filter:" is present at all, clearType is disabled, we want to avoid this
+ // style.removeAttribute is IE Only, but so apparently is this code path...
+ style.removeAttribute( "filter" );
+
+ // if there there is no filter style applied in a css rule, we are done
+ if ( currentStyle && !currentStyle.filter ) {
+ return;
+ }
+ }
+
+ // otherwise, set new filter values
+ style.filter = ralpha.test( filter ) ?
+ filter.replace( ralpha, opacity ) :
+ filter + " " + opacity;
+ }
+ };
+}
+
+jQuery(function() {
+ // This hook cannot be added until DOM ready because the support test
+ // for it is not run until after DOM ready
+ if ( !jQuery.support.reliableMarginRight ) {
+ jQuery.cssHooks.marginRight = {
+ get: function( elem, computed ) {
+ // WebKit Bug 13343 - getComputedStyle returns wrong value for margin-right
+ // Work around by temporarily setting element display to inline-block
+ var ret;
+ jQuery.swap( elem, { "display": "inline-block" }, function() {
+ if ( computed ) {
+ ret = curCSS( elem, "margin-right", "marginRight" );
+ } else {
+ ret = elem.style.marginRight;
+ }
+ });
+ return ret;
+ }
+ };
+ }
+});
+
+if ( document.defaultView && document.defaultView.getComputedStyle ) {
+ getComputedStyle = function( elem, name ) {
+ var ret, defaultView, computedStyle;
+
+ name = name.replace( rupper, "-$1" ).toLowerCase();
+
+ if ( !(defaultView = elem.ownerDocument.defaultView) ) {
+ return undefined;
+ }
+
+ if ( (computedStyle = defaultView.getComputedStyle( elem, null )) ) {
+ ret = computedStyle.getPropertyValue( name );
+ if ( ret === "" && !jQuery.contains( elem.ownerDocument.documentElement, elem ) ) {
+ ret = jQuery.style( elem, name );
+ }
+ }
+
+ return ret;
+ };
+}
+
+if ( document.documentElement.currentStyle ) {
+ currentStyle = function( elem, name ) {
+ var left,
+ ret = elem.currentStyle && elem.currentStyle[ name ],
+ rsLeft = elem.runtimeStyle && elem.runtimeStyle[ name ],
+ style = elem.style;
+
+ // From the awesome hack by Dean Edwards
+ // http://erik.eae.net/archives/2007/07/27/18.54.15/#comment-102291
+
+ // If we're not dealing with a regular pixel number
+ // but a number that has a weird ending, we need to convert it to pixels
+ if ( !rnumpx.test( ret ) && rnum.test( ret ) ) {
+ // Remember the original values
+ left = style.left;
+
+ // Put in the new values to get a computed value out
+ if ( rsLeft ) {
+ elem.runtimeStyle.left = elem.currentStyle.left;
+ }
+ style.left = name === "fontSize" ? "1em" : (ret || 0);
+ ret = style.pixelLeft + "px";
+
+ // Revert the changed values
+ style.left = left;
+ if ( rsLeft ) {
+ elem.runtimeStyle.left = rsLeft;
+ }
+ }
+
+ return ret === "" ? "auto" : ret;
+ };
+}
+
+curCSS = getComputedStyle || currentStyle;
+
+function getWH( elem, name, extra ) {
+
+ // Start with offset property
+ var val = name === "width" ? elem.offsetWidth : elem.offsetHeight,
+ which = name === "width" ? cssWidth : cssHeight;
+
+ if ( val > 0 ) {
+ if ( extra !== "border" ) {
+ jQuery.each( which, function() {
+ if ( !extra ) {
+ val -= parseFloat( jQuery.css( elem, "padding" + this ) ) || 0;
+ }
+ if ( extra === "margin" ) {
+ val += parseFloat( jQuery.css( elem, extra + this ) ) || 0;
+ } else {
+ val -= parseFloat( jQuery.css( elem, "border" + this + "Width" ) ) || 0;
+ }
+ });
+ }
+
+ return val + "px";
+ }
+
+ // Fall back to computed then uncomputed css if necessary
+ val = curCSS( elem, name, name );
+ if ( val < 0 || val == null ) {
+ val = elem.style[ name ] || 0;
+ }
+ // Normalize "", auto, and prepare for extra
+ val = parseFloat( val ) || 0;
+
+ // Add padding, border, margin
+ if ( extra ) {
+ jQuery.each( which, function() {
+ val += parseFloat( jQuery.css( elem, "padding" + this ) ) || 0;
+ if ( extra !== "padding" ) {
+ val += parseFloat( jQuery.css( elem, "border" + this + "Width" ) ) || 0;
+ }
+ if ( extra === "margin" ) {
+ val += parseFloat( jQuery.css( elem, extra + this ) ) || 0;
+ }
+ });
+ }
+
+ return val + "px";
+}
+
+if ( jQuery.expr && jQuery.expr.filters ) {
+ jQuery.expr.filters.hidden = function( elem ) {
+ var width = elem.offsetWidth,
+ height = elem.offsetHeight;
+
+ return (width === 0 && height === 0) || (!jQuery.support.reliableHiddenOffsets && (elem.style.display || jQuery.css( elem, "display" )) === "none");
+ };
+
+ jQuery.expr.filters.visible = function( elem ) {
+ return !jQuery.expr.filters.hidden( elem );
+ };
+}
+
+
+
+
+var r20 = /%20/g,
+ rbracket = /\[\]$/,
+ rCRLF = /\r?\n/g,
+ rhash = /#.*$/,
+ rheaders = /^(.*?):[ \t]*([^\r\n]*)\r?$/mg, // IE leaves an \r character at EOL
+ rinput = /^(?:color|date|datetime|datetime-local|email|hidden|month|number|password|range|search|tel|text|time|url|week)$/i,
+ // #7653, #8125, #8152: local protocol detection
+ rlocalProtocol = /^(?:about|app|app\-storage|.+\-extension|file|res|widget):$/,
+ rnoContent = /^(?:GET|HEAD)$/,
+ rprotocol = /^\/\//,
+ rquery = /\?/,
+ rscript = /<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi,
+ rselectTextarea = /^(?:select|textarea)/i,
+ rspacesAjax = /\s+/,
+ rts = /([?&])_=[^&]*/,
+ rurl = /^([\w\+\.\-]+:)(?:\/\/([^\/?#:]*)(?::(\d+))?)?/,
+
+ // Keep a copy of the old load method
+ _load = jQuery.fn.load,
+
+ /* Prefilters
+ * 1) They are useful to introduce custom dataTypes (see ajax/jsonp.js for an example)
+ * 2) These are called:
+ * - BEFORE asking for a transport
+ * - AFTER param serialization (s.data is a string if s.processData is true)
+ * 3) key is the dataType
+ * 4) the catchall symbol "*" can be used
+ * 5) execution will start with transport dataType and THEN continue down to "*" if needed
+ */
+ prefilters = {},
+
+ /* Transports bindings
+ * 1) key is the dataType
+ * 2) the catchall symbol "*" can be used
+ * 3) selection will start with transport dataType and THEN go to "*" if needed
+ */
+ transports = {},
+
+ // Document location
+ ajaxLocation,
+
+ // Document location segments
+ ajaxLocParts,
+
+ // Avoid comment-prolog char sequence (#10098); must appease lint and evade compression
+ allTypes = ["*/"] + ["*"];
+
+// #8138, IE may throw an exception when accessing
+// a field from window.location if document.domain has been set
+try {
+ ajaxLocation = location.href;
+} catch( e ) {
+ // Use the href attribute of an A element
+ // since IE will modify it given document.location
+ ajaxLocation = document.createElement( "a" );
+ ajaxLocation.href = "";
+ ajaxLocation = ajaxLocation.href;
+}
+
+// Segment location into parts
+ajaxLocParts = rurl.exec( ajaxLocation.toLowerCase() ) || [];
+
+// Base "constructor" for jQuery.ajaxPrefilter and jQuery.ajaxTransport
+function addToPrefiltersOrTransports( structure ) {
+
+ // dataTypeExpression is optional and defaults to "*"
+ return function( dataTypeExpression, func ) {
+
+ if ( typeof dataTypeExpression !== "string" ) {
+ func = dataTypeExpression;
+ dataTypeExpression = "*";
+ }
+
+ if ( jQuery.isFunction( func ) ) {
+ var dataTypes = dataTypeExpression.toLowerCase().split( rspacesAjax ),
+ i = 0,
+ length = dataTypes.length,
+ dataType,
+ list,
+ placeBefore;
+
+ // For each dataType in the dataTypeExpression
+ for(; i < length; i++ ) {
+ dataType = dataTypes[ i ];
+ // We control if we're asked to add before
+ // any existing element
+ placeBefore = /^\+/.test( dataType );
+ if ( placeBefore ) {
+ dataType = dataType.substr( 1 ) || "*";
+ }
+ list = structure[ dataType ] = structure[ dataType ] || [];
+ // then we add to the structure accordingly
+ list[ placeBefore ? "unshift" : "push" ]( func );
+ }
+ }
+ };
+}
+
+// Base inspection function for prefilters and transports
+function inspectPrefiltersOrTransports( structure, options, originalOptions, jqXHR,
+ dataType /* internal */, inspected /* internal */ ) {
+
+ dataType = dataType || options.dataTypes[ 0 ];
+ inspected = inspected || {};
+
+ inspected[ dataType ] = true;
+
+ var list = structure[ dataType ],
+ i = 0,
+ length = list ? list.length : 0,
+ executeOnly = ( structure === prefilters ),
+ selection;
+
+ for(; i < length && ( executeOnly || !selection ); i++ ) {
+ selection = list[ i ]( options, originalOptions, jqXHR );
+ // If we got redirected to another dataType
+ // we try there if executing only and not done already
+ if ( typeof selection === "string" ) {
+ if ( !executeOnly || inspected[ selection ] ) {
+ selection = undefined;
+ } else {
+ options.dataTypes.unshift( selection );
+ selection = inspectPrefiltersOrTransports(
+ structure, options, originalOptions, jqXHR, selection, inspected );
+ }
+ }
+ }
+ // If we're only executing or nothing was selected
+ // we try the catchall dataType if not done already
+ if ( ( executeOnly || !selection ) && !inspected[ "*" ] ) {
+ selection = inspectPrefiltersOrTransports(
+ structure, options, originalOptions, jqXHR, "*", inspected );
+ }
+ // unnecessary when only executing (prefilters)
+ // but it'll be ignored by the caller in that case
+ return selection;
+}
+
+// A special extend for ajax options
+// that takes "flat" options (not to be deep extended)
+// Fixes #9887
+function ajaxExtend( target, src ) {
+ var key, deep,
+ flatOptions = jQuery.ajaxSettings.flatOptions || {};
+ for( key in src ) {
+ if ( src[ key ] !== undefined ) {
+ ( flatOptions[ key ] ? target : ( deep || ( deep = {} ) ) )[ key ] = src[ key ];
+ }
+ }
+ if ( deep ) {
+ jQuery.extend( true, target, deep );
+ }
+}
+
+jQuery.fn.extend({
+ load: function( url, params, callback ) {
+ if ( typeof url !== "string" && _load ) {
+ return _load.apply( this, arguments );
+
+ // Don't do a request if no elements are being requested
+ } else if ( !this.length ) {
+ return this;
+ }
+
+ var off = url.indexOf( " " );
+ if ( off >= 0 ) {
+ var selector = url.slice( off, url.length );
+ url = url.slice( 0, off );
+ }
+
+ // Default to a GET request
+ var type = "GET";
+
+ // If the second parameter was provided
+ if ( params ) {
+ // If it's a function
+ if ( jQuery.isFunction( params ) ) {
+ // We assume that it's the callback
+ callback = params;
+ params = undefined;
+
+ // Otherwise, build a param string
+ } else if ( typeof params === "object" ) {
+ params = jQuery.param( params, jQuery.ajaxSettings.traditional );
+ type = "POST";
+ }
+ }
+
+ var self = this;
+
+ // Request the remote document
+ jQuery.ajax({
+ url: url,
+ type: type,
+ dataType: "html",
+ data: params,
+ // Complete callback (responseText is used internally)
+ complete: function( jqXHR, status, responseText ) {
+ // Store the response as specified by the jqXHR object
+ responseText = jqXHR.responseText;
+ // If successful, inject the HTML into all the matched elements
+ if ( jqXHR.isResolved() ) {
+ // #4825: Get the actual response in case
+ // a dataFilter is present in ajaxSettings
+ jqXHR.done(function( r ) {
+ responseText = r;
+ });
+ // See if a selector was specified
+ self.html( selector ?
+ // Create a dummy div to hold the results
+ jQuery("<div>")
+ // inject the contents of the document in, removing the scripts
+ // to avoid any 'Permission Denied' errors in IE
+ .append(responseText.replace(rscript, ""))
+
+ // Locate the specified elements
+ .find(selector) :
+
+ // If not, just inject the full result
+ responseText );
+ }
+
+ if ( callback ) {
+ self.each( callback, [ responseText, status, jqXHR ] );
+ }
+ }
+ });
+
+ return this;
+ },
+
+ serialize: function() {
+ return jQuery.param( this.serializeArray() );
+ },
+
+ serializeArray: function() {
+ return this.map(function(){
+ return this.elements ? jQuery.makeArray( this.elements ) : this;
+ })
+ .filter(function(){
+ return this.name && !this.disabled &&
+ ( this.checked || rselectTextarea.test( this.nodeName ) ||
+ rinput.test( this.type ) );
+ })
+ .map(function( i, elem ){
+ var val = jQuery( this ).val();
+
+ return val == null ?
+ null :
+ jQuery.isArray( val ) ?
+ jQuery.map( val, function( val, i ){
+ return { name: elem.name, value: val.replace( rCRLF, "\r\n" ) };
+ }) :
+ { name: elem.name, value: val.replace( rCRLF, "\r\n" ) };
+ }).get();
+ }
+});
+
+// Attach a bunch of functions for handling common AJAX events
+jQuery.each( "ajaxStart ajaxStop ajaxComplete ajaxError ajaxSuccess ajaxSend".split( " " ), function( i, o ){
+ jQuery.fn[ o ] = function( f ){
+ return this.bind( o, f );
+ };
+});
+
+jQuery.each( [ "get", "post" ], function( i, method ) {
+ jQuery[ method ] = function( url, data, callback, type ) {
+ // shift arguments if data argument was omitted
+ if ( jQuery.isFunction( data ) ) {
+ type = type || callback;
+ callback = data;
+ data = undefined;
+ }
+
+ return jQuery.ajax({
+ type: method,
+ url: url,
+ data: data,
+ success: callback,
+ dataType: type
+ });
+ };
+});
+
+jQuery.extend({
+
+ getScript: function( url, callback ) {
+ return jQuery.get( url, undefined, callback, "script" );
+ },
+
+ getJSON: function( url, data, callback ) {
+ return jQuery.get( url, data, callback, "json" );
+ },
+
+ // Creates a full fledged settings object into target
+ // with both ajaxSettings and settings fields.
+ // If target is omitted, writes into ajaxSettings.
+ ajaxSetup: function( target, settings ) {
+ if ( settings ) {
+ // Building a settings object
+ ajaxExtend( target, jQuery.ajaxSettings );
+ } else {
+ // Extending ajaxSettings
+ settings = target;
+ target = jQuery.ajaxSettings;
+ }
+ ajaxExtend( target, settings );
+ return target;
+ },
+
+ ajaxSettings: {
+ url: ajaxLocation,
+ isLocal: rlocalProtocol.test( ajaxLocParts[ 1 ] ),
+ global: true,
+ type: "GET",
+ contentType: "application/x-www-form-urlencoded",
+ processData: true,
+ async: true,
+ /*
+ timeout: 0,
+ data: null,
+ dataType: null,
+ username: null,
+ password: null,
+ cache: null,
+ traditional: false,
+ headers: {},
+ */
+
+ accepts: {
+ xml: "application/xml, text/xml",
+ html: "text/html",
+ text: "text/plain",
+ json: "application/json, text/javascript",
+ "*": allTypes
+ },
+
+ contents: {
+ xml: /xml/,
+ html: /html/,
+ json: /json/
+ },
+
+ responseFields: {
+ xml: "responseXML",
+ text: "responseText"
+ },
+
+ // List of data converters
+ // 1) key format is "source_type destination_type" (a single space in-between)
+ // 2) the catchall symbol "*" can be used for source_type
+ converters: {
+
+ // Convert anything to text
+ "* text": window.String,
+
+ // Text to html (true = no transformation)
+ "text html": true,
+
+ // Evaluate text as a json expression
+ "text json": jQuery.parseJSON,
+
+ // Parse text as xml
+ "text xml": jQuery.parseXML
+ },
+
+ // For options that shouldn't be deep extended:
+ // you can add your own custom options here if
+ // and when you create one that shouldn't be
+ // deep extended (see ajaxExtend)
+ flatOptions: {
+ context: true,
+ url: true
+ }
+ },
+
+ ajaxPrefilter: addToPrefiltersOrTransports( prefilters ),
+ ajaxTransport: addToPrefiltersOrTransports( transports ),
+
+ // Main method
+ ajax: function( url, options ) {
+
+ // If url is an object, simulate pre-1.5 signature
+ if ( typeof url === "object" ) {
+ options = url;
+ url = undefined;
+ }
+
+ // Force options to be an object
+ options = options || {};
+
+ var // Create the final options object
+ s = jQuery.ajaxSetup( {}, options ),
+ // Callbacks context
+ callbackContext = s.context || s,
+ // Context for global events
+ // It's the callbackContext if one was provided in the options
+ // and if it's a DOM node or a jQuery collection
+ globalEventContext = callbackContext !== s &&
+ ( callbackContext.nodeType || callbackContext instanceof jQuery ) ?
+ jQuery( callbackContext ) : jQuery.event,
+ // Deferreds
+ deferred = jQuery.Deferred(),
+ completeDeferred = jQuery._Deferred(),
+ // Status-dependent callbacks
+ statusCode = s.statusCode || {},
+ // ifModified key
+ ifModifiedKey,
+ // Headers (they are sent all at once)
+ requestHeaders = {},
+ requestHeadersNames = {},
+ // Response headers
+ responseHeadersString,
+ responseHeaders,
+ // transport
+ transport,
+ // timeout handle
+ timeoutTimer,
+ // Cross-domain detection vars
+ parts,
+ // The jqXHR state
+ state = 0,
+ // To know if global events are to be dispatched
+ fireGlobals,
+ // Loop variable
+ i,
+ // Fake xhr
+ jqXHR = {
+
+ readyState: 0,
+
+ // Caches the header
+ setRequestHeader: function( name, value ) {
+ if ( !state ) {
+ var lname = name.toLowerCase();
+ name = requestHeadersNames[ lname ] = requestHeadersNames[ lname ] || name;
+ requestHeaders[ name ] = value;
+ }
+ return this;
+ },
+
+ // Raw string
+ getAllResponseHeaders: function() {
+ return state === 2 ? responseHeadersString : null;
+ },
+
+ // Builds headers hashtable if needed
+ getResponseHeader: function( key ) {
+ var match;
+ if ( state === 2 ) {
+ if ( !responseHeaders ) {
+ responseHeaders = {};
+ while( ( match = rheaders.exec( responseHeadersString ) ) ) {
+ responseHeaders[ match[1].toLowerCase() ] = match[ 2 ];
+ }
+ }
+ match = responseHeaders[ key.toLowerCase() ];
+ }
+ return match === undefined ? null : match;
+ },
+
+ // Overrides response content-type header
+ overrideMimeType: function( type ) {
+ if ( !state ) {
+ s.mimeType = type;
+ }
+ return this;
+ },
+
+ // Cancel the request
+ abort: function( statusText ) {
+ statusText = statusText || "abort";
+ if ( transport ) {
+ transport.abort( statusText );
+ }
+ done( 0, statusText );
+ return this;
+ }
+ };
+
+ // Callback for when everything is done
+ // It is defined here because jslint complains if it is declared
+ // at the end of the function (which would be more logical and readable)
+ function done( status, nativeStatusText, responses, headers ) {
+
+ // Called once
+ if ( state === 2 ) {
+ return;
+ }
+
+ // State is "done" now
+ state = 2;
+
+ // Clear timeout if it exists
+ if ( timeoutTimer ) {
+ clearTimeout( timeoutTimer );
+ }
+
+ // Dereference transport for early garbage collection
+ // (no matter how long the jqXHR object will be used)
+ transport = undefined;
+
+ // Cache response headers
+ responseHeadersString = headers || "";
+
+ // Set readyState
+ jqXHR.readyState = status > 0 ? 4 : 0;
+
+ var isSuccess,
+ success,
+ error,
+ statusText = nativeStatusText,
+ response = responses ? ajaxHandleResponses( s, jqXHR, responses ) : undefined,
+ lastModified,
+ etag;
+
+ // If successful, handle type chaining
+ if ( status >= 200 && status < 300 || status === 304 ) {
+
+ // Set the If-Modified-Since and/or If-None-Match header, if in ifModified mode.
+ if ( s.ifModified ) {
+
+ if ( ( lastModified = jqXHR.getResponseHeader( "Last-Modified" ) ) ) {
+ jQuery.lastModified[ ifModifiedKey ] = lastModified;
+ }
+ if ( ( etag = jqXHR.getResponseHeader( "Etag" ) ) ) {
+ jQuery.etag[ ifModifiedKey ] = etag;
+ }
+ }
+
+ // If not modified
+ if ( status === 304 ) {
+
+ statusText = "notmodified";
+ isSuccess = true;
+
+ // If we have data
+ } else {
+
+ try {
+ success = ajaxConvert( s, response );
+ statusText = "success";
+ isSuccess = true;
+ } catch(e) {
+ // We have a parsererror
+ statusText = "parsererror";
+ error = e;
+ }
+ }
+ } else {
+ // We extract error from statusText
+ // then normalize statusText and status for non-aborts
+ error = statusText;
+ if( !statusText || status ) {
+ statusText = "error";
+ if ( status < 0 ) {
+ status = 0;
+ }
+ }
+ }
+
+ // Set data for the fake xhr object
+ jqXHR.status = status;
+ jqXHR.statusText = "" + ( nativeStatusText || statusText );
+
+ // Success/Error
+ if ( isSuccess ) {
+ deferred.resolveWith( callbackContext, [ success, statusText, jqXHR ] );
+ } else {
+ deferred.rejectWith( callbackContext, [ jqXHR, statusText, error ] );
+ }
+
+ // Status-dependent callbacks
+ jqXHR.statusCode( statusCode );
+ statusCode = undefined;
+
+ if ( fireGlobals ) {
+ globalEventContext.trigger( "ajax" + ( isSuccess ? "Success" : "Error" ),
+ [ jqXHR, s, isSuccess ? success : error ] );
+ }
+
+ // Complete
+ completeDeferred.resolveWith( callbackContext, [ jqXHR, statusText ] );
+
+ if ( fireGlobals ) {
+ globalEventContext.trigger( "ajaxComplete", [ jqXHR, s ] );
+ // Handle the global AJAX counter
+ if ( !( --jQuery.active ) ) {
+ jQuery.event.trigger( "ajaxStop" );
+ }
+ }
+ }
+
+ // Attach deferreds
+ deferred.promise( jqXHR );
+ jqXHR.success = jqXHR.done;
+ jqXHR.error = jqXHR.fail;
+ jqXHR.complete = completeDeferred.done;
+
+ // Status-dependent callbacks
+ jqXHR.statusCode = function( map ) {
+ if ( map ) {
+ var tmp;
+ if ( state < 2 ) {
+ for( tmp in map ) {
+ statusCode[ tmp ] = [ statusCode[tmp], map[tmp] ];
+ }
+ } else {
+ tmp = map[ jqXHR.status ];
+ jqXHR.then( tmp, tmp );
+ }
+ }
+ return this;
+ };
+
+ // Remove hash character (#7531: and string promotion)
+ // Add protocol if not provided (#5866: IE7 issue with protocol-less urls)
+ // We also use the url parameter if available
+ s.url = ( ( url || s.url ) + "" ).replace( rhash, "" ).replace( rprotocol, ajaxLocParts[ 1 ] + "//" );
+
+ // Extract dataTypes list
+ s.dataTypes = jQuery.trim( s.dataType || "*" ).toLowerCase().split( rspacesAjax );
+
+ // Determine if a cross-domain request is in order
+ if ( s.crossDomain == null ) {
+ parts = rurl.exec( s.url.toLowerCase() );
+ s.crossDomain = !!( parts &&
+ ( parts[ 1 ] != ajaxLocParts[ 1 ] || parts[ 2 ] != ajaxLocParts[ 2 ] ||
+ ( parts[ 3 ] || ( parts[ 1 ] === "http:" ? 80 : 443 ) ) !=
+ ( ajaxLocParts[ 3 ] || ( ajaxLocParts[ 1 ] === "http:" ? 80 : 443 ) ) )
+ );
+ }
+
+ // Convert data if not already a string
+ if ( s.data && s.processData && typeof s.data !== "string" ) {
+ s.data = jQuery.param( s.data, s.traditional );
+ }
+
+ // Apply prefilters
+ inspectPrefiltersOrTransports( prefilters, s, options, jqXHR );
+
+ // If request was aborted inside a prefiler, stop there
+ if ( state === 2 ) {
+ return false;
+ }
+
+ // We can fire global events as of now if asked to
+ fireGlobals = s.global;
+
+ // Uppercase the type
+ s.type = s.type.toUpperCase();
+
+ // Determine if request has content
+ s.hasContent = !rnoContent.test( s.type );
+
+ // Watch for a new set of requests
+ if ( fireGlobals && jQuery.active++ === 0 ) {
+ jQuery.event.trigger( "ajaxStart" );
+ }
+
+ // More options handling for requests with no content
+ if ( !s.hasContent ) {
+
+ // If data is available, append data to url
+ if ( s.data ) {
+ s.url += ( rquery.test( s.url ) ? "&" : "?" ) + s.data;
+ // #9682: remove data so that it's not used in an eventual retry
+ delete s.data;
+ }
+
+ // Get ifModifiedKey before adding the anti-cache parameter
+ ifModifiedKey = s.url;
+
+ // Add anti-cache in url if needed
+ if ( s.cache === false ) {
+
+ var ts = jQuery.now(),
+ // try replacing _= if it is there
+ ret = s.url.replace( rts, "$1_=" + ts );
+
+ // if nothing was replaced, add timestamp to the end
+ s.url = ret + ( (ret === s.url ) ? ( rquery.test( s.url ) ? "&" : "?" ) + "_=" + ts : "" );
+ }
+ }
+
+ // Set the correct header, if data is being sent
+ if ( s.data && s.hasContent && s.contentType !== false || options.contentType ) {
+ jqXHR.setRequestHeader( "Content-Type", s.contentType );
+ }
+
+ // Set the If-Modified-Since and/or If-None-Match header, if in ifModified mode.
+ if ( s.ifModified ) {
+ ifModifiedKey = ifModifiedKey || s.url;
+ if ( jQuery.lastModified[ ifModifiedKey ] ) {
+ jqXHR.setRequestHeader( "If-Modified-Since", jQuery.lastModified[ ifModifiedKey ] );
+ }
+ if ( jQuery.etag[ ifModifiedKey ] ) {
+ jqXHR.setRequestHeader( "If-None-Match", jQuery.etag[ ifModifiedKey ] );
+ }
+ }
+
+ // Set the Accepts header for the server, depending on the dataType
+ jqXHR.setRequestHeader(
+ "Accept",
+ s.dataTypes[ 0 ] && s.accepts[ s.dataTypes[0] ] ?
+ s.accepts[ s.dataTypes[0] ] + ( s.dataTypes[ 0 ] !== "*" ? ", " + allTypes + "; q=0.01" : "" ) :
+ s.accepts[ "*" ]
+ );
+
+ // Check for headers option
+ for ( i in s.headers ) {
+ jqXHR.setRequestHeader( i, s.headers[ i ] );
+ }
+
+ // Allow custom headers/mimetypes and early abort
+ if ( s.beforeSend && ( s.beforeSend.call( callbackContext, jqXHR, s ) === false || state === 2 ) ) {
+ // Abort if not done already
+ jqXHR.abort();
+ return false;
+
+ }
+
+ // Install callbacks on deferreds
+ for ( i in { success: 1, error: 1, complete: 1 } ) {
+ jqXHR[ i ]( s[ i ] );
+ }
+
+ // Get transport
+ transport = inspectPrefiltersOrTransports( transports, s, options, jqXHR );
+
+ // If no transport, we auto-abort
+ if ( !transport ) {
+ done( -1, "No Transport" );
+ } else {
+ jqXHR.readyState = 1;
+ // Send global event
+ if ( fireGlobals ) {
+ globalEventContext.trigger( "ajaxSend", [ jqXHR, s ] );
+ }
+ // Timeout
+ if ( s.async && s.timeout > 0 ) {
+ timeoutTimer = setTimeout( function(){
+ jqXHR.abort( "timeout" );
+ }, s.timeout );
+ }
+
+ try {
+ state = 1;
+ transport.send( requestHeaders, done );
+ } catch (e) {
+ // Propagate exception as error if not done
+ if ( state < 2 ) {
+ done( -1, e );
+ // Simply rethrow otherwise
+ } else {
+ jQuery.error( e );
+ }
+ }
+ }
+
+ return jqXHR;
+ },
+
+ // Serialize an array of form elements or a set of
+ // key/values into a query string
+ param: function( a, traditional ) {
+ var s = [],
+ add = function( key, value ) {
+ // If value is a function, invoke it and return its value
+ value = jQuery.isFunction( value ) ? value() : value;
+ s[ s.length ] = encodeURIComponent( key ) + "=" + encodeURIComponent( value );
+ };
+
+ // Set traditional to true for jQuery <= 1.3.2 behavior.
+ if ( traditional === undefined ) {
+ traditional = jQuery.ajaxSettings.traditional;
+ }
+
+ // If an array was passed in, assume that it is an array of form elements.
+ if ( jQuery.isArray( a ) || ( a.jquery && !jQuery.isPlainObject( a ) ) ) {
+ // Serialize the form elements
+ jQuery.each( a, function() {
+ add( this.name, this.value );
+ });
+
+ } else {
+ // If traditional, encode the "old" way (the way 1.3.2 or older
+ // did it), otherwise encode params recursively.
+ for ( var prefix in a ) {
+ buildParams( prefix, a[ prefix ], traditional, add );
+ }
+ }
+
+ // Return the resulting serialization
+ return s.join( "&" ).replace( r20, "+" );
+ }
+});
+
+function buildParams( prefix, obj, traditional, add ) {
+ if ( jQuery.isArray( obj ) ) {
+ // Serialize array item.
+ jQuery.each( obj, function( i, v ) {
+ if ( traditional || rbracket.test( prefix ) ) {
+ // Treat each array item as a scalar.
+ add( prefix, v );
+
+ } else {
+ // If array item is non-scalar (array or object), encode its
+ // numeric index to resolve deserialization ambiguity issues.
+ // Note that rack (as of 1.0.0) can't currently deserialize
+ // nested arrays properly, and attempting to do so may cause
+ // a server error. Possible fixes are to modify rack's
+ // deserialization algorithm or to provide an option or flag
+ // to force array serialization to be shallow.
+ buildParams( prefix + "[" + ( typeof v === "object" || jQuery.isArray(v) ? i : "" ) + "]", v, traditional, add );
+ }
+ });
+
+ } else if ( !traditional && obj != null && typeof obj === "object" ) {
+ // Serialize object item.
+ for ( var name in obj ) {
+ buildParams( prefix + "[" + name + "]", obj[ name ], traditional, add );
+ }
+
+ } else {
+ // Serialize scalar item.
+ add( prefix, obj );
+ }
+}
+
+// This is still on the jQuery object... for now
+// Want to move this to jQuery.ajax some day
+jQuery.extend({
+
+ // Counter for holding the number of active queries
+ active: 0,
+
+ // Last-Modified header cache for next request
+ lastModified: {},
+ etag: {}
+
+});
+
+/* Handles responses to an ajax request:
+ * - sets all responseXXX fields accordingly
+ * - finds the right dataType (mediates between content-type and expected dataType)
+ * - returns the corresponding response
+ */
+function ajaxHandleResponses( s, jqXHR, responses ) {
+
+ var contents = s.contents,
+ dataTypes = s.dataTypes,
+ responseFields = s.responseFields,
+ ct,
+ type,
+ finalDataType,
+ firstDataType;
+
+ // Fill responseXXX fields
+ for( type in responseFields ) {
+ if ( type in responses ) {
+ jqXHR[ responseFields[type] ] = responses[ type ];
+ }
+ }
+
+ // Remove auto dataType and get content-type in the process
+ while( dataTypes[ 0 ] === "*" ) {
+ dataTypes.shift();
+ if ( ct === undefined ) {
+ ct = s.mimeType || jqXHR.getResponseHeader( "content-type" );
+ }
+ }
+
+ // Check if we're dealing with a known content-type
+ if ( ct ) {
+ for ( type in contents ) {
+ if ( contents[ type ] && contents[ type ].test( ct ) ) {
+ dataTypes.unshift( type );
+ break;
+ }
+ }
+ }
+
+ // Check to see if we have a response for the expected dataType
+ if ( dataTypes[ 0 ] in responses ) {
+ finalDataType = dataTypes[ 0 ];
+ } else {
+ // Try convertible dataTypes
+ for ( type in responses ) {
+ if ( !dataTypes[ 0 ] || s.converters[ type + " " + dataTypes[0] ] ) {
+ finalDataType = type;
+ break;
+ }
+ if ( !firstDataType ) {
+ firstDataType = type;
+ }
+ }
+ // Or just use first one
+ finalDataType = finalDataType || firstDataType;
+ }
+
+ // If we found a dataType
+ // We add the dataType to the list if needed
+ // and return the corresponding response
+ if ( finalDataType ) {
+ if ( finalDataType !== dataTypes[ 0 ] ) {
+ dataTypes.unshift( finalDataType );
+ }
+ return responses[ finalDataType ];
+ }
+}
+
+// Chain conversions given the request and the original response
+function ajaxConvert( s, response ) {
+
+ // Apply the dataFilter if provided
+ if ( s.dataFilter ) {
+ response = s.dataFilter( response, s.dataType );
+ }
+
+ var dataTypes = s.dataTypes,
+ converters = {},
+ i,
+ key,
+ length = dataTypes.length,
+ tmp,
+ // Current and previous dataTypes
+ current = dataTypes[ 0 ],
+ prev,
+ // Conversion expression
+ conversion,
+ // Conversion function
+ conv,
+ // Conversion functions (transitive conversion)
+ conv1,
+ conv2;
+
+ // For each dataType in the chain
+ for( i = 1; i < length; i++ ) {
+
+ // Create converters map
+ // with lowercased keys
+ if ( i === 1 ) {
+ for( key in s.converters ) {
+ if( typeof key === "string" ) {
+ converters[ key.toLowerCase() ] = s.converters[ key ];
+ }
+ }
+ }
+
+ // Get the dataTypes
+ prev = current;
+ current = dataTypes[ i ];
+
+ // If current is auto dataType, update it to prev
+ if( current === "*" ) {
+ current = prev;
+ // If no auto and dataTypes are actually different
+ } else if ( prev !== "*" && prev !== current ) {
+
+ // Get the converter
+ conversion = prev + " " + current;
+ conv = converters[ conversion ] || converters[ "* " + current ];
+
+ // If there is no direct converter, search transitively
+ if ( !conv ) {
+ conv2 = undefined;
+ for( conv1 in converters ) {
+ tmp = conv1.split( " " );
+ if ( tmp[ 0 ] === prev || tmp[ 0 ] === "*" ) {
+ conv2 = converters[ tmp[1] + " " + current ];
+ if ( conv2 ) {
+ conv1 = converters[ conv1 ];
+ if ( conv1 === true ) {
+ conv = conv2;
+ } else if ( conv2 === true ) {
+ conv = conv1;
+ }
+ break;
+ }
+ }
+ }
+ }
+ // If we found no converter, dispatch an error
+ if ( !( conv || conv2 ) ) {
+ jQuery.error( "No conversion from " + conversion.replace(" "," to ") );
+ }
+ // If found converter is not an equivalence
+ if ( conv !== true ) {
+ // Convert with 1 or 2 converters accordingly
+ response = conv ? conv( response ) : conv2( conv1(response) );
+ }
+ }
+ }
+ return response;
+}
+
+
+
+
+var jsc = jQuery.now(),
+ jsre = /(\=)\?(&|$)|\?\?/i;
+
+// Default jsonp settings
+jQuery.ajaxSetup({
+ jsonp: "callback",
+ jsonpCallback: function() {
+ return jQuery.expando + "_" + ( jsc++ );
+ }
+});
+
+// Detect, normalize options and install callbacks for jsonp requests
+jQuery.ajaxPrefilter( "json jsonp", function( s, originalSettings, jqXHR ) {
+
+ var inspectData = s.contentType === "application/x-www-form-urlencoded" &&
+ ( typeof s.data === "string" );
+
+ if ( s.dataTypes[ 0 ] === "jsonp" ||
+ s.jsonp !== false && ( jsre.test( s.url ) ||
+ inspectData && jsre.test( s.data ) ) ) {
+
+ var responseContainer,
+ jsonpCallback = s.jsonpCallback =
+ jQuery.isFunction( s.jsonpCallback ) ? s.jsonpCallback() : s.jsonpCallback,
+ previous = window[ jsonpCallback ],
+ url = s.url,
+ data = s.data,
+ replace = "$1" + jsonpCallback + "$2";
+
+ if ( s.jsonp !== false ) {
+ url = url.replace( jsre, replace );
+ if ( s.url === url ) {
+ if ( inspectData ) {
+ data = data.replace( jsre, replace );
+ }
+ if ( s.data === data ) {
+ // Add callback manually
+ url += (/\?/.test( url ) ? "&" : "?") + s.jsonp + "=" + jsonpCallback;
+ }
+ }
+ }
+
+ s.url = url;
+ s.data = data;
+
+ // Install callback
+ window[ jsonpCallback ] = function( response ) {
+ responseContainer = [ response ];
+ };
+
+ // Clean-up function
+ jqXHR.always(function() {
+ // Set callback back to previous value
+ window[ jsonpCallback ] = previous;
+ // Call if it was a function and we have a response
+ if ( responseContainer && jQuery.isFunction( previous ) ) {
+ window[ jsonpCallback ]( responseContainer[ 0 ] );
+ }
+ });
+
+ // Use data converter to retrieve json after script execution
+ s.converters["script json"] = function() {
+ if ( !responseContainer ) {
+ jQuery.error( jsonpCallback + " was not called" );
+ }
+ return responseContainer[ 0 ];
+ };
+
+ // force json dataType
+ s.dataTypes[ 0 ] = "json";
+
+ // Delegate to script
+ return "script";
+ }
+});
+
+
+
+
+// Install script dataType
+jQuery.ajaxSetup({
+ accepts: {
+ script: "text/javascript, application/javascript, application/ecmascript, application/x-ecmascript"
+ },
+ contents: {
+ script: /javascript|ecmascript/
+ },
+ converters: {
+ "text script": function( text ) {
+ jQuery.globalEval( text );
+ return text;
+ }
+ }
+});
+
+// Handle cache's special case and global
+jQuery.ajaxPrefilter( "script", function( s ) {
+ if ( s.cache === undefined ) {
+ s.cache = false;
+ }
+ if ( s.crossDomain ) {
+ s.type = "GET";
+ s.global = false;
+ }
+});
+
+// Bind script tag hack transport
+jQuery.ajaxTransport( "script", function(s) {
+
+ // This transport only deals with cross domain requests
+ if ( s.crossDomain ) {
+
+ var script,
+ head = document.head || document.getElementsByTagName( "head" )[0] || document.documentElement;
+
+ return {
+
+ send: function( _, callback ) {
+
+ script = document.createElement( "script" );
+
+ script.async = "async";
+
+ if ( s.scriptCharset ) {
+ script.charset = s.scriptCharset;
+ }
+
+ script.src = s.url;
+
+ // Attach handlers for all browsers
+ script.onload = script.onreadystatechange = function( _, isAbort ) {
+
+ if ( isAbort || !script.readyState || /loaded|complete/.test( script.readyState ) ) {
+
+ // Handle memory leak in IE
+ script.onload = script.onreadystatechange = null;
+
+ // Remove the script
+ if ( head && script.parentNode ) {
+ head.removeChild( script );
+ }
+
+ // Dereference the script
+ script = undefined;
+
+ // Callback if not abort
+ if ( !isAbort ) {
+ callback( 200, "success" );
+ }
+ }
+ };
+ // Use insertBefore instead of appendChild to circumvent an IE6 bug.
+ // This arises when a base node is used (#2709 and #4378).
+ head.insertBefore( script, head.firstChild );
+ },
+
+ abort: function() {
+ if ( script ) {
+ script.onload( 0, 1 );
+ }
+ }
+ };
+ }
+});
+
+
+
+
+var // #5280: Internet Explorer will keep connections alive if we don't abort on unload
+ xhrOnUnloadAbort = window.ActiveXObject ? function() {
+ // Abort all pending requests
+ for ( var key in xhrCallbacks ) {
+ xhrCallbacks[ key ]( 0, 1 );
+ }
+ } : false,
+ xhrId = 0,
+ xhrCallbacks;
+
+// Functions to create xhrs
+function createStandardXHR() {
+ try {
+ return new window.XMLHttpRequest();
+ } catch( e ) {}
+}
+
+function createActiveXHR() {
+ try {
+ return new window.ActiveXObject( "Microsoft.XMLHTTP" );
+ } catch( e ) {}
+}
+
+// Create the request object
+// (This is still attached to ajaxSettings for backward compatibility)
+jQuery.ajaxSettings.xhr = window.ActiveXObject ?
+ /* Microsoft failed to properly
+ * implement the XMLHttpRequest in IE7 (can't request local files),
+ * so we use the ActiveXObject when it is available
+ * Additionally XMLHttpRequest can be disabled in IE7/IE8 so
+ * we need a fallback.
+ */
+ function() {
+ return !this.isLocal && createStandardXHR() || createActiveXHR();
+ } :
+ // For all other browsers, use the standard XMLHttpRequest object
+ createStandardXHR;
+
+// Determine support properties
+(function( xhr ) {
+ jQuery.extend( jQuery.support, {
+ ajax: !!xhr,
+ cors: !!xhr && ( "withCredentials" in xhr )
+ });
+})( jQuery.ajaxSettings.xhr() );
+
+// Create transport if the browser can provide an xhr
+if ( jQuery.support.ajax ) {
+
+ jQuery.ajaxTransport(function( s ) {
+ // Cross domain only allowed if supported through XMLHttpRequest
+ if ( !s.crossDomain || jQuery.support.cors ) {
+
+ var callback;
+
+ return {
+ send: function( headers, complete ) {
+
+ // Get a new xhr
+ var xhr = s.xhr(),
+ handle,
+ i;
+
+ // Open the socket
+ // Passing null username, generates a login popup on Opera (#2865)
+ if ( s.username ) {
+ xhr.open( s.type, s.url, s.async, s.username, s.password );
+ } else {
+ xhr.open( s.type, s.url, s.async );
+ }
+
+ // Apply custom fields if provided
+ if ( s.xhrFields ) {
+ for ( i in s.xhrFields ) {
+ xhr[ i ] = s.xhrFields[ i ];
+ }
+ }
+
+ // Override mime type if needed
+ if ( s.mimeType && xhr.overrideMimeType ) {
+ xhr.overrideMimeType( s.mimeType );
+ }
+
+ // X-Requested-With header
+ // For cross-domain requests, seeing as conditions for a preflight are
+ // akin to a jigsaw puzzle, we simply never set it to be sure.
+ // (it can always be set on a per-request basis or even using ajaxSetup)
+ // For same-domain requests, won't change header if already provided.
+ if ( !s.crossDomain && !headers["X-Requested-With"] ) {
+ headers[ "X-Requested-With" ] = "XMLHttpRequest";
+ }
+
+ // Need an extra try/catch for cross domain requests in Firefox 3
+ try {
+ for ( i in headers ) {
+ xhr.setRequestHeader( i, headers[ i ] );
+ }
+ } catch( _ ) {}
+
+ // Do send the request
+ // This may raise an exception which is actually
+ // handled in jQuery.ajax (so no try/catch here)
+ xhr.send( ( s.hasContent && s.data ) || null );
+
+ // Listener
+ callback = function( _, isAbort ) {
+
+ var status,
+ statusText,
+ responseHeaders,
+ responses,
+ xml;
+
+ // Firefox throws exceptions when accessing properties
+ // of an xhr when a network error occured
+ // http://helpful.knobs-dials.com/index.php/Component_returned_failure_code:_0x80040111_(NS_ERROR_NOT_AVAILABLE)
+ try {
+
+ // Was never called and is aborted or complete
+ if ( callback && ( isAbort || xhr.readyState === 4 ) ) {
+
+ // Only called once
+ callback = undefined;
+
+ // Do not keep as active anymore
+ if ( handle ) {
+ xhr.onreadystatechange = jQuery.noop;
+ if ( xhrOnUnloadAbort ) {
+ delete xhrCallbacks[ handle ];
+ }
+ }
+
+ // If it's an abort
+ if ( isAbort ) {
+ // Abort it manually if needed
+ if ( xhr.readyState !== 4 ) {
+ xhr.abort();
+ }
+ } else {
+ status = xhr.status;
+ responseHeaders = xhr.getAllResponseHeaders();
+ responses = {};
+ xml = xhr.responseXML;
+
+ // Construct response list
+ if ( xml && xml.documentElement /* #4958 */ ) {
+ responses.xml = xml;
+ }
+ responses.text = xhr.responseText;
+
+ // Firefox throws an exception when accessing
+ // statusText for faulty cross-domain requests
+ try {
+ statusText = xhr.statusText;
+ } catch( e ) {
+ // We normalize with Webkit giving an empty statusText
+ statusText = "";
+ }
+
+ // Filter status for non standard behaviors
+
+ // If the request is local and we have data: assume a success
+ // (success with no data won't get notified, that's the best we
+ // can do given current implementations)
+ if ( !status && s.isLocal && !s.crossDomain ) {
+ status = responses.text ? 200 : 404;
+ // IE - #1450: sometimes returns 1223 when it should be 204
+ } else if ( status === 1223 ) {
+ status = 204;
+ }
+ }
+ }
+ } catch( firefoxAccessException ) {
+ if ( !isAbort ) {
+ complete( -1, firefoxAccessException );
+ }
+ }
+
+ // Call complete if needed
+ if ( responses ) {
+ complete( status, statusText, responses, responseHeaders );
+ }
+ };
+
+ // if we're in sync mode or it's in cache
+ // and has been retrieved directly (IE6 & IE7)
+ // we need to manually fire the callback
+ if ( !s.async || xhr.readyState === 4 ) {
+ callback();
+ } else {
+ handle = ++xhrId;
+ if ( xhrOnUnloadAbort ) {
+ // Create the active xhrs callbacks list if needed
+ // and attach the unload handler
+ if ( !xhrCallbacks ) {
+ xhrCallbacks = {};
+ jQuery( window ).unload( xhrOnUnloadAbort );
+ }
+ // Add to list of active xhrs callbacks
+ xhrCallbacks[ handle ] = callback;
+ }
+ xhr.onreadystatechange = callback;
+ }
+ },
+
+ abort: function() {
+ if ( callback ) {
+ callback(0,1);
+ }
+ }
+ };
+ }
+ });
+}
+
+
+
+
+var elemdisplay = {},
+ iframe, iframeDoc,
+ rfxtypes = /^(?:toggle|show|hide)$/,
+ rfxnum = /^([+\-]=)?([\d+.\-]+)([a-z%]*)$/i,
+ timerId,
+ fxAttrs = [
+ // height animations
+ [ "height", "marginTop", "marginBottom", "paddingTop", "paddingBottom" ],
+ // width animations
+ [ "width", "marginLeft", "marginRight", "paddingLeft", "paddingRight" ],
+ // opacity animations
+ [ "opacity" ]
+ ],
+ fxNow;
+
+jQuery.fn.extend({
+ show: function( speed, easing, callback ) {
+ var elem, display;
+
+ if ( speed || speed === 0 ) {
+ return this.animate( genFx("show", 3), speed, easing, callback);
+
+ } else {
+ for ( var i = 0, j = this.length; i < j; i++ ) {
+ elem = this[i];
+
+ if ( elem.style ) {
+ display = elem.style.display;
+
+ // Reset the inline display of this element to learn if it is
+ // being hidden by cascaded rules or not
+ if ( !jQuery._data(elem, "olddisplay") && display === "none" ) {
+ display = elem.style.display = "";
+ }
+
+ // Set elements which have been overridden with display: none
+ // in a stylesheet to whatever the default browser style is
+ // for such an element
+ if ( display === "" && jQuery.css( elem, "display" ) === "none" ) {
+ jQuery._data(elem, "olddisplay", defaultDisplay(elem.nodeName));
+ }
+ }
+ }
+
+ // Set the display of most of the elements in a second loop
+ // to avoid the constant reflow
+ for ( i = 0; i < j; i++ ) {
+ elem = this[i];
+
+ if ( elem.style ) {
+ display = elem.style.display;
+
+ if ( display === "" || display === "none" ) {
+ elem.style.display = jQuery._data(elem, "olddisplay") || "";
+ }
+ }
+ }
+
+ return this;
+ }
+ },
+
+ hide: function( speed, easing, callback ) {
+ if ( speed || speed === 0 ) {
+ return this.animate( genFx("hide", 3), speed, easing, callback);
+
+ } else {
+ for ( var i = 0, j = this.length; i < j; i++ ) {
+ if ( this[i].style ) {
+ var display = jQuery.css( this[i], "display" );
+
+ if ( display !== "none" && !jQuery._data( this[i], "olddisplay" ) ) {
+ jQuery._data( this[i], "olddisplay", display );
+ }
+ }
+ }
+
+ // Set the display of the elements in a second loop
+ // to avoid the constant reflow
+ for ( i = 0; i < j; i++ ) {
+ if ( this[i].style ) {
+ this[i].style.display = "none";
+ }
+ }
+
+ return this;
+ }
+ },
+
+ // Save the old toggle function
+ _toggle: jQuery.fn.toggle,
+
+ toggle: function( fn, fn2, callback ) {
+ var bool = typeof fn === "boolean";
+
+ if ( jQuery.isFunction(fn) && jQuery.isFunction(fn2) ) {
+ this._toggle.apply( this, arguments );
+
+ } else if ( fn == null || bool ) {
+ this.each(function() {
+ var state = bool ? fn : jQuery(this).is(":hidden");
+ jQuery(this)[ state ? "show" : "hide" ]();
+ });
+
+ } else {
+ this.animate(genFx("toggle", 3), fn, fn2, callback);
+ }
+
+ return this;
+ },
+
+ fadeTo: function( speed, to, easing, callback ) {
+ return this.filter(":hidden").css("opacity", 0).show().end()
+ .animate({opacity: to}, speed, easing, callback);
+ },
+
+ animate: function( prop, speed, easing, callback ) {
+ var optall = jQuery.speed(speed, easing, callback);
+
+ if ( jQuery.isEmptyObject( prop ) ) {
+ return this.each( optall.complete, [ false ] );
+ }
+
+ // Do not change referenced properties as per-property easing will be lost
+ prop = jQuery.extend( {}, prop );
+
+ return this[ optall.queue === false ? "each" : "queue" ](function() {
+ // XXX 'this' does not always have a nodeName when running the
+ // test suite
+
+ if ( optall.queue === false ) {
+ jQuery._mark( this );
+ }
+
+ var opt = jQuery.extend( {}, optall ),
+ isElement = this.nodeType === 1,
+ hidden = isElement && jQuery(this).is(":hidden"),
+ name, val, p,
+ display, e,
+ parts, start, end, unit;
+
+ // will store per property easing and be used to determine when an animation is complete
+ opt.animatedProperties = {};
+
+ for ( p in prop ) {
+
+ // property name normalization
+ name = jQuery.camelCase( p );
+ if ( p !== name ) {
+ prop[ name ] = prop[ p ];
+ delete prop[ p ];
+ }
+
+ val = prop[ name ];
+
+ // easing resolution: per property > opt.specialEasing > opt.easing > 'swing' (default)
+ if ( jQuery.isArray( val ) ) {
+ opt.animatedProperties[ name ] = val[ 1 ];
+ val = prop[ name ] = val[ 0 ];
+ } else {
+ opt.animatedProperties[ name ] = opt.specialEasing && opt.specialEasing[ name ] || opt.easing || 'swing';
+ }
+
+ if ( val === "hide" && hidden || val === "show" && !hidden ) {
+ return opt.complete.call( this );
+ }
+
+ if ( isElement && ( name === "height" || name === "width" ) ) {
+ // Make sure that nothing sneaks out
+ // Record all 3 overflow attributes because IE does not
+ // change the overflow attribute when overflowX and
+ // overflowY are set to the same value
+ opt.overflow = [ this.style.overflow, this.style.overflowX, this.style.overflowY ];
+
+ // Set display property to inline-block for height/width
+ // animations on inline elements that are having width/height
+ // animated
+ if ( jQuery.css( this, "display" ) === "inline" &&
+ jQuery.css( this, "float" ) === "none" ) {
+ if ( !jQuery.support.inlineBlockNeedsLayout ) {
+ this.style.display = "inline-block";
+
+ } else {
+ display = defaultDisplay( this.nodeName );
+
+ // inline-level elements accept inline-block;
+ // block-level elements need to be inline with layout
+ if ( display === "inline" ) {
+ this.style.display = "inline-block";
+
+ } else {
+ this.style.display = "inline";
+ this.style.zoom = 1;
+ }
+ }
+ }
+ }
+ }
+
+ if ( opt.overflow != null ) {
+ this.style.overflow = "hidden";
+ }
+
+ for ( p in prop ) {
+ e = new jQuery.fx( this, opt, p );
+ val = prop[ p ];
+
+ if ( rfxtypes.test(val) ) {
+ e[ val === "toggle" ? hidden ? "show" : "hide" : val ]();
+
+ } else {
+ parts = rfxnum.exec( val );
+ start = e.cur();
+
+ if ( parts ) {
+ end = parseFloat( parts[2] );
+ unit = parts[3] || ( jQuery.cssNumber[ p ] ? "" : "px" );
+
+ // We need to compute starting value
+ if ( unit !== "px" ) {
+ jQuery.style( this, p, (end || 1) + unit);
+ start = ((end || 1) / e.cur()) * start;
+ jQuery.style( this, p, start + unit);
+ }
+
+ // If a +=/-= token was provided, we're doing a relative animation
+ if ( parts[1] ) {
+ end = ( (parts[ 1 ] === "-=" ? -1 : 1) * end ) + start;
+ }
+
+ e.custom( start, end, unit );
+
+ } else {
+ e.custom( start, val, "" );
+ }
+ }
+ }
+
+ // For JS strict compliance
+ return true;
+ });
+ },
+
+ stop: function( clearQueue, gotoEnd ) {
+ if ( clearQueue ) {
+ this.queue([]);
+ }
+
+ this.each(function() {
+ var timers = jQuery.timers,
+ i = timers.length;
+ // clear marker counters if we know they won't be
+ if ( !gotoEnd ) {
+ jQuery._unmark( true, this );
+ }
+ while ( i-- ) {
+ if ( timers[i].elem === this ) {
+ if (gotoEnd) {
+ // force the next step to be the last
+ timers[i](true);
+ }
+
+ timers.splice(i, 1);
+ }
+ }
+ });
+
+ // start the next in the queue if the last step wasn't forced
+ if ( !gotoEnd ) {
+ this.dequeue();
+ }
+
+ return this;
+ }
+
+});
+
+// Animations created synchronously will run synchronously
+function createFxNow() {
+ setTimeout( clearFxNow, 0 );
+ return ( fxNow = jQuery.now() );
+}
+
+function clearFxNow() {
+ fxNow = undefined;
+}
+
+// Generate parameters to create a standard animation
+function genFx( type, num ) {
+ var obj = {};
+
+ jQuery.each( fxAttrs.concat.apply([], fxAttrs.slice(0,num)), function() {
+ obj[ this ] = type;
+ });
+
+ return obj;
+}
+
+// Generate shortcuts for custom animations
+jQuery.each({
+ slideDown: genFx("show", 1),
+ slideUp: genFx("hide", 1),
+ slideToggle: genFx("toggle", 1),
+ fadeIn: { opacity: "show" },
+ fadeOut: { opacity: "hide" },
+ fadeToggle: { opacity: "toggle" }
+}, function( name, props ) {
+ jQuery.fn[ name ] = function( speed, easing, callback ) {
+ return this.animate( props, speed, easing, callback );
+ };
+});
+
+jQuery.extend({
+ speed: function( speed, easing, fn ) {
+ var opt = speed && typeof speed === "object" ? jQuery.extend({}, speed) : {
+ complete: fn || !fn && easing ||
+ jQuery.isFunction( speed ) && speed,
+ duration: speed,
+ easing: fn && easing || easing && !jQuery.isFunction(easing) && easing
+ };
+
+ opt.duration = jQuery.fx.off ? 0 : typeof opt.duration === "number" ? opt.duration :
+ opt.duration in jQuery.fx.speeds ? jQuery.fx.speeds[opt.duration] : jQuery.fx.speeds._default;
+
+ // Queueing
+ opt.old = opt.complete;
+ opt.complete = function( noUnmark ) {
+ if ( jQuery.isFunction( opt.old ) ) {
+ opt.old.call( this );
+ }
+
+ if ( opt.queue !== false ) {
+ jQuery.dequeue( this );
+ } else if ( noUnmark !== false ) {
+ jQuery._unmark( this );
+ }
+ };
+
+ return opt;
+ },
+
+ easing: {
+ linear: function( p, n, firstNum, diff ) {
+ return firstNum + diff * p;
+ },
+ swing: function( p, n, firstNum, diff ) {
+ return ((-Math.cos(p*Math.PI)/2) + 0.5) * diff + firstNum;
+ }
+ },
+
+ timers: [],
+
+ fx: function( elem, options, prop ) {
+ this.options = options;
+ this.elem = elem;
+ this.prop = prop;
+
+ options.orig = options.orig || {};
+ }
+
+});
+
+jQuery.fx.prototype = {
+ // Simple function for setting a style value
+ update: function() {
+ if ( this.options.step ) {
+ this.options.step.call( this.elem, this.now, this );
+ }
+
+ (jQuery.fx.step[this.prop] || jQuery.fx.step._default)( this );
+ },
+
+ // Get the current size
+ cur: function() {
+ if ( this.elem[this.prop] != null && (!this.elem.style || this.elem.style[this.prop] == null) ) {
+ return this.elem[ this.prop ];
+ }
+
+ var parsed,
+ r = jQuery.css( this.elem, this.prop );
+ // Empty strings, null, undefined and "auto" are converted to 0,
+ // complex values such as "rotate(1rad)" are returned as is,
+ // simple values such as "10px" are parsed to Float.
+ return isNaN( parsed = parseFloat( r ) ) ? !r || r === "auto" ? 0 : r : parsed;
+ },
+
+ // Start an animation from one number to another
+ custom: function( from, to, unit ) {
+ var self = this,
+ fx = jQuery.fx;
+
+ this.startTime = fxNow || createFxNow();
+ this.start = from;
+ this.end = to;
+ this.unit = unit || this.unit || ( jQuery.cssNumber[ this.prop ] ? "" : "px" );
+ this.now = this.start;
+ this.pos = this.state = 0;
+
+ function t( gotoEnd ) {
+ return self.step(gotoEnd);
+ }
+
+ t.elem = this.elem;
+
+ if ( t() && jQuery.timers.push(t) && !timerId ) {
+ timerId = setInterval( fx.tick, fx.interval );
+ }
+ },
+
+ // Simple 'show' function
+ show: function() {
+ // Remember where we started, so that we can go back to it later
+ this.options.orig[this.prop] = jQuery.style( this.elem, this.prop );
+ this.options.show = true;
+
+ // Begin the animation
+ // Make sure that we start at a small width/height to avoid any
+ // flash of content
+ this.custom(this.prop === "width" || this.prop === "height" ? 1 : 0, this.cur());
+
+ // Start by showing the element
+ jQuery( this.elem ).show();
+ },
+
+ // Simple 'hide' function
+ hide: function() {
+ // Remember where we started, so that we can go back to it later
+ this.options.orig[this.prop] = jQuery.style( this.elem, this.prop );
+ this.options.hide = true;
+
+ // Begin the animation
+ this.custom(this.cur(), 0);
+ },
+
+ // Each step of an animation
+ step: function( gotoEnd ) {
+ var t = fxNow || createFxNow(),
+ done = true,
+ elem = this.elem,
+ options = this.options,
+ i, n;
+
+ if ( gotoEnd || t >= options.duration + this.startTime ) {
+ this.now = this.end;
+ this.pos = this.state = 1;
+ this.update();
+
+ options.animatedProperties[ this.prop ] = true;
+
+ for ( i in options.animatedProperties ) {
+ if ( options.animatedProperties[i] !== true ) {
+ done = false;
+ }
+ }
+
+ if ( done ) {
+ // Reset the overflow
+ if ( options.overflow != null && !jQuery.support.shrinkWrapBlocks ) {
+
+ jQuery.each( [ "", "X", "Y" ], function (index, value) {
+ elem.style[ "overflow" + value ] = options.overflow[index];
+ });
+ }
+
+ // Hide the element if the "hide" operation was done
+ if ( options.hide ) {
+ jQuery(elem).hide();
+ }
+
+ // Reset the properties, if the item has been hidden or shown
+ if ( options.hide || options.show ) {
+ for ( var p in options.animatedProperties ) {
+ jQuery.style( elem, p, options.orig[p] );
+ }
+ }
+
+ // Execute the complete function
+ options.complete.call( elem );
+ }
+
+ return false;
+
+ } else {
+ // classical easing cannot be used with an Infinity duration
+ if ( options.duration == Infinity ) {
+ this.now = t;
+ } else {
+ n = t - this.startTime;
+ this.state = n / options.duration;
+
+ // Perform the easing function, defaults to swing
+ this.pos = jQuery.easing[ options.animatedProperties[ this.prop ] ]( this.state, n, 0, 1, options.duration );
+ this.now = this.start + ((this.end - this.start) * this.pos);
+ }
+ // Perform the next step of the animation
+ this.update();
+ }
+
+ return true;
+ }
+};
+
+jQuery.extend( jQuery.fx, {
+ tick: function() {
+ for ( var timers = jQuery.timers, i = 0 ; i < timers.length ; ++i ) {
+ if ( !timers[i]() ) {
+ timers.splice(i--, 1);
+ }
+ }
+
+ if ( !timers.length ) {
+ jQuery.fx.stop();
+ }
+ },
+
+ interval: 13,
+
+ stop: function() {
+ clearInterval( timerId );
+ timerId = null;
+ },
+
+ speeds: {
+ slow: 600,
+ fast: 200,
+ // Default speed
+ _default: 400
+ },
+
+ step: {
+ opacity: function( fx ) {
+ jQuery.style( fx.elem, "opacity", fx.now );
+ },
+
+ _default: function( fx ) {
+ if ( fx.elem.style && fx.elem.style[ fx.prop ] != null ) {
+ fx.elem.style[ fx.prop ] = (fx.prop === "width" || fx.prop === "height" ? Math.max(0, fx.now) : fx.now) + fx.unit;
+ } else {
+ fx.elem[ fx.prop ] = fx.now;
+ }
+ }
+ }
+});
+
+if ( jQuery.expr && jQuery.expr.filters ) {
+ jQuery.expr.filters.animated = function( elem ) {
+ return jQuery.grep(jQuery.timers, function( fn ) {
+ return elem === fn.elem;
+ }).length;
+ };
+}
+
+// Try to restore the default display value of an element
+function defaultDisplay( nodeName ) {
+
+ if ( !elemdisplay[ nodeName ] ) {
+
+ var body = document.body,
+ elem = jQuery( "<" + nodeName + ">" ).appendTo( body ),
+ display = elem.css( "display" );
+
+ elem.remove();
+
+ // If the simple way fails,
+ // get element's real default display by attaching it to a temp iframe
+ if ( display === "none" || display === "" ) {
+ // No iframe to use yet, so create it
+ if ( !iframe ) {
+ iframe = document.createElement( "iframe" );
+ iframe.frameBorder = iframe.width = iframe.height = 0;
+ }
+
+ body.appendChild( iframe );
+
+ // Create a cacheable copy of the iframe document on first call.
+ // IE and Opera will allow us to reuse the iframeDoc without re-writing the fake HTML
+ // document to it; WebKit & Firefox won't allow reusing the iframe document.
+ if ( !iframeDoc || !iframe.createElement ) {
+ iframeDoc = ( iframe.contentWindow || iframe.contentDocument ).document;
+ iframeDoc.write( ( document.compatMode === "CSS1Compat" ? "<!doctype html>" : "" ) + "<html><body>" );
+ iframeDoc.close();
+ }
+
+ elem = iframeDoc.createElement( nodeName );
+
+ iframeDoc.body.appendChild( elem );
+
+ display = jQuery.css( elem, "display" );
+
+ body.removeChild( iframe );
+ }
+
+ // Store the correct default display
+ elemdisplay[ nodeName ] = display;
+ }
+
+ return elemdisplay[ nodeName ];
+}
+
+
+
+
+var rtable = /^t(?:able|d|h)$/i,
+ rroot = /^(?:body|html)$/i;
+
+if ( "getBoundingClientRect" in document.documentElement ) {
+ jQuery.fn.offset = function( options ) {
+ var elem = this[0], box;
+
+ if ( options ) {
+ return this.each(function( i ) {
+ jQuery.offset.setOffset( this, options, i );
+ });
+ }
+
+ if ( !elem || !elem.ownerDocument ) {
+ return null;
+ }
+
+ if ( elem === elem.ownerDocument.body ) {
+ return jQuery.offset.bodyOffset( elem );
+ }
+
+ try {
+ box = elem.getBoundingClientRect();
+ } catch(e) {}
+
+ var doc = elem.ownerDocument,
+ docElem = doc.documentElement;
+
+ // Make sure we're not dealing with a disconnected DOM node
+ if ( !box || !jQuery.contains( docElem, elem ) ) {
+ return box ? { top: box.top, left: box.left } : { top: 0, left: 0 };
+ }
+
+ var body = doc.body,
+ win = getWindow(doc),
+ clientTop = docElem.clientTop || body.clientTop || 0,
+ clientLeft = docElem.clientLeft || body.clientLeft || 0,
+ scrollTop = win.pageYOffset || jQuery.support.boxModel && docElem.scrollTop || body.scrollTop,
+ scrollLeft = win.pageXOffset || jQuery.support.boxModel && docElem.scrollLeft || body.scrollLeft,
+ top = box.top + scrollTop - clientTop,
+ left = box.left + scrollLeft - clientLeft;
+
+ return { top: top, left: left };
+ };
+
+} else {
+ jQuery.fn.offset = function( options ) {
+ var elem = this[0];
+
+ if ( options ) {
+ return this.each(function( i ) {
+ jQuery.offset.setOffset( this, options, i );
+ });
+ }
+
+ if ( !elem || !elem.ownerDocument ) {
+ return null;
+ }
+
+ if ( elem === elem.ownerDocument.body ) {
+ return jQuery.offset.bodyOffset( elem );
+ }
+
+ jQuery.offset.initialize();
+
+ var computedStyle,
+ offsetParent = elem.offsetParent,
+ prevOffsetParent = elem,
+ doc = elem.ownerDocument,
+ docElem = doc.documentElement,
+ body = doc.body,
+ defaultView = doc.defaultView,
+ prevComputedStyle = defaultView ? defaultView.getComputedStyle( elem, null ) : elem.currentStyle,
+ top = elem.offsetTop,
+ left = elem.offsetLeft;
+
+ while ( (elem = elem.parentNode) && elem !== body && elem !== docElem ) {
+ if ( jQuery.offset.supportsFixedPosition && prevComputedStyle.position === "fixed" ) {
+ break;
+ }
+
+ computedStyle = defaultView ? defaultView.getComputedStyle(elem, null) : elem.currentStyle;
+ top -= elem.scrollTop;
+ left -= elem.scrollLeft;
+
+ if ( elem === offsetParent ) {
+ top += elem.offsetTop;
+ left += elem.offsetLeft;
+
+ if ( jQuery.offset.doesNotAddBorder && !(jQuery.offset.doesAddBorderForTableAndCells && rtable.test(elem.nodeName)) ) {
+ top += parseFloat( computedStyle.borderTopWidth ) || 0;
+ left += parseFloat( computedStyle.borderLeftWidth ) || 0;
+ }
+
+ prevOffsetParent = offsetParent;
+ offsetParent = elem.offsetParent;
+ }
+
+ if ( jQuery.offset.subtractsBorderForOverflowNotVisible && computedStyle.overflow !== "visible" ) {
+ top += parseFloat( computedStyle.borderTopWidth ) || 0;
+ left += parseFloat( computedStyle.borderLeftWidth ) || 0;
+ }
+
+ prevComputedStyle = computedStyle;
+ }
+
+ if ( prevComputedStyle.position === "relative" || prevComputedStyle.position === "static" ) {
+ top += body.offsetTop;
+ left += body.offsetLeft;
+ }
+
+ if ( jQuery.offset.supportsFixedPosition && prevComputedStyle.position === "fixed" ) {
+ top += Math.max( docElem.scrollTop, body.scrollTop );
+ left += Math.max( docElem.scrollLeft, body.scrollLeft );
+ }
+
+ return { top: top, left: left };
+ };
+}
+
+jQuery.offset = {
+ initialize: function() {
+ var body = document.body, container = document.createElement("div"), innerDiv, checkDiv, table, td, bodyMarginTop = parseFloat( jQuery.css(body, "marginTop") ) || 0,
+ html = "<div style='position:absolute;top:0;left:0;margin:0;border:5px solid #000;padding:0;width:1px;height:1px;'><div></div></div><table style='position:absolute;top:0;left:0;margin:0;border:5px solid #000;padding:0;width:1px;height:1px;' cellpadding='0' cellspacing='0'><tr><td></td></tr></table>";
+
+ jQuery.extend( container.style, { position: "absolute", top: 0, left: 0, margin: 0, border: 0, width: "1px", height: "1px", visibility: "hidden" } );
+
+ container.innerHTML = html;
+ body.insertBefore( container, body.firstChild );
+ innerDiv = container.firstChild;
+ checkDiv = innerDiv.firstChild;
+ td = innerDiv.nextSibling.firstChild.firstChild;
+
+ this.doesNotAddBorder = (checkDiv.offsetTop !== 5);
+ this.doesAddBorderForTableAndCells = (td.offsetTop === 5);
+
+ checkDiv.style.position = "fixed";
+ checkDiv.style.top = "20px";
+
+ // safari subtracts parent border width here which is 5px
+ this.supportsFixedPosition = (checkDiv.offsetTop === 20 || checkDiv.offsetTop === 15);
+ checkDiv.style.position = checkDiv.style.top = "";
+
+ innerDiv.style.overflow = "hidden";
+ innerDiv.style.position = "relative";
+
+ this.subtractsBorderForOverflowNotVisible = (checkDiv.offsetTop === -5);
+
+ this.doesNotIncludeMarginInBodyOffset = (body.offsetTop !== bodyMarginTop);
+
+ body.removeChild( container );
+ jQuery.offset.initialize = jQuery.noop;
+ },
+
+ bodyOffset: function( body ) {
+ var top = body.offsetTop,
+ left = body.offsetLeft;
+
+ jQuery.offset.initialize();
+
+ if ( jQuery.offset.doesNotIncludeMarginInBodyOffset ) {
+ top += parseFloat( jQuery.css(body, "marginTop") ) || 0;
+ left += parseFloat( jQuery.css(body, "marginLeft") ) || 0;
+ }
+
+ return { top: top, left: left };
+ },
+
+ setOffset: function( elem, options, i ) {
+ var position = jQuery.css( elem, "position" );
+
+ // set position first, in-case top/left are set even on static elem
+ if ( position === "static" ) {
+ elem.style.position = "relative";
+ }
+
+ var curElem = jQuery( elem ),
+ curOffset = curElem.offset(),
+ curCSSTop = jQuery.css( elem, "top" ),
+ curCSSLeft = jQuery.css( elem, "left" ),
+ calculatePosition = (position === "absolute" || position === "fixed") && jQuery.inArray("auto", [curCSSTop, curCSSLeft]) > -1,
+ props = {}, curPosition = {}, curTop, curLeft;
+
+ // need to be able to calculate position if either top or left is auto and position is either absolute or fixed
+ if ( calculatePosition ) {
+ curPosition = curElem.position();
+ curTop = curPosition.top;
+ curLeft = curPosition.left;
+ } else {
+ curTop = parseFloat( curCSSTop ) || 0;
+ curLeft = parseFloat( curCSSLeft ) || 0;
+ }
+
+ if ( jQuery.isFunction( options ) ) {
+ options = options.call( elem, i, curOffset );
+ }
+
+ if (options.top != null) {
+ props.top = (options.top - curOffset.top) + curTop;
+ }
+ if (options.left != null) {
+ props.left = (options.left - curOffset.left) + curLeft;
+ }
+
+ if ( "using" in options ) {
+ options.using.call( elem, props );
+ } else {
+ curElem.css( props );
+ }
+ }
+};
+
+
+jQuery.fn.extend({
+ position: function() {
+ if ( !this[0] ) {
+ return null;
+ }
+
+ var elem = this[0],
+
+ // Get *real* offsetParent
+ offsetParent = this.offsetParent(),
+
+ // Get correct offsets
+ offset = this.offset(),
+ parentOffset = rroot.test(offsetParent[0].nodeName) ? { top: 0, left: 0 } : offsetParent.offset();
+
+ // Subtract element margins
+ // note: when an element has margin: auto the offsetLeft and marginLeft
+ // are the same in Safari causing offset.left to incorrectly be 0
+ offset.top -= parseFloat( jQuery.css(elem, "marginTop") ) || 0;
+ offset.left -= parseFloat( jQuery.css(elem, "marginLeft") ) || 0;
+
+ // Add offsetParent borders
+ parentOffset.top += parseFloat( jQuery.css(offsetParent[0], "borderTopWidth") ) || 0;
+ parentOffset.left += parseFloat( jQuery.css(offsetParent[0], "borderLeftWidth") ) || 0;
+
+ // Subtract the two offsets
+ return {
+ top: offset.top - parentOffset.top,
+ left: offset.left - parentOffset.left
+ };
+ },
+
+ offsetParent: function() {
+ return this.map(function() {
+ var offsetParent = this.offsetParent || document.body;
+ while ( offsetParent && (!rroot.test(offsetParent.nodeName) && jQuery.css(offsetParent, "position") === "static") ) {
+ offsetParent = offsetParent.offsetParent;
+ }
+ return offsetParent;
+ });
+ }
+});
+
+
+// Create scrollLeft and scrollTop methods
+jQuery.each( ["Left", "Top"], function( i, name ) {
+ var method = "scroll" + name;
+
+ jQuery.fn[ method ] = function( val ) {
+ var elem, win;
+
+ if ( val === undefined ) {
+ elem = this[ 0 ];
+
+ if ( !elem ) {
+ return null;
+ }
+
+ win = getWindow( elem );
+
+ // Return the scroll offset
+ return win ? ("pageXOffset" in win) ? win[ i ? "pageYOffset" : "pageXOffset" ] :
+ jQuery.support.boxModel && win.document.documentElement[ method ] ||
+ win.document.body[ method ] :
+ elem[ method ];
+ }
+
+ // Set the scroll offset
+ return this.each(function() {
+ win = getWindow( this );
+
+ if ( win ) {
+ win.scrollTo(
+ !i ? val : jQuery( win ).scrollLeft(),
+ i ? val : jQuery( win ).scrollTop()
+ );
+
+ } else {
+ this[ method ] = val;
+ }
+ });
+ };
+});
+
+function getWindow( elem ) {
+ return jQuery.isWindow( elem ) ?
+ elem :
+ elem.nodeType === 9 ?
+ elem.defaultView || elem.parentWindow :
+ false;
+}
+
+
+
+
+// Create width, height, innerHeight, innerWidth, outerHeight and outerWidth methods
+jQuery.each([ "Height", "Width" ], function( i, name ) {
+
+ var type = name.toLowerCase();
+
+ // innerHeight and innerWidth
+ jQuery.fn[ "inner" + name ] = function() {
+ var elem = this[0];
+ return elem && elem.style ?
+ parseFloat( jQuery.css( elem, type, "padding" ) ) :
+ null;
+ };
+
+ // outerHeight and outerWidth
+ jQuery.fn[ "outer" + name ] = function( margin ) {
+ var elem = this[0];
+ return elem && elem.style ?
+ parseFloat( jQuery.css( elem, type, margin ? "margin" : "border" ) ) :
+ null;
+ };
+
+ jQuery.fn[ type ] = function( size ) {
+ // Get window width or height
+ var elem = this[0];
+ if ( !elem ) {
+ return size == null ? null : this;
+ }
+
+ if ( jQuery.isFunction( size ) ) {
+ return this.each(function( i ) {
+ var self = jQuery( this );
+ self[ type ]( size.call( this, i, self[ type ]() ) );
+ });
+ }
+
+ if ( jQuery.isWindow( elem ) ) {
+ // Everyone else use document.documentElement or document.body depending on Quirks vs Standards mode
+ // 3rd condition allows Nokia support, as it supports the docElem prop but not CSS1Compat
+ var docElemProp = elem.document.documentElement[ "client" + name ],
+ body = elem.document.body;
+ return elem.document.compatMode === "CSS1Compat" && docElemProp ||
+ body && body[ "client" + name ] || docElemProp;
+
+ // Get document width or height
+ } else if ( elem.nodeType === 9 ) {
+ // Either scroll[Width/Height] or offset[Width/Height], whichever is greater
+ return Math.max(
+ elem.documentElement["client" + name],
+ elem.body["scroll" + name], elem.documentElement["scroll" + name],
+ elem.body["offset" + name], elem.documentElement["offset" + name]
+ );
+
+ // Get or set width or height on the element
+ } else if ( size === undefined ) {
+ var orig = jQuery.css( elem, type ),
+ ret = parseFloat( orig );
+
+ return jQuery.isNaN( ret ) ? orig : ret;
+
+ // Set the width or height on the element (default to pixels if value is unitless)
+ } else {
+ return this.css( type, typeof size === "string" ? size : size + "px" );
+ }
+ };
+
+});
+
+
+// Expose jQuery to the global object
+window.jQuery = window.$ = jQuery;
+})(window);
diff --git a/contrib/server/Translation-web/web/lib/jquery-ui-1.8.16.custom.js b/contrib/server/Translation-web/web/lib/jquery-ui-1.8.16.custom.js
new file mode 100755
index 000000000..c3fca875f
--- /dev/null
+++ b/contrib/server/Translation-web/web/lib/jquery-ui-1.8.16.custom.js
@@ -0,0 +1,11769 @@
+/*!
+ * jQuery UI 1.8.16
+ *
+ * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about)
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://jquery.org/license
+ *
+ * http://docs.jquery.com/UI
+ */
+(function( $, undefined ) {
+
+// prevent duplicate loading
+// this is only a problem because we proxy existing functions
+// and we don't want to double proxy them
+$.ui = $.ui || {};
+if ( $.ui.version ) {
+ return;
+}
+
+$.extend( $.ui, {
+ version: "1.8.16",
+
+ keyCode: {
+ ALT: 18,
+ BACKSPACE: 8,
+ CAPS_LOCK: 20,
+ COMMA: 188,
+ COMMAND: 91,
+ COMMAND_LEFT: 91, // COMMAND
+ COMMAND_RIGHT: 93,
+ CONTROL: 17,
+ DELETE: 46,
+ DOWN: 40,
+ END: 35,
+ ENTER: 13,
+ ESCAPE: 27,
+ HOME: 36,
+ INSERT: 45,
+ LEFT: 37,
+ MENU: 93, // COMMAND_RIGHT
+ NUMPAD_ADD: 107,
+ NUMPAD_DECIMAL: 110,
+ NUMPAD_DIVIDE: 111,
+ NUMPAD_ENTER: 108,
+ NUMPAD_MULTIPLY: 106,
+ NUMPAD_SUBTRACT: 109,
+ PAGE_DOWN: 34,
+ PAGE_UP: 33,
+ PERIOD: 190,
+ RIGHT: 39,
+ SHIFT: 16,
+ SPACE: 32,
+ TAB: 9,
+ UP: 38,
+ WINDOWS: 91 // COMMAND
+ }
+});
+
+// plugins
+$.fn.extend({
+ propAttr: $.fn.prop || $.fn.attr,
+
+ _focus: $.fn.focus,
+ focus: function( delay, fn ) {
+ return typeof delay === "number" ?
+ this.each(function() {
+ var elem = this;
+ setTimeout(function() {
+ $( elem ).focus();
+ if ( fn ) {
+ fn.call( elem );
+ }
+ }, delay );
+ }) :
+ this._focus.apply( this, arguments );
+ },
+
+ scrollParent: function() {
+ var scrollParent;
+ if (($.browser.msie && (/(static|relative)/).test(this.css('position'))) || (/absolute/).test(this.css('position'))) {
+ scrollParent = this.parents().filter(function() {
+ return (/(relative|absolute|fixed)/).test($.curCSS(this,'position',1)) && (/(auto|scroll)/).test($.curCSS(this,'overflow',1)+$.curCSS(this,'overflow-y',1)+$.curCSS(this,'overflow-x',1));
+ }).eq(0);
+ } else {
+ scrollParent = this.parents().filter(function() {
+ return (/(auto|scroll)/).test($.curCSS(this,'overflow',1)+$.curCSS(this,'overflow-y',1)+$.curCSS(this,'overflow-x',1));
+ }).eq(0);
+ }
+
+ return (/fixed/).test(this.css('position')) || !scrollParent.length ? $(document) : scrollParent;
+ },
+
+ zIndex: function( zIndex ) {
+ if ( zIndex !== undefined ) {
+ return this.css( "zIndex", zIndex );
+ }
+
+ if ( this.length ) {
+ var elem = $( this[ 0 ] ), position, value;
+ while ( elem.length && elem[ 0 ] !== document ) {
+ // Ignore z-index if position is set to a value where z-index is ignored by the browser
+ // This makes behavior of this function consistent across browsers
+ // WebKit always returns auto if the element is positioned
+ position = elem.css( "position" );
+ if ( position === "absolute" || position === "relative" || position === "fixed" ) {
+ // IE returns 0 when zIndex is not specified
+ // other browsers return a string
+ // we ignore the case of nested elements with an explicit value of 0
+ // <div style="z-index: -10;"><div style="z-index: 0;"></div></div>
+ value = parseInt( elem.css( "zIndex" ), 10 );
+ if ( !isNaN( value ) && value !== 0 ) {
+ return value;
+ }
+ }
+ elem = elem.parent();
+ }
+ }
+
+ return 0;
+ },
+
+ disableSelection: function() {
+ return this.bind( ( $.support.selectstart ? "selectstart" : "mousedown" ) +
+ ".ui-disableSelection", function( event ) {
+ event.preventDefault();
+ });
+ },
+
+ enableSelection: function() {
+ return this.unbind( ".ui-disableSelection" );
+ }
+});
+
+$.each( [ "Width", "Height" ], function( i, name ) {
+ var side = name === "Width" ? [ "Left", "Right" ] : [ "Top", "Bottom" ],
+ type = name.toLowerCase(),
+ orig = {
+ innerWidth: $.fn.innerWidth,
+ innerHeight: $.fn.innerHeight,
+ outerWidth: $.fn.outerWidth,
+ outerHeight: $.fn.outerHeight
+ };
+
+ function reduce( elem, size, border, margin ) {
+ $.each( side, function() {
+ size -= parseFloat( $.curCSS( elem, "padding" + this, true) ) || 0;
+ if ( border ) {
+ size -= parseFloat( $.curCSS( elem, "border" + this + "Width", true) ) || 0;
+ }
+ if ( margin ) {
+ size -= parseFloat( $.curCSS( elem, "margin" + this, true) ) || 0;
+ }
+ });
+ return size;
+ }
+
+ $.fn[ "inner" + name ] = function( size ) {
+ if ( size === undefined ) {
+ return orig[ "inner" + name ].call( this );
+ }
+
+ return this.each(function() {
+ $( this ).css( type, reduce( this, size ) + "px" );
+ });
+ };
+
+ $.fn[ "outer" + name] = function( size, margin ) {
+ if ( typeof size !== "number" ) {
+ return orig[ "outer" + name ].call( this, size );
+ }
+
+ return this.each(function() {
+ $( this).css( type, reduce( this, size, true, margin ) + "px" );
+ });
+ };
+});
+
+// selectors
+function focusable( element, isTabIndexNotNaN ) {
+ var nodeName = element.nodeName.toLowerCase();
+ if ( "area" === nodeName ) {
+ var map = element.parentNode,
+ mapName = map.name,
+ img;
+ if ( !element.href || !mapName || map.nodeName.toLowerCase() !== "map" ) {
+ return false;
+ }
+ img = $( "img[usemap=#" + mapName + "]" )[0];
+ return !!img && visible( img );
+ }
+ return ( /input|select|textarea|button|object/.test( nodeName )
+ ? !element.disabled
+ : "a" == nodeName
+ ? element.href || isTabIndexNotNaN
+ : isTabIndexNotNaN)
+ // the element and all of its ancestors must be visible
+ && visible( element );
+}
+
+function visible( element ) {
+ return !$( element ).parents().andSelf().filter(function() {
+ return $.curCSS( this, "visibility" ) === "hidden" ||
+ $.expr.filters.hidden( this );
+ }).length;
+}
+
+$.extend( $.expr[ ":" ], {
+ data: function( elem, i, match ) {
+ return !!$.data( elem, match[ 3 ] );
+ },
+
+ focusable: function( element ) {
+ return focusable( element, !isNaN( $.attr( element, "tabindex" ) ) );
+ },
+
+ tabbable: function( element ) {
+ var tabIndex = $.attr( element, "tabindex" ),
+ isTabIndexNaN = isNaN( tabIndex );
+ return ( isTabIndexNaN || tabIndex >= 0 ) && focusable( element, !isTabIndexNaN );
+ }
+});
+
+// support
+$(function() {
+ var body = document.body,
+ div = body.appendChild( div = document.createElement( "div" ) );
+
+ $.extend( div.style, {
+ minHeight: "100px",
+ height: "auto",
+ padding: 0,
+ borderWidth: 0
+ });
+
+ $.support.minHeight = div.offsetHeight === 100;
+ $.support.selectstart = "onselectstart" in div;
+
+ // set display to none to avoid a layout bug in IE
+ // http://dev.jquery.com/ticket/4014
+ body.removeChild( div ).style.display = "none";
+});
+
+
+
+
+
+// deprecated
+$.extend( $.ui, {
+ // $.ui.plugin is deprecated. Use the proxy pattern instead.
+ plugin: {
+ add: function( module, option, set ) {
+ var proto = $.ui[ module ].prototype;
+ for ( var i in set ) {
+ proto.plugins[ i ] = proto.plugins[ i ] || [];
+ proto.plugins[ i ].push( [ option, set[ i ] ] );
+ }
+ },
+ call: function( instance, name, args ) {
+ var set = instance.plugins[ name ];
+ if ( !set || !instance.element[ 0 ].parentNode ) {
+ return;
+ }
+
+ for ( var i = 0; i < set.length; i++ ) {
+ if ( instance.options[ set[ i ][ 0 ] ] ) {
+ set[ i ][ 1 ].apply( instance.element, args );
+ }
+ }
+ }
+ },
+
+ // will be deprecated when we switch to jQuery 1.4 - use jQuery.contains()
+ contains: function( a, b ) {
+ return document.compareDocumentPosition ?
+ a.compareDocumentPosition( b ) & 16 :
+ a !== b && a.contains( b );
+ },
+
+ // only used by resizable
+ hasScroll: function( el, a ) {
+
+ //If overflow is hidden, the element might have extra content, but the user wants to hide it
+ if ( $( el ).css( "overflow" ) === "hidden") {
+ return false;
+ }
+
+ var scroll = ( a && a === "left" ) ? "scrollLeft" : "scrollTop",
+ has = false;
+
+ if ( el[ scroll ] > 0 ) {
+ return true;
+ }
+
+ // TODO: determine which cases actually cause this to happen
+ // if the element doesn't have the scroll set, see if it's possible to
+ // set the scroll
+ el[ scroll ] = 1;
+ has = ( el[ scroll ] > 0 );
+ el[ scroll ] = 0;
+ return has;
+ },
+
+ // these are odd functions, fix the API or move into individual plugins
+ isOverAxis: function( x, reference, size ) {
+ //Determines when x coordinate is over "b" element axis
+ return ( x > reference ) && ( x < ( reference + size ) );
+ },
+ isOver: function( y, x, top, left, height, width ) {
+ //Determines when x, y coordinates is over "b" element
+ return $.ui.isOverAxis( y, top, height ) && $.ui.isOverAxis( x, left, width );
+ }
+});
+
+})( jQuery );
+/*!
+ * jQuery UI Widget 1.8.16
+ *
+ * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about)
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://jquery.org/license
+ *
+ * http://docs.jquery.com/UI/Widget
+ */
+(function( $, undefined ) {
+
+// jQuery 1.4+
+if ( $.cleanData ) {
+ var _cleanData = $.cleanData;
+ $.cleanData = function( elems ) {
+ for ( var i = 0, elem; (elem = elems[i]) != null; i++ ) {
+ try {
+ $( elem ).triggerHandler( "remove" );
+ // http://bugs.jquery.com/ticket/8235
+ } catch( e ) {}
+ }
+ _cleanData( elems );
+ };
+} else {
+ var _remove = $.fn.remove;
+ $.fn.remove = function( selector, keepData ) {
+ return this.each(function() {
+ if ( !keepData ) {
+ if ( !selector || $.filter( selector, [ this ] ).length ) {
+ $( "*", this ).add( [ this ] ).each(function() {
+ try {
+ $( this ).triggerHandler( "remove" );
+ // http://bugs.jquery.com/ticket/8235
+ } catch( e ) {}
+ });
+ }
+ }
+ return _remove.call( $(this), selector, keepData );
+ });
+ };
+}
+
+$.widget = function( name, base, prototype ) {
+ var namespace = name.split( "." )[ 0 ],
+ fullName;
+ name = name.split( "." )[ 1 ];
+ fullName = namespace + "-" + name;
+
+ if ( !prototype ) {
+ prototype = base;
+ base = $.Widget;
+ }
+
+ // create selector for plugin
+ $.expr[ ":" ][ fullName ] = function( elem ) {
+ return !!$.data( elem, name );
+ };
+
+ $[ namespace ] = $[ namespace ] || {};
+ $[ namespace ][ name ] = function( options, element ) {
+ // allow instantiation without initializing for simple inheritance
+ if ( arguments.length ) {
+ this._createWidget( options, element );
+ }
+ };
+
+ var basePrototype = new base();
+ // we need to make the options hash a property directly on the new instance
+ // otherwise we'll modify the options hash on the prototype that we're
+ // inheriting from
+// $.each( basePrototype, function( key, val ) {
+// if ( $.isPlainObject(val) ) {
+// basePrototype[ key ] = $.extend( {}, val );
+// }
+// });
+ basePrototype.options = $.extend( true, {}, basePrototype.options );
+ $[ namespace ][ name ].prototype = $.extend( true, basePrototype, {
+ namespace: namespace,
+ widgetName: name,
+ widgetEventPrefix: $[ namespace ][ name ].prototype.widgetEventPrefix || name,
+ widgetBaseClass: fullName
+ }, prototype );
+
+ $.widget.bridge( name, $[ namespace ][ name ] );
+};
+
+$.widget.bridge = function( name, object ) {
+ $.fn[ name ] = function( options ) {
+ var isMethodCall = typeof options === "string",
+ args = Array.prototype.slice.call( arguments, 1 ),
+ returnValue = this;
+
+ // allow multiple hashes to be passed on init
+ options = !isMethodCall && args.length ?
+ $.extend.apply( null, [ true, options ].concat(args) ) :
+ options;
+
+ // prevent calls to internal methods
+ if ( isMethodCall && options.charAt( 0 ) === "_" ) {
+ return returnValue;
+ }
+
+ if ( isMethodCall ) {
+ this.each(function() {
+ var instance = $.data( this, name ),
+ methodValue = instance && $.isFunction( instance[options] ) ?
+ instance[ options ].apply( instance, args ) :
+ instance;
+ // TODO: add this back in 1.9 and use $.error() (see #5972)
+ if ( !instance ) {
+ alert("ui : before initialization " + name + options);
+ throw "cannot call methods on " + name + " prior to initialization; " +
+ "attempted to call method '" + options + "'";
+ }
+ if ( !$.isFunction( instance[options] ) ) {
+ alert("ui : no such method" + name + options);
+ throw "no such method '" + options + "' for " + name + " widget instance";
+ }
+// var methodValue = instance[ options ].apply( instance, args );
+ if ( methodValue !== instance && methodValue !== undefined ) {
+ returnValue = methodValue;
+ return false;
+ }
+ });
+ } else {
+ this.each(function() {
+ var instance = $.data( this, name );
+ if ( instance ) {
+ instance.option( options || {} )._init();
+ } else {
+ $.data( this, name, new object( options, this ) );
+ }
+ });
+ }
+
+ return returnValue;
+ };
+};
+
+$.Widget = function( options, element ) {
+ // allow instantiation without initializing for simple inheritance
+ if ( arguments.length ) {
+ this._createWidget( options, element );
+ }
+};
+
+$.Widget.prototype = {
+ widgetName: "widget",
+ widgetEventPrefix: "",
+ options: {
+ disabled: false
+ },
+ _createWidget: function( options, element ) {
+ // $.widget.bridge stores the plugin instance, but we do it anyway
+ // so that it's stored even before the _create function runs
+ $.data( element, this.widgetName, this );
+ this.element = $( element );
+ this.options = $.extend( true, {},
+ this.options,
+ this._getCreateOptions(),
+ options );
+
+ var self = this;
+ this.element.bind( "remove." + this.widgetName, function() {
+ self.destroy();
+ });
+
+ this._create();
+ this._trigger( "create" );
+ this._init();
+ },
+ _getCreateOptions: function() {
+ return $.metadata && $.metadata.get( this.element[0] )[ this.widgetName ];
+ },
+ _create: function() {},
+ _init: function() {},
+
+ destroy: function() {
+ this.element
+ .unbind( "." + this.widgetName )
+ .removeData( this.widgetName );
+ this.widget()
+ .unbind( "." + this.widgetName )
+ .removeAttr( "aria-disabled" )
+ .removeClass(
+ this.widgetBaseClass + "-disabled " +
+ "ui-state-disabled" );
+ },
+
+ widget: function() {
+ return this.element;
+ },
+
+ option: function( key, value ) {
+ var options = key;
+
+ if ( arguments.length === 0 ) {
+ // don't return a reference to the internal hash
+ return $.extend( {}, this.options );
+ }
+
+ if (typeof key === "string" ) {
+ if ( value === undefined ) {
+ return this.options[ key ];
+ }
+ options = {};
+ options[ key ] = value;
+ }
+
+ this._setOptions( options );
+
+ return this;
+ },
+ _setOptions: function( options ) {
+ var self = this;
+ $.each( options, function( key, value ) {
+ self._setOption( key, value );
+ });
+
+ return this;
+ },
+ _setOption: function( key, value ) {
+ this.options[ key ] = value;
+
+ if ( key === "disabled" ) {
+ this.widget()
+ [ value ? "addClass" : "removeClass"](
+ this.widgetBaseClass + "-disabled" + " " +
+ "ui-state-disabled" )
+ .attr( "aria-disabled", value );
+ }
+
+ return this;
+ },
+
+ enable: function() {
+ return this._setOption( "disabled", false );
+ },
+ disable: function() {
+ return this._setOption( "disabled", true );
+ },
+
+ _trigger: function( type, event, data ) {
+ var callback = this.options[ type ];
+
+ event = $.Event( event );
+ event.type = ( type === this.widgetEventPrefix ?
+ type :
+ this.widgetEventPrefix + type ).toLowerCase();
+ data = data || {};
+
+ // copy original event properties over to the new event
+ // this would happen if we could call $.event.fix instead of $.Event
+ // but we don't have a way to force an event to be fixed multiple times
+ if ( event.originalEvent ) {
+ for ( var i = $.event.props.length, prop; i; ) {
+ prop = $.event.props[ --i ];
+ event[ prop ] = event.originalEvent[ prop ];
+ }
+ }
+
+ this.element.trigger( event, data );
+
+ return !( $.isFunction(callback) &&
+ callback.call( this.element[0], event, data ) === false ||
+ event.isDefaultPrevented() );
+ }
+};
+
+})( jQuery );
+/*!
+ * jQuery UI Mouse 1.8.16
+ *
+ * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about)
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://jquery.org/license
+ *
+ * http://docs.jquery.com/UI/Mouse
+ *
+ * Depends:
+ * jquery.ui.widget.js
+ */
+(function( $, undefined ) {
+
+var mouseHandled = false;
+$( document ).mouseup( function( e ) {
+ mouseHandled = false;
+});
+
+$.widget("ui.mouse", {
+ options: {
+ cancel: ':input,option',
+ distance: 1,
+ delay: 0
+ },
+ _mouseInit: function() {
+ var self = this;
+
+ this.element
+ .bind('mousedown.'+this.widgetName, function(event) {
+ return self._mouseDown(event);
+ })
+ .bind('click.'+this.widgetName, function(event) {
+ if (true === $.data(event.target, self.widgetName + '.preventClickEvent')) {
+ $.removeData(event.target, self.widgetName + '.preventClickEvent');
+ event.stopImmediatePropagation();
+ return false;
+ }
+ });
+
+ this.started = false;
+ },
+
+ // TODO: make sure destroying one instance of mouse doesn't mess with
+ // other instances of mouse
+ _mouseDestroy: function() {
+ this.element.unbind('.'+this.widgetName);
+ },
+
+ _mouseDown: function(event) {
+ // don't let more than one widget handle mouseStart
+ if( mouseHandled ) { return };
+
+ // we may have missed mouseup (out of window)
+ (this._mouseStarted && this._mouseUp(event));
+
+ this._mouseDownEvent = event;
+
+ var self = this,
+ btnIsLeft = (event.which == 1),
+ // event.target.nodeName works around a bug in IE 8 with
+ // disabled inputs (#7620)
+ elIsCancel = (typeof this.options.cancel == "string" && event.target.nodeName ? $(event.target).closest(this.options.cancel).length : false);
+ if (!btnIsLeft || elIsCancel || !this._mouseCapture(event)) {
+ return true;
+ }
+
+ this.mouseDelayMet = !this.options.delay;
+ if (!this.mouseDelayMet) {
+ this._mouseDelayTimer = setTimeout(function() {
+ self.mouseDelayMet = true;
+ }, this.options.delay);
+ }
+
+ if (this._mouseDistanceMet(event) && this._mouseDelayMet(event)) {
+ this._mouseStarted = (this._mouseStart(event) !== false);
+ if (!this._mouseStarted) {
+ event.preventDefault();
+ return true;
+ }
+ }
+
+ // Click event may never have fired (Gecko & Opera)
+ if (true === $.data(event.target, this.widgetName + '.preventClickEvent')) {
+ $.removeData(event.target, this.widgetName + '.preventClickEvent');
+ }
+
+ // these delegates are required to keep context
+ this._mouseMoveDelegate = function(event) {
+ return self._mouseMove(event);
+ };
+ this._mouseUpDelegate = function(event) {
+ return self._mouseUp(event);
+ };
+ $(document)
+ .bind('mousemove.'+this.widgetName, this._mouseMoveDelegate)
+ .bind('mouseup.'+this.widgetName, this._mouseUpDelegate);
+
+ event.preventDefault();
+
+ mouseHandled = true;
+ return true;
+ },
+
+ _mouseMove: function(event) {
+ // IE mouseup check - mouseup happened when mouse was out of window
+ if ($.browser.msie && !(document.documentMode >= 9) && !event.button) {
+ return this._mouseUp(event);
+ }
+
+ if (this._mouseStarted) {
+ this._mouseDrag(event);
+ return event.preventDefault();
+ }
+
+ if (this._mouseDistanceMet(event) && this._mouseDelayMet(event)) {
+ this._mouseStarted =
+ (this._mouseStart(this._mouseDownEvent, event) !== false);
+ (this._mouseStarted ? this._mouseDrag(event) : this._mouseUp(event));
+ }
+
+ return !this._mouseStarted;
+ },
+
+ _mouseUp: function(event) {
+ $(document)
+ .unbind('mousemove.'+this.widgetName, this._mouseMoveDelegate)
+ .unbind('mouseup.'+this.widgetName, this._mouseUpDelegate);
+
+ if (this._mouseStarted) {
+ this._mouseStarted = false;
+
+ if (event.target == this._mouseDownEvent.target) {
+ $.data(event.target, this.widgetName + '.preventClickEvent', true);
+ }
+
+ this._mouseStop(event);
+ }
+
+ return false;
+ },
+
+ _mouseDistanceMet: function(event) {
+ return (Math.max(
+ Math.abs(this._mouseDownEvent.pageX - event.pageX),
+ Math.abs(this._mouseDownEvent.pageY - event.pageY)
+ ) >= this.options.distance
+ );
+ },
+
+ _mouseDelayMet: function(event) {
+ return this.mouseDelayMet;
+ },
+
+ // These are placeholder methods, to be overriden by extending plugin
+ _mouseStart: function(event) {},
+ _mouseDrag: function(event) {},
+ _mouseStop: function(event) {},
+ _mouseCapture: function(event) { return true; }
+});
+
+})(jQuery);
+/*
+ * jQuery UI Position 1.8.16
+ *
+ * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about)
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://jquery.org/license
+ *
+ * http://docs.jquery.com/UI/Position
+ */
+(function( $, undefined ) {
+
+$.ui = $.ui || {};
+
+var horizontalPositions = /left|center|right/,
+ verticalPositions = /top|center|bottom/,
+ center = "center",
+ _position = $.fn.position,
+ _offset = $.fn.offset;
+
+$.fn.position = function( options ) {
+ if ( !options || !options.of ) {
+ return _position.apply( this, arguments );
+ }
+
+ // make a copy, we don't want to modify arguments
+ options = $.extend( {}, options );
+
+ var target = $( options.of ),
+ targetElem = target[0],
+ collision = ( options.collision || "flip" ).split( " " ),
+ offset = options.offset ? options.offset.split( " " ) : [ 0, 0 ],
+ targetWidth,
+ targetHeight,
+ basePosition;
+
+ if ( targetElem.nodeType === 9 ) {
+ targetWidth = target.width();
+ targetHeight = target.height();
+ basePosition = { top: 0, left: 0 };
+ // TODO: use $.isWindow() in 1.9
+ } else if ( targetElem.setTimeout ) {
+ targetWidth = target.width();
+ targetHeight = target.height();
+ basePosition = { top: target.scrollTop(), left: target.scrollLeft() };
+ } else if ( targetElem.preventDefault ) {
+ // force left top to allow flipping
+ options.at = "left top";
+ targetWidth = targetHeight = 0;
+ basePosition = { top: options.of.pageY, left: options.of.pageX };
+ } else {
+ targetWidth = target.outerWidth();
+ targetHeight = target.outerHeight();
+ basePosition = target.offset();
+ }
+
+ // force my and at to have valid horizontal and veritcal positions
+ // if a value is missing or invalid, it will be converted to center
+ $.each( [ "my", "at" ], function() {
+ var pos = ( options[this] || "" ).split( " " );
+ if ( pos.length === 1) {
+ pos = horizontalPositions.test( pos[0] ) ?
+ pos.concat( [center] ) :
+ verticalPositions.test( pos[0] ) ?
+ [ center ].concat( pos ) :
+ [ center, center ];
+ }
+ pos[ 0 ] = horizontalPositions.test( pos[0] ) ? pos[ 0 ] : center;
+ pos[ 1 ] = verticalPositions.test( pos[1] ) ? pos[ 1 ] : center;
+ options[ this ] = pos;
+ });
+
+ // normalize collision option
+ if ( collision.length === 1 ) {
+ collision[ 1 ] = collision[ 0 ];
+ }
+
+ // normalize offset option
+ offset[ 0 ] = parseInt( offset[0], 10 ) || 0;
+ if ( offset.length === 1 ) {
+ offset[ 1 ] = offset[ 0 ];
+ }
+ offset[ 1 ] = parseInt( offset[1], 10 ) || 0;
+
+ if ( options.at[0] === "right" ) {
+ basePosition.left += targetWidth;
+ } else if ( options.at[0] === center ) {
+ basePosition.left += targetWidth / 2;
+ }
+
+ if ( options.at[1] === "bottom" ) {
+ basePosition.top += targetHeight;
+ } else if ( options.at[1] === center ) {
+ basePosition.top += targetHeight / 2;
+ }
+
+ basePosition.left += offset[ 0 ];
+ basePosition.top += offset[ 1 ];
+
+ return this.each(function() {
+ var elem = $( this ),
+ elemWidth = elem.outerWidth(),
+ elemHeight = elem.outerHeight(),
+ marginLeft = parseInt( $.curCSS( this, "marginLeft", true ) ) || 0,
+ marginTop = parseInt( $.curCSS( this, "marginTop", true ) ) || 0,
+ collisionWidth = elemWidth + marginLeft +
+ ( parseInt( $.curCSS( this, "marginRight", true ) ) || 0 ),
+ collisionHeight = elemHeight + marginTop +
+ ( parseInt( $.curCSS( this, "marginBottom", true ) ) || 0 ),
+ position = $.extend( {}, basePosition ),
+ collisionPosition;
+
+ if ( options.my[0] === "right" ) {
+ position.left -= elemWidth;
+ } else if ( options.my[0] === center ) {
+ position.left -= elemWidth / 2;
+ }
+
+ if ( options.my[1] === "bottom" ) {
+ position.top -= elemHeight;
+ } else if ( options.my[1] === center ) {
+ position.top -= elemHeight / 2;
+ }
+
+ // prevent fractions (see #5280)
+ position.left = Math.round( position.left );
+ position.top = Math.round( position.top );
+
+ collisionPosition = {
+ left: position.left - marginLeft,
+ top: position.top - marginTop
+ };
+
+ $.each( [ "left", "top" ], function( i, dir ) {
+ if ( $.ui.position[ collision[i] ] ) {
+ $.ui.position[ collision[i] ][ dir ]( position, {
+ targetWidth: targetWidth,
+ targetHeight: targetHeight,
+ elemWidth: elemWidth,
+ elemHeight: elemHeight,
+ collisionPosition: collisionPosition,
+ collisionWidth: collisionWidth,
+ collisionHeight: collisionHeight,
+ offset: offset,
+ my: options.my,
+ at: options.at
+ });
+ }
+ });
+
+ if ( $.fn.bgiframe ) {
+ elem.bgiframe();
+ }
+ elem.offset( $.extend( position, { using: options.using } ) );
+ });
+};
+
+$.ui.position = {
+ fit: {
+ left: function( position, data ) {
+ var win = $( window ),
+ over = data.collisionPosition.left + data.collisionWidth - win.width() - win.scrollLeft();
+ position.left = over > 0 ? position.left - over : Math.max( position.left - data.collisionPosition.left, position.left );
+ },
+ top: function( position, data ) {
+ var win = $( window ),
+ over = data.collisionPosition.top + data.collisionHeight - win.height() - win.scrollTop();
+ position.top = over > 0 ? position.top - over : Math.max( position.top - data.collisionPosition.top, position.top );
+ }
+ },
+
+ flip: {
+ left: function( position, data ) {
+ if ( data.at[0] === center ) {
+ return;
+ }
+ var win = $( window ),
+ over = data.collisionPosition.left + data.collisionWidth - win.width() - win.scrollLeft(),
+ myOffset = data.my[ 0 ] === "left" ?
+ -data.elemWidth :
+ data.my[ 0 ] === "right" ?
+ data.elemWidth :
+ 0,
+ atOffset = data.at[ 0 ] === "left" ?
+ data.targetWidth :
+ -data.targetWidth,
+ offset = -2 * data.offset[ 0 ];
+ position.left += data.collisionPosition.left < 0 ?
+ myOffset + atOffset + offset :
+ over > 0 ?
+ myOffset + atOffset + offset :
+ 0;
+ },
+ top: function( position, data ) {
+ if ( data.at[1] === center ) {
+ return;
+ }
+ var win = $( window ),
+ over = data.collisionPosition.top + data.collisionHeight - win.height() - win.scrollTop(),
+ myOffset = data.my[ 1 ] === "top" ?
+ -data.elemHeight :
+ data.my[ 1 ] === "bottom" ?
+ data.elemHeight :
+ 0,
+ atOffset = data.at[ 1 ] === "top" ?
+ data.targetHeight :
+ -data.targetHeight,
+ offset = -2 * data.offset[ 1 ];
+ position.top += data.collisionPosition.top < 0 ?
+ myOffset + atOffset + offset :
+ over > 0 ?
+ myOffset + atOffset + offset :
+ 0;
+ }
+ }
+};
+
+// offset setter from jQuery 1.4
+if ( !$.offset.setOffset ) {
+ $.offset.setOffset = function( elem, options ) {
+ // set position first, in-case top/left are set even on static elem
+ if ( /static/.test( $.curCSS( elem, "position" ) ) ) {
+ elem.style.position = "relative";
+ }
+ var curElem = $( elem ),
+ curOffset = curElem.offset(),
+ curTop = parseInt( $.curCSS( elem, "top", true ), 10 ) || 0,
+ curLeft = parseInt( $.curCSS( elem, "left", true ), 10) || 0,
+ props = {
+ top: (options.top - curOffset.top) + curTop,
+ left: (options.left - curOffset.left) + curLeft
+ };
+
+ if ( 'using' in options ) {
+ options.using.call( elem, props );
+ } else {
+ curElem.css( props );
+ }
+ };
+
+ $.fn.offset = function( options ) {
+ var elem = this[ 0 ];
+ if ( !elem || !elem.ownerDocument ) { return null; }
+ if ( options ) {
+ return this.each(function() {
+ $.offset.setOffset( this, options );
+ });
+ }
+ return _offset.call( this );
+ };
+}
+
+}( jQuery ));
+/*
+ * jQuery UI Draggable 1.8.16
+ *
+ * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about)
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://jquery.org/license
+ *
+ * http://docs.jquery.com/UI/Draggables
+ *
+ * Depends:
+ * jquery.ui.core.js
+ * jquery.ui.mouse.js
+ * jquery.ui.widget.js
+ */
+(function( $, undefined ) {
+
+$.widget("ui.draggable", $.ui.mouse, {
+ widgetEventPrefix: "drag",
+ options: {
+ addClasses: true,
+ appendTo: "parent",
+ axis: false,
+ connectToSortable: false,
+ containment: false,
+ cursor: "auto",
+ cursorAt: false,
+ grid: false,
+ handle: false,
+ helper: "original",
+ iframeFix: false,
+ opacity: false,
+ refreshPositions: false,
+ revert: false,
+ revertDuration: 500,
+ scope: "default",
+ scroll: true,
+ scrollSensitivity: 20,
+ scrollSpeed: 20,
+ snap: false,
+ snapMode: "both",
+ snapTolerance: 20,
+ stack: false,
+ zIndex: false
+ },
+ _create: function() {
+
+ if (this.options.helper == 'original' && !(/^(?:r|a|f)/).test(this.element.css("position")))
+ this.element[0].style.position = 'relative';
+
+ (this.options.addClasses && this.element.addClass("ui-draggable"));
+ (this.options.disabled && this.element.addClass("ui-draggable-disabled"));
+
+ this._mouseInit();
+
+ },
+
+ destroy: function() {
+ if(!this.element.data('draggable')) return;
+ this.element
+ .removeData("draggable")
+ .unbind(".draggable")
+ .removeClass("ui-draggable"
+ + " ui-draggable-dragging"
+ + " ui-draggable-disabled");
+ this._mouseDestroy();
+
+ return this;
+ },
+
+ _mouseCapture: function(event) {
+
+ var o = this.options;
+
+ // among others, prevent a drag on a resizable-handle
+ if (this.helper || o.disabled || $(event.target).is('.ui-resizable-handle'))
+ return false;
+
+ //Quit if we're not on a valid handle
+ this.handle = this._getHandle(event);
+ if (!this.handle)
+ return false;
+
+ if ( o.iframeFix ) {
+ $(o.iframeFix === true ? "iframe" : o.iframeFix).each(function() {
+ $('<div class="ui-draggable-iframeFix" style="background: #fff;"></div>')
+ .css({
+ width: this.offsetWidth+"px", height: this.offsetHeight+"px",
+ position: "absolute", opacity: "0.001", zIndex: 1000
+ })
+ .css($(this).offset())
+ .appendTo("body");
+ });
+ }
+
+ return true;
+
+ },
+
+ _mouseStart: function(event) {
+
+ var o = this.options;
+
+ //Create and append the visible helper
+ this.helper = this._createHelper(event);
+
+ //Cache the helper size
+ this._cacheHelperProportions();
+
+ //If ddmanager is used for droppables, set the global draggable
+ if($.ui.ddmanager)
+ $.ui.ddmanager.current = this;
+
+ /*
+ * - Position generation -
+ * This block generates everything position related - it's the core of draggables.
+ */
+
+ //Cache the margins of the original element
+ this._cacheMargins();
+
+ //Store the helper's css position
+ this.cssPosition = this.helper.css("position");
+ this.scrollParent = this.helper.scrollParent();
+
+ //The element's absolute position on the page minus margins
+ this.offset = this.positionAbs = this.element.offset();
+ this.offset = {
+ top: this.offset.top - this.margins.top,
+ left: this.offset.left - this.margins.left
+ };
+
+ $.extend(this.offset, {
+ click: { //Where the click happened, relative to the element
+ left: event.pageX - this.offset.left,
+ top: event.pageY - this.offset.top
+ },
+ parent: this._getParentOffset(),
+ relative: this._getRelativeOffset() //This is a relative to absolute position minus the actual position calculation - only used for relative positioned helper
+ });
+
+ //Generate the original position
+ this.originalPosition = this.position = this._generatePosition(event);
+ this.originalPageX = event.pageX;
+ this.originalPageY = event.pageY;
+
+ //Adjust the mouse offset relative to the helper if 'cursorAt' is supplied
+ (o.cursorAt && this._adjustOffsetFromHelper(o.cursorAt));
+
+ //Set a containment if given in the options
+ if(o.containment)
+ this._setContainment();
+
+ //Trigger event + callbacks
+ if(this._trigger("start", event) === false) {
+ this._clear();
+ return false;
+ }
+
+ //Recache the helper size
+ this._cacheHelperProportions();
+
+ //Prepare the droppable offsets
+ if ($.ui.ddmanager && !o.dropBehaviour)
+ $.ui.ddmanager.prepareOffsets(this, event);
+
+ this.helper.addClass("ui-draggable-dragging");
+ this._mouseDrag(event, true); //Execute the drag once - this causes the helper not to be visible before getting its correct position
+
+ //If the ddmanager is used for droppables, inform the manager that dragging has started (see #5003)
+ if ( $.ui.ddmanager ) $.ui.ddmanager.dragStart(this, event);
+
+ return true;
+ },
+
+ _mouseDrag: function(event, noPropagation) {
+
+ //Compute the helpers position
+ this.position = this._generatePosition(event);
+ this.positionAbs = this._convertPositionTo("absolute");
+
+ //Call plugins and callbacks and use the resulting position if something is returned
+ if (!noPropagation) {
+ var ui = this._uiHash();
+ if(this._trigger('drag', event, ui) === false) {
+ this._mouseUp({});
+ return false;
+ }
+ this.position = ui.position;
+ }
+
+ if(!this.options.axis || this.options.axis != "y") this.helper[0].style.left = this.position.left+'px';
+ if(!this.options.axis || this.options.axis != "x") this.helper[0].style.top = this.position.top+'px';
+ if($.ui.ddmanager) $.ui.ddmanager.drag(this, event);
+
+ return false;
+ },
+
+ _mouseStop: function(event) {
+
+ //If we are using droppables, inform the manager about the drop
+ var dropped = false;
+ if ($.ui.ddmanager && !this.options.dropBehaviour)
+ dropped = $.ui.ddmanager.drop(this, event);
+
+ //if a drop comes from outside (a sortable)
+ if(this.dropped) {
+ dropped = this.dropped;
+ this.dropped = false;
+ }
+
+ //if the original element is removed, don't bother to continue if helper is set to "original"
+ if((!this.element[0] || !this.element[0].parentNode) && this.options.helper == "original")
+ return false;
+
+ if((this.options.revert == "invalid" && !dropped) || (this.options.revert == "valid" && dropped) || this.options.revert === true || ($.isFunction(this.options.revert) && this.options.revert.call(this.element, dropped))) {
+ var self = this;
+ $(this.helper).animate(this.originalPosition, parseInt(this.options.revertDuration, 10), function() {
+ if(self._trigger("stop", event) !== false) {
+ self._clear();
+ }
+ });
+ } else {
+ if(this._trigger("stop", event) !== false) {
+ this._clear();
+ }
+ }
+
+ return false;
+ },
+
+ _mouseUp: function(event) {
+ if (this.options.iframeFix === true) {
+ $("div.ui-draggable-iframeFix").each(function() {
+ this.parentNode.removeChild(this);
+ }); //Remove frame helpers
+ }
+
+ //If the ddmanager is used for droppables, inform the manager that dragging has stopped (see #5003)
+ if( $.ui.ddmanager ) $.ui.ddmanager.dragStop(this, event);
+
+ return $.ui.mouse.prototype._mouseUp.call(this, event);
+ },
+
+ cancel: function() {
+
+ if(this.helper.is(".ui-draggable-dragging")) {
+ this._mouseUp({});
+ } else {
+ this._clear();
+ }
+
+ return this;
+
+ },
+
+ _getHandle: function(event) {
+
+ var handle = !this.options.handle || !$(this.options.handle, this.element).length ? true : false;
+ $(this.options.handle, this.element)
+ .find("*")
+ .andSelf()
+ .each(function() {
+ if(this == event.target) handle = true;
+ });
+
+ return handle;
+
+ },
+
+ _createHelper: function(event) {
+
+ var o = this.options;
+ var helper = $.isFunction(o.helper) ? $(o.helper.apply(this.element[0], [event])) : (o.helper == 'clone' ? this.element.clone().removeAttr('id') : this.element);
+
+ if(!helper.parents('body').length)
+ helper.appendTo((o.appendTo == 'parent' ? this.element[0].parentNode : o.appendTo));
+
+ if(helper[0] != this.element[0] && !(/(fixed|absolute)/).test(helper.css("position")))
+ helper.css("position", "absolute");
+
+ return helper;
+
+ },
+
+ _adjustOffsetFromHelper: function(obj) {
+ if (typeof obj == 'string') {
+ obj = obj.split(' ');
+ }
+ if ($.isArray(obj)) {
+ obj = {left: +obj[0], top: +obj[1] || 0};
+ }
+ if ('left' in obj) {
+ this.offset.click.left = obj.left + this.margins.left;
+ }
+ if ('right' in obj) {
+ this.offset.click.left = this.helperProportions.width - obj.right + this.margins.left;
+ }
+ if ('top' in obj) {
+ this.offset.click.top = obj.top + this.margins.top;
+ }
+ if ('bottom' in obj) {
+ this.offset.click.top = this.helperProportions.height - obj.bottom + this.margins.top;
+ }
+ },
+
+ _getParentOffset: function() {
+
+ //Get the offsetParent and cache its position
+ this.offsetParent = this.helper.offsetParent();
+ var po = this.offsetParent.offset();
+
+ // This is a special case where we need to modify a offset calculated on start, since the following happened:
+ // 1. The position of the helper is absolute, so it's position is calculated based on the next positioned parent
+ // 2. The actual offset parent is a child of the scroll parent, and the scroll parent isn't the document, which means that
+ // the scroll is included in the initial calculation of the offset of the parent, and never recalculated upon drag
+ if(this.cssPosition == 'absolute' && this.scrollParent[0] != document && $.ui.contains(this.scrollParent[0], this.offsetParent[0])) {
+ po.left += this.scrollParent.scrollLeft();
+ po.top += this.scrollParent.scrollTop();
+ }
+
+ if((this.offsetParent[0] == document.body) //This needs to be actually done for all browsers, since pageX/pageY includes this information
+ || (this.offsetParent[0].tagName && this.offsetParent[0].tagName.toLowerCase() == 'html' && $.browser.msie)) //Ugly IE fix
+ po = { top: 0, left: 0 };
+
+ return {
+ top: po.top + (parseInt(this.offsetParent.css("borderTopWidth"),10) || 0),
+ left: po.left + (parseInt(this.offsetParent.css("borderLeftWidth"),10) || 0)
+ };
+
+ },
+
+ _getRelativeOffset: function() {
+
+ if(this.cssPosition == "relative") {
+ var p = this.element.position();
+ return {
+ top: p.top - (parseInt(this.helper.css("top"),10) || 0) + this.scrollParent.scrollTop(),
+ left: p.left - (parseInt(this.helper.css("left"),10) || 0) + this.scrollParent.scrollLeft()
+ };
+ } else {
+ return { top: 0, left: 0 };
+ }
+
+ },
+
+ _cacheMargins: function() {
+ this.margins = {
+ left: (parseInt(this.element.css("marginLeft"),10) || 0),
+ top: (parseInt(this.element.css("marginTop"),10) || 0),
+ right: (parseInt(this.element.css("marginRight"),10) || 0),
+ bottom: (parseInt(this.element.css("marginBottom"),10) || 0)
+ };
+ },
+
+ _cacheHelperProportions: function() {
+ this.helperProportions = {
+ width: this.helper.outerWidth(),
+ height: this.helper.outerHeight()
+ };
+ },
+
+ _setContainment: function() {
+
+ var o = this.options;
+ if(o.containment == 'parent') o.containment = this.helper[0].parentNode;
+ if(o.containment == 'document' || o.containment == 'window') this.containment = [
+ o.containment == 'document' ? 0 : $(window).scrollLeft() - this.offset.relative.left - this.offset.parent.left,
+ o.containment == 'document' ? 0 : $(window).scrollTop() - this.offset.relative.top - this.offset.parent.top,
+ (o.containment == 'document' ? 0 : $(window).scrollLeft()) + $(o.containment == 'document' ? document : window).width() - this.helperProportions.width - this.margins.left,
+ (o.containment == 'document' ? 0 : $(window).scrollTop()) + ($(o.containment == 'document' ? document : window).height() || document.body.parentNode.scrollHeight) - this.helperProportions.height - this.margins.top
+ ];
+
+ if(!(/^(document|window|parent)$/).test(o.containment) && o.containment.constructor != Array) {
+ var c = $(o.containment);
+ var ce = c[0]; if(!ce) return;
+ var co = c.offset();
+ var over = ($(ce).css("overflow") != 'hidden');
+
+ this.containment = [
+ (parseInt($(ce).css("borderLeftWidth"),10) || 0) + (parseInt($(ce).css("paddingLeft"),10) || 0),
+ (parseInt($(ce).css("borderTopWidth"),10) || 0) + (parseInt($(ce).css("paddingTop"),10) || 0),
+ (over ? Math.max(ce.scrollWidth,ce.offsetWidth) : ce.offsetWidth) - (parseInt($(ce).css("borderLeftWidth"),10) || 0) - (parseInt($(ce).css("paddingRight"),10) || 0) - this.helperProportions.width - this.margins.left - this.margins.right,
+ (over ? Math.max(ce.scrollHeight,ce.offsetHeight) : ce.offsetHeight) - (parseInt($(ce).css("borderTopWidth"),10) || 0) - (parseInt($(ce).css("paddingBottom"),10) || 0) - this.helperProportions.height - this.margins.top - this.margins.bottom
+ ];
+ this.relative_container = c;
+
+ } else if(o.containment.constructor == Array) {
+ this.containment = o.containment;
+ }
+
+ },
+
+ _convertPositionTo: function(d, pos) {
+
+ if(!pos) pos = this.position;
+ var mod = d == "absolute" ? 1 : -1;
+ var o = this.options, scroll = this.cssPosition == 'absolute' && !(this.scrollParent[0] != document && $.ui.contains(this.scrollParent[0], this.offsetParent[0])) ? this.offsetParent : this.scrollParent, scrollIsRootNode = (/(html|body)/i).test(scroll[0].tagName);
+
+ return {
+ top: (
+ pos.top // The absolute mouse position
+ + this.offset.relative.top * mod // Only for relative positioned nodes: Relative offset from element to offset parent
+ + this.offset.parent.top * mod // The offsetParent's offset without borders (offset + border)
+ - ($.browser.safari && $.browser.version < 526 && this.cssPosition == 'fixed' ? 0 : ( this.cssPosition == 'fixed' ? -this.scrollParent.scrollTop() : ( scrollIsRootNode ? 0 : scroll.scrollTop() ) ) * mod)
+ ),
+ left: (
+ pos.left // The absolute mouse position
+ + this.offset.relative.left * mod // Only for relative positioned nodes: Relative offset from element to offset parent
+ + this.offset.parent.left * mod // The offsetParent's offset without borders (offset + border)
+ - ($.browser.safari && $.browser.version < 526 && this.cssPosition == 'fixed' ? 0 : ( this.cssPosition == 'fixed' ? -this.scrollParent.scrollLeft() : scrollIsRootNode ? 0 : scroll.scrollLeft() ) * mod)
+ )
+ };
+
+ },
+
+ _generatePosition: function(event) {
+
+ var o = this.options, scroll = this.cssPosition == 'absolute' && !(this.scrollParent[0] != document && $.ui.contains(this.scrollParent[0], this.offsetParent[0])) ? this.offsetParent : this.scrollParent, scrollIsRootNode = (/(html|body)/i).test(scroll[0].tagName);
+ var pageX = event.pageX;
+ var pageY = event.pageY;
+
+ /*
+ * - Position constraining -
+ * Constrain the position to a mix of grid, containment.
+ */
+
+ if(this.originalPosition) { //If we are not dragging yet, we won't check for options
+ var containment;
+ if(this.containment) {
+ if (this.relative_container){
+ var co = this.relative_container.offset();
+ containment = [ this.containment[0] + co.left,
+ this.containment[1] + co.top,
+ this.containment[2] + co.left,
+ this.containment[3] + co.top ];
+ }
+ else {
+ containment = this.containment;
+ }
+
+ if(event.pageX - this.offset.click.left < containment[0]) pageX = containment[0] + this.offset.click.left;
+ if(event.pageY - this.offset.click.top < containment[1]) pageY = containment[1] + this.offset.click.top;
+ if(event.pageX - this.offset.click.left > containment[2]) pageX = containment[2] + this.offset.click.left;
+ if(event.pageY - this.offset.click.top > containment[3]) pageY = containment[3] + this.offset.click.top;
+ }
+
+ if(o.grid) {
+ //Check for grid elements set to 0 to prevent divide by 0 error causing invalid argument errors in IE (see ticket #6950)
+ var top = o.grid[1] ? this.originalPageY + Math.round((pageY - this.originalPageY) / o.grid[1]) * o.grid[1] : this.originalPageY;
+ pageY = containment ? (!(top - this.offset.click.top < containment[1] || top - this.offset.click.top > containment[3]) ? top : (!(top - this.offset.click.top < containment[1]) ? top - o.grid[1] : top + o.grid[1])) : top;
+
+ var left = o.grid[0] ? this.originalPageX + Math.round((pageX - this.originalPageX) / o.grid[0]) * o.grid[0] : this.originalPageX;
+ pageX = containment ? (!(left - this.offset.click.left < containment[0] || left - this.offset.click.left > containment[2]) ? left : (!(left - this.offset.click.left < containment[0]) ? left - o.grid[0] : left + o.grid[0])) : left;
+ }
+
+ }
+
+ return {
+ top: (
+ pageY // The absolute mouse position
+ - this.offset.click.top // Click offset (relative to the element)
+ - this.offset.relative.top // Only for relative positioned nodes: Relative offset from element to offset parent
+ - this.offset.parent.top // The offsetParent's offset without borders (offset + border)
+ + ($.browser.safari && $.browser.version < 526 && this.cssPosition == 'fixed' ? 0 : ( this.cssPosition == 'fixed' ? -this.scrollParent.scrollTop() : ( scrollIsRootNode ? 0 : scroll.scrollTop() ) ))
+ ),
+ left: (
+ pageX // The absolute mouse position
+ - this.offset.click.left // Click offset (relative to the element)
+ - this.offset.relative.left // Only for relative positioned nodes: Relative offset from element to offset parent
+ - this.offset.parent.left // The offsetParent's offset without borders (offset + border)
+ + ($.browser.safari && $.browser.version < 526 && this.cssPosition == 'fixed' ? 0 : ( this.cssPosition == 'fixed' ? -this.scrollParent.scrollLeft() : scrollIsRootNode ? 0 : scroll.scrollLeft() ))
+ )
+ };
+
+ },
+
+ _clear: function() {
+ this.helper.removeClass("ui-draggable-dragging");
+ if(this.helper[0] != this.element[0] && !this.cancelHelperRemoval) this.helper.remove();
+ //if($.ui.ddmanager) $.ui.ddmanager.current = null;
+ this.helper = null;
+ this.cancelHelperRemoval = false;
+ },
+
+ // From now on bulk stuff - mainly helpers
+
+ _trigger: function(type, event, ui) {
+ ui = ui || this._uiHash();
+ $.ui.plugin.call(this, type, [event, ui]);
+ if(type == "drag") this.positionAbs = this._convertPositionTo("absolute"); //The absolute position has to be recalculated after plugins
+ return $.Widget.prototype._trigger.call(this, type, event, ui);
+ },
+
+ plugins: {},
+
+ _uiHash: function(event) {
+ return {
+ helper: this.helper,
+ position: this.position,
+ originalPosition: this.originalPosition,
+ offset: this.positionAbs
+ };
+ }
+
+});
+
+$.extend($.ui.draggable, {
+ version: "1.8.16"
+});
+
+$.ui.plugin.add("draggable", "connectToSortable", {
+ start: function(event, ui) {
+
+ var inst = $(this).data("draggable"), o = inst.options,
+ uiSortable = $.extend({}, ui, { item: inst.element });
+ inst.sortables = [];
+ $(o.connectToSortable).each(function() {
+ var sortable = $.data(this, 'sortable');
+ if (sortable && !sortable.options.disabled) {
+ inst.sortables.push({
+ instance: sortable,
+ shouldRevert: sortable.options.revert
+ });
+ sortable.refreshPositions(); // Call the sortable's refreshPositions at drag start to refresh the containerCache since the sortable container cache is used in drag and needs to be up to date (this will ensure it's initialised as well as being kept in step with any changes that might have happened on the page).
+ sortable._trigger("activate", event, uiSortable);
+ }
+ });
+
+ },
+ stop: function(event, ui) {
+
+ //If we are still over the sortable, we fake the stop event of the sortable, but also remove helper
+ var inst = $(this).data("draggable"),
+ uiSortable = $.extend({}, ui, { item: inst.element });
+
+ $.each(inst.sortables, function() {
+ if(this.instance.isOver) {
+
+ this.instance.isOver = 0;
+
+ inst.cancelHelperRemoval = true; //Don't remove the helper in the draggable instance
+ this.instance.cancelHelperRemoval = false; //Remove it in the sortable instance (so sortable plugins like revert still work)
+
+ //The sortable revert is supported, and we have to set a temporary dropped variable on the draggable to support revert: 'valid/invalid'
+ if(this.shouldRevert) this.instance.options.revert = true;
+
+ //Trigger the stop of the sortable
+ this.instance._mouseStop(event);
+
+ this.instance.options.helper = this.instance.options._helper;
+
+ //If the helper has been the original item, restore properties in the sortable
+ if(inst.options.helper == 'original')
+ this.instance.currentItem.css({ top: 'auto', left: 'auto' });
+
+ } else {
+ this.instance.cancelHelperRemoval = false; //Remove the helper in the sortable instance
+ this.instance._trigger("deactivate", event, uiSortable);
+ }
+
+ });
+
+ },
+ drag: function(event, ui) {
+
+ var inst = $(this).data("draggable"), self = this;
+
+ var checkPos = function(o) {
+ var dyClick = this.offset.click.top, dxClick = this.offset.click.left;
+ var helperTop = this.positionAbs.top, helperLeft = this.positionAbs.left;
+ var itemHeight = o.height, itemWidth = o.width;
+ var itemTop = o.top, itemLeft = o.left;
+
+ return $.ui.isOver(helperTop + dyClick, helperLeft + dxClick, itemTop, itemLeft, itemHeight, itemWidth);
+ };
+
+ $.each(inst.sortables, function(i) {
+
+ //Copy over some variables to allow calling the sortable's native _intersectsWith
+ this.instance.positionAbs = inst.positionAbs;
+ this.instance.helperProportions = inst.helperProportions;
+ this.instance.offset.click = inst.offset.click;
+
+ if(this.instance._intersectsWith(this.instance.containerCache)) {
+
+ //If it intersects, we use a little isOver variable and set it once, so our move-in stuff gets fired only once
+ if(!this.instance.isOver) {
+
+ this.instance.isOver = 1;
+ //Now we fake the start of dragging for the sortable instance,
+ //by cloning the list group item, appending it to the sortable and using it as inst.currentItem
+ //We can then fire the start event of the sortable with our passed browser event, and our own helper (so it doesn't create a new one)
+ this.instance.currentItem = $(self).clone().removeAttr('id').appendTo(this.instance.element).data("sortable-item", true);
+ this.instance.options._helper = this.instance.options.helper; //Store helper option to later restore it
+ this.instance.options.helper = function() { return ui.helper[0]; };
+
+ event.target = this.instance.currentItem[0];
+ this.instance._mouseCapture(event, true);
+ this.instance._mouseStart(event, true, true);
+
+ //Because the browser event is way off the new appended portlet, we modify a couple of variables to reflect the changes
+ this.instance.offset.click.top = inst.offset.click.top;
+ this.instance.offset.click.left = inst.offset.click.left;
+ this.instance.offset.parent.left -= inst.offset.parent.left - this.instance.offset.parent.left;
+ this.instance.offset.parent.top -= inst.offset.parent.top - this.instance.offset.parent.top;
+
+ inst._trigger("toSortable", event);
+ inst.dropped = this.instance.element; //draggable revert needs that
+ //hack so receive/update callbacks work (mostly)
+ inst.currentItem = inst.element;
+ this.instance.fromOutside = inst;
+
+ }
+
+ //Provided we did all the previous steps, we can fire the drag event of the sortable on every draggable drag, when it intersects with the sortable
+ if(this.instance.currentItem) this.instance._mouseDrag(event);
+
+ } else {
+
+ //If it doesn't intersect with the sortable, and it intersected before,
+ //we fake the drag stop of the sortable, but make sure it doesn't remove the helper by using cancelHelperRemoval
+ if(this.instance.isOver) {
+
+ this.instance.isOver = 0;
+ this.instance.cancelHelperRemoval = true;
+
+ //Prevent reverting on this forced stop
+ this.instance.options.revert = false;
+
+ // The out event needs to be triggered independently
+ this.instance._trigger('out', event, this.instance._uiHash(this.instance));
+
+ this.instance._mouseStop(event, true);
+ this.instance.options.helper = this.instance.options._helper;
+
+ //Now we remove our currentItem, the list group clone again, and the placeholder, and animate the helper back to it's original size
+ this.instance.currentItem.remove();
+ if(this.instance.placeholder) this.instance.placeholder.remove();
+
+ inst._trigger("fromSortable", event);
+ inst.dropped = false; //draggable revert needs that
+ }
+
+ };
+
+ });
+
+ }
+});
+
+$.ui.plugin.add("draggable", "cursor", {
+ start: function(event, ui) {
+ var t = $('body'), o = $(this).data('draggable').options;
+ if (t.css("cursor")) o._cursor = t.css("cursor");
+ t.css("cursor", o.cursor);
+ },
+ stop: function(event, ui) {
+ var o = $(this).data('draggable').options;
+ if (o._cursor) $('body').css("cursor", o._cursor);
+ }
+});
+
+$.ui.plugin.add("draggable", "opacity", {
+ start: function(event, ui) {
+ var t = $(ui.helper), o = $(this).data('draggable').options;
+ if(t.css("opacity")) o._opacity = t.css("opacity");
+ t.css('opacity', o.opacity);
+ },
+ stop: function(event, ui) {
+ var o = $(this).data('draggable').options;
+ if(o._opacity) $(ui.helper).css('opacity', o._opacity);
+ }
+});
+
+$.ui.plugin.add("draggable", "scroll", {
+ start: function(event, ui) {
+ var i = $(this).data("draggable");
+ if(i.scrollParent[0] != document && i.scrollParent[0].tagName != 'HTML') i.overflowOffset = i.scrollParent.offset();
+ },
+ drag: function(event, ui) {
+
+ var i = $(this).data("draggable"), o = i.options, scrolled = false;
+
+ if(i.scrollParent[0] != document && i.scrollParent[0].tagName != 'HTML') {
+
+ if(!o.axis || o.axis != 'x') {
+ if((i.overflowOffset.top + i.scrollParent[0].offsetHeight) - event.pageY < o.scrollSensitivity)
+ i.scrollParent[0].scrollTop = scrolled = i.scrollParent[0].scrollTop + o.scrollSpeed;
+ else if(event.pageY - i.overflowOffset.top < o.scrollSensitivity)
+ i.scrollParent[0].scrollTop = scrolled = i.scrollParent[0].scrollTop - o.scrollSpeed;
+ }
+
+ if(!o.axis || o.axis != 'y') {
+ if((i.overflowOffset.left + i.scrollParent[0].offsetWidth) - event.pageX < o.scrollSensitivity)
+ i.scrollParent[0].scrollLeft = scrolled = i.scrollParent[0].scrollLeft + o.scrollSpeed;
+ else if(event.pageX - i.overflowOffset.left < o.scrollSensitivity)
+ i.scrollParent[0].scrollLeft = scrolled = i.scrollParent[0].scrollLeft - o.scrollSpeed;
+ }
+
+ } else {
+
+ if(!o.axis || o.axis != 'x') {
+ if(event.pageY - $(document).scrollTop() < o.scrollSensitivity)
+ scrolled = $(document).scrollTop($(document).scrollTop() - o.scrollSpeed);
+ else if($(window).height() - (event.pageY - $(document).scrollTop()) < o.scrollSensitivity)
+ scrolled = $(document).scrollTop($(document).scrollTop() + o.scrollSpeed);
+ }
+
+ if(!o.axis || o.axis != 'y') {
+ if(event.pageX - $(document).scrollLeft() < o.scrollSensitivity)
+ scrolled = $(document).scrollLeft($(document).scrollLeft() - o.scrollSpeed);
+ else if($(window).width() - (event.pageX - $(document).scrollLeft()) < o.scrollSensitivity)
+ scrolled = $(document).scrollLeft($(document).scrollLeft() + o.scrollSpeed);
+ }
+
+ }
+
+ if(scrolled !== false && $.ui.ddmanager && !o.dropBehaviour)
+ $.ui.ddmanager.prepareOffsets(i, event);
+
+ }
+});
+
+$.ui.plugin.add("draggable", "snap", {
+ start: function(event, ui) {
+
+ var i = $(this).data("draggable"), o = i.options;
+ i.snapElements = [];
+
+ $(o.snap.constructor != String ? ( o.snap.items || ':data(draggable)' ) : o.snap).each(function() {
+ var $t = $(this); var $o = $t.offset();
+ if(this != i.element[0]) i.snapElements.push({
+ item: this,
+ width: $t.outerWidth(), height: $t.outerHeight(),
+ top: $o.top, left: $o.left
+ });
+ });
+
+ },
+ drag: function(event, ui) {
+
+ var inst = $(this).data("draggable"), o = inst.options;
+ var d = o.snapTolerance;
+
+ var x1 = ui.offset.left, x2 = x1 + inst.helperProportions.width,
+ y1 = ui.offset.top, y2 = y1 + inst.helperProportions.height;
+
+ for (var i = inst.snapElements.length - 1; i >= 0; i--){
+
+ var l = inst.snapElements[i].left, r = l + inst.snapElements[i].width,
+ t = inst.snapElements[i].top, b = t + inst.snapElements[i].height;
+
+ //Yes, I know, this is insane ;)
+ if(!((l-d < x1 && x1 < r+d && t-d < y1 && y1 < b+d) || (l-d < x1 && x1 < r+d && t-d < y2 && y2 < b+d) || (l-d < x2 && x2 < r+d && t-d < y1 && y1 < b+d) || (l-d < x2 && x2 < r+d && t-d < y2 && y2 < b+d))) {
+ if(inst.snapElements[i].snapping) (inst.options.snap.release && inst.options.snap.release.call(inst.element, event, $.extend(inst._uiHash(), { snapItem: inst.snapElements[i].item })));
+ inst.snapElements[i].snapping = false;
+ continue;
+ }
+
+ if(o.snapMode != 'inner') {
+ var ts = Math.abs(t - y2) <= d;
+ var bs = Math.abs(b - y1) <= d;
+ var ls = Math.abs(l - x2) <= d;
+ var rs = Math.abs(r - x1) <= d;
+ if(ts) ui.position.top = inst._convertPositionTo("relative", { top: t - inst.helperProportions.height, left: 0 }).top - inst.margins.top;
+ if(bs) ui.position.top = inst._convertPositionTo("relative", { top: b, left: 0 }).top - inst.margins.top;
+ if(ls) ui.position.left = inst._convertPositionTo("relative", { top: 0, left: l - inst.helperProportions.width }).left - inst.margins.left;
+ if(rs) ui.position.left = inst._convertPositionTo("relative", { top: 0, left: r }).left - inst.margins.left;
+ }
+
+ var first = (ts || bs || ls || rs);
+
+ if(o.snapMode != 'outer') {
+ var ts = Math.abs(t - y1) <= d;
+ var bs = Math.abs(b - y2) <= d;
+ var ls = Math.abs(l - x1) <= d;
+ var rs = Math.abs(r - x2) <= d;
+ if(ts) ui.position.top = inst._convertPositionTo("relative", { top: t, left: 0 }).top - inst.margins.top;
+ if(bs) ui.position.top = inst._convertPositionTo("relative", { top: b - inst.helperProportions.height, left: 0 }).top - inst.margins.top;
+ if(ls) ui.position.left = inst._convertPositionTo("relative", { top: 0, left: l }).left - inst.margins.left;
+ if(rs) ui.position.left = inst._convertPositionTo("relative", { top: 0, left: r - inst.helperProportions.width }).left - inst.margins.left;
+ }
+
+ if(!inst.snapElements[i].snapping && (ts || bs || ls || rs || first))
+ (inst.options.snap.snap && inst.options.snap.snap.call(inst.element, event, $.extend(inst._uiHash(), { snapItem: inst.snapElements[i].item })));
+ inst.snapElements[i].snapping = (ts || bs || ls || rs || first);
+
+ };
+
+ }
+});
+
+$.ui.plugin.add("draggable", "stack", {
+ start: function(event, ui) {
+
+ var o = $(this).data("draggable").options;
+
+ var group = $.makeArray($(o.stack)).sort(function(a,b) {
+ return (parseInt($(a).css("zIndex"),10) || 0) - (parseInt($(b).css("zIndex"),10) || 0);
+ });
+ if (!group.length) { return; }
+
+ var min = parseInt(group[0].style.zIndex) || 0;
+ $(group).each(function(i) {
+ this.style.zIndex = min + i;
+ });
+
+ this[0].style.zIndex = min + group.length;
+
+ }
+});
+
+$.ui.plugin.add("draggable", "zIndex", {
+ start: function(event, ui) {
+ var t = $(ui.helper), o = $(this).data("draggable").options;
+ if(t.css("zIndex")) o._zIndex = t.css("zIndex");
+ t.css('zIndex', o.zIndex);
+ },
+ stop: function(event, ui) {
+ var o = $(this).data("draggable").options;
+ if(o._zIndex) $(ui.helper).css('zIndex', o._zIndex);
+ }
+});
+
+})(jQuery);
+/*
+ * jQuery UI Droppable 1.8.16
+ *
+ * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about)
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://jquery.org/license
+ *
+ * http://docs.jquery.com/UI/Droppables
+ *
+ * Depends:
+ * jquery.ui.core.js
+ * jquery.ui.widget.js
+ * jquery.ui.mouse.js
+ * jquery.ui.draggable.js
+ */
+(function( $, undefined ) {
+
+$.widget("ui.droppable", {
+ widgetEventPrefix: "drop",
+ options: {
+ accept: '*',
+ activeClass: false,
+ addClasses: true,
+ greedy: false,
+ hoverClass: false,
+ scope: 'default',
+ tolerance: 'intersect'
+ },
+ _create: function() {
+
+ var o = this.options, accept = o.accept;
+ this.isover = 0; this.isout = 1;
+
+ this.accept = $.isFunction(accept) ? accept : function(d) {
+ return d.is(accept);
+ };
+
+ //Store the droppable's proportions
+ this.proportions = { width: this.element[0].offsetWidth, height: this.element[0].offsetHeight };
+
+ // Add the reference and positions to the manager
+ $.ui.ddmanager.droppables[o.scope] = $.ui.ddmanager.droppables[o.scope] || [];
+ $.ui.ddmanager.droppables[o.scope].push(this);
+
+ (o.addClasses && this.element.addClass("ui-droppable"));
+
+ },
+
+ destroy: function() {
+ var drop = $.ui.ddmanager.droppables[this.options.scope];
+ for ( var i = 0; i < drop.length; i++ )
+ if ( drop[i] == this )
+ drop.splice(i, 1);
+
+ this.element
+ .removeClass("ui-droppable ui-droppable-disabled")
+ .removeData("droppable")
+ .unbind(".droppable");
+
+ return this;
+ },
+
+ _setOption: function(key, value) {
+
+ if(key == 'accept') {
+ this.accept = $.isFunction(value) ? value : function(d) {
+ return d.is(value);
+ };
+ }
+ $.Widget.prototype._setOption.apply(this, arguments);
+ },
+
+ _activate: function(event) {
+ var draggable = $.ui.ddmanager.current;
+ if(this.options.activeClass) this.element.addClass(this.options.activeClass);
+ (draggable && this._trigger('activate', event, this.ui(draggable)));
+ },
+
+ _deactivate: function(event) {
+ var draggable = $.ui.ddmanager.current;
+ if(this.options.activeClass) this.element.removeClass(this.options.activeClass);
+ (draggable && this._trigger('deactivate', event, this.ui(draggable)));
+ },
+
+ _over: function(event) {
+
+ var draggable = $.ui.ddmanager.current;
+ if (!draggable || (draggable.currentItem || draggable.element)[0] == this.element[0]) return; // Bail if draggable and droppable are same element
+
+ if (this.accept.call(this.element[0],(draggable.currentItem || draggable.element))) {
+ if(this.options.hoverClass) this.element.addClass(this.options.hoverClass);
+ this._trigger('over', event, this.ui(draggable));
+ }
+
+ },
+
+ _out: function(event) {
+
+ var draggable = $.ui.ddmanager.current;
+ if (!draggable || (draggable.currentItem || draggable.element)[0] == this.element[0]) return; // Bail if draggable and droppable are same element
+
+ if (this.accept.call(this.element[0],(draggable.currentItem || draggable.element))) {
+ if(this.options.hoverClass) this.element.removeClass(this.options.hoverClass);
+ this._trigger('out', event, this.ui(draggable));
+ }
+
+ },
+
+ _drop: function(event,custom) {
+
+ var draggable = custom || $.ui.ddmanager.current;
+ if (!draggable || (draggable.currentItem || draggable.element)[0] == this.element[0]) return false; // Bail if draggable and droppable are same element
+
+ var childrenIntersection = false;
+ this.element.find(":data(droppable)").not(".ui-draggable-dragging").each(function() {
+ var inst = $.data(this, 'droppable');
+ if(
+ inst.options.greedy
+ && !inst.options.disabled
+ && inst.options.scope == draggable.options.scope
+ && inst.accept.call(inst.element[0], (draggable.currentItem || draggable.element))
+ && $.ui.intersect(draggable, $.extend(inst, { offset: inst.element.offset() }), inst.options.tolerance)
+ ) { childrenIntersection = true; return false; }
+ });
+ if(childrenIntersection) return false;
+
+ if(this.accept.call(this.element[0],(draggable.currentItem || draggable.element))) {
+ if(this.options.activeClass) this.element.removeClass(this.options.activeClass);
+ if(this.options.hoverClass) this.element.removeClass(this.options.hoverClass);
+ this._trigger('drop', event, this.ui(draggable));
+ return this.element;
+ }
+
+ return false;
+
+ },
+
+ ui: function(c) {
+ return {
+ draggable: (c.currentItem || c.element),
+ helper: c.helper,
+ position: c.position,
+ offset: c.positionAbs
+ };
+ }
+
+});
+
+$.extend($.ui.droppable, {
+ version: "1.8.16"
+});
+
+$.ui.intersect = function(draggable, droppable, toleranceMode) {
+
+ if (!droppable.offset) return false;
+
+ var x1 = (draggable.positionAbs || draggable.position.absolute).left, x2 = x1 + draggable.helperProportions.width,
+ y1 = (draggable.positionAbs || draggable.position.absolute).top, y2 = y1 + draggable.helperProportions.height;
+ var l = droppable.offset.left, r = l + droppable.proportions.width,
+ t = droppable.offset.top, b = t + droppable.proportions.height;
+
+ switch (toleranceMode) {
+ case 'fit':
+ return (l <= x1 && x2 <= r
+ && t <= y1 && y2 <= b);
+ break;
+ case 'intersect':
+ return (l < x1 + (draggable.helperProportions.width / 2) // Right Half
+ && x2 - (draggable.helperProportions.width / 2) < r // Left Half
+ && t < y1 + (draggable.helperProportions.height / 2) // Bottom Half
+ && y2 - (draggable.helperProportions.height / 2) < b ); // Top Half
+ break;
+ case 'pointer':
+ var draggableLeft = ((draggable.positionAbs || draggable.position.absolute).left + (draggable.clickOffset || draggable.offset.click).left),
+ draggableTop = ((draggable.positionAbs || draggable.position.absolute).top + (draggable.clickOffset || draggable.offset.click).top),
+ isOver = $.ui.isOver(draggableTop, draggableLeft, t, l, droppable.proportions.height, droppable.proportions.width);
+ return isOver;
+ break;
+ case 'touch':
+ return (
+ (y1 >= t && y1 <= b) || // Top edge touching
+ (y2 >= t && y2 <= b) || // Bottom edge touching
+ (y1 < t && y2 > b) // Surrounded vertically
+ ) && (
+ (x1 >= l && x1 <= r) || // Left edge touching
+ (x2 >= l && x2 <= r) || // Right edge touching
+ (x1 < l && x2 > r) // Surrounded horizontally
+ );
+ break;
+ default:
+ return false;
+ break;
+ }
+
+};
+
+/*
+ This manager tracks offsets of draggables and droppables
+*/
+$.ui.ddmanager = {
+ current: null,
+ droppables: { 'default': [] },
+ prepareOffsets: function(t, event) {
+
+ var m = $.ui.ddmanager.droppables[t.options.scope] || [];
+ var type = event ? event.type : null; // workaround for #2317
+ var list = (t.currentItem || t.element).find(":data(droppable)").andSelf();
+
+ droppablesLoop: for (var i = 0; i < m.length; i++) {
+
+ if(m[i].options.disabled || (t && !m[i].accept.call(m[i].element[0],(t.currentItem || t.element)))) continue; //No disabled and non-accepted
+ for (var j=0; j < list.length; j++) { if(list[j] == m[i].element[0]) { m[i].proportions.height = 0; continue droppablesLoop; } }; //Filter out elements in the current dragged item
+ m[i].visible = m[i].element.css("display") != "none"; if(!m[i].visible) continue; //If the element is not visible, continue
+
+ if(type == "mousedown") m[i]._activate.call(m[i], event); //Activate the droppable if used directly from draggables
+
+ m[i].offset = m[i].element.offset();
+ m[i].proportions = { width: m[i].element[0].offsetWidth, height: m[i].element[0].offsetHeight };
+
+ }
+
+ },
+ drop: function(draggable, event) {
+
+ var dropped = false;
+ $.each($.ui.ddmanager.droppables[draggable.options.scope] || [], function() {
+
+ if(!this.options) return;
+ if (!this.options.disabled && this.visible && $.ui.intersect(draggable, this, this.options.tolerance))
+ dropped = dropped || this._drop.call(this, event);
+
+ if (!this.options.disabled && this.visible && this.accept.call(this.element[0],(draggable.currentItem || draggable.element))) {
+ this.isout = 1; this.isover = 0;
+ this._deactivate.call(this, event);
+ }
+
+ });
+ return dropped;
+
+ },
+ dragStart: function( draggable, event ) {
+ //Listen for scrolling so that if the dragging causes scrolling the position of the droppables can be recalculated (see #5003)
+ draggable.element.parents( ":not(body,html)" ).bind( "scroll.droppable", function() {
+ if( !draggable.options.refreshPositions ) $.ui.ddmanager.prepareOffsets( draggable, event );
+ });
+ },
+ drag: function(draggable, event) {
+
+ //If you have a highly dynamic page, you might try this option. It renders positions every time you move the mouse.
+ if(draggable.options.refreshPositions) $.ui.ddmanager.prepareOffsets(draggable, event);
+
+ //Run through all droppables and check their positions based on specific tolerance options
+ $.each($.ui.ddmanager.droppables[draggable.options.scope] || [], function() {
+
+ if(this.options.disabled || this.greedyChild || !this.visible) return;
+ var intersects = $.ui.intersect(draggable, this, this.options.tolerance);
+
+ var c = !intersects && this.isover == 1 ? 'isout' : (intersects && this.isover == 0 ? 'isover' : null);
+ if(!c) return;
+
+ var parentInstance;
+ if (this.options.greedy) {
+ var parent = this.element.parents(':data(droppable):eq(0)');
+ if (parent.length) {
+ parentInstance = $.data(parent[0], 'droppable');
+ parentInstance.greedyChild = (c == 'isover' ? 1 : 0);
+ }
+ }
+
+ // we just moved into a greedy child
+ if (parentInstance && c == 'isover') {
+ parentInstance['isover'] = 0;
+ parentInstance['isout'] = 1;
+ parentInstance._out.call(parentInstance, event);
+ }
+
+ this[c] = 1; this[c == 'isout' ? 'isover' : 'isout'] = 0;
+ this[c == "isover" ? "_over" : "_out"].call(this, event);
+
+ // we just moved out of a greedy child
+ if (parentInstance && c == 'isout') {
+ parentInstance['isout'] = 0;
+ parentInstance['isover'] = 1;
+ parentInstance._over.call(parentInstance, event);
+ }
+ });
+
+ },
+ dragStop: function( draggable, event ) {
+ draggable.element.parents( ":not(body,html)" ).unbind( "scroll.droppable" );
+ //Call prepareOffsets one final time since IE does not fire return scroll events when overflow was caused by drag (see #5003)
+ if( !draggable.options.refreshPositions ) $.ui.ddmanager.prepareOffsets( draggable, event );
+ }
+};
+
+})(jQuery);
+/*
+ * jQuery UI Resizable 1.8.16
+ *
+ * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about)
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://jquery.org/license
+ *
+ * http://docs.jquery.com/UI/Resizables
+ *
+ * Depends:
+ * jquery.ui.core.js
+ * jquery.ui.mouse.js
+ * jquery.ui.widget.js
+ */
+(function( $, undefined ) {
+
+$.widget("ui.resizable", $.ui.mouse, {
+ widgetEventPrefix: "resize",
+ options: {
+ alsoResize: false,
+ animate: false,
+ animateDuration: "slow",
+ animateEasing: "swing",
+ aspectRatio: false,
+ autoHide: false,
+ containment: false,
+ ghost: false,
+ grid: false,
+ handles: "e,s,se",
+ helper: false,
+ maxHeight: null,
+ maxWidth: null,
+ minHeight: 10,
+ minWidth: 10,
+ zIndex: 1000
+ },
+ _create: function() {
+
+ var self = this, o = this.options;
+ this.element.addClass("ui-resizable");
+
+ $.extend(this, {
+ _aspectRatio: !!(o.aspectRatio),
+ aspectRatio: o.aspectRatio,
+ originalElement: this.element,
+ _proportionallyResizeElements: [],
+ _helper: o.helper || o.ghost || o.animate ? o.helper || 'ui-resizable-helper' : null
+ });
+
+ //Wrap the element if it cannot hold child nodes
+ if(this.element[0].nodeName.match(/canvas|textarea|input|select|button|img/i)) {
+
+ //Opera fix for relative positioning
+ if (/relative/.test(this.element.css('position')) && $.browser.opera)
+ this.element.css({ position: 'relative', top: 'auto', left: 'auto' });
+
+ //Create a wrapper element and set the wrapper to the new current internal element
+ this.element.wrap(
+ $('<div class="ui-wrapper" style="overflow: hidden;"></div>').css({
+ position: this.element.css('position'),
+ width: this.element.outerWidth(),
+ height: this.element.outerHeight(),
+ top: this.element.css('top'),
+ left: this.element.css('left')
+ })
+ );
+
+ //Overwrite the original this.element
+ this.element = this.element.parent().data(
+ "resizable", this.element.data('resizable')
+ );
+
+ this.elementIsWrapper = true;
+
+ //Move margins to the wrapper
+ this.element.css({ marginLeft: this.originalElement.css("marginLeft"), marginTop: this.originalElement.css("marginTop"), marginRight: this.originalElement.css("marginRight"), marginBottom: this.originalElement.css("marginBottom") });
+ this.originalElement.css({ marginLeft: 0, marginTop: 0, marginRight: 0, marginBottom: 0});
+
+ //Prevent Safari textarea resize
+ this.originalResizeStyle = this.originalElement.css('resize');
+ this.originalElement.css('resize', 'none');
+
+ //Push the actual element to our proportionallyResize internal array
+ this._proportionallyResizeElements.push(this.originalElement.css({ position: 'static', zoom: 1, display: 'block' }));
+
+ // avoid IE jump (hard set the margin)
+ this.originalElement.css({ margin: this.originalElement.css('margin') });
+
+ // fix handlers offset
+ this._proportionallyResize();
+
+ }
+
+ this.handles = o.handles || (!$('.ui-resizable-handle', this.element).length ? "e,s,se" : { n: '.ui-resizable-n', e: '.ui-resizable-e', s: '.ui-resizable-s', w: '.ui-resizable-w', se: '.ui-resizable-se', sw: '.ui-resizable-sw', ne: '.ui-resizable-ne', nw: '.ui-resizable-nw' });
+ if(this.handles.constructor == String) {
+
+ if(this.handles == 'all') this.handles = 'n,e,s,w,se,sw,ne,nw';
+ var n = this.handles.split(","); this.handles = {};
+
+ for(var i = 0; i < n.length; i++) {
+
+ var handle = $.trim(n[i]), hname = 'ui-resizable-'+handle;
+ var axis = $('<div class="ui-resizable-handle ' + hname + '"></div>');
+
+ // increase zIndex of sw, se, ne, nw axis
+ //TODO : this modifies original option
+ if(/sw|se|ne|nw/.test(handle)) axis.css({ zIndex: ++o.zIndex });
+
+ //TODO : What's going on here?
+ if ('se' == handle) {
+ axis.addClass('ui-icon ui-icon-gripsmall-diagonal-se');
+ };
+
+ //Insert into internal handles object and append to element
+ this.handles[handle] = '.ui-resizable-'+handle;
+ this.element.append(axis);
+ }
+
+ }
+
+ this._renderAxis = function(target) {
+
+ target = target || this.element;
+
+ for(var i in this.handles) {
+
+ if(this.handles[i].constructor == String)
+ this.handles[i] = $(this.handles[i], this.element).show();
+
+ //Apply pad to wrapper element, needed to fix axis position (textarea, inputs, scrolls)
+ if (this.elementIsWrapper && this.originalElement[0].nodeName.match(/textarea|input|select|button/i)) {
+
+ var axis = $(this.handles[i], this.element), padWrapper = 0;
+
+ //Checking the correct pad and border
+ padWrapper = /sw|ne|nw|se|n|s/.test(i) ? axis.outerHeight() : axis.outerWidth();
+
+ //The padding type i have to apply...
+ var padPos = [ 'padding',
+ /ne|nw|n/.test(i) ? 'Top' :
+ /se|sw|s/.test(i) ? 'Bottom' :
+ /^e$/.test(i) ? 'Right' : 'Left' ].join("");
+
+ target.css(padPos, padWrapper);
+
+ this._proportionallyResize();
+
+ }
+
+ //TODO: What's that good for? There's not anything to be executed left
+ if(!$(this.handles[i]).length)
+ continue;
+
+ }
+ };
+
+ //TODO: make renderAxis a prototype function
+ this._renderAxis(this.element);
+
+ this._handles = $('.ui-resizable-handle', this.element)
+ .disableSelection();
+
+ //Matching axis name
+ this._handles.mouseover(function() {
+ if (!self.resizing) {
+ if (this.className)
+ var axis = this.className.match(/ui-resizable-(se|sw|ne|nw|n|e|s|w)/i);
+ //Axis, default = se
+ self.axis = axis && axis[1] ? axis[1] : 'se';
+ }
+ });
+
+ //If we want to auto hide the elements
+ if (o.autoHide) {
+ this._handles.hide();
+ $(this.element)
+ .addClass("ui-resizable-autohide")
+ .hover(function() {
+ if (o.disabled) return;
+ $(this).removeClass("ui-resizable-autohide");
+ self._handles.show();
+ },
+ function(){
+ if (o.disabled) return;
+ if (!self.resizing) {
+ $(this).addClass("ui-resizable-autohide");
+ self._handles.hide();
+ }
+ });
+ }
+
+ //Initialize the mouse interaction
+ this._mouseInit();
+
+ },
+
+ destroy: function() {
+
+ this._mouseDestroy();
+
+ var _destroy = function(exp) {
+ $(exp).removeClass("ui-resizable ui-resizable-disabled ui-resizable-resizing")
+ .removeData("resizable").unbind(".resizable").find('.ui-resizable-handle').remove();
+ };
+
+ //TODO: Unwrap at same DOM position
+ if (this.elementIsWrapper) {
+ _destroy(this.element);
+ var wrapper = this.element;
+ wrapper.after(
+ this.originalElement.css({
+ position: wrapper.css('position'),
+ width: wrapper.outerWidth(),
+ height: wrapper.outerHeight(),
+ top: wrapper.css('top'),
+ left: wrapper.css('left')
+ })
+ ).remove();
+ }
+
+ this.originalElement.css('resize', this.originalResizeStyle);
+ _destroy(this.originalElement);
+
+ return this;
+ },
+
+ _mouseCapture: function(event) {
+ var handle = false;
+ for (var i in this.handles) {
+ if ($(this.handles[i])[0] == event.target) {
+ handle = true;
+ }
+ }
+
+ return !this.options.disabled && handle;
+ },
+
+ _mouseStart: function(event) {
+
+ var o = this.options, iniPos = this.element.position(), el = this.element;
+
+ this.resizing = true;
+ this.documentScroll = { top: $(document).scrollTop(), left: $(document).scrollLeft() };
+
+ // bugfix for http://dev.jquery.com/ticket/1749
+ if (el.is('.ui-draggable') || (/absolute/).test(el.css('position'))) {
+ el.css({ position: 'absolute', top: iniPos.top, left: iniPos.left });
+ }
+
+ //Opera fixing relative position
+ if ($.browser.opera && (/relative/).test(el.css('position')))
+ el.css({ position: 'relative', top: 'auto', left: 'auto' });
+
+ this._renderProxy();
+
+ var curleft = num(this.helper.css('left')), curtop = num(this.helper.css('top'));
+
+ if (o.containment) {
+ curleft += $(o.containment).scrollLeft() || 0;
+ curtop += $(o.containment).scrollTop() || 0;
+ }
+
+ //Store needed variables
+ this.offset = this.helper.offset();
+ this.position = { left: curleft, top: curtop };
+ this.size = this._helper ? { width: el.outerWidth(), height: el.outerHeight() } : { width: el.width(), height: el.height() };
+ this.originalSize = this._helper ? { width: el.outerWidth(), height: el.outerHeight() } : { width: el.width(), height: el.height() };
+ this.originalPosition = { left: curleft, top: curtop };
+ this.sizeDiff = { width: el.outerWidth() - el.width(), height: el.outerHeight() - el.height() };
+ this.originalMousePosition = { left: event.pageX, top: event.pageY };
+
+ //Aspect Ratio
+ this.aspectRatio = (typeof o.aspectRatio == 'number') ? o.aspectRatio : ((this.originalSize.width / this.originalSize.height) || 1);
+
+ var cursor = $('.ui-resizable-' + this.axis).css('cursor');
+ $('body').css('cursor', cursor == 'auto' ? this.axis + '-resize' : cursor);
+
+ el.addClass("ui-resizable-resizing");
+ this._propagate("start", event);
+ return true;
+ },
+
+ _mouseDrag: function(event) {
+
+ //Increase performance, avoid regex
+ var el = this.helper, o = this.options, props = {},
+ self = this, smp = this.originalMousePosition, a = this.axis;
+
+ var dx = (event.pageX-smp.left)||0, dy = (event.pageY-smp.top)||0;
+ var trigger = this._change[a];
+ if (!trigger) return false;
+
+ // Calculate the attrs that will be change
+ var data = trigger.apply(this, [event, dx, dy]), ie6 = $.browser.msie && $.browser.version < 7, csdif = this.sizeDiff;
+
+ // Put this in the mouseDrag handler since the user can start pressing shift while resizing
+ this._updateVirtualBoundaries(event.shiftKey);
+ if (this._aspectRatio || event.shiftKey)
+ data = this._updateRatio(data, event);
+
+ data = this._respectSize(data, event);
+
+ // plugins callbacks need to be called first
+ this._propagate("resize", event);
+
+ el.css({
+ top: this.position.top + "px", left: this.position.left + "px",
+ width: this.size.width + "px", height: this.size.height + "px"
+ });
+
+ if (!this._helper && this._proportionallyResizeElements.length)
+ this._proportionallyResize();
+
+ this._updateCache(data);
+
+ // calling the user callback at the end
+ this._trigger('resize', event, this.ui());
+
+ return false;
+ },
+
+ _mouseStop: function(event) {
+
+ this.resizing = false;
+ var o = this.options, self = this;
+
+ if(this._helper) {
+ var pr = this._proportionallyResizeElements, ista = pr.length && (/textarea/i).test(pr[0].nodeName),
+ soffseth = ista && $.ui.hasScroll(pr[0], 'left') /* TODO - jump height */ ? 0 : self.sizeDiff.height,
+ soffsetw = ista ? 0 : self.sizeDiff.width;
+
+ var s = { width: (self.helper.width() - soffsetw), height: (self.helper.height() - soffseth) },
+ left = (parseInt(self.element.css('left'), 10) + (self.position.left - self.originalPosition.left)) || null,
+ top = (parseInt(self.element.css('top'), 10) + (self.position.top - self.originalPosition.top)) || null;
+
+ if (!o.animate)
+ this.element.css($.extend(s, { top: top, left: left }));
+
+ self.helper.height(self.size.height);
+ self.helper.width(self.size.width);
+
+ if (this._helper && !o.animate) this._proportionallyResize();
+ }
+
+ $('body').css('cursor', 'auto');
+
+ this.element.removeClass("ui-resizable-resizing");
+
+ this._propagate("stop", event);
+
+ if (this._helper) this.helper.remove();
+ return false;
+
+ },
+
+ _updateVirtualBoundaries: function(forceAspectRatio) {
+ var o = this.options, pMinWidth, pMaxWidth, pMinHeight, pMaxHeight, b;
+
+ b = {
+ minWidth: isNumber(o.minWidth) ? o.minWidth : 0,
+ maxWidth: isNumber(o.maxWidth) ? o.maxWidth : Infinity,
+ minHeight: isNumber(o.minHeight) ? o.minHeight : 0,
+ maxHeight: isNumber(o.maxHeight) ? o.maxHeight : Infinity
+ };
+
+ if(this._aspectRatio || forceAspectRatio) {
+ // We want to create an enclosing box whose aspect ration is the requested one
+ // First, compute the "projected" size for each dimension based on the aspect ratio and other dimension
+ pMinWidth = b.minHeight * this.aspectRatio;
+ pMinHeight = b.minWidth / this.aspectRatio;
+ pMaxWidth = b.maxHeight * this.aspectRatio;
+ pMaxHeight = b.maxWidth / this.aspectRatio;
+
+ if(pMinWidth > b.minWidth) b.minWidth = pMinWidth;
+ if(pMinHeight > b.minHeight) b.minHeight = pMinHeight;
+ if(pMaxWidth < b.maxWidth) b.maxWidth = pMaxWidth;
+ if(pMaxHeight < b.maxHeight) b.maxHeight = pMaxHeight;
+ }
+ this._vBoundaries = b;
+ },
+
+ _updateCache: function(data) {
+ var o = this.options;
+ this.offset = this.helper.offset();
+ if (isNumber(data.left)) this.position.left = data.left;
+ if (isNumber(data.top)) this.position.top = data.top;
+ if (isNumber(data.height)) this.size.height = data.height;
+ if (isNumber(data.width)) this.size.width = data.width;
+ },
+
+ _updateRatio: function(data, event) {
+
+ var o = this.options, cpos = this.position, csize = this.size, a = this.axis;
+
+ if (isNumber(data.height)) data.width = (data.height * this.aspectRatio);
+ else if (isNumber(data.width)) data.height = (data.width / this.aspectRatio);
+
+ if (a == 'sw') {
+ data.left = cpos.left + (csize.width - data.width);
+ data.top = null;
+ }
+ if (a == 'nw') {
+ data.top = cpos.top + (csize.height - data.height);
+ data.left = cpos.left + (csize.width - data.width);
+ }
+
+ return data;
+ },
+
+ _respectSize: function(data, event) {
+
+ var el = this.helper, o = this._vBoundaries, pRatio = this._aspectRatio || event.shiftKey, a = this.axis,
+ ismaxw = isNumber(data.width) && o.maxWidth && (o.maxWidth < data.width), ismaxh = isNumber(data.height) && o.maxHeight && (o.maxHeight < data.height),
+ isminw = isNumber(data.width) && o.minWidth && (o.minWidth > data.width), isminh = isNumber(data.height) && o.minHeight && (o.minHeight > data.height);
+
+ if (isminw) data.width = o.minWidth;
+ if (isminh) data.height = o.minHeight;
+ if (ismaxw) data.width = o.maxWidth;
+ if (ismaxh) data.height = o.maxHeight;
+
+ var dw = this.originalPosition.left + this.originalSize.width, dh = this.position.top + this.size.height;
+ var cw = /sw|nw|w/.test(a), ch = /nw|ne|n/.test(a);
+
+ if (isminw && cw) data.left = dw - o.minWidth;
+ if (ismaxw && cw) data.left = dw - o.maxWidth;
+ if (isminh && ch) data.top = dh - o.minHeight;
+ if (ismaxh && ch) data.top = dh - o.maxHeight;
+
+ // fixing jump error on top/left - bug #2330
+ var isNotwh = !data.width && !data.height;
+ if (isNotwh && !data.left && data.top) data.top = null;
+ else if (isNotwh && !data.top && data.left) data.left = null;
+
+ return data;
+ },
+
+ _proportionallyResize: function() {
+
+ var o = this.options;
+ if (!this._proportionallyResizeElements.length) return;
+ var element = this.helper || this.element;
+
+ for (var i=0; i < this._proportionallyResizeElements.length; i++) {
+
+ var prel = this._proportionallyResizeElements[i];
+
+ if (!this.borderDif) {
+ var b = [prel.css('borderTopWidth'), prel.css('borderRightWidth'), prel.css('borderBottomWidth'), prel.css('borderLeftWidth')],
+ p = [prel.css('paddingTop'), prel.css('paddingRight'), prel.css('paddingBottom'), prel.css('paddingLeft')];
+
+ this.borderDif = $.map(b, function(v, i) {
+ var border = parseInt(v,10)||0, padding = parseInt(p[i],10)||0;
+ return border + padding;
+ });
+ }
+
+ if ($.browser.msie && !(!($(element).is(':hidden') || $(element).parents(':hidden').length)))
+ continue;
+
+ prel.css({
+ height: (element.height() - this.borderDif[0] - this.borderDif[2]) || 0,
+ width: (element.width() - this.borderDif[1] - this.borderDif[3]) || 0
+ });
+
+ };
+
+ },
+
+ _renderProxy: function() {
+
+ var el = this.element, o = this.options;
+ this.elementOffset = el.offset();
+
+ if(this._helper) {
+
+ this.helper = this.helper || $('<div style="overflow:hidden;"></div>');
+
+ // fix ie6 offset TODO: This seems broken
+ var ie6 = $.browser.msie && $.browser.version < 7, ie6offset = (ie6 ? 1 : 0),
+ pxyoffset = ( ie6 ? 2 : -1 );
+
+ this.helper.addClass(this._helper).css({
+ width: this.element.outerWidth() + pxyoffset,
+ height: this.element.outerHeight() + pxyoffset,
+ position: 'absolute',
+ left: this.elementOffset.left - ie6offset +'px',
+ top: this.elementOffset.top - ie6offset +'px',
+ zIndex: ++o.zIndex //TODO: Don't modify option
+ });
+
+ this.helper
+ .appendTo("body")
+ .disableSelection();
+
+ } else {
+ this.helper = this.element;
+ }
+
+ },
+
+ _change: {
+ e: function(event, dx, dy) {
+ return { width: this.originalSize.width + dx };
+ },
+ w: function(event, dx, dy) {
+ var o = this.options, cs = this.originalSize, sp = this.originalPosition;
+ return { left: sp.left + dx, width: cs.width - dx };
+ },
+ n: function(event, dx, dy) {
+ var o = this.options, cs = this.originalSize, sp = this.originalPosition;
+ return { top: sp.top + dy, height: cs.height - dy };
+ },
+ s: function(event, dx, dy) {
+ return { height: this.originalSize.height + dy };
+ },
+ se: function(event, dx, dy) {
+ return $.extend(this._change.s.apply(this, arguments), this._change.e.apply(this, [event, dx, dy]));
+ },
+ sw: function(event, dx, dy) {
+ return $.extend(this._change.s.apply(this, arguments), this._change.w.apply(this, [event, dx, dy]));
+ },
+ ne: function(event, dx, dy) {
+ return $.extend(this._change.n.apply(this, arguments), this._change.e.apply(this, [event, dx, dy]));
+ },
+ nw: function(event, dx, dy) {
+ return $.extend(this._change.n.apply(this, arguments), this._change.w.apply(this, [event, dx, dy]));
+ }
+ },
+
+ _propagate: function(n, event) {
+ $.ui.plugin.call(this, n, [event, this.ui()]);
+ (n != "resize" && this._trigger(n, event, this.ui()));
+ },
+
+ plugins: {},
+
+ ui: function() {
+ return {
+ originalElement: this.originalElement,
+ element: this.element,
+ helper: this.helper,
+ position: this.position,
+ size: this.size,
+ originalSize: this.originalSize,
+ originalPosition: this.originalPosition
+ };
+ }
+
+});
+
+$.extend($.ui.resizable, {
+ version: "1.8.16"
+});
+
+/*
+ * Resizable Extensions
+ */
+
+$.ui.plugin.add("resizable", "alsoResize", {
+
+ start: function (event, ui) {
+ var self = $(this).data("resizable"), o = self.options;
+
+ var _store = function (exp) {
+ $(exp).each(function() {
+ var el = $(this);
+ el.data("resizable-alsoresize", {
+ width: parseInt(el.width(), 10), height: parseInt(el.height(), 10),
+ left: parseInt(el.css('left'), 10), top: parseInt(el.css('top'), 10),
+ position: el.css('position') // to reset Opera on stop()
+ });
+ });
+ };
+
+ if (typeof(o.alsoResize) == 'object' && !o.alsoResize.parentNode) {
+ if (o.alsoResize.length) { o.alsoResize = o.alsoResize[0]; _store(o.alsoResize); }
+ else { $.each(o.alsoResize, function (exp) { _store(exp); }); }
+ }else{
+ _store(o.alsoResize);
+ }
+ },
+
+ resize: function (event, ui) {
+ var self = $(this).data("resizable"), o = self.options, os = self.originalSize, op = self.originalPosition;
+
+ var delta = {
+ height: (self.size.height - os.height) || 0, width: (self.size.width - os.width) || 0,
+ top: (self.position.top - op.top) || 0, left: (self.position.left - op.left) || 0
+ },
+
+ _alsoResize = function (exp, c) {
+ $(exp).each(function() {
+ var el = $(this), start = $(this).data("resizable-alsoresize"), style = {},
+ css = c && c.length ? c : el.parents(ui.originalElement[0]).length ? ['width', 'height'] : ['width', 'height', 'top', 'left'];
+
+ $.each(css, function (i, prop) {
+ var sum = (start[prop]||0) + (delta[prop]||0);
+ if (sum && sum >= 0)
+ style[prop] = sum || null;
+ });
+
+ // Opera fixing relative position
+ if ($.browser.opera && /relative/.test(el.css('position'))) {
+ self._revertToRelativePosition = true;
+ el.css({ position: 'absolute', top: 'auto', left: 'auto' });
+ }
+
+ el.css(style);
+ });
+ };
+
+ if (typeof(o.alsoResize) == 'object' && !o.alsoResize.nodeType) {
+ $.each(o.alsoResize, function (exp, c) { _alsoResize(exp, c); });
+ }else{
+ _alsoResize(o.alsoResize);
+ }
+ },
+
+ stop: function (event, ui) {
+ var self = $(this).data("resizable"), o = self.options;
+
+ var _reset = function (exp) {
+ $(exp).each(function() {
+ var el = $(this);
+ // reset position for Opera - no need to verify it was changed
+ el.css({ position: el.data("resizable-alsoresize").position });
+ });
+ };
+
+ if (self._revertToRelativePosition) {
+ self._revertToRelativePosition = false;
+ if (typeof(o.alsoResize) == 'object' && !o.alsoResize.nodeType) {
+ $.each(o.alsoResize, function (exp) { _reset(exp); });
+ }else{
+ _reset(o.alsoResize);
+ }
+ }
+
+ $(this).removeData("resizable-alsoresize");
+ }
+});
+
+$.ui.plugin.add("resizable", "animate", {
+
+ stop: function(event, ui) {
+ var self = $(this).data("resizable"), o = self.options;
+
+ var pr = self._proportionallyResizeElements, ista = pr.length && (/textarea/i).test(pr[0].nodeName),
+ soffseth = ista && $.ui.hasScroll(pr[0], 'left') /* TODO - jump height */ ? 0 : self.sizeDiff.height,
+ soffsetw = ista ? 0 : self.sizeDiff.width;
+
+ var style = { width: (self.size.width - soffsetw), height: (self.size.height - soffseth) },
+ left = (parseInt(self.element.css('left'), 10) + (self.position.left - self.originalPosition.left)) || null,
+ top = (parseInt(self.element.css('top'), 10) + (self.position.top - self.originalPosition.top)) || null;
+
+ self.element.animate(
+ $.extend(style, top && left ? { top: top, left: left } : {}), {
+ duration: o.animateDuration,
+ easing: o.animateEasing,
+ step: function() {
+
+ var data = {
+ width: parseInt(self.element.css('width'), 10),
+ height: parseInt(self.element.css('height'), 10),
+ top: parseInt(self.element.css('top'), 10),
+ left: parseInt(self.element.css('left'), 10)
+ };
+
+ if (pr && pr.length) $(pr[0]).css({ width: data.width, height: data.height });
+
+ // propagating resize, and updating values for each animation step
+ self._updateCache(data);
+ self._propagate("resize", event);
+
+ }
+ }
+ );
+ }
+
+});
+
+$.ui.plugin.add("resizable", "containment", {
+
+ start: function(event, ui) {
+ var self = $(this).data("resizable"), o = self.options, el = self.element;
+ var oc = o.containment, ce = (oc instanceof $) ? oc.get(0) : (/parent/.test(oc)) ? el.parent().get(0) : oc;
+ if (!ce) return;
+
+ self.containerElement = $(ce);
+
+ if (/document/.test(oc) || oc == document) {
+ self.containerOffset = { left: 0, top: 0 };
+ self.containerPosition = { left: 0, top: 0 };
+
+ self.parentData = {
+ element: $(document), left: 0, top: 0,
+ width: $(document).width(), height: $(document).height() || document.body.parentNode.scrollHeight
+ };
+ }
+
+ // i'm a node, so compute top, left, right, bottom
+ else {
+ var element = $(ce), p = [];
+ $([ "Top", "Right", "Left", "Bottom" ]).each(function(i, name) { p[i] = num(element.css("padding" + name)); });
+
+ self.containerOffset = element.offset();
+ self.containerPosition = element.position();
+ self.containerSize = { height: (element.innerHeight() - p[3]), width: (element.innerWidth() - p[1]) };
+
+ var co = self.containerOffset, ch = self.containerSize.height, cw = self.containerSize.width,
+ width = ($.ui.hasScroll(ce, "left") ? ce.scrollWidth : cw ), height = ($.ui.hasScroll(ce) ? ce.scrollHeight : ch);
+
+ self.parentData = {
+ element: ce, left: co.left, top: co.top, width: width, height: height
+ };
+ }
+ },
+
+ resize: function(event, ui) {
+ var self = $(this).data("resizable"), o = self.options,
+ ps = self.containerSize, co = self.containerOffset, cs = self.size, cp = self.position,
+ pRatio = self._aspectRatio || event.shiftKey, cop = { top:0, left:0 }, ce = self.containerElement;
+
+ if (ce[0] != document && (/static/).test(ce.css('position'))) cop = co;
+
+ if (cp.left < (self._helper ? co.left : 0)) {
+ self.size.width = self.size.width + (self._helper ? (self.position.left - co.left) : (self.position.left - cop.left));
+ if (pRatio) self.size.height = self.size.width / o.aspectRatio;
+ self.position.left = o.helper ? co.left : 0;
+ }
+
+ if (cp.top < (self._helper ? co.top : 0)) {
+ self.size.height = self.size.height + (self._helper ? (self.position.top - co.top) : self.position.top);
+ if (pRatio) self.size.width = self.size.height * o.aspectRatio;
+ self.position.top = self._helper ? co.top : 0;
+ }
+
+ self.offset.left = self.parentData.left+self.position.left;
+ self.offset.top = self.parentData.top+self.position.top;
+
+ var woset = Math.abs( (self._helper ? self.offset.left - cop.left : (self.offset.left - cop.left)) + self.sizeDiff.width ),
+ hoset = Math.abs( (self._helper ? self.offset.top - cop.top : (self.offset.top - co.top)) + self.sizeDiff.height );
+
+ var isParent = self.containerElement.get(0) == self.element.parent().get(0),
+ isOffsetRelative = /relative|absolute/.test(self.containerElement.css('position'));
+
+ if(isParent && isOffsetRelative) woset -= self.parentData.left;
+
+ if (woset + self.size.width >= self.parentData.width) {
+ self.size.width = self.parentData.width - woset;
+ if (pRatio) self.size.height = self.size.width / self.aspectRatio;
+ }
+
+ if (hoset + self.size.height >= self.parentData.height) {
+ self.size.height = self.parentData.height - hoset;
+ if (pRatio) self.size.width = self.size.height * self.aspectRatio;
+ }
+ },
+
+ stop: function(event, ui){
+ var self = $(this).data("resizable"), o = self.options, cp = self.position,
+ co = self.containerOffset, cop = self.containerPosition, ce = self.containerElement;
+
+ var helper = $(self.helper), ho = helper.offset(), w = helper.outerWidth() - self.sizeDiff.width, h = helper.outerHeight() - self.sizeDiff.height;
+
+ if (self._helper && !o.animate && (/relative/).test(ce.css('position')))
+ $(this).css({ left: ho.left - cop.left - co.left, width: w, height: h });
+
+ if (self._helper && !o.animate && (/static/).test(ce.css('position')))
+ $(this).css({ left: ho.left - cop.left - co.left, width: w, height: h });
+
+ }
+});
+
+$.ui.plugin.add("resizable", "ghost", {
+
+ start: function(event, ui) {
+
+ var self = $(this).data("resizable"), o = self.options, cs = self.size;
+
+ self.ghost = self.originalElement.clone();
+ self.ghost
+ .css({ opacity: .25, display: 'block', position: 'relative', height: cs.height, width: cs.width, margin: 0, left: 0, top: 0 })
+ .addClass('ui-resizable-ghost')
+ .addClass(typeof o.ghost == 'string' ? o.ghost : '');
+
+ self.ghost.appendTo(self.helper);
+
+ },
+
+ resize: function(event, ui){
+ var self = $(this).data("resizable"), o = self.options;
+ if (self.ghost) self.ghost.css({ position: 'relative', height: self.size.height, width: self.size.width });
+ },
+
+ stop: function(event, ui){
+ var self = $(this).data("resizable"), o = self.options;
+ if (self.ghost && self.helper) self.helper.get(0).removeChild(self.ghost.get(0));
+ }
+
+});
+
+$.ui.plugin.add("resizable", "grid", {
+
+ resize: function(event, ui) {
+ var self = $(this).data("resizable"), o = self.options, cs = self.size, os = self.originalSize, op = self.originalPosition, a = self.axis, ratio = o._aspectRatio || event.shiftKey;
+ o.grid = typeof o.grid == "number" ? [o.grid, o.grid] : o.grid;
+ var ox = Math.round((cs.width - os.width) / (o.grid[0]||1)) * (o.grid[0]||1), oy = Math.round((cs.height - os.height) / (o.grid[1]||1)) * (o.grid[1]||1);
+
+ if (/^(se|s|e)$/.test(a)) {
+ self.size.width = os.width + ox;
+ self.size.height = os.height + oy;
+ }
+ else if (/^(ne)$/.test(a)) {
+ self.size.width = os.width + ox;
+ self.size.height = os.height + oy;
+ self.position.top = op.top - oy;
+ }
+ else if (/^(sw)$/.test(a)) {
+ self.size.width = os.width + ox;
+ self.size.height = os.height + oy;
+ self.position.left = op.left - ox;
+ }
+ else {
+ self.size.width = os.width + ox;
+ self.size.height = os.height + oy;
+ self.position.top = op.top - oy;
+ self.position.left = op.left - ox;
+ }
+ }
+
+});
+
+var num = function(v) {
+ return parseInt(v, 10) || 0;
+};
+
+var isNumber = function(value) {
+ return !isNaN(parseInt(value, 10));
+};
+
+})(jQuery);
+/*
+ * jQuery UI Selectable 1.8.16
+ *
+ * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about)
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://jquery.org/license
+ *
+ * http://docs.jquery.com/UI/Selectables
+ *
+ * Depends:
+ * jquery.ui.core.js
+ * jquery.ui.mouse.js
+ * jquery.ui.widget.js
+ */
+(function( $, undefined ) {
+
+$.widget("ui.selectable", $.ui.mouse, {
+ options: {
+ appendTo: 'body',
+ autoRefresh: true,
+ distance: 0,
+ filter: '*',
+ tolerance: 'touch'
+ },
+ _create: function() {
+ var self = this;
+
+ this.element.addClass("ui-selectable");
+
+ this.dragged = false;
+
+ // cache selectee children based on filter
+ var selectees;
+ this.refresh = function() {
+ selectees = $(self.options.filter, self.element[0]);
+ selectees.each(function() {
+ var $this = $(this);
+ var pos = $this.offset();
+ $.data(this, "selectable-item", {
+ element: this,
+ $element: $this,
+ left: pos.left,
+ top: pos.top,
+ right: pos.left + $this.outerWidth(),
+ bottom: pos.top + $this.outerHeight(),
+ startselected: false,
+ selected: $this.hasClass('ui-selected'),
+ selecting: $this.hasClass('ui-selecting'),
+ unselecting: $this.hasClass('ui-unselecting')
+ });
+ });
+ };
+ this.refresh();
+
+ this.selectees = selectees.addClass("ui-selectee");
+
+ this._mouseInit();
+
+ this.helper = $("<div class='ui-selectable-helper'></div>");
+ },
+
+ destroy: function() {
+ this.selectees
+ .removeClass("ui-selectee")
+ .removeData("selectable-item");
+ this.element
+ .removeClass("ui-selectable ui-selectable-disabled")
+ .removeData("selectable")
+ .unbind(".selectable");
+ this._mouseDestroy();
+
+ return this;
+ },
+
+ _mouseStart: function(event) {
+ var self = this;
+
+ this.opos = [event.pageX, event.pageY];
+
+ if (this.options.disabled)
+ return;
+
+ var options = this.options;
+
+ this.selectees = $(options.filter, this.element[0]);
+
+ this._trigger("start", event);
+
+ $(options.appendTo).append(this.helper);
+ // position helper (lasso)
+ this.helper.css({
+ "left": event.clientX,
+ "top": event.clientY,
+ "width": 0,
+ "height": 0
+ });
+
+ if (options.autoRefresh) {
+ this.refresh();
+ }
+
+ this.selectees.filter('.ui-selected').each(function() {
+ var selectee = $.data(this, "selectable-item");
+ selectee.startselected = true;
+ if (!event.metaKey) {
+ selectee.$element.removeClass('ui-selected');
+ selectee.selected = false;
+ selectee.$element.addClass('ui-unselecting');
+ selectee.unselecting = true;
+ // selectable UNSELECTING callback
+ self._trigger("unselecting", event, {
+ unselecting: selectee.element
+ });
+ }
+ });
+
+ $(event.target).parents().andSelf().each(function() {
+ var selectee = $.data(this, "selectable-item");
+ if (selectee) {
+ var doSelect = !event.metaKey || !selectee.$element.hasClass('ui-selected');
+ selectee.$element
+ .removeClass(doSelect ? "ui-unselecting" : "ui-selected")
+ .addClass(doSelect ? "ui-selecting" : "ui-unselecting");
+ selectee.unselecting = !doSelect;
+ selectee.selecting = doSelect;
+ selectee.selected = doSelect;
+ // selectable (UN)SELECTING callback
+ if (doSelect) {
+ self._trigger("selecting", event, {
+ selecting: selectee.element
+ });
+ } else {
+ self._trigger("unselecting", event, {
+ unselecting: selectee.element
+ });
+ }
+ return false;
+ }
+ });
+
+ },
+
+ _mouseDrag: function(event) {
+ var self = this;
+ this.dragged = true;
+
+ if (this.options.disabled)
+ return;
+
+ var options = this.options;
+
+ var x1 = this.opos[0], y1 = this.opos[1], x2 = event.pageX, y2 = event.pageY;
+ if (x1 > x2) { var tmp = x2; x2 = x1; x1 = tmp; }
+ if (y1 > y2) { var tmp = y2; y2 = y1; y1 = tmp; }
+ this.helper.css({left: x1, top: y1, width: x2-x1, height: y2-y1});
+
+ this.selectees.each(function() {
+ var selectee = $.data(this, "selectable-item");
+ //prevent helper from being selected if appendTo: selectable
+ if (!selectee || selectee.element == self.element[0])
+ return;
+ var hit = false;
+ if (options.tolerance == 'touch') {
+ hit = ( !(selectee.left > x2 || selectee.right < x1 || selectee.top > y2 || selectee.bottom < y1) );
+ } else if (options.tolerance == 'fit') {
+ hit = (selectee.left > x1 && selectee.right < x2 && selectee.top > y1 && selectee.bottom < y2);
+ }
+
+ if (hit) {
+ // SELECT
+ if (selectee.selected) {
+ selectee.$element.removeClass('ui-selected');
+ selectee.selected = false;
+ }
+ if (selectee.unselecting) {
+ selectee.$element.removeClass('ui-unselecting');
+ selectee.unselecting = false;
+ }
+ if (!selectee.selecting) {
+ selectee.$element.addClass('ui-selecting');
+ selectee.selecting = true;
+ // selectable SELECTING callback
+ self._trigger("selecting", event, {
+ selecting: selectee.element
+ });
+ }
+ } else {
+ // UNSELECT
+ if (selectee.selecting) {
+ if (event.metaKey && selectee.startselected) {
+ selectee.$element.removeClass('ui-selecting');
+ selectee.selecting = false;
+ selectee.$element.addClass('ui-selected');
+ selectee.selected = true;
+ } else {
+ selectee.$element.removeClass('ui-selecting');
+ selectee.selecting = false;
+ if (selectee.startselected) {
+ selectee.$element.addClass('ui-unselecting');
+ selectee.unselecting = true;
+ }
+ // selectable UNSELECTING callback
+ self._trigger("unselecting", event, {
+ unselecting: selectee.element
+ });
+ }
+ }
+ if (selectee.selected) {
+ if (!event.metaKey && !selectee.startselected) {
+ selectee.$element.removeClass('ui-selected');
+ selectee.selected = false;
+
+ selectee.$element.addClass('ui-unselecting');
+ selectee.unselecting = true;
+ // selectable UNSELECTING callback
+ self._trigger("unselecting", event, {
+ unselecting: selectee.element
+ });
+ }
+ }
+ }
+ });
+
+ return false;
+ },
+
+ _mouseStop: function(event) {
+ var self = this;
+
+ this.dragged = false;
+
+ var options = this.options;
+
+ $('.ui-unselecting', this.element[0]).each(function() {
+ var selectee = $.data(this, "selectable-item");
+ selectee.$element.removeClass('ui-unselecting');
+ selectee.unselecting = false;
+ selectee.startselected = false;
+ self._trigger("unselected", event, {
+ unselected: selectee.element
+ });
+ });
+ $('.ui-selecting', this.element[0]).each(function() {
+ var selectee = $.data(this, "selectable-item");
+ selectee.$element.removeClass('ui-selecting').addClass('ui-selected');
+ selectee.selecting = false;
+ selectee.selected = true;
+ selectee.startselected = true;
+ self._trigger("selected", event, {
+ selected: selectee.element
+ });
+ });
+ this._trigger("stop", event);
+
+ this.helper.remove();
+
+ return false;
+ }
+
+});
+
+$.extend($.ui.selectable, {
+ version: "1.8.16"
+});
+
+})(jQuery);
+/*
+ * jQuery UI Sortable 1.8.16
+ *
+ * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about)
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://jquery.org/license
+ *
+ * http://docs.jquery.com/UI/Sortables
+ *
+ * Depends:
+ * jquery.ui.core.js
+ * jquery.ui.mouse.js
+ * jquery.ui.widget.js
+ */
+(function( $, undefined ) {
+
+$.widget("ui.sortable", $.ui.mouse, {
+ widgetEventPrefix: "sort",
+ options: {
+ appendTo: "parent",
+ axis: false,
+ connectWith: false,
+ containment: false,
+ cursor: 'auto',
+ cursorAt: false,
+ dropOnEmpty: true,
+ forcePlaceholderSize: false,
+ forceHelperSize: false,
+ grid: false,
+ handle: false,
+ helper: "original",
+ items: '> *',
+ opacity: false,
+ placeholder: false,
+ revert: false,
+ scroll: true,
+ scrollSensitivity: 20,
+ scrollSpeed: 20,
+ scope: "default",
+ tolerance: "intersect",
+ zIndex: 1000
+ },
+ _create: function() {
+
+ var o = this.options;
+ this.containerCache = {};
+ this.element.addClass("ui-sortable");
+
+ //Get the items
+ this.refresh();
+
+ //Let's determine if the items are being displayed horizontally
+ this.floating = this.items.length ? o.axis === 'x' || (/left|right/).test(this.items[0].item.css('float')) || (/inline|table-cell/).test(this.items[0].item.css('display')) : false;
+
+ //Let's determine the parent's offset
+ this.offset = this.element.offset();
+
+ //Initialize mouse events for interaction
+ this._mouseInit();
+
+ },
+
+ destroy: function() {
+ this.element
+ .removeClass("ui-sortable ui-sortable-disabled")
+ .removeData("sortable")
+ .unbind(".sortable");
+ this._mouseDestroy();
+
+ for ( var i = this.items.length - 1; i >= 0; i-- )
+ this.items[i].item.removeData("sortable-item");
+
+ return this;
+ },
+
+ _setOption: function(key, value){
+ if ( key === "disabled" ) {
+ this.options[ key ] = value;
+
+ this.widget()
+ [ value ? "addClass" : "removeClass"]( "ui-sortable-disabled" );
+ } else {
+ // Don't call widget base _setOption for disable as it adds ui-state-disabled class
+ $.Widget.prototype._setOption.apply(this, arguments);
+ }
+ },
+
+ _mouseCapture: function(event, overrideHandle) {
+
+ if (this.reverting) {
+ return false;
+ }
+
+ if(this.options.disabled || this.options.type == 'static') return false;
+
+ //We have to refresh the items data once first
+ this._refreshItems(event);
+
+ //Find out if the clicked node (or one of its parents) is a actual item in this.items
+ var currentItem = null, self = this, nodes = $(event.target).parents().each(function() {
+ if($.data(this, 'sortable-item') == self) {
+ currentItem = $(this);
+ return false;
+ }
+ });
+ if($.data(event.target, 'sortable-item') == self) currentItem = $(event.target);
+
+ if(!currentItem) return false;
+ if(this.options.handle && !overrideHandle) {
+ var validHandle = false;
+
+ $(this.options.handle, currentItem).find("*").andSelf().each(function() { if(this == event.target) validHandle = true; });
+ if(!validHandle) return false;
+ }
+
+ this.currentItem = currentItem;
+ this._removeCurrentsFromItems();
+ return true;
+
+ },
+
+ _mouseStart: function(event, overrideHandle, noActivation) {
+
+ var o = this.options, self = this;
+ this.currentContainer = this;
+
+ //We only need to call refreshPositions, because the refreshItems call has been moved to mouseCapture
+ this.refreshPositions();
+
+ //Create and append the visible helper
+ this.helper = this._createHelper(event);
+
+ //Cache the helper size
+ this._cacheHelperProportions();
+
+ /*
+ * - Position generation -
+ * This block generates everything position related - it's the core of draggables.
+ */
+
+ //Cache the margins of the original element
+ this._cacheMargins();
+
+ //Get the next scrolling parent
+ this.scrollParent = this.helper.scrollParent();
+
+ //The element's absolute position on the page minus margins
+ this.offset = this.currentItem.offset();
+ this.offset = {
+ top: this.offset.top - this.margins.top,
+ left: this.offset.left - this.margins.left
+ };
+
+ // Only after we got the offset, we can change the helper's position to absolute
+ // TODO: Still need to figure out a way to make relative sorting possible
+ this.helper.css("position", "absolute");
+ this.cssPosition = this.helper.css("position");
+
+ $.extend(this.offset, {
+ click: { //Where the click happened, relative to the element
+ left: event.pageX - this.offset.left,
+ top: event.pageY - this.offset.top
+ },
+ parent: this._getParentOffset(),
+ relative: this._getRelativeOffset() //This is a relative to absolute position minus the actual position calculation - only used for relative positioned helper
+ });
+
+ //Generate the original position
+ this.originalPosition = this._generatePosition(event);
+ this.originalPageX = event.pageX;
+ this.originalPageY = event.pageY;
+
+ //Adjust the mouse offset relative to the helper if 'cursorAt' is supplied
+ (o.cursorAt && this._adjustOffsetFromHelper(o.cursorAt));
+
+ //Cache the former DOM position
+ this.domPosition = { prev: this.currentItem.prev()[0], parent: this.currentItem.parent()[0] };
+
+ //If the helper is not the original, hide the original so it's not playing any role during the drag, won't cause anything bad this way
+ if(this.helper[0] != this.currentItem[0]) {
+ this.currentItem.hide();
+ }
+
+ //Create the placeholder
+ this._createPlaceholder();
+
+ //Set a containment if given in the options
+ if(o.containment)
+ this._setContainment();
+
+ if(o.cursor) { // cursor option
+ if ($('body').css("cursor")) this._storedCursor = $('body').css("cursor");
+ $('body').css("cursor", o.cursor);
+ }
+
+ if(o.opacity) { // opacity option
+ if (this.helper.css("opacity")) this._storedOpacity = this.helper.css("opacity");
+ this.helper.css("opacity", o.opacity);
+ }
+
+ if(o.zIndex) { // zIndex option
+ if (this.helper.css("zIndex")) this._storedZIndex = this.helper.css("zIndex");
+ this.helper.css("zIndex", o.zIndex);
+ }
+
+ //Prepare scrolling
+ if(this.scrollParent[0] != document && this.scrollParent[0].tagName != 'HTML')
+ this.overflowOffset = this.scrollParent.offset();
+
+ //Call callbacks
+ this._trigger("start", event, this._uiHash());
+
+ //Recache the helper size
+ if(!this._preserveHelperProportions)
+ this._cacheHelperProportions();
+
+
+ //Post 'activate' events to possible containers
+ if(!noActivation) {
+ for (var i = this.containers.length - 1; i >= 0; i--) { this.containers[i]._trigger("activate", event, self._uiHash(this)); }
+ }
+
+ //Prepare possible droppables
+ if($.ui.ddmanager)
+ $.ui.ddmanager.current = this;
+
+ if ($.ui.ddmanager && !o.dropBehaviour)
+ $.ui.ddmanager.prepareOffsets(this, event);
+
+ this.dragging = true;
+
+ this.helper.addClass("ui-sortable-helper");
+ this._mouseDrag(event); //Execute the drag once - this causes the helper not to be visible before getting its correct position
+ return true;
+
+ },
+
+ _mouseDrag: function(event) {
+
+ //Compute the helpers position
+ this.position = this._generatePosition(event);
+ this.positionAbs = this._convertPositionTo("absolute");
+
+ if (!this.lastPositionAbs) {
+ this.lastPositionAbs = this.positionAbs;
+ }
+
+ //Do scrolling
+ if(this.options.scroll) {
+ var o = this.options, scrolled = false;
+ if(this.scrollParent[0] != document && this.scrollParent[0].tagName != 'HTML') {
+
+ if((this.overflowOffset.top + this.scrollParent[0].offsetHeight) - event.pageY < o.scrollSensitivity)
+ this.scrollParent[0].scrollTop = scrolled = this.scrollParent[0].scrollTop + o.scrollSpeed;
+ else if(event.pageY - this.overflowOffset.top < o.scrollSensitivity)
+ this.scrollParent[0].scrollTop = scrolled = this.scrollParent[0].scrollTop - o.scrollSpeed;
+
+ if((this.overflowOffset.left + this.scrollParent[0].offsetWidth) - event.pageX < o.scrollSensitivity)
+ this.scrollParent[0].scrollLeft = scrolled = this.scrollParent[0].scrollLeft + o.scrollSpeed;
+ else if(event.pageX - this.overflowOffset.left < o.scrollSensitivity)
+ this.scrollParent[0].scrollLeft = scrolled = this.scrollParent[0].scrollLeft - o.scrollSpeed;
+
+ } else {
+
+ if(event.pageY - $(document).scrollTop() < o.scrollSensitivity)
+ scrolled = $(document).scrollTop($(document).scrollTop() - o.scrollSpeed);
+ else if($(window).height() - (event.pageY - $(document).scrollTop()) < o.scrollSensitivity)
+ scrolled = $(document).scrollTop($(document).scrollTop() + o.scrollSpeed);
+
+ if(event.pageX - $(document).scrollLeft() < o.scrollSensitivity)
+ scrolled = $(document).scrollLeft($(document).scrollLeft() - o.scrollSpeed);
+ else if($(window).width() - (event.pageX - $(document).scrollLeft()) < o.scrollSensitivity)
+ scrolled = $(document).scrollLeft($(document).scrollLeft() + o.scrollSpeed);
+
+ }
+
+ if(scrolled !== false && $.ui.ddmanager && !o.dropBehaviour)
+ $.ui.ddmanager.prepareOffsets(this, event);
+ }
+
+ //Regenerate the absolute position used for position checks
+ this.positionAbs = this._convertPositionTo("absolute");
+
+ //Set the helper position
+ if(!this.options.axis || this.options.axis != "y") this.helper[0].style.left = this.position.left+'px';
+ if(!this.options.axis || this.options.axis != "x") this.helper[0].style.top = this.position.top+'px';
+
+ //Rearrange
+ for (var i = this.items.length - 1; i >= 0; i--) {
+
+ //Cache variables and intersection, continue if no intersection
+ var item = this.items[i], itemElement = item.item[0], intersection = this._intersectsWithPointer(item);
+ if (!intersection) continue;
+
+ if(itemElement != this.currentItem[0] //cannot intersect with itself
+ && this.placeholder[intersection == 1 ? "next" : "prev"]()[0] != itemElement //no useless actions that have been done before
+ && !$.ui.contains(this.placeholder[0], itemElement) //no action if the item moved is the parent of the item checked
+ && (this.options.type == 'semi-dynamic' ? !$.ui.contains(this.element[0], itemElement) : true)
+ //&& itemElement.parentNode == this.placeholder[0].parentNode // only rearrange items within the same container
+ ) {
+
+ this.direction = intersection == 1 ? "down" : "up";
+
+ if (this.options.tolerance == "pointer" || this._intersectsWithSides(item)) {
+ this._rearrange(event, item);
+ } else {
+ break;
+ }
+
+ this._trigger("change", event, this._uiHash());
+ break;
+ }
+ }
+
+ //Post events to containers
+ this._contactContainers(event);
+
+ //Interconnect with droppables
+ if($.ui.ddmanager) $.ui.ddmanager.drag(this, event);
+
+ //Call callbacks
+ this._trigger('sort', event, this._uiHash());
+
+ this.lastPositionAbs = this.positionAbs;
+ return false;
+
+ },
+
+ _mouseStop: function(event, noPropagation) {
+
+ if(!event) return;
+
+ //If we are using droppables, inform the manager about the drop
+ if ($.ui.ddmanager && !this.options.dropBehaviour)
+ $.ui.ddmanager.drop(this, event);
+
+ if(this.options.revert) {
+ var self = this;
+ var cur = self.placeholder.offset();
+
+ self.reverting = true;
+
+ $(this.helper).animate({
+ left: cur.left - this.offset.parent.left - self.margins.left + (this.offsetParent[0] == document.body ? 0 : this.offsetParent[0].scrollLeft),
+ top: cur.top - this.offset.parent.top - self.margins.top + (this.offsetParent[0] == document.body ? 0 : this.offsetParent[0].scrollTop)
+ }, parseInt(this.options.revert, 10) || 500, function() {
+ self._clear(event);
+ });
+ } else {
+ this._clear(event, noPropagation);
+ }
+
+ return false;
+
+ },
+
+ cancel: function() {
+
+ var self = this;
+
+ if(this.dragging) {
+
+ this._mouseUp({ target: null });
+
+ if(this.options.helper == "original")
+ this.currentItem.css(this._storedCSS).removeClass("ui-sortable-helper");
+ else
+ this.currentItem.show();
+
+ //Post deactivating events to containers
+ for (var i = this.containers.length - 1; i >= 0; i--){
+ this.containers[i]._trigger("deactivate", null, self._uiHash(this));
+ if(this.containers[i].containerCache.over) {
+ this.containers[i]._trigger("out", null, self._uiHash(this));
+ this.containers[i].containerCache.over = 0;
+ }
+ }
+
+ }
+
+ if (this.placeholder) {
+ //$(this.placeholder[0]).remove(); would have been the jQuery way - unfortunately, it unbinds ALL events from the original node!
+ if(this.placeholder[0].parentNode) this.placeholder[0].parentNode.removeChild(this.placeholder[0]);
+ if(this.options.helper != "original" && this.helper && this.helper[0].parentNode) this.helper.remove();
+
+ $.extend(this, {
+ helper: null,
+ dragging: false,
+ reverting: false,
+ _noFinalSort: null
+ });
+
+ if(this.domPosition.prev) {
+ $(this.domPosition.prev).after(this.currentItem);
+ } else {
+ $(this.domPosition.parent).prepend(this.currentItem);
+ }
+ }
+
+ return this;
+
+ },
+
+ serialize: function(o) {
+
+ var items = this._getItemsAsjQuery(o && o.connected);
+ var str = []; o = o || {};
+
+ $(items).each(function() {
+ var res = ($(o.item || this).attr(o.attribute || 'id') || '').match(o.expression || (/(.+)[-=_](.+)/));
+ if(res) str.push((o.key || res[1]+'[]')+'='+(o.key && o.expression ? res[1] : res[2]));
+ });
+
+ if(!str.length && o.key) {
+ str.push(o.key + '=');
+ }
+
+ return str.join('&');
+
+ },
+
+ toArray: function(o) {
+
+ var items = this._getItemsAsjQuery(o && o.connected);
+ var ret = []; o = o || {};
+
+ items.each(function() { ret.push($(o.item || this).attr(o.attribute || 'id') || ''); });
+ return ret;
+
+ },
+
+ /* Be careful with the following core functions */
+ _intersectsWith: function(item) {
+
+ var x1 = this.positionAbs.left,
+ x2 = x1 + this.helperProportions.width,
+ y1 = this.positionAbs.top,
+ y2 = y1 + this.helperProportions.height;
+
+ var l = item.left,
+ r = l + item.width,
+ t = item.top,
+ b = t + item.height;
+
+ var dyClick = this.offset.click.top,
+ dxClick = this.offset.click.left;
+
+ var isOverElement = (y1 + dyClick) > t && (y1 + dyClick) < b && (x1 + dxClick) > l && (x1 + dxClick) < r;
+
+ if( this.options.tolerance == "pointer"
+ || this.options.forcePointerForContainers
+ || (this.options.tolerance != "pointer" && this.helperProportions[this.floating ? 'width' : 'height'] > item[this.floating ? 'width' : 'height'])
+ ) {
+ return isOverElement;
+ } else {
+
+ return (l < x1 + (this.helperProportions.width / 2) // Right Half
+ && x2 - (this.helperProportions.width / 2) < r // Left Half
+ && t < y1 + (this.helperProportions.height / 2) // Bottom Half
+ && y2 - (this.helperProportions.height / 2) < b ); // Top Half
+
+ }
+ },
+
+ _intersectsWithPointer: function(item) {
+
+ var isOverElementHeight = $.ui.isOverAxis(this.positionAbs.top + this.offset.click.top, item.top, item.height),
+ isOverElementWidth = $.ui.isOverAxis(this.positionAbs.left + this.offset.click.left, item.left, item.width),
+ isOverElement = isOverElementHeight && isOverElementWidth,
+ verticalDirection = this._getDragVerticalDirection(),
+ horizontalDirection = this._getDragHorizontalDirection();
+
+ if (!isOverElement)
+ return false;
+
+ return this.floating ?
+ ( ((horizontalDirection && horizontalDirection == "right") || verticalDirection == "down") ? 2 : 1 )
+ : ( verticalDirection && (verticalDirection == "down" ? 2 : 1) );
+
+ },
+
+ _intersectsWithSides: function(item) {
+
+ var isOverBottomHalf = $.ui.isOverAxis(this.positionAbs.top + this.offset.click.top, item.top + (item.height/2), item.height),
+ isOverRightHalf = $.ui.isOverAxis(this.positionAbs.left + this.offset.click.left, item.left + (item.width/2), item.width),
+ verticalDirection = this._getDragVerticalDirection(),
+ horizontalDirection = this._getDragHorizontalDirection();
+
+ if (this.floating && horizontalDirection) {
+ return ((horizontalDirection == "right" && isOverRightHalf) || (horizontalDirection == "left" && !isOverRightHalf));
+ } else {
+ return verticalDirection && ((verticalDirection == "down" && isOverBottomHalf) || (verticalDirection == "up" && !isOverBottomHalf));
+ }
+
+ },
+
+ _getDragVerticalDirection: function() {
+ var delta = this.positionAbs.top - this.lastPositionAbs.top;
+ return delta != 0 && (delta > 0 ? "down" : "up");
+ },
+
+ _getDragHorizontalDirection: function() {
+ var delta = this.positionAbs.left - this.lastPositionAbs.left;
+ return delta != 0 && (delta > 0 ? "right" : "left");
+ },
+
+ refresh: function(event) {
+ this._refreshItems(event);
+ this.refreshPositions();
+ return this;
+ },
+
+ _connectWith: function() {
+ var options = this.options;
+ return options.connectWith.constructor == String
+ ? [options.connectWith]
+ : options.connectWith;
+ },
+
+ _getItemsAsjQuery: function(connected) {
+
+ var self = this;
+ var items = [];
+ var queries = [];
+ var connectWith = this._connectWith();
+
+ if(connectWith && connected) {
+ for (var i = connectWith.length - 1; i >= 0; i--){
+ var cur = $(connectWith[i]);
+ for (var j = cur.length - 1; j >= 0; j--){
+ var inst = $.data(cur[j], 'sortable');
+ if(inst && inst != this && !inst.options.disabled) {
+ queries.push([$.isFunction(inst.options.items) ? inst.options.items.call(inst.element) : $(inst.options.items, inst.element).not(".ui-sortable-helper").not('.ui-sortable-placeholder'), inst]);
+ }
+ };
+ };
+ }
+
+ queries.push([$.isFunction(this.options.items) ? this.options.items.call(this.element, null, { options: this.options, item: this.currentItem }) : $(this.options.items, this.element).not(".ui-sortable-helper").not('.ui-sortable-placeholder'), this]);
+
+ for (var i = queries.length - 1; i >= 0; i--){
+ queries[i][0].each(function() {
+ items.push(this);
+ });
+ };
+
+ return $(items);
+
+ },
+
+ _removeCurrentsFromItems: function() {
+
+ var list = this.currentItem.find(":data(sortable-item)");
+
+ for (var i=0; i < this.items.length; i++) {
+
+ for (var j=0; j < list.length; j++) {
+ if(list[j] == this.items[i].item[0])
+ this.items.splice(i,1);
+ };
+
+ };
+
+ },
+
+ _refreshItems: function(event) {
+
+ this.items = [];
+ this.containers = [this];
+ var items = this.items;
+ var self = this;
+ var queries = [[$.isFunction(this.options.items) ? this.options.items.call(this.element[0], event, { item: this.currentItem }) : $(this.options.items, this.element), this]];
+ var connectWith = this._connectWith();
+
+ if(connectWith) {
+ for (var i = connectWith.length - 1; i >= 0; i--){
+ var cur = $(connectWith[i]);
+ for (var j = cur.length - 1; j >= 0; j--){
+ var inst = $.data(cur[j], 'sortable');
+ if(inst && inst != this && !inst.options.disabled) {
+ queries.push([$.isFunction(inst.options.items) ? inst.options.items.call(inst.element[0], event, { item: this.currentItem }) : $(inst.options.items, inst.element), inst]);
+ this.containers.push(inst);
+ }
+ };
+ };
+ }
+
+ for (var i = queries.length - 1; i >= 0; i--) {
+ var targetData = queries[i][1];
+ var _queries = queries[i][0];
+
+ for (var j=0, queriesLength = _queries.length; j < queriesLength; j++) {
+ var item = $(_queries[j]);
+
+ item.data('sortable-item', targetData); // Data for target checking (mouse manager)
+
+ items.push({
+ item: item,
+ instance: targetData,
+ width: 0, height: 0,
+ left: 0, top: 0
+ });
+ };
+ };
+
+ },
+
+ refreshPositions: function(fast) {
+
+ //This has to be redone because due to the item being moved out/into the offsetParent, the offsetParent's position will change
+ if(this.offsetParent && this.helper) {
+ this.offset.parent = this._getParentOffset();
+ }
+
+ for (var i = this.items.length - 1; i >= 0; i--){
+ var item = this.items[i];
+
+ //We ignore calculating positions of all connected containers when we're not over them
+ if(item.instance != this.currentContainer && this.currentContainer && item.item[0] != this.currentItem[0])
+ continue;
+
+ var t = this.options.toleranceElement ? $(this.options.toleranceElement, item.item) : item.item;
+
+ if (!fast) {
+ item.width = t.outerWidth();
+ item.height = t.outerHeight();
+ }
+
+ var p = t.offset();
+ item.left = p.left;
+ item.top = p.top;
+ };
+
+ if(this.options.custom && this.options.custom.refreshContainers) {
+ this.options.custom.refreshContainers.call(this);
+ } else {
+ for (var i = this.containers.length - 1; i >= 0; i--){
+ var p = this.containers[i].element.offset();
+ this.containers[i].containerCache.left = p.left;
+ this.containers[i].containerCache.top = p.top;
+ this.containers[i].containerCache.width = this.containers[i].element.outerWidth();
+ this.containers[i].containerCache.height = this.containers[i].element.outerHeight();
+ };
+ }
+
+ return this;
+ },
+
+ _createPlaceholder: function(that) {
+
+ var self = that || this, o = self.options;
+
+ if(!o.placeholder || o.placeholder.constructor == String) {
+ var className = o.placeholder;
+ o.placeholder = {
+ element: function() {
+
+ var el = $(document.createElement(self.currentItem[0].nodeName))
+ .addClass(className || self.currentItem[0].className+" ui-sortable-placeholder")
+ .removeClass("ui-sortable-helper")[0];
+
+ if(!className)
+ el.style.visibility = "hidden";
+
+ return el;
+ },
+ update: function(container, p) {
+
+ // 1. If a className is set as 'placeholder option, we don't force sizes - the class is responsible for that
+ // 2. The option 'forcePlaceholderSize can be enabled to force it even if a class name is specified
+ if(className && !o.forcePlaceholderSize) return;
+
+ //If the element doesn't have a actual height by itself (without styles coming from a stylesheet), it receives the inline height from the dragged item
+ if(!p.height()) { p.height(self.currentItem.innerHeight() - parseInt(self.currentItem.css('paddingTop')||0, 10) - parseInt(self.currentItem.css('paddingBottom')||0, 10)); };
+ if(!p.width()) { p.width(self.currentItem.innerWidth() - parseInt(self.currentItem.css('paddingLeft')||0, 10) - parseInt(self.currentItem.css('paddingRight')||0, 10)); };
+ }
+ };
+ }
+
+ //Create the placeholder
+ self.placeholder = $(o.placeholder.element.call(self.element, self.currentItem));
+
+ //Append it after the actual current item
+ self.currentItem.after(self.placeholder);
+
+ //Update the size of the placeholder (TODO: Logic to fuzzy, see line 316/317)
+ o.placeholder.update(self, self.placeholder);
+
+ },
+
+ _contactContainers: function(event) {
+
+ // get innermost container that intersects with item
+ var innermostContainer = null, innermostIndex = null;
+
+
+ for (var i = this.containers.length - 1; i >= 0; i--){
+
+ // never consider a container that's located within the item itself
+ if($.ui.contains(this.currentItem[0], this.containers[i].element[0]))
+ continue;
+
+ if(this._intersectsWith(this.containers[i].containerCache)) {
+
+ // if we've already found a container and it's more "inner" than this, then continue
+ if(innermostContainer && $.ui.contains(this.containers[i].element[0], innermostContainer.element[0]))
+ continue;
+
+ innermostContainer = this.containers[i];
+ innermostIndex = i;
+
+ } else {
+ // container doesn't intersect. trigger "out" event if necessary
+ if(this.containers[i].containerCache.over) {
+ this.containers[i]._trigger("out", event, this._uiHash(this));
+ this.containers[i].containerCache.over = 0;
+ }
+ }
+
+ }
+
+ // if no intersecting containers found, return
+ if(!innermostContainer) return;
+
+ // move the item into the container if it's not there already
+ if(this.containers.length === 1) {
+ this.containers[innermostIndex]._trigger("over", event, this._uiHash(this));
+ this.containers[innermostIndex].containerCache.over = 1;
+ } else if(this.currentContainer != this.containers[innermostIndex]) {
+
+ //When entering a new container, we will find the item with the least distance and append our item near it
+ var dist = 10000; var itemWithLeastDistance = null; var base = this.positionAbs[this.containers[innermostIndex].floating ? 'left' : 'top'];
+ for (var j = this.items.length - 1; j >= 0; j--) {
+ if(!$.ui.contains(this.containers[innermostIndex].element[0], this.items[j].item[0])) continue;
+ var cur = this.items[j][this.containers[innermostIndex].floating ? 'left' : 'top'];
+ if(Math.abs(cur - base) < dist) {
+ dist = Math.abs(cur - base); itemWithLeastDistance = this.items[j];
+ }
+ }
+
+ if(!itemWithLeastDistance && !this.options.dropOnEmpty) //Check if dropOnEmpty is enabled
+ return;
+
+ this.currentContainer = this.containers[innermostIndex];
+ itemWithLeastDistance ? this._rearrange(event, itemWithLeastDistance, null, true) : this._rearrange(event, null, this.containers[innermostIndex].element, true);
+ this._trigger("change", event, this._uiHash());
+ this.containers[innermostIndex]._trigger("change", event, this._uiHash(this));
+
+ //Update the placeholder
+ this.options.placeholder.update(this.currentContainer, this.placeholder);
+
+ this.containers[innermostIndex]._trigger("over", event, this._uiHash(this));
+ this.containers[innermostIndex].containerCache.over = 1;
+ }
+
+
+ },
+
+ _createHelper: function(event) {
+
+ var o = this.options;
+ var helper = $.isFunction(o.helper) ? $(o.helper.apply(this.element[0], [event, this.currentItem])) : (o.helper == 'clone' ? this.currentItem.clone() : this.currentItem);
+
+ if(!helper.parents('body').length) //Add the helper to the DOM if that didn't happen already
+ $(o.appendTo != 'parent' ? o.appendTo : this.currentItem[0].parentNode)[0].appendChild(helper[0]);
+
+ if(helper[0] == this.currentItem[0])
+ this._storedCSS = { width: this.currentItem[0].style.width, height: this.currentItem[0].style.height, position: this.currentItem.css("position"), top: this.currentItem.css("top"), left: this.currentItem.css("left") };
+
+ if(helper[0].style.width == '' || o.forceHelperSize) helper.width(this.currentItem.width());
+ if(helper[0].style.height == '' || o.forceHelperSize) helper.height(this.currentItem.height());
+
+ return helper;
+
+ },
+
+ _adjustOffsetFromHelper: function(obj) {
+ if (typeof obj == 'string') {
+ obj = obj.split(' ');
+ }
+ if ($.isArray(obj)) {
+ obj = {left: +obj[0], top: +obj[1] || 0};
+ }
+ if ('left' in obj) {
+ this.offset.click.left = obj.left + this.margins.left;
+ }
+ if ('right' in obj) {
+ this.offset.click.left = this.helperProportions.width - obj.right + this.margins.left;
+ }
+ if ('top' in obj) {
+ this.offset.click.top = obj.top + this.margins.top;
+ }
+ if ('bottom' in obj) {
+ this.offset.click.top = this.helperProportions.height - obj.bottom + this.margins.top;
+ }
+ },
+
+ _getParentOffset: function() {
+
+
+ //Get the offsetParent and cache its position
+ this.offsetParent = this.helper.offsetParent();
+ var po = this.offsetParent.offset();
+
+ // This is a special case where we need to modify a offset calculated on start, since the following happened:
+ // 1. The position of the helper is absolute, so it's position is calculated based on the next positioned parent
+ // 2. The actual offset parent is a child of the scroll parent, and the scroll parent isn't the document, which means that
+ // the scroll is included in the initial calculation of the offset of the parent, and never recalculated upon drag
+ if(this.cssPosition == 'absolute' && this.scrollParent[0] != document && $.ui.contains(this.scrollParent[0], this.offsetParent[0])) {
+ po.left += this.scrollParent.scrollLeft();
+ po.top += this.scrollParent.scrollTop();
+ }
+
+ if((this.offsetParent[0] == document.body) //This needs to be actually done for all browsers, since pageX/pageY includes this information
+ || (this.offsetParent[0].tagName && this.offsetParent[0].tagName.toLowerCase() == 'html' && $.browser.msie)) //Ugly IE fix
+ po = { top: 0, left: 0 };
+
+ return {
+ top: po.top + (parseInt(this.offsetParent.css("borderTopWidth"),10) || 0),
+ left: po.left + (parseInt(this.offsetParent.css("borderLeftWidth"),10) || 0)
+ };
+
+ },
+
+ _getRelativeOffset: function() {
+
+ if(this.cssPosition == "relative") {
+ var p = this.currentItem.position();
+ return {
+ top: p.top - (parseInt(this.helper.css("top"),10) || 0) + this.scrollParent.scrollTop(),
+ left: p.left - (parseInt(this.helper.css("left"),10) || 0) + this.scrollParent.scrollLeft()
+ };
+ } else {
+ return { top: 0, left: 0 };
+ }
+
+ },
+
+ _cacheMargins: function() {
+ this.margins = {
+ left: (parseInt(this.currentItem.css("marginLeft"),10) || 0),
+ top: (parseInt(this.currentItem.css("marginTop"),10) || 0)
+ };
+ },
+
+ _cacheHelperProportions: function() {
+ this.helperProportions = {
+ width: this.helper.outerWidth(),
+ height: this.helper.outerHeight()
+ };
+ },
+
+ _setContainment: function() {
+
+ var o = this.options;
+ if(o.containment == 'parent') o.containment = this.helper[0].parentNode;
+ if(o.containment == 'document' || o.containment == 'window') this.containment = [
+ 0 - this.offset.relative.left - this.offset.parent.left,
+ 0 - this.offset.relative.top - this.offset.parent.top,
+ $(o.containment == 'document' ? document : window).width() - this.helperProportions.width - this.margins.left,
+ ($(o.containment == 'document' ? document : window).height() || document.body.parentNode.scrollHeight) - this.helperProportions.height - this.margins.top
+ ];
+
+ if(!(/^(document|window|parent)$/).test(o.containment)) {
+ var ce = $(o.containment)[0];
+ var co = $(o.containment).offset();
+ var over = ($(ce).css("overflow") != 'hidden');
+
+ this.containment = [
+ co.left + (parseInt($(ce).css("borderLeftWidth"),10) || 0) + (parseInt($(ce).css("paddingLeft"),10) || 0) - this.margins.left,
+ co.top + (parseInt($(ce).css("borderTopWidth"),10) || 0) + (parseInt($(ce).css("paddingTop"),10) || 0) - this.margins.top,
+ co.left+(over ? Math.max(ce.scrollWidth,ce.offsetWidth) : ce.offsetWidth) - (parseInt($(ce).css("borderLeftWidth"),10) || 0) - (parseInt($(ce).css("paddingRight"),10) || 0) - this.helperProportions.width - this.margins.left,
+ co.top+(over ? Math.max(ce.scrollHeight,ce.offsetHeight) : ce.offsetHeight) - (parseInt($(ce).css("borderTopWidth"),10) || 0) - (parseInt($(ce).css("paddingBottom"),10) || 0) - this.helperProportions.height - this.margins.top
+ ];
+ }
+
+ },
+
+ _convertPositionTo: function(d, pos) {
+
+ if(!pos) pos = this.position;
+ var mod = d == "absolute" ? 1 : -1;
+ var o = this.options, scroll = this.cssPosition == 'absolute' && !(this.scrollParent[0] != document && $.ui.contains(this.scrollParent[0], this.offsetParent[0])) ? this.offsetParent : this.scrollParent, scrollIsRootNode = (/(html|body)/i).test(scroll[0].tagName);
+
+ return {
+ top: (
+ pos.top // The absolute mouse position
+ + this.offset.relative.top * mod // Only for relative positioned nodes: Relative offset from element to offset parent
+ + this.offset.parent.top * mod // The offsetParent's offset without borders (offset + border)
+ - ($.browser.safari && this.cssPosition == 'fixed' ? 0 : ( this.cssPosition == 'fixed' ? -this.scrollParent.scrollTop() : ( scrollIsRootNode ? 0 : scroll.scrollTop() ) ) * mod)
+ ),
+ left: (
+ pos.left // The absolute mouse position
+ + this.offset.relative.left * mod // Only for relative positioned nodes: Relative offset from element to offset parent
+ + this.offset.parent.left * mod // The offsetParent's offset without borders (offset + border)
+ - ($.browser.safari && this.cssPosition == 'fixed' ? 0 : ( this.cssPosition == 'fixed' ? -this.scrollParent.scrollLeft() : scrollIsRootNode ? 0 : scroll.scrollLeft() ) * mod)
+ )
+ };
+
+ },
+
+ _generatePosition: function(event) {
+
+ var o = this.options, scroll = this.cssPosition == 'absolute' && !(this.scrollParent[0] != document && $.ui.contains(this.scrollParent[0], this.offsetParent[0])) ? this.offsetParent : this.scrollParent, scrollIsRootNode = (/(html|body)/i).test(scroll[0].tagName);
+
+ // This is another very weird special case that only happens for relative elements:
+ // 1. If the css position is relative
+ // 2. and the scroll parent is the document or similar to the offset parent
+ // we have to refresh the relative offset during the scroll so there are no jumps
+ if(this.cssPosition == 'relative' && !(this.scrollParent[0] != document && this.scrollParent[0] != this.offsetParent[0])) {
+ this.offset.relative = this._getRelativeOffset();
+ }
+
+ var pageX = event.pageX;
+ var pageY = event.pageY;
+
+ /*
+ * - Position constraining -
+ * Constrain the position to a mix of grid, containment.
+ */
+
+ if(this.originalPosition) { //If we are not dragging yet, we won't check for options
+
+ if(this.containment) {
+ if(event.pageX - this.offset.click.left < this.containment[0]) pageX = this.containment[0] + this.offset.click.left;
+ if(event.pageY - this.offset.click.top < this.containment[1]) pageY = this.containment[1] + this.offset.click.top;
+ if(event.pageX - this.offset.click.left > this.containment[2]) pageX = this.containment[2] + this.offset.click.left;
+ if(event.pageY - this.offset.click.top > this.containment[3]) pageY = this.containment[3] + this.offset.click.top;
+ }
+
+ if(o.grid) {
+ var top = this.originalPageY + Math.round((pageY - this.originalPageY) / o.grid[1]) * o.grid[1];
+ pageY = this.containment ? (!(top - this.offset.click.top < this.containment[1] || top - this.offset.click.top > this.containment[3]) ? top : (!(top - this.offset.click.top < this.containment[1]) ? top - o.grid[1] : top + o.grid[1])) : top;
+
+ var left = this.originalPageX + Math.round((pageX - this.originalPageX) / o.grid[0]) * o.grid[0];
+ pageX = this.containment ? (!(left - this.offset.click.left < this.containment[0] || left - this.offset.click.left > this.containment[2]) ? left : (!(left - this.offset.click.left < this.containment[0]) ? left - o.grid[0] : left + o.grid[0])) : left;
+ }
+
+ }
+
+ return {
+ top: (
+ pageY // The absolute mouse position
+ - this.offset.click.top // Click offset (relative to the element)
+ - this.offset.relative.top // Only for relative positioned nodes: Relative offset from element to offset parent
+ - this.offset.parent.top // The offsetParent's offset without borders (offset + border)
+ + ($.browser.safari && this.cssPosition == 'fixed' ? 0 : ( this.cssPosition == 'fixed' ? -this.scrollParent.scrollTop() : ( scrollIsRootNode ? 0 : scroll.scrollTop() ) ))
+ ),
+ left: (
+ pageX // The absolute mouse position
+ - this.offset.click.left // Click offset (relative to the element)
+ - this.offset.relative.left // Only for relative positioned nodes: Relative offset from element to offset parent
+ - this.offset.parent.left // The offsetParent's offset without borders (offset + border)
+ + ($.browser.safari && this.cssPosition == 'fixed' ? 0 : ( this.cssPosition == 'fixed' ? -this.scrollParent.scrollLeft() : scrollIsRootNode ? 0 : scroll.scrollLeft() ))
+ )
+ };
+
+ },
+
+ _rearrange: function(event, i, a, hardRefresh) {
+
+ a ? a[0].appendChild(this.placeholder[0]) : i.item[0].parentNode.insertBefore(this.placeholder[0], (this.direction == 'down' ? i.item[0] : i.item[0].nextSibling));
+
+ //Various things done here to improve the performance:
+ // 1. we create a setTimeout, that calls refreshPositions
+ // 2. on the instance, we have a counter variable, that get's higher after every append
+ // 3. on the local scope, we copy the counter variable, and check in the timeout, if it's still the same
+ // 4. this lets only the last addition to the timeout stack through
+ this.counter = this.counter ? ++this.counter : 1;
+ var self = this, counter = this.counter;
+
+ window.setTimeout(function() {
+ if(counter == self.counter) self.refreshPositions(!hardRefresh); //Precompute after each DOM insertion, NOT on mousemove
+ },0);
+
+ },
+
+ _clear: function(event, noPropagation) {
+
+ this.reverting = false;
+ // We delay all events that have to be triggered to after the point where the placeholder has been removed and
+ // everything else normalized again
+ var delayedTriggers = [], self = this;
+
+ // We first have to update the dom position of the actual currentItem
+ // Note: don't do it if the current item is already removed (by a user), or it gets reappended (see #4088)
+ if(!this._noFinalSort && this.currentItem.parent().length) this.placeholder.before(this.currentItem);
+ this._noFinalSort = null;
+
+ if(this.helper[0] == this.currentItem[0]) {
+ for(var i in this._storedCSS) {
+ if(this._storedCSS[i] == 'auto' || this._storedCSS[i] == 'static') this._storedCSS[i] = '';
+ }
+ this.currentItem.css(this._storedCSS).removeClass("ui-sortable-helper");
+ } else {
+ this.currentItem.show();
+ }
+
+ if(this.fromOutside && !noPropagation) delayedTriggers.push(function(event) { this._trigger("receive", event, this._uiHash(this.fromOutside)); });
+ if((this.fromOutside || this.domPosition.prev != this.currentItem.prev().not(".ui-sortable-helper")[0] || this.domPosition.parent != this.currentItem.parent()[0]) && !noPropagation) delayedTriggers.push(function(event) { this._trigger("update", event, this._uiHash()); }); //Trigger update callback if the DOM position has changed
+ if(!$.ui.contains(this.element[0], this.currentItem[0])) { //Node was moved out of the current element
+ if(!noPropagation) delayedTriggers.push(function(event) { this._trigger("remove", event, this._uiHash()); });
+ for (var i = this.containers.length - 1; i >= 0; i--){
+ if($.ui.contains(this.containers[i].element[0], this.currentItem[0]) && !noPropagation) {
+ delayedTriggers.push((function(c) { return function(event) { c._trigger("receive", event, this._uiHash(this)); }; }).call(this, this.containers[i]));
+ delayedTriggers.push((function(c) { return function(event) { c._trigger("update", event, this._uiHash(this)); }; }).call(this, this.containers[i]));
+ }
+ };
+ };
+
+ //Post events to containers
+ for (var i = this.containers.length - 1; i >= 0; i--){
+ if(!noPropagation) delayedTriggers.push((function(c) { return function(event) { c._trigger("deactivate", event, this._uiHash(this)); }; }).call(this, this.containers[i]));
+ if(this.containers[i].containerCache.over) {
+ delayedTriggers.push((function(c) { return function(event) { c._trigger("out", event, this._uiHash(this)); }; }).call(this, this.containers[i]));
+ this.containers[i].containerCache.over = 0;
+ }
+ }
+
+ //Do what was originally in plugins
+ if(this._storedCursor) $('body').css("cursor", this._storedCursor); //Reset cursor
+ if(this._storedOpacity) this.helper.css("opacity", this._storedOpacity); //Reset opacity
+ if(this._storedZIndex) this.helper.css("zIndex", this._storedZIndex == 'auto' ? '' : this._storedZIndex); //Reset z-index
+
+ this.dragging = false;
+ if(this.cancelHelperRemoval) {
+ if(!noPropagation) {
+ this._trigger("beforeStop", event, this._uiHash());
+ for (var i=0; i < delayedTriggers.length; i++) { delayedTriggers[i].call(this, event); }; //Trigger all delayed events
+ this._trigger("stop", event, this._uiHash());
+ }
+ return false;
+ }
+
+ if(!noPropagation) this._trigger("beforeStop", event, this._uiHash());
+
+ //$(this.placeholder[0]).remove(); would have been the jQuery way - unfortunately, it unbinds ALL events from the original node!
+ this.placeholder[0].parentNode.removeChild(this.placeholder[0]);
+
+ if(this.helper[0] != this.currentItem[0]) this.helper.remove(); this.helper = null;
+
+ if(!noPropagation) {
+ for (var i=0; i < delayedTriggers.length; i++) { delayedTriggers[i].call(this, event); }; //Trigger all delayed events
+ this._trigger("stop", event, this._uiHash());
+ }
+
+ this.fromOutside = false;
+ return true;
+
+ },
+
+ _trigger: function() {
+ if ($.Widget.prototype._trigger.apply(this, arguments) === false) {
+ this.cancel();
+ }
+ },
+
+ _uiHash: function(inst) {
+ var self = inst || this;
+ return {
+ helper: self.helper,
+ placeholder: self.placeholder || $([]),
+ position: self.position,
+ originalPosition: self.originalPosition,
+ offset: self.positionAbs,
+ item: self.currentItem,
+ sender: inst ? inst.element : null
+ };
+ }
+
+});
+
+$.extend($.ui.sortable, {
+ version: "1.8.16"
+});
+
+})(jQuery);
+/*
+ * jQuery UI Accordion 1.8.16
+ *
+ * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about)
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://jquery.org/license
+ *
+ * http://docs.jquery.com/UI/Accordion
+ *
+ * Depends:
+ * jquery.ui.core.js
+ * jquery.ui.widget.js
+ */
+(function( $, undefined ) {
+
+$.widget( "ui.accordion", {
+ options: {
+ active: 0,
+ animated: "slide",
+ autoHeight: true,
+ clearStyle: false,
+ collapsible: false,
+ event: "click",
+ fillSpace: false,
+ header: "> li > :first-child,> :not(li):even",
+ icons: {
+ header: "ui-icon-triangle-1-e",
+ headerSelected: "ui-icon-triangle-1-s"
+ },
+ navigation: false,
+ navigationFilter: function() {
+ return this.href.toLowerCase() === location.href.toLowerCase();
+ }
+ },
+
+ _create: function() {
+ var self = this,
+ options = self.options;
+
+ self.running = 0;
+
+ self.element
+ .addClass( "ui-accordion ui-widget ui-helper-reset" )
+ // in lack of child-selectors in CSS
+ // we need to mark top-LIs in a UL-accordion for some IE-fix
+ .children( "li" )
+ .addClass( "ui-accordion-li-fix" );
+
+ self.headers = self.element.find( options.header )
+ .addClass( "ui-accordion-header ui-helper-reset ui-state-default ui-corner-all" )
+ .bind( "mouseenter.accordion", function() {
+ if ( options.disabled ) {
+ return;
+ }
+ $( this ).addClass( "ui-state-hover" );
+ })
+ .bind( "mouseleave.accordion", function() {
+ if ( options.disabled ) {
+ return;
+ }
+ $( this ).removeClass( "ui-state-hover" );
+ })
+ .bind( "focus.accordion", function() {
+ if ( options.disabled ) {
+ return;
+ }
+ $( this ).addClass( "ui-state-focus" );
+ })
+ .bind( "blur.accordion", function() {
+ if ( options.disabled ) {
+ return;
+ }
+ $( this ).removeClass( "ui-state-focus" );
+ });
+
+ self.headers.next()
+ .addClass( "ui-accordion-content ui-helper-reset ui-widget-content ui-corner-bottom" );
+
+ if ( options.navigation ) {
+ var current = self.element.find( "a" ).filter( options.navigationFilter ).eq( 0 );
+ if ( current.length ) {
+ var header = current.closest( ".ui-accordion-header" );
+ if ( header.length ) {
+ // anchor within header
+ self.active = header;
+ } else {
+ // anchor within content
+ self.active = current.closest( ".ui-accordion-content" ).prev();
+ }
+ }
+ }
+
+ self.active = self._findActive( self.active || options.active )
+ .addClass( "ui-state-default ui-state-active" )
+ .toggleClass( "ui-corner-all" )
+ .toggleClass( "ui-corner-top" );
+ self.active.next().addClass( "ui-accordion-content-active" );
+
+ self._createIcons();
+ self.resize();
+
+ // ARIA
+ self.element.attr( "role", "tablist" );
+
+ self.headers
+ .attr( "role", "tab" )
+ .bind( "keydown.accordion", function( event ) {
+ return self._keydown( event );
+ })
+ .next()
+ .attr( "role", "tabpanel" );
+
+ self.headers
+ .not( self.active || "" )
+ .attr({
+ "aria-expanded": "false",
+ "aria-selected": "false",
+ tabIndex: -1
+ })
+ .next()
+ .hide();
+
+ // make sure at least one header is in the tab order
+ if ( !self.active.length ) {
+ self.headers.eq( 0 ).attr( "tabIndex", 0 );
+ } else {
+ self.active
+ .attr({
+ "aria-expanded": "true",
+ "aria-selected": "true",
+ tabIndex: 0
+ });
+ }
+
+ // only need links in tab order for Safari
+ if ( !$.browser.safari ) {
+ self.headers.find( "a" ).attr( "tabIndex", -1 );
+ }
+
+ if ( options.event ) {
+ self.headers.bind( options.event.split(" ").join(".accordion ") + ".accordion", function(event) {
+ self._clickHandler.call( self, event, this );
+ event.preventDefault();
+ });
+ }
+ },
+
+ _createIcons: function() {
+ var options = this.options;
+ if ( options.icons ) {
+ $( "<span></span>" )
+ .addClass( "ui-icon " + options.icons.header )
+ .prependTo( this.headers );
+ this.active.children( ".ui-icon" )
+ .toggleClass(options.icons.header)
+ .toggleClass(options.icons.headerSelected);
+ this.element.addClass( "ui-accordion-icons" );
+ }
+ },
+
+ _destroyIcons: function() {
+ this.headers.children( ".ui-icon" ).remove();
+ this.element.removeClass( "ui-accordion-icons" );
+ },
+
+ destroy: function() {
+ var options = this.options;
+
+ this.element
+ .removeClass( "ui-accordion ui-widget ui-helper-reset" )
+ .removeAttr( "role" );
+
+ this.headers
+ .unbind( ".accordion" )
+ .removeClass( "ui-accordion-header ui-accordion-disabled ui-helper-reset ui-state-default ui-corner-all ui-state-active ui-state-disabled ui-corner-top" )
+ .removeAttr( "role" )
+ .removeAttr( "aria-expanded" )
+ .removeAttr( "aria-selected" )
+ .removeAttr( "tabIndex" );
+
+ this.headers.find( "a" ).removeAttr( "tabIndex" );
+ this._destroyIcons();
+ var contents = this.headers.next()
+ .css( "display", "" )
+ .removeAttr( "role" )
+ .removeClass( "ui-helper-reset ui-widget-content ui-corner-bottom ui-accordion-content ui-accordion-content-active ui-accordion-disabled ui-state-disabled" );
+ if ( options.autoHeight || options.fillHeight ) {
+ contents.css( "height", "" );
+ }
+
+ return $.Widget.prototype.destroy.call( this );
+ },
+
+ _setOption: function( key, value ) {
+ $.Widget.prototype._setOption.apply( this, arguments );
+
+ if ( key == "active" ) {
+ this.activate( value );
+ }
+ if ( key == "icons" ) {
+ this._destroyIcons();
+ if ( value ) {
+ this._createIcons();
+ }
+ }
+ // #5332 - opacity doesn't cascade to positioned elements in IE
+ // so we need to add the disabled class to the headers and panels
+ if ( key == "disabled" ) {
+ this.headers.add(this.headers.next())
+ [ value ? "addClass" : "removeClass" ](
+ "ui-accordion-disabled ui-state-disabled" );
+ }
+ },
+
+ _keydown: function( event ) {
+ if ( this.options.disabled || event.altKey || event.ctrlKey ) {
+ return;
+ }
+
+ var keyCode = $.ui.keyCode,
+ length = this.headers.length,
+ currentIndex = this.headers.index( event.target ),
+ toFocus = false;
+
+ switch ( event.keyCode ) {
+ case keyCode.RIGHT:
+ case keyCode.DOWN:
+ toFocus = this.headers[ ( currentIndex + 1 ) % length ];
+ break;
+ case keyCode.LEFT:
+ case keyCode.UP:
+ toFocus = this.headers[ ( currentIndex - 1 + length ) % length ];
+ break;
+ case keyCode.SPACE:
+ case keyCode.ENTER:
+ this._clickHandler( { target: event.target }, event.target );
+ event.preventDefault();
+ }
+
+ if ( toFocus ) {
+ $( event.target ).attr( "tabIndex", -1 );
+ $( toFocus ).attr( "tabIndex", 0 );
+ toFocus.focus();
+ return false;
+ }
+
+ return true;
+ },
+
+ resize: function() {
+ var options = this.options,
+ maxHeight;
+
+ if ( options.fillSpace ) {
+ if ( $.browser.msie ) {
+ var defOverflow = this.element.parent().css( "overflow" );
+ this.element.parent().css( "overflow", "hidden");
+ }
+ maxHeight = this.element.parent().height();
+ if ($.browser.msie) {
+ this.element.parent().css( "overflow", defOverflow );
+ }
+
+ this.headers.each(function() {
+ maxHeight -= $( this ).outerHeight( true );
+ });
+
+ this.headers.next()
+ .each(function() {
+ $( this ).height( Math.max( 0, maxHeight -
+ $( this ).innerHeight() + $( this ).height() ) );
+ })
+ .css( "overflow", "auto" );
+ } else if ( options.autoHeight ) {
+ maxHeight = 0;
+ this.headers.next()
+ .each(function() {
+ maxHeight = Math.max( maxHeight, $( this ).height( "" ).height() );
+ })
+ .height( maxHeight );
+ }
+
+ return this;
+ },
+
+ activate: function( index ) {
+ // TODO this gets called on init, changing the option without an explicit call for that
+ this.options.active = index;
+ // call clickHandler with custom event
+ var active = this._findActive( index )[ 0 ];
+ this._clickHandler( { target: active }, active );
+
+ return this;
+ },
+
+ _findActive: function( selector ) {
+ return selector
+ ? typeof selector === "number"
+ ? this.headers.filter( ":eq(" + selector + ")" )
+ : this.headers.not( this.headers.not( selector ) )
+ : selector === false
+ ? $( [] )
+ : this.headers.filter( ":eq(0)" );
+ },
+
+ // TODO isn't event.target enough? why the separate target argument?
+ _clickHandler: function( event, target ) {
+ var options = this.options;
+ if ( options.disabled ) {
+ return;
+ }
+
+ // called only when using activate(false) to close all parts programmatically
+ if ( !event.target ) {
+ if ( !options.collapsible ) {
+ return;
+ }
+ this.active
+ .removeClass( "ui-state-active ui-corner-top" )
+ .addClass( "ui-state-default ui-corner-all" )
+ .children( ".ui-icon" )
+ .removeClass( options.icons.headerSelected )
+ .addClass( options.icons.header );
+ this.active.next().addClass( "ui-accordion-content-active" );
+ var toHide = this.active.next(),
+ data = {
+ options: options,
+ newHeader: $( [] ),
+ oldHeader: options.active,
+ newContent: $( [] ),
+ oldContent: toHide
+ },
+ toShow = ( this.active = $( [] ) );
+ this._toggle( toShow, toHide, data );
+ return;
+ }
+
+ // get the click target
+ var clicked = $( event.currentTarget || target ),
+ clickedIsActive = clicked[0] === this.active[0];
+
+ // TODO the option is changed, is that correct?
+ // TODO if it is correct, shouldn't that happen after determining that the click is valid?
+ options.active = options.collapsible && clickedIsActive ?
+ false :
+ this.headers.index( clicked );
+
+ // if animations are still active, or the active header is the target, ignore click
+ if ( this.running || ( !options.collapsible && clickedIsActive ) ) {
+ return;
+ }
+
+ // find elements to show and hide
+ var active = this.active,
+ toShow = clicked.next(),
+ toHide = this.active.next(),
+ data = {
+ options: options,
+ newHeader: clickedIsActive && options.collapsible ? $([]) : clicked,
+ oldHeader: this.active,
+ newContent: clickedIsActive && options.collapsible ? $([]) : toShow,
+ oldContent: toHide
+ },
+ down = this.headers.index( this.active[0] ) > this.headers.index( clicked[0] );
+
+ // when the call to ._toggle() comes after the class changes
+ // it causes a very odd bug in IE 8 (see #6720)
+ this.active = clickedIsActive ? $([]) : clicked;
+ this._toggle( toShow, toHide, data, clickedIsActive, down );
+
+ // switch classes
+ active
+ .removeClass( "ui-state-active ui-corner-top" )
+ .addClass( "ui-state-default ui-corner-all" )
+ .children( ".ui-icon" )
+ .removeClass( options.icons.headerSelected )
+ .addClass( options.icons.header );
+ if ( !clickedIsActive ) {
+ clicked
+ .removeClass( "ui-state-default ui-corner-all" )
+ .addClass( "ui-state-active ui-corner-top" )
+ .children( ".ui-icon" )
+ .removeClass( options.icons.header )
+ .addClass( options.icons.headerSelected );
+ clicked
+ .next()
+ .addClass( "ui-accordion-content-active" );
+ }
+
+ return;
+ },
+
+ _toggle: function( toShow, toHide, data, clickedIsActive, down ) {
+ var self = this,
+ options = self.options;
+
+ self.toShow = toShow;
+ self.toHide = toHide;
+ self.data = data;
+
+ var complete = function() {
+ if ( !self ) {
+ return;
+ }
+ return self._completed.apply( self, arguments );
+ };
+
+ // trigger changestart event
+ self._trigger( "changestart", null, self.data );
+
+ // count elements to animate
+ self.running = toHide.size() === 0 ? toShow.size() : toHide.size();
+
+ if ( options.animated ) {
+ var animOptions = {};
+
+ if ( options.collapsible && clickedIsActive ) {
+ animOptions = {
+ toShow: $( [] ),
+ toHide: toHide,
+ complete: complete,
+ down: down,
+ autoHeight: options.autoHeight || options.fillSpace
+ };
+ } else {
+ animOptions = {
+ toShow: toShow,
+ toHide: toHide,
+ complete: complete,
+ down: down,
+ autoHeight: options.autoHeight || options.fillSpace
+ };
+ }
+
+ if ( !options.proxied ) {
+ options.proxied = options.animated;
+ }
+
+ if ( !options.proxiedDuration ) {
+ options.proxiedDuration = options.duration;
+ }
+
+ options.animated = $.isFunction( options.proxied ) ?
+ options.proxied( animOptions ) :
+ options.proxied;
+
+ options.duration = $.isFunction( options.proxiedDuration ) ?
+ options.proxiedDuration( animOptions ) :
+ options.proxiedDuration;
+
+ var animations = $.ui.accordion.animations,
+ duration = options.duration,
+ easing = options.animated;
+
+ if ( easing && !animations[ easing ] && !$.easing[ easing ] ) {
+ easing = "slide";
+ }
+ if ( !animations[ easing ] ) {
+ animations[ easing ] = function( options ) {
+ this.slide( options, {
+ easing: easing,
+ duration: duration || 700
+ });
+ };
+ }
+
+ animations[ easing ]( animOptions );
+ } else {
+ if ( options.collapsible && clickedIsActive ) {
+ toShow.toggle();
+ } else {
+ toHide.hide();
+ toShow.show();
+ }
+
+ complete( true );
+ }
+
+ // TODO assert that the blur and focus triggers are really necessary, remove otherwise
+ toHide.prev()
+ .attr({
+ "aria-expanded": "false",
+ "aria-selected": "false",
+ tabIndex: -1
+ })
+ .blur();
+ toShow.prev()
+ .attr({
+ "aria-expanded": "true",
+ "aria-selected": "true",
+ tabIndex: 0
+ })
+ .focus();
+ },
+
+ _completed: function( cancel ) {
+ this.running = cancel ? 0 : --this.running;
+ if ( this.running ) {
+ return;
+ }
+
+ if ( this.options.clearStyle ) {
+ this.toShow.add( this.toHide ).css({
+ height: "",
+ overflow: ""
+ });
+ }
+
+ // other classes are removed before the animation; this one needs to stay until completed
+ this.toHide.removeClass( "ui-accordion-content-active" );
+ // Work around for rendering bug in IE (#5421)
+ if ( this.toHide.length ) {
+ this.toHide.parent()[0].className = this.toHide.parent()[0].className;
+ }
+
+ this._trigger( "change", null, this.data );
+ }
+});
+
+$.extend( $.ui.accordion, {
+ version: "1.8.16",
+ animations: {
+ slide: function( options, additions ) {
+ options = $.extend({
+ easing: "swing",
+ duration: 300
+ }, options, additions );
+ if ( !options.toHide.size() ) {
+ options.toShow.animate({
+ height: "show",
+ paddingTop: "show",
+ paddingBottom: "show"
+ }, options );
+ return;
+ }
+ if ( !options.toShow.size() ) {
+ options.toHide.animate({
+ height: "hide",
+ paddingTop: "hide",
+ paddingBottom: "hide"
+ }, options );
+ return;
+ }
+ var overflow = options.toShow.css( "overflow" ),
+ percentDone = 0,
+ showProps = {},
+ hideProps = {},
+ fxAttrs = [ "height", "paddingTop", "paddingBottom" ],
+ originalWidth;
+ // fix width before calculating height of hidden element
+ var s = options.toShow;
+ originalWidth = s[0].style.width;
+ s.width( parseInt( s.parent().width(), 10 )
+ - parseInt( s.css( "paddingLeft" ), 10 )
+ - parseInt( s.css( "paddingRight" ), 10 )
+ - ( parseInt( s.css( "borderLeftWidth" ), 10 ) || 0 )
+ - ( parseInt( s.css( "borderRightWidth" ), 10) || 0 ) );
+
+ $.each( fxAttrs, function( i, prop ) {
+ hideProps[ prop ] = "hide";
+
+ var parts = ( "" + $.css( options.toShow[0], prop ) ).match( /^([\d+-.]+)(.*)$/ );
+ showProps[ prop ] = {
+ value: parts[ 1 ],
+ unit: parts[ 2 ] || "px"
+ };
+ });
+ options.toShow.css({ height: 0, overflow: "hidden" }).show();
+ options.toHide
+ .filter( ":hidden" )
+ .each( options.complete )
+ .end()
+ .filter( ":visible" )
+ .animate( hideProps, {
+ step: function( now, settings ) {
+ // only calculate the percent when animating height
+ // IE gets very inconsistent results when animating elements
+ // with small values, which is common for padding
+ if ( settings.prop == "height" ) {
+ percentDone = ( settings.end - settings.start === 0 ) ? 0 :
+ ( settings.now - settings.start ) / ( settings.end - settings.start );
+ }
+
+ options.toShow[ 0 ].style[ settings.prop ] =
+ ( percentDone * showProps[ settings.prop ].value )
+ + showProps[ settings.prop ].unit;
+ },
+ duration: options.duration,
+ easing: options.easing,
+ complete: function() {
+ if ( !options.autoHeight ) {
+ options.toShow.css( "height", "" );
+ }
+ options.toShow.css({
+ width: originalWidth,
+ overflow: overflow
+ });
+ options.complete();
+ }
+ });
+ },
+ bounceslide: function( options ) {
+ this.slide( options, {
+ easing: options.down ? "easeOutBounce" : "swing",
+ duration: options.down ? 1000 : 200
+ });
+ }
+ }
+});
+
+})( jQuery );
+/*
+ * jQuery UI Autocomplete 1.8.16
+ *
+ * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about)
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://jquery.org/license
+ *
+ * http://docs.jquery.com/UI/Autocomplete
+ *
+ * Depends:
+ * jquery.ui.core.js
+ * jquery.ui.widget.js
+ * jquery.ui.position.js
+ */
+(function( $, undefined ) {
+
+// used to prevent race conditions with remote data sources
+var requestIndex = 0;
+
+$.widget( "ui.autocomplete", {
+ options: {
+ appendTo: "body",
+ autoFocus: false,
+ delay: 300,
+ minLength: 1,
+ position: {
+ my: "left top",
+ at: "left bottom",
+ collision: "none"
+ },
+ source: null
+ },
+
+ pending: 0,
+
+ _create: function() {
+ var self = this,
+ doc = this.element[ 0 ].ownerDocument,
+ suppressKeyPress;
+
+ this.element
+ .addClass( "ui-autocomplete-input" )
+ .attr( "autocomplete", "off" )
+ // TODO verify these actually work as intended
+ .attr({
+ role: "textbox",
+ "aria-autocomplete": "list",
+ "aria-haspopup": "true"
+ })
+ .bind( "keydown.autocomplete", function( event ) {
+ if ( self.options.disabled || self.element.propAttr( "readOnly" ) ) {
+ return;
+ }
+
+ suppressKeyPress = false;
+ var keyCode = $.ui.keyCode;
+ switch( event.keyCode ) {
+ case keyCode.PAGE_UP:
+ self._move( "previousPage", event );
+ break;
+ case keyCode.PAGE_DOWN:
+ self._move( "nextPage", event );
+ break;
+ case keyCode.UP:
+ self._move( "previous", event );
+ // prevent moving cursor to beginning of text field in some browsers
+ event.preventDefault();
+ break;
+ case keyCode.DOWN:
+ self._move( "next", event );
+ // prevent moving cursor to end of text field in some browsers
+ event.preventDefault();
+ break;
+ case keyCode.ENTER:
+ case keyCode.NUMPAD_ENTER:
+ // when menu is open and has focus
+ if ( self.menu.active ) {
+ // #6055 - Opera still allows the keypress to occur
+ // which causes forms to submit
+ suppressKeyPress = true;
+ event.preventDefault();
+ }
+ //passthrough - ENTER and TAB both select the current element
+ case keyCode.TAB:
+ if ( !self.menu.active ) {
+ return;
+ }
+ self.menu.select( event );
+ break;
+ case keyCode.ESCAPE:
+ self.element.val( self.term );
+ self.close( event );
+ break;
+ default:
+ // keypress is triggered before the input value is changed
+ clearTimeout( self.searching );
+ self.searching = setTimeout(function() {
+ // only search if the value has changed
+ if ( self.term != self.element.val() ) {
+ self.selectedItem = null;
+ self.search( null, event );
+ }
+ }, self.options.delay );
+ break;
+ }
+ })
+ .bind( "keypress.autocomplete", function( event ) {
+ if ( suppressKeyPress ) {
+ suppressKeyPress = false;
+ event.preventDefault();
+ }
+ })
+ .bind( "focus.autocomplete", function() {
+ if ( self.options.disabled ) {
+ return;
+ }
+
+ self.selectedItem = null;
+ self.previous = self.element.val();
+ })
+ .bind( "blur.autocomplete", function( event ) {
+ if ( self.options.disabled ) {
+ return;
+ }
+
+ clearTimeout( self.searching );
+ // clicks on the menu (or a button to trigger a search) will cause a blur event
+ self.closing = setTimeout(function() {
+ self.close( event );
+ self._change( event );
+ }, 150 );
+ });
+ this._initSource();
+ this.response = function() {
+ return self._response.apply( self, arguments );
+ };
+ this.menu = $( "<ul></ul>" )
+ .addClass( "ui-autocomplete" )
+ .appendTo( $( this.options.appendTo || "body", doc )[0] )
+ // prevent the close-on-blur in case of a "slow" click on the menu (long mousedown)
+ .mousedown(function( event ) {
+ // clicking on the scrollbar causes focus to shift to the body
+ // but we can't detect a mouseup or a click immediately afterward
+ // so we have to track the next mousedown and close the menu if
+ // the user clicks somewhere outside of the autocomplete
+ var menuElement = self.menu.element[ 0 ];
+ if ( !$( event.target ).closest( ".ui-menu-item" ).length ) {
+ setTimeout(function() {
+ $( document ).one( 'mousedown', function( event ) {
+ if ( event.target !== self.element[ 0 ] &&
+ event.target !== menuElement &&
+ !$.ui.contains( menuElement, event.target ) ) {
+ self.close();
+ }
+ });
+ }, 1 );
+ }
+
+ // use another timeout to make sure the blur-event-handler on the input was already triggered
+ setTimeout(function() {
+ clearTimeout( self.closing );
+ }, 13);
+ })
+ .menu({
+ focus: function( event, ui ) {
+ var item = ui.item.data( "item.autocomplete" );
+ if ( false !== self._trigger( "focus", event, { item: item } ) ) {
+ // use value to match what will end up in the input, if it was a key event
+ if ( /^key/.test(event.originalEvent.type) ) {
+ self.element.val( item.value );
+ }
+ }
+ },
+ selected: function( event, ui ) {
+ var item = ui.item.data( "item.autocomplete" ),
+ previous = self.previous;
+
+ // only trigger when focus was lost (click on menu)
+ if ( self.element[0] !== doc.activeElement ) {
+ self.element.focus();
+ self.previous = previous;
+ // #6109 - IE triggers two focus events and the second
+ // is asynchronous, so we need to reset the previous
+ // term synchronously and asynchronously :-(
+ setTimeout(function() {
+ self.previous = previous;
+ self.selectedItem = item;
+ }, 1);
+ }
+
+ if ( false !== self._trigger( "select", event, { item: item } ) ) {
+ self.element.val( item.value );
+ }
+ // reset the term after the select event
+ // this allows custom select handling to work properly
+ self.term = self.element.val();
+
+ self.close( event );
+ self.selectedItem = item;
+ },
+ blur: function( event, ui ) {
+ // don't set the value of the text field if it's already correct
+ // this prevents moving the cursor unnecessarily
+ if ( self.menu.element.is(":visible") &&
+ ( self.element.val() !== self.term ) ) {
+ self.element.val( self.term );
+ }
+ }
+ })
+ .zIndex( this.element.zIndex() + 1 )
+ // workaround for jQuery bug #5781 http://dev.jquery.com/ticket/5781
+ .css({ top: 0, left: 0 })
+ .hide()
+ .data( "menu" );
+ if ( $.fn.bgiframe ) {
+ this.menu.element.bgiframe();
+ }
+ },
+
+ destroy: function() {
+ this.element
+ .removeClass( "ui-autocomplete-input" )
+ .removeAttr( "autocomplete" )
+ .removeAttr( "role" )
+ .removeAttr( "aria-autocomplete" )
+ .removeAttr( "aria-haspopup" );
+ this.menu.element.remove();
+ $.Widget.prototype.destroy.call( this );
+ },
+
+ _setOption: function( key, value ) {
+ $.Widget.prototype._setOption.apply( this, arguments );
+ if ( key === "source" ) {
+ this._initSource();
+ }
+ if ( key === "appendTo" ) {
+ this.menu.element.appendTo( $( value || "body", this.element[0].ownerDocument )[0] )
+ }
+ if ( key === "disabled" && value && this.xhr ) {
+ this.xhr.abort();
+ }
+ },
+
+ _initSource: function() {
+ var self = this,
+ array,
+ url;
+ if ( $.isArray(this.options.source) ) {
+ array = this.options.source;
+ this.source = function( request, response ) {
+ response( $.ui.autocomplete.filter(array, request.term) );
+ };
+ } else if ( typeof this.options.source === "string" ) {
+ url = this.options.source;
+ this.source = function( request, response ) {
+ if ( self.xhr ) {
+ self.xhr.abort();
+ }
+ self.xhr = $.ajax({
+ url: url,
+ data: request,
+ dataType: "json",
+ autocompleteRequest: ++requestIndex,
+ success: function( data, status ) {
+ if ( this.autocompleteRequest === requestIndex ) {
+ response( data );
+ }
+ },
+ error: function() {
+ if ( this.autocompleteRequest === requestIndex ) {
+ response( [] );
+ }
+ }
+ });
+ };
+ } else {
+ this.source = this.options.source;
+ }
+ },
+
+ search: function( value, event ) {
+ value = value != null ? value : this.element.val();
+
+ // always save the actual value, not the one passed as an argument
+ this.term = this.element.val();
+
+ if ( value.length < this.options.minLength ) {
+ return this.close( event );
+ }
+
+ clearTimeout( this.closing );
+ if ( this._trigger( "search", event ) === false ) {
+ return;
+ }
+
+ return this._search( value );
+ },
+
+ _search: function( value ) {
+ this.pending++;
+ this.element.addClass( "ui-autocomplete-loading" );
+
+ this.source( { term: value }, this.response );
+ },
+
+ _response: function( content ) {
+ if ( !this.options.disabled && content && content.length ) {
+ content = this._normalize( content );
+ this._suggest( content );
+ this._trigger( "open" );
+ } else {
+ this.close();
+ }
+ this.pending--;
+ if ( !this.pending ) {
+ this.element.removeClass( "ui-autocomplete-loading" );
+ }
+ },
+
+ close: function( event ) {
+ clearTimeout( this.closing );
+ if ( this.menu.element.is(":visible") ) {
+ this.menu.element.hide();
+ this.menu.deactivate();
+ this._trigger( "close", event );
+ }
+ },
+
+ _change: function( event ) {
+ if ( this.previous !== this.element.val() ) {
+ this._trigger( "change", event, { item: this.selectedItem } );
+ }
+ },
+
+ _normalize: function( items ) {
+ // assume all items have the right format when the first item is complete
+ if ( items.length && items[0].label && items[0].value ) {
+ return items;
+ }
+ return $.map( items, function(item) {
+ if ( typeof item === "string" ) {
+ return {
+ label: item,
+ value: item
+ };
+ }
+ return $.extend({
+ label: item.label || item.value,
+ value: item.value || item.label
+ }, item );
+ });
+ },
+
+ _suggest: function( items ) {
+ var ul = this.menu.element
+ .empty()
+ .zIndex( this.element.zIndex() + 1 );
+ this._renderMenu( ul, items );
+ // TODO refresh should check if the active item is still in the dom, removing the need for a manual deactivate
+ this.menu.deactivate();
+ this.menu.refresh();
+
+ // size and position menu
+ ul.show();
+ this._resizeMenu();
+ ul.position( $.extend({
+ of: this.element
+ }, this.options.position ));
+
+ if ( this.options.autoFocus ) {
+ this.menu.next( new $.Event("mouseover") );
+ }
+ },
+
+ _resizeMenu: function() {
+ var ul = this.menu.element;
+ ul.outerWidth( Math.max(
+ ul.width( "" ).outerWidth(),
+ this.element.outerWidth()
+ ) );
+ },
+
+ _renderMenu: function( ul, items ) {
+ var self = this;
+ $.each( items, function( index, item ) {
+ self._renderItem( ul, item );
+ });
+ },
+
+ _renderItem: function( ul, item) {
+ return $( "<li></li>" )
+ .data( "item.autocomplete", item )
+ .append( $( "<a></a>" ).text( item.label ) )
+ .appendTo( ul );
+ },
+
+ _move: function( direction, event ) {
+ if ( !this.menu.element.is(":visible") ) {
+ this.search( null, event );
+ return;
+ }
+ if ( this.menu.first() && /^previous/.test(direction) ||
+ this.menu.last() && /^next/.test(direction) ) {
+ this.element.val( this.term );
+ this.menu.deactivate();
+ return;
+ }
+ this.menu[ direction ]( event );
+ },
+
+ widget: function() {
+ return this.menu.element;
+ }
+});
+
+$.extend( $.ui.autocomplete, {
+ escapeRegex: function( value ) {
+ return value.replace(/[-[\]{}()*+?.,\\^$|#\s]/g, "\\$&");
+ },
+ filter: function(array, term) {
+ var matcher = new RegExp( $.ui.autocomplete.escapeRegex(term), "i" );
+ return $.grep( array, function(value) {
+ return matcher.test( value.label || value.value || value );
+ });
+ }
+});
+
+}( jQuery ));
+
+/*
+ * jQuery UI Menu (not officially released)
+ *
+ * This widget isn't yet finished and the API is subject to change. We plan to finish
+ * it for the next release. You're welcome to give it a try anyway and give us feedback,
+ * as long as you're okay with migrating your code later on. We can help with that, too.
+ *
+ * Copyright 2010, AUTHORS.txt (http://jqueryui.com/about)
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://jquery.org/license
+ *
+ * http://docs.jquery.com/UI/Menu
+ *
+ * Depends:
+ * jquery.ui.core.js
+ * jquery.ui.widget.js
+ */
+(function($) {
+
+$.widget("ui.menu", {
+ _create: function() {
+ var self = this;
+ this.element
+ .addClass("ui-menu ui-widget ui-widget-content ui-corner-all")
+ .attr({
+ role: "listbox",
+ "aria-activedescendant": "ui-active-menuitem"
+ })
+ .click(function( event ) {
+ if ( !$( event.target ).closest( ".ui-menu-item a" ).length ) {
+ return;
+ }
+ // temporary
+ event.preventDefault();
+ self.select( event );
+ });
+ this.refresh();
+ },
+
+ refresh: function() {
+ var self = this;
+
+ // don't refresh list items that are already adapted
+ var items = this.element.children("li:not(.ui-menu-item):has(a)")
+ .addClass("ui-menu-item")
+ .attr("role", "menuitem");
+
+ items.children("a")
+ .addClass("ui-corner-all")
+ .attr("tabindex", -1)
+ // mouseenter doesn't work with event delegation
+ .mouseenter(function( event ) {
+ self.activate( event, $(this).parent() );
+ })
+ .mouseleave(function() {
+ self.deactivate();
+ });
+ },
+
+ activate: function( event, item ) {
+ this.deactivate();
+ if (this.hasScroll()) {
+ var offset = item.offset().top - this.element.offset().top,
+ scroll = this.element.scrollTop(),
+ elementHeight = this.element.height();
+ if (offset < 0) {
+ this.element.scrollTop( scroll + offset);
+ } else if (offset >= elementHeight) {
+ this.element.scrollTop( scroll + offset - elementHeight + item.height());
+ }
+ }
+ this.active = item.eq(0)
+ .children("a")
+ .addClass("ui-state-hover")
+ .attr("id", "ui-active-menuitem")
+ .end();
+ this._trigger("focus", event, { item: item });
+ },
+
+ deactivate: function() {
+ if (!this.active) { return; }
+
+ this.active.children("a")
+ .removeClass("ui-state-hover")
+ .removeAttr("id");
+ this._trigger("blur");
+ this.active = null;
+ },
+
+ next: function(event) {
+ this.move("next", ".ui-menu-item:first", event);
+ },
+
+ previous: function(event) {
+ this.move("prev", ".ui-menu-item:last", event);
+ },
+
+ first: function() {
+ return this.active && !this.active.prevAll(".ui-menu-item").length;
+ },
+
+ last: function() {
+ return this.active && !this.active.nextAll(".ui-menu-item").length;
+ },
+
+ move: function(direction, edge, event) {
+ if (!this.active) {
+ this.activate(event, this.element.children(edge));
+ return;
+ }
+ var next = this.active[direction + "All"](".ui-menu-item").eq(0);
+ if (next.length) {
+ this.activate(event, next);
+ } else {
+ this.activate(event, this.element.children(edge));
+ }
+ },
+
+ // TODO merge with previousPage
+ nextPage: function(event) {
+ if (this.hasScroll()) {
+ // TODO merge with no-scroll-else
+ if (!this.active || this.last()) {
+ this.activate(event, this.element.children(".ui-menu-item:first"));
+ return;
+ }
+ var base = this.active.offset().top,
+ height = this.element.height(),
+ result = this.element.children(".ui-menu-item").filter(function() {
+ var close = $(this).offset().top - base - height + $(this).height();
+ // TODO improve approximation
+ return close < 10 && close > -10;
+ });
+
+ // TODO try to catch this earlier when scrollTop indicates the last page anyway
+ if (!result.length) {
+ result = this.element.children(".ui-menu-item:last");
+ }
+ this.activate(event, result);
+ } else {
+ this.activate(event, this.element.children(".ui-menu-item")
+ .filter(!this.active || this.last() ? ":first" : ":last"));
+ }
+ },
+
+ // TODO merge with nextPage
+ previousPage: function(event) {
+ if (this.hasScroll()) {
+ // TODO merge with no-scroll-else
+ if (!this.active || this.first()) {
+ this.activate(event, this.element.children(".ui-menu-item:last"));
+ return;
+ }
+
+ var base = this.active.offset().top,
+ height = this.element.height();
+ result = this.element.children(".ui-menu-item").filter(function() {
+ var close = $(this).offset().top - base + height - $(this).height();
+ // TODO improve approximation
+ return close < 10 && close > -10;
+ });
+
+ // TODO try to catch this earlier when scrollTop indicates the last page anyway
+ if (!result.length) {
+ result = this.element.children(".ui-menu-item:first");
+ }
+ this.activate(event, result);
+ } else {
+ this.activate(event, this.element.children(".ui-menu-item")
+ .filter(!this.active || this.first() ? ":last" : ":first"));
+ }
+ },
+
+ hasScroll: function() {
+ return this.element.height() < this.element[ $.fn.prop ? "prop" : "attr" ]("scrollHeight");
+ },
+
+ select: function( event ) {
+ this._trigger("selected", event, { item: this.active });
+ }
+});
+
+}(jQuery));
+/*
+ * jQuery UI Button 1.8.16
+ *
+ * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about)
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://jquery.org/license
+ *
+ * http://docs.jquery.com/UI/Button
+ *
+ * Depends:
+ * jquery.ui.core.js
+ * jquery.ui.widget.js
+ */
+(function( $, undefined ) {
+
+var lastActive, startXPos, startYPos, clickDragged,
+ baseClasses = "ui-button ui-widget ui-state-default ui-corner-all",
+ stateClasses = "ui-state-hover ui-state-active ",
+ typeClasses = "ui-button-icons-only ui-button-icon-only ui-button-text-icons ui-button-text-icon-primary ui-button-text-icon-secondary ui-button-text-only",
+ formResetHandler = function() {
+ var buttons = $( this ).find( ":ui-button" );
+ setTimeout(function() {
+ buttons.button( "refresh" );
+ }, 1 );
+ },
+ radioGroup = function( radio ) {
+ var name = radio.name,
+ form = radio.form,
+ radios = $( [] );
+ if ( name ) {
+ if ( form ) {
+ radios = $( form ).find( "[name='" + name + "']" );
+ } else {
+ radios = $( "[name='" + name + "']", radio.ownerDocument )
+ .filter(function() {
+ return !this.form;
+ });
+ }
+ }
+ return radios;
+ };
+
+$.widget( "ui.button", {
+ options: {
+ disabled: null,
+ text: true,
+ label: null,
+ icons: {
+ primary: null,
+ secondary: null
+ }
+ },
+ _create: function() {
+ this.element.closest( "form" )
+ .unbind( "reset.button" )
+ .bind( "reset.button", formResetHandler );
+
+ if ( typeof this.options.disabled !== "boolean" ) {
+ this.options.disabled = this.element.propAttr( "disabled" );
+ }
+
+ this._determineButtonType();
+ this.hasTitle = !!this.buttonElement.attr( "title" );
+
+ var self = this,
+ options = this.options,
+ toggleButton = this.type === "checkbox" || this.type === "radio",
+ hoverClass = "ui-state-hover" + ( !toggleButton ? " ui-state-active" : "" ),
+ focusClass = "ui-state-focus";
+
+ if ( options.label === null ) {
+ options.label = this.buttonElement.html();
+ }
+
+ if ( this.element.is( ":disabled" ) ) {
+ options.disabled = true;
+ }
+
+ this.buttonElement
+ .addClass( baseClasses )
+ .attr( "role", "button" )
+ .bind( "mouseenter.button", function() {
+ if ( options.disabled ) {
+ return;
+ }
+ $( this ).addClass( "ui-state-hover" );
+ if ( this === lastActive ) {
+ $( this ).addClass( "ui-state-active" );
+ }
+ })
+ .bind( "mouseleave.button", function() {
+ if ( options.disabled ) {
+ return;
+ }
+ $( this ).removeClass( hoverClass );
+ })
+ .bind( "click.button", function( event ) {
+ if ( options.disabled ) {
+ event.preventDefault();
+ event.stopImmediatePropagation();
+ }
+ });
+
+ this.element
+ .bind( "focus.button", function() {
+ // no need to check disabled, focus won't be triggered anyway
+ self.buttonElement.addClass( focusClass );
+ })
+ .bind( "blur.button", function() {
+ self.buttonElement.removeClass( focusClass );
+ });
+
+ if ( toggleButton ) {
+ this.element.bind( "change.button", function() {
+ if ( clickDragged ) {
+ return;
+ }
+ self.refresh();
+ });
+ // if mouse moves between mousedown and mouseup (drag) set clickDragged flag
+ // prevents issue where button state changes but checkbox/radio checked state
+ // does not in Firefox (see ticket #6970)
+ this.buttonElement
+ .bind( "mousedown.button", function( event ) {
+ if ( options.disabled ) {
+ return;
+ }
+ clickDragged = false;
+ startXPos = event.pageX;
+ startYPos = event.pageY;
+ })
+ .bind( "mouseup.button", function( event ) {
+ if ( options.disabled ) {
+ return;
+ }
+ if ( startXPos !== event.pageX || startYPos !== event.pageY ) {
+ clickDragged = true;
+ }
+ });
+ }
+
+ if ( this.type === "checkbox" ) {
+ this.buttonElement.bind( "click.button", function() {
+ if ( options.disabled || clickDragged ) {
+ return false;
+ }
+ $( this ).toggleClass( "ui-state-active" );
+ self.buttonElement.attr( "aria-pressed", self.element[0].checked );
+ });
+ } else if ( this.type === "radio" ) {
+ this.buttonElement.bind( "click.button", function() {
+ if ( options.disabled || clickDragged ) {
+ return false;
+ }
+ $( this ).addClass( "ui-state-active" );
+ self.buttonElement.attr( "aria-pressed", "true" );
+
+ var radio = self.element[ 0 ];
+ radioGroup( radio )
+ .not( radio )
+ .map(function() {
+ return $( this ).button( "widget" )[ 0 ];
+ })
+ .removeClass( "ui-state-active" )
+ .attr( "aria-pressed", "false" );
+ });
+ } else {
+ this.buttonElement
+ .bind( "mousedown.button", function() {
+ if ( options.disabled ) {
+ return false;
+ }
+ $( this ).addClass( "ui-state-active" );
+ lastActive = this;
+ $( document ).one( "mouseup", function() {
+ lastActive = null;
+ });
+ })
+ .bind( "mouseup.button", function() {
+ if ( options.disabled ) {
+ return false;
+ }
+ $( this ).removeClass( "ui-state-active" );
+ })
+ .bind( "keydown.button", function(event) {
+ if ( options.disabled ) {
+ return false;
+ }
+ if ( event.keyCode == $.ui.keyCode.SPACE || event.keyCode == $.ui.keyCode.ENTER ) {
+ $( this ).addClass( "ui-state-active" );
+ }
+ })
+ .bind( "keyup.button", function() {
+ $( this ).removeClass( "ui-state-active" );
+ });
+
+ if ( this.buttonElement.is("a") ) {
+ this.buttonElement.keyup(function(event) {
+ if ( event.keyCode === $.ui.keyCode.SPACE ) {
+ // TODO pass through original event correctly (just as 2nd argument doesn't work)
+ $( this ).click();
+ }
+ });
+ }
+ }
+
+ // TODO: pull out $.Widget's handling for the disabled option into
+ // $.Widget.prototype._setOptionDisabled so it's easy to proxy and can
+ // be overridden by individual plugins
+ this._setOption( "disabled", options.disabled );
+ this._resetButton();
+ },
+
+ _determineButtonType: function() {
+
+ if ( this.element.is(":checkbox") ) {
+ this.type = "checkbox";
+ } else if ( this.element.is(":radio") ) {
+ this.type = "radio";
+ } else if ( this.element.is("input") ) {
+ this.type = "input";
+ } else {
+ this.type = "button";
+ }
+
+ if ( this.type === "checkbox" || this.type === "radio" ) {
+ // we don't search against the document in case the element
+ // is disconnected from the DOM
+ var ancestor = this.element.parents().filter(":last"),
+ labelSelector = "label[for='" + this.element.attr("id") + "']";
+ this.buttonElement = ancestor.find( labelSelector );
+ if ( !this.buttonElement.length ) {
+ ancestor = ancestor.length ? ancestor.siblings() : this.element.siblings();
+ this.buttonElement = ancestor.filter( labelSelector );
+ if ( !this.buttonElement.length ) {
+ this.buttonElement = ancestor.find( labelSelector );
+ }
+ }
+ this.element.addClass( "ui-helper-hidden-accessible" );
+
+ var checked = this.element.is( ":checked" );
+ if ( checked ) {
+ this.buttonElement.addClass( "ui-state-active" );
+ }
+ this.buttonElement.attr( "aria-pressed", checked );
+ } else {
+ this.buttonElement = this.element;
+ }
+ },
+
+ widget: function() {
+ return this.buttonElement;
+ },
+
+ destroy: function() {
+ this.element
+ .removeClass( "ui-helper-hidden-accessible" );
+ this.buttonElement
+ .removeClass( baseClasses + " " + stateClasses + " " + typeClasses )
+ .removeAttr( "role" )
+ .removeAttr( "aria-pressed" )
+ .html( this.buttonElement.find(".ui-button-text").html() );
+
+ if ( !this.hasTitle ) {
+ this.buttonElement.removeAttr( "title" );
+ }
+
+ $.Widget.prototype.destroy.call( this );
+ },
+
+ _setOption: function( key, value ) {
+ $.Widget.prototype._setOption.apply( this, arguments );
+ if ( key === "disabled" ) {
+ if ( value ) {
+ this.element.propAttr( "disabled", true );
+ } else {
+ this.element.propAttr( "disabled", false );
+ }
+ return;
+ }
+ this._resetButton();
+ },
+
+ refresh: function() {
+ var isDisabled = this.element.is( ":disabled" );
+ if ( isDisabled !== this.options.disabled ) {
+ this._setOption( "disabled", isDisabled );
+ }
+ if ( this.type === "radio" ) {
+ radioGroup( this.element[0] ).each(function() {
+ if ( $( this ).is( ":checked" ) ) {
+ $( this ).button( "widget" )
+ .addClass( "ui-state-active" )
+ .attr( "aria-pressed", "true" );
+ } else {
+ $( this ).button( "widget" )
+ .removeClass( "ui-state-active" )
+ .attr( "aria-pressed", "false" );
+ }
+ });
+ } else if ( this.type === "checkbox" ) {
+ if ( this.element.is( ":checked" ) ) {
+ this.buttonElement
+ .addClass( "ui-state-active" )
+ .attr( "aria-pressed", "true" );
+ } else {
+ this.buttonElement
+ .removeClass( "ui-state-active" )
+ .attr( "aria-pressed", "false" );
+ }
+ }
+ },
+
+ _resetButton: function() {
+ if ( this.type === "input" ) {
+ if ( this.options.label ) {
+ this.element.val( this.options.label );
+ }
+ return;
+ }
+ var buttonElement = this.buttonElement.removeClass( typeClasses ),
+ buttonText = $( "<span></span>" )
+ .addClass( "ui-button-text" )
+ .html( this.options.label )
+ .appendTo( buttonElement.empty() )
+ .text(),
+ icons = this.options.icons,
+ multipleIcons = icons.primary && icons.secondary,
+ buttonClasses = [];
+
+ if ( icons.primary || icons.secondary ) {
+ if ( this.options.text ) {
+ buttonClasses.push( "ui-button-text-icon" + ( multipleIcons ? "s" : ( icons.primary ? "-primary" : "-secondary" ) ) );
+ }
+
+ if ( icons.primary ) {
+ buttonElement.prepend( "<span class='ui-button-icon-primary ui-icon " + icons.primary + "'></span>" );
+ }
+
+ if ( icons.secondary ) {
+ buttonElement.append( "<span class='ui-button-icon-secondary ui-icon " + icons.secondary + "'></span>" );
+ }
+
+ if ( !this.options.text ) {
+ buttonClasses.push( multipleIcons ? "ui-button-icons-only" : "ui-button-icon-only" );
+
+ if ( !this.hasTitle ) {
+ buttonElement.attr( "title", buttonText );
+ }
+ }
+ } else {
+ buttonClasses.push( "ui-button-text-only" );
+ }
+ buttonElement.addClass( buttonClasses.join( " " ) );
+ }
+});
+
+$.widget( "ui.buttonset", {
+ options: {
+ items: ":button, :submit, :reset, :checkbox, :radio, a, :data(button)"
+ },
+
+ _create: function() {
+ this.element.addClass( "ui-buttonset" );
+ },
+
+ _init: function() {
+ this.refresh();
+ },
+
+ _setOption: function( key, value ) {
+ if ( key === "disabled" ) {
+ this.buttons.button( "option", key, value );
+ }
+
+ $.Widget.prototype._setOption.apply( this, arguments );
+ },
+
+ refresh: function() {
+ var ltr = this.element.css( "direction" ) === "ltr";
+
+ this.buttons = this.element.find( this.options.items )
+ .filter( ":ui-button" )
+ .button( "refresh" )
+ .end()
+ .not( ":ui-button" )
+ .button()
+ .end()
+ .map(function() {
+ return $( this ).button( "widget" )[ 0 ];
+ })
+ .removeClass( "ui-corner-all ui-corner-left ui-corner-right" )
+ .filter( ":first" )
+ .addClass( ltr ? "ui-corner-left" : "ui-corner-right" )
+ .end()
+ .filter( ":last" )
+ .addClass( ltr ? "ui-corner-right" : "ui-corner-left" )
+ .end()
+ .end();
+ },
+
+ destroy: function() {
+ this.element.removeClass( "ui-buttonset" );
+ this.buttons
+ .map(function() {
+ return $( this ).button( "widget" )[ 0 ];
+ })
+ .removeClass( "ui-corner-left ui-corner-right" )
+ .end()
+ .button( "destroy" );
+
+ $.Widget.prototype.destroy.call( this );
+ }
+});
+
+}( jQuery ) );
+/*
+ * jQuery UI Dialog 1.8.16
+ *
+ * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about)
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://jquery.org/license
+ *
+ * http://docs.jquery.com/UI/Dialog
+ *
+ * Depends:
+ * jquery.ui.core.js
+ * jquery.ui.widget.js
+ * jquery.ui.button.js
+ * jquery.ui.draggable.js
+ * jquery.ui.mouse.js
+ * jquery.ui.position.js
+ * jquery.ui.resizable.js
+ */
+(function( $, undefined ) {
+
+var uiDialogClasses =
+ 'ui-dialog ' +
+ 'ui-widget ' +
+ 'ui-widget-content ' +
+ 'ui-corner-all ',
+ sizeRelatedOptions = {
+ buttons: true,
+ height: true,
+ maxHeight: true,
+ maxWidth: true,
+ minHeight: true,
+ minWidth: true,
+ width: true
+ },
+ resizableRelatedOptions = {
+ maxHeight: true,
+ maxWidth: true,
+ minHeight: true,
+ minWidth: true
+ },
+ // support for jQuery 1.3.2 - handle common attrFn methods for dialog
+ attrFn = $.attrFn || {
+ val: true,
+ css: true,
+ html: true,
+ text: true,
+ data: true,
+ width: true,
+ height: true,
+ offset: true,
+ click: true
+ };
+
+$.widget("ui.dialog", {
+ options: {
+ autoOpen: true,
+ buttons: {},
+ closeOnEscape: true,
+ closeText: 'close',
+ dialogClass: '',
+ draggable: true,
+ hide: null,
+ height: 'auto',
+ maxHeight: false,
+ maxWidth: false,
+ minHeight: 150,
+ minWidth: 150,
+ modal: false,
+ position: {
+ my: 'center',
+ at: 'center',
+ collision: 'fit',
+ // ensure that the titlebar is never outside the document
+ using: function(pos) {
+ var topOffset = $(this).css(pos).offset().top;
+ if (topOffset < 0) {
+ $(this).css('top', pos.top - topOffset);
+ }
+ }
+ },
+ resizable: true,
+ show: null,
+ stack: true,
+ title: '',
+ width: 300,
+ zIndex: 1000
+ },
+
+ _create: function() {
+ this.originalTitle = this.element.attr('title');
+ // #5742 - .attr() might return a DOMElement
+ if ( typeof this.originalTitle !== "string" ) {
+ this.originalTitle = "";
+ }
+
+ this.options.title = this.options.title || this.originalTitle;
+ var self = this,
+ options = self.options,
+
+ title = options.title || '&#160;',
+ titleId = $.ui.dialog.getTitleId(self.element),
+
+ uiDialog = (self.uiDialog = $('<div></div>'))
+ .appendTo(document.body)
+ .hide()
+ .addClass(uiDialogClasses + options.dialogClass)
+ .css({
+ zIndex: options.zIndex
+ })
+ // setting tabIndex makes the div focusable
+ // setting outline to 0 prevents a border on focus in Mozilla
+ .attr('tabIndex', -1).css('outline', 0).keydown(function(event) {
+ if (options.closeOnEscape && !event.isDefaultPrevented() && event.keyCode &&
+ event.keyCode === $.ui.keyCode.ESCAPE) {
+
+ self.close(event);
+ event.preventDefault();
+ }
+ })
+ .attr({
+ role: 'dialog',
+ 'aria-labelledby': titleId
+ })
+ .mousedown(function(event) {
+ self.moveToTop(false, event);
+ }),
+
+ uiDialogContent = self.element
+ .show()
+ .removeAttr('title')
+ .addClass(
+ 'ui-dialog-content ' +
+ 'ui-widget-content')
+ .appendTo(uiDialog),
+
+ uiDialogTitlebar = (self.uiDialogTitlebar = $('<div></div>'))
+ .addClass(
+ 'ui-dialog-titlebar ' +
+ 'ui-widget-header ' +
+ 'ui-corner-all ' +
+ 'ui-helper-clearfix'
+ )
+ .prependTo(uiDialog),
+
+ uiDialogTitlebarClose = $('<a href="#"></a>')
+ .addClass(
+ 'ui-dialog-titlebar-close ' +
+ 'ui-corner-all'
+ )
+ .attr('role', 'button')
+ .hover(
+ function() {
+ uiDialogTitlebarClose.addClass('ui-state-hover');
+ },
+ function() {
+ uiDialogTitlebarClose.removeClass('ui-state-hover');
+ }
+ )
+ .focus(function() {
+ uiDialogTitlebarClose.addClass('ui-state-focus');
+ })
+ .blur(function() {
+ uiDialogTitlebarClose.removeClass('ui-state-focus');
+ })
+ .click(function(event) {
+ self.close(event);
+ return false;
+ })
+ .appendTo(uiDialogTitlebar),
+
+ uiDialogTitlebarCloseText = (self.uiDialogTitlebarCloseText = $('<span></span>'))
+ .addClass(
+ 'ui-icon ' +
+ 'ui-icon-closethick'
+ )
+ .text(options.closeText)
+ .appendTo(uiDialogTitlebarClose),
+
+ uiDialogTitle = $('<span></span>')
+ .addClass('ui-dialog-title')
+ .attr('id', titleId)
+ .html(title)
+ .prependTo(uiDialogTitlebar);
+
+ //handling of deprecated beforeclose (vs beforeClose) option
+ //Ticket #4669 http://dev.jqueryui.com/ticket/4669
+ //TODO: remove in 1.9pre
+ if ($.isFunction(options.beforeclose) && !$.isFunction(options.beforeClose)) {
+ options.beforeClose = options.beforeclose;
+ }
+
+ uiDialogTitlebar.find("*").add(uiDialogTitlebar).disableSelection();
+
+ if (options.draggable && $.fn.draggable) {
+ self._makeDraggable();
+ }
+ if (options.resizable && $.fn.resizable) {
+ self._makeResizable();
+ }
+
+ self._createButtons(options.buttons);
+ self._isOpen = false;
+
+ if ($.fn.bgiframe) {
+ uiDialog.bgiframe();
+ }
+ },
+
+ _init: function() {
+ if ( this.options.autoOpen ) {
+ this.open();
+ }
+ },
+
+ destroy: function() {
+ var self = this;
+
+ if (self.overlay) {
+ self.overlay.destroy();
+ }
+ self.uiDialog.hide();
+ self.element
+ .unbind('.dialog')
+ .removeData('dialog')
+ .removeClass('ui-dialog-content ui-widget-content')
+ .hide().appendTo('body');
+ self.uiDialog.remove();
+
+ if (self.originalTitle) {
+ self.element.attr('title', self.originalTitle);
+ }
+
+ return self;
+ },
+
+ widget: function() {
+ return this.uiDialog;
+ },
+
+ close: function(event) {
+ var self = this,
+ maxZ, thisZ;
+
+ if (false === self._trigger('beforeClose', event)) {
+ return;
+ }
+
+ if (self.overlay) {
+ self.overlay.destroy();
+ }
+ self.uiDialog.unbind('keypress.ui-dialog');
+
+ self._isOpen = false;
+
+ if (self.options.hide) {
+ self.uiDialog.hide(self.options.hide, function() {
+ self._trigger('close', event);
+ });
+ } else {
+ self.uiDialog.hide();
+ self._trigger('close', event);
+ }
+
+ $.ui.dialog.overlay.resize();
+
+ // adjust the maxZ to allow other modal dialogs to continue to work (see #4309)
+ if (self.options.modal) {
+ maxZ = 0;
+ $('.ui-dialog').each(function() {
+ if (this !== self.uiDialog[0]) {
+ thisZ = $(this).css('z-index');
+ if(!isNaN(thisZ)) {
+ maxZ = Math.max(maxZ, thisZ);
+ }
+ }
+ });
+ $.ui.dialog.maxZ = maxZ;
+ }
+
+ return self;
+ },
+
+ isOpen: function() {
+ return this._isOpen;
+ },
+
+ // the force parameter allows us to move modal dialogs to their correct
+ // position on open
+ moveToTop: function(force, event) {
+ var self = this,
+ options = self.options,
+ saveScroll;
+
+ if ((options.modal && !force) ||
+ (!options.stack && !options.modal)) {
+ return self._trigger('focus', event);
+ }
+
+ if (options.zIndex > $.ui.dialog.maxZ) {
+ $.ui.dialog.maxZ = options.zIndex;
+ }
+ if (self.overlay) {
+ $.ui.dialog.maxZ += 1;
+ self.overlay.$el.css('z-index', $.ui.dialog.overlay.maxZ = $.ui.dialog.maxZ);
+ }
+
+ //Save and then restore scroll since Opera 9.5+ resets when parent z-Index is changed.
+ // http://ui.jquery.com/bugs/ticket/3193
+ saveScroll = { scrollTop: self.element.scrollTop(), scrollLeft: self.element.scrollLeft() };
+ $.ui.dialog.maxZ += 1;
+ self.uiDialog.css('z-index', $.ui.dialog.maxZ);
+ self.element.attr(saveScroll);
+ self._trigger('focus', event);
+
+ return self;
+ },
+
+ open: function() {
+ if (this._isOpen) { return; }
+
+ var self = this,
+ options = self.options,
+ uiDialog = self.uiDialog;
+
+ self.overlay = options.modal ? new $.ui.dialog.overlay(self) : null;
+ self._size();
+ self._position(options.position);
+ uiDialog.show(options.show);
+ self.moveToTop(true);
+
+ // prevent tabbing out of modal dialogs
+ if (options.modal) {
+ uiDialog.bind('keypress.ui-dialog', function(event) {
+ if (event.keyCode !== $.ui.keyCode.TAB) {
+ return;
+ }
+
+ var tabbables = $(':tabbable', this),
+ first = tabbables.filter(':first'),
+ last = tabbables.filter(':last');
+
+ if (event.target === last[0] && !event.shiftKey) {
+ first.focus(1);
+ return false;
+ } else if (event.target === first[0] && event.shiftKey) {
+ last.focus(1);
+ return false;
+ }
+ });
+ }
+
+ // set focus to the first tabbable element in the content area or the first button
+ // if there are no tabbable elements, set focus on the dialog itself
+ $(self.element.find(':tabbable').get().concat(
+ uiDialog.find('.ui-dialog-buttonpane :tabbable').get().concat(
+ uiDialog.get()))).eq(0).focus();
+
+ self._isOpen = true;
+ self._trigger('open');
+
+ return self;
+ },
+
+ _createButtons: function(buttons) {
+ var self = this,
+ hasButtons = false,
+ uiDialogButtonPane = $('<div></div>')
+ .addClass(
+ 'ui-dialog-buttonpane ' +
+ 'ui-widget-content ' +
+ 'ui-helper-clearfix'
+ ),
+ uiButtonSet = $( "<div></div>" )
+ .addClass( "ui-dialog-buttonset" )
+ .appendTo( uiDialogButtonPane );
+
+ // if we already have a button pane, remove it
+ self.uiDialog.find('.ui-dialog-buttonpane').remove();
+
+ if (typeof buttons === 'object' && buttons !== null) {
+ $.each(buttons, function() {
+ return !(hasButtons = true);
+ });
+ }
+ if (hasButtons) {
+ $.each(buttons, function(name, props) {
+ props = $.isFunction( props ) ?
+ { click: props, text: name } :
+ props;
+ var button = $('<button type="button"></button>')
+ .click(function() {
+ props.click.apply(self.element[0], arguments);
+ })
+ .appendTo(uiButtonSet);
+ // can't use .attr( props, true ) with jQuery 1.3.2.
+ $.each( props, function( key, value ) {
+ if ( key === "click" ) {
+ return;
+ }
+ if ( key in attrFn ) {
+ button[ key ]( value );
+ } else {
+ button.attr( key, value );
+ }
+ });
+ if ($.fn.button) {
+ button.button();
+ }
+ });
+ uiDialogButtonPane.appendTo(self.uiDialog);
+ }
+ },
+
+ _makeDraggable: function() {
+ var self = this,
+ options = self.options,
+ doc = $(document),
+ heightBeforeDrag;
+
+ function filteredUi(ui) {
+ return {
+ position: ui.position,
+ offset: ui.offset
+ };
+ }
+
+ self.uiDialog.draggable({
+ cancel: '.ui-dialog-content, .ui-dialog-titlebar-close',
+ handle: '.ui-dialog-titlebar',
+ containment: 'document',
+ start: function(event, ui) {
+ heightBeforeDrag = options.height === "auto" ? "auto" : $(this).height();
+ $(this).height($(this).height()).addClass("ui-dialog-dragging");
+ self._trigger('dragStart', event, filteredUi(ui));
+ },
+ drag: function(event, ui) {
+ self._trigger('drag', event, filteredUi(ui));
+ },
+ stop: function(event, ui) {
+ options.position = [ui.position.left - doc.scrollLeft(),
+ ui.position.top - doc.scrollTop()];
+ $(this).removeClass("ui-dialog-dragging").height(heightBeforeDrag);
+ self._trigger('dragStop', event, filteredUi(ui));
+ $.ui.dialog.overlay.resize();
+ }
+ });
+ },
+
+ _makeResizable: function(handles) {
+ handles = (handles === undefined ? this.options.resizable : handles);
+ var self = this,
+ options = self.options,
+ // .ui-resizable has position: relative defined in the stylesheet
+ // but dialogs have to use absolute or fixed positioning
+ position = self.uiDialog.css('position'),
+ resizeHandles = (typeof handles === 'string' ?
+ handles :
+ 'n,e,s,w,se,sw,ne,nw'
+ );
+
+ function filteredUi(ui) {
+ return {
+ originalPosition: ui.originalPosition,
+ originalSize: ui.originalSize,
+ position: ui.position,
+ size: ui.size
+ };
+ }
+
+ self.uiDialog.resizable({
+ cancel: '.ui-dialog-content',
+ containment: 'document',
+ alsoResize: self.element,
+ maxWidth: options.maxWidth,
+ maxHeight: options.maxHeight,
+ minWidth: options.minWidth,
+ minHeight: self._minHeight(),
+ handles: resizeHandles,
+ start: function(event, ui) {
+ $(this).addClass("ui-dialog-resizing");
+ self._trigger('resizeStart', event, filteredUi(ui));
+ },
+ resize: function(event, ui) {
+ self._trigger('resize', event, filteredUi(ui));
+ },
+ stop: function(event, ui) {
+ $(this).removeClass("ui-dialog-resizing");
+ options.height = $(this).height();
+ options.width = $(this).width();
+ self._trigger('resizeStop', event, filteredUi(ui));
+ $.ui.dialog.overlay.resize();
+ }
+ })
+ .css('position', position)
+ .find('.ui-resizable-se').addClass('ui-icon ui-icon-grip-diagonal-se');
+ },
+
+ _minHeight: function() {
+ var options = this.options;
+
+ if (options.height === 'auto') {
+ return options.minHeight;
+ } else {
+ return Math.min(options.minHeight, options.height);
+ }
+ },
+
+ _position: function(position) {
+ var myAt = [],
+ offset = [0, 0],
+ isVisible;
+
+ if (position) {
+ // deep extending converts arrays to objects in jQuery <= 1.3.2 :-(
+ // if (typeof position == 'string' || $.isArray(position)) {
+ // myAt = $.isArray(position) ? position : position.split(' ');
+
+ if (typeof position === 'string' || (typeof position === 'object' && '0' in position)) {
+ myAt = position.split ? position.split(' ') : [position[0], position[1]];
+ if (myAt.length === 1) {
+ myAt[1] = myAt[0];
+ }
+
+ $.each(['left', 'top'], function(i, offsetPosition) {
+ if (+myAt[i] === myAt[i]) {
+ offset[i] = myAt[i];
+ myAt[i] = offsetPosition;
+ }
+ });
+
+ position = {
+ my: myAt.join(" "),
+ at: myAt.join(" "),
+ offset: offset.join(" ")
+ };
+ }
+
+ position = $.extend({}, $.ui.dialog.prototype.options.position, position);
+ } else {
+ position = $.ui.dialog.prototype.options.position;
+ }
+
+ // need to show the dialog to get the actual offset in the position plugin
+ isVisible = this.uiDialog.is(':visible');
+ if (!isVisible) {
+ this.uiDialog.show();
+ }
+ this.uiDialog
+ // workaround for jQuery bug #5781 http://dev.jquery.com/ticket/5781
+ .css({ top: 0, left: 0 })
+ .position($.extend({ of: window }, position));
+ if (!isVisible) {
+ this.uiDialog.hide();
+ }
+ },
+
+ _setOptions: function( options ) {
+ var self = this,
+ resizableOptions = {},
+ resize = false;
+
+ $.each( options, function( key, value ) {
+ self._setOption( key, value );
+
+ if ( key in sizeRelatedOptions ) {
+ resize = true;
+ }
+ if ( key in resizableRelatedOptions ) {
+ resizableOptions[ key ] = value;
+ }
+ });
+
+ if ( resize ) {
+ this._size();
+ }
+ if ( this.uiDialog.is( ":data(resizable)" ) ) {
+ this.uiDialog.resizable( "option", resizableOptions );
+ }
+ },
+
+ _setOption: function(key, value){
+ var self = this,
+ uiDialog = self.uiDialog;
+
+ switch (key) {
+ //handling of deprecated beforeclose (vs beforeClose) option
+ //Ticket #4669 http://dev.jqueryui.com/ticket/4669
+ //TODO: remove in 1.9pre
+ case "beforeclose":
+ key = "beforeClose";
+ break;
+ case "buttons":
+ self._createButtons(value);
+ break;
+ case "closeText":
+ // ensure that we always pass a string
+ self.uiDialogTitlebarCloseText.text("" + value);
+ break;
+ case "dialogClass":
+ uiDialog
+ .removeClass(self.options.dialogClass)
+ .addClass(uiDialogClasses + value);
+ break;
+ case "disabled":
+ if (value) {
+ uiDialog.addClass('ui-dialog-disabled');
+ } else {
+ uiDialog.removeClass('ui-dialog-disabled');
+ }
+ break;
+ case "draggable":
+ var isDraggable = uiDialog.is( ":data(draggable)" );
+ if ( isDraggable && !value ) {
+ uiDialog.draggable( "destroy" );
+ }
+
+ if ( !isDraggable && value ) {
+ self._makeDraggable();
+ }
+ break;
+ case "position":
+ self._position(value);
+ break;
+ case "resizable":
+ // currently resizable, becoming non-resizable
+ var isResizable = uiDialog.is( ":data(resizable)" );
+ if (isResizable && !value) {
+ uiDialog.resizable('destroy');
+ }
+
+ // currently resizable, changing handles
+ if (isResizable && typeof value === 'string') {
+ uiDialog.resizable('option', 'handles', value);
+ }
+
+ // currently non-resizable, becoming resizable
+ if (!isResizable && value !== false) {
+ self._makeResizable(value);
+ }
+ break;
+ case "title":
+ // convert whatever was passed in o a string, for html() to not throw up
+ $(".ui-dialog-title", self.uiDialogTitlebar).html("" + (value || '&#160;'));
+ break;
+ }
+
+ $.Widget.prototype._setOption.apply(self, arguments);
+ },
+
+ _size: function() {
+ /* If the user has resized the dialog, the .ui-dialog and .ui-dialog-content
+ * divs will both have width and height set, so we need to reset them
+ */
+ var options = this.options,
+ nonContentHeight,
+ minContentHeight,
+ isVisible = this.uiDialog.is( ":visible" );
+
+ // reset content sizing
+ this.element.show().css({
+ width: 'auto',
+ minHeight: 0,
+ height: 0
+ });
+
+ if (options.minWidth > options.width) {
+ options.width = options.minWidth;
+ }
+
+ // reset wrapper sizing
+ // determine the height of all the non-content elements
+ nonContentHeight = this.uiDialog.css({
+ height: 'auto',
+ width: options.width
+ })
+ .height();
+ minContentHeight = Math.max( 0, options.minHeight - nonContentHeight );
+
+ if ( options.height === "auto" ) {
+ // only needed for IE6 support
+ if ( $.support.minHeight ) {
+ this.element.css({
+ minHeight: minContentHeight,
+ height: "auto"
+ });
+ } else {
+ this.uiDialog.show();
+ var autoHeight = this.element.css( "height", "auto" ).height();
+ if ( !isVisible ) {
+ this.uiDialog.hide();
+ }
+ this.element.height( Math.max( autoHeight, minContentHeight ) );
+ }
+ } else {
+ this.element.height( Math.max( options.height - nonContentHeight, 0 ) );
+ }
+
+ if (this.uiDialog.is(':data(resizable)')) {
+ this.uiDialog.resizable('option', 'minHeight', this._minHeight());
+ }
+ }
+});
+
+$.extend($.ui.dialog, {
+ version: "1.8.16",
+
+ uuid: 0,
+ maxZ: 0,
+
+ getTitleId: function($el) {
+ var id = $el.attr('id');
+ if (!id) {
+ this.uuid += 1;
+ id = this.uuid;
+ }
+ return 'ui-dialog-title-' + id;
+ },
+
+ overlay: function(dialog) {
+ this.$el = $.ui.dialog.overlay.create(dialog);
+ }
+});
+
+$.extend($.ui.dialog.overlay, {
+ instances: [],
+ // reuse old instances due to IE memory leak with alpha transparency (see #5185)
+ oldInstances: [],
+ maxZ: 0,
+ events: $.map('focus,mousedown,mouseup,keydown,keypress,click'.split(','),
+ function(event) { return event + '.dialog-overlay'; }).join(' '),
+ create: function(dialog) {
+ if (this.instances.length === 0) {
+ // prevent use of anchors and inputs
+ // we use a setTimeout in case the overlay is created from an
+ // event that we're going to be cancelling (see #2804)
+ setTimeout(function() {
+ // handle $(el).dialog().dialog('close') (see #4065)
+ if ($.ui.dialog.overlay.instances.length) {
+ $(document).bind($.ui.dialog.overlay.events, function(event) {
+ // stop events if the z-index of the target is < the z-index of the overlay
+ // we cannot return true when we don't want to cancel the event (#3523)
+ if ($(event.target).zIndex() < $.ui.dialog.overlay.maxZ) {
+ return false;
+ }
+ });
+ }
+ }, 1);
+
+ // allow closing by pressing the escape key
+ $(document).bind('keydown.dialog-overlay', function(event) {
+ if (dialog.options.closeOnEscape && !event.isDefaultPrevented() && event.keyCode &&
+ event.keyCode === $.ui.keyCode.ESCAPE) {
+
+ dialog.close(event);
+ event.preventDefault();
+ }
+ });
+
+ // handle window resize
+ $(window).bind('resize.dialog-overlay', $.ui.dialog.overlay.resize);
+ }
+
+ var $el = (this.oldInstances.pop() || $('<div></div>').addClass('ui-widget-overlay'))
+ .appendTo(document.body)
+ .css({
+ width: this.width(),
+ height: this.height()
+ });
+
+ if ($.fn.bgiframe) {
+ $el.bgiframe();
+ }
+
+ this.instances.push($el);
+ return $el;
+ },
+
+ destroy: function($el) {
+ var indexOf = $.inArray($el, this.instances);
+ if (indexOf != -1){
+ this.oldInstances.push(this.instances.splice(indexOf, 1)[0]);
+ }
+
+ if (this.instances.length === 0) {
+ $([document, window]).unbind('.dialog-overlay');
+ }
+
+ $el.remove();
+
+ // adjust the maxZ to allow other modal dialogs to continue to work (see #4309)
+ var maxZ = 0;
+ $.each(this.instances, function() {
+ maxZ = Math.max(maxZ, this.css('z-index'));
+ });
+ this.maxZ = maxZ;
+ },
+
+ height: function() {
+ var scrollHeight,
+ offsetHeight;
+ // handle IE 6
+ if ($.browser.msie && $.browser.version < 7) {
+ scrollHeight = Math.max(
+ document.documentElement.scrollHeight,
+ document.body.scrollHeight
+ );
+ offsetHeight = Math.max(
+ document.documentElement.offsetHeight,
+ document.body.offsetHeight
+ );
+
+ if (scrollHeight < offsetHeight) {
+ return $(window).height() + 'px';
+ } else {
+ return scrollHeight + 'px';
+ }
+ // handle "good" browsers
+ } else {
+ return $(document).height() + 'px';
+ }
+ },
+
+ width: function() {
+ var scrollWidth,
+ offsetWidth;
+ // handle IE
+ if ( $.browser.msie ) {
+ scrollWidth = Math.max(
+ document.documentElement.scrollWidth,
+ document.body.scrollWidth
+ );
+ offsetWidth = Math.max(
+ document.documentElement.offsetWidth,
+ document.body.offsetWidth
+ );
+
+ if (scrollWidth < offsetWidth) {
+ return $(window).width() + 'px';
+ } else {
+ return scrollWidth + 'px';
+ }
+ // handle "good" browsers
+ } else {
+ return $(document).width() + 'px';
+ }
+ },
+
+ resize: function() {
+ /* If the dialog is draggable and the user drags it past the
+ * right edge of the window, the document becomes wider so we
+ * need to stretch the overlay. If the user then drags the
+ * dialog back to the left, the document will become narrower,
+ * so we need to shrink the overlay to the appropriate size.
+ * This is handled by shrinking the overlay before setting it
+ * to the full document size.
+ */
+ var $overlays = $([]);
+ $.each($.ui.dialog.overlay.instances, function() {
+ $overlays = $overlays.add(this);
+ });
+
+ $overlays.css({
+ width: 0,
+ height: 0
+ }).css({
+ width: $.ui.dialog.overlay.width(),
+ height: $.ui.dialog.overlay.height()
+ });
+ }
+});
+
+$.extend($.ui.dialog.overlay.prototype, {
+ destroy: function() {
+ $.ui.dialog.overlay.destroy(this.$el);
+ }
+});
+
+}(jQuery));
+/*
+ * jQuery UI Slider 1.8.16
+ *
+ * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about)
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://jquery.org/license
+ *
+ * http://docs.jquery.com/UI/Slider
+ *
+ * Depends:
+ * jquery.ui.core.js
+ * jquery.ui.mouse.js
+ * jquery.ui.widget.js
+ */
+(function( $, undefined ) {
+
+// number of pages in a slider
+// (how many times can you page up/down to go through the whole range)
+var numPages = 5;
+
+$.widget( "ui.slider", $.ui.mouse, {
+
+ widgetEventPrefix: "slide",
+
+ options: {
+ animate: false,
+ distance: 0,
+ max: 100,
+ min: 0,
+ orientation: "horizontal",
+ range: false,
+ step: 1,
+ value: 0,
+ values: null
+ },
+
+ _create: function() {
+ var self = this,
+ o = this.options,
+ existingHandles = this.element.find( ".ui-slider-handle" ).addClass( "ui-state-default ui-corner-all" ),
+ handle = "<a class='ui-slider-handle ui-state-default ui-corner-all' href='#'></a>",
+ handleCount = ( o.values && o.values.length ) || 1,
+ handles = [];
+
+ this._keySliding = false;
+ this._mouseSliding = false;
+ this._animateOff = true;
+ this._handleIndex = null;
+ this._detectOrientation();
+ this._mouseInit();
+
+ this.element
+ .addClass( "ui-slider" +
+ " ui-slider-" + this.orientation +
+ " ui-widget" +
+ " ui-widget-content" +
+ " ui-corner-all" +
+ ( o.disabled ? " ui-slider-disabled ui-disabled" : "" ) );
+
+ this.range = $([]);
+
+ if ( o.range ) {
+ if ( o.range === true ) {
+ if ( !o.values ) {
+ o.values = [ this._valueMin(), this._valueMin() ];
+ }
+ if ( o.values.length && o.values.length !== 2 ) {
+ o.values = [ o.values[0], o.values[0] ];
+ }
+ }
+
+ this.range = $( "<div></div>" )
+ .appendTo( this.element )
+ .addClass( "ui-slider-range" +
+ // note: this isn't the most fittingly semantic framework class for this element,
+ // but worked best visually with a variety of themes
+ " ui-widget-header" +
+ ( ( o.range === "min" || o.range === "max" ) ? " ui-slider-range-" + o.range : "" ) );
+ }
+
+ for ( var i = existingHandles.length; i < handleCount; i += 1 ) {
+ handles.push( handle );
+ }
+
+ this.handles = existingHandles.add( $( handles.join( "" ) ).appendTo( self.element ) );
+
+ this.handle = this.handles.eq( 0 );
+
+ this.handles.add( this.range ).filter( "a" )
+ .click(function( event ) {
+ event.preventDefault();
+ })
+ .hover(function() {
+ if ( !o.disabled ) {
+ $( this ).addClass( "ui-state-hover" );
+ }
+ }, function() {
+ $( this ).removeClass( "ui-state-hover" );
+ })
+ .focus(function() {
+ if ( !o.disabled ) {
+ $( ".ui-slider .ui-state-focus" ).removeClass( "ui-state-focus" );
+ $( this ).addClass( "ui-state-focus" );
+ } else {
+ $( this ).blur();
+ }
+ })
+ .blur(function() {
+ $( this ).removeClass( "ui-state-focus" );
+ });
+
+ this.handles.each(function( i ) {
+ $( this ).data( "index.ui-slider-handle", i );
+ });
+
+ this.handles
+ .keydown(function( event ) {
+ var ret = true,
+ index = $( this ).data( "index.ui-slider-handle" ),
+ allowed,
+ curVal,
+ newVal,
+ step;
+
+ if ( self.options.disabled ) {
+ return;
+ }
+
+ switch ( event.keyCode ) {
+ case $.ui.keyCode.HOME:
+ case $.ui.keyCode.END:
+ case $.ui.keyCode.PAGE_UP:
+ case $.ui.keyCode.PAGE_DOWN:
+ case $.ui.keyCode.UP:
+ case $.ui.keyCode.RIGHT:
+ case $.ui.keyCode.DOWN:
+ case $.ui.keyCode.LEFT:
+ ret = false;
+ if ( !self._keySliding ) {
+ self._keySliding = true;
+ $( this ).addClass( "ui-state-active" );
+ allowed = self._start( event, index );
+ if ( allowed === false ) {
+ return;
+ }
+ }
+ break;
+ }
+
+ step = self.options.step;
+ if ( self.options.values && self.options.values.length ) {
+ curVal = newVal = self.values( index );
+ } else {
+ curVal = newVal = self.value();
+ }
+
+ switch ( event.keyCode ) {
+ case $.ui.keyCode.HOME:
+ newVal = self._valueMin();
+ break;
+ case $.ui.keyCode.END:
+ newVal = self._valueMax();
+ break;
+ case $.ui.keyCode.PAGE_UP:
+ newVal = self._trimAlignValue( curVal + ( (self._valueMax() - self._valueMin()) / numPages ) );
+ break;
+ case $.ui.keyCode.PAGE_DOWN:
+ newVal = self._trimAlignValue( curVal - ( (self._valueMax() - self._valueMin()) / numPages ) );
+ break;
+ case $.ui.keyCode.UP:
+ case $.ui.keyCode.RIGHT:
+ if ( curVal === self._valueMax() ) {
+ return;
+ }
+ newVal = self._trimAlignValue( curVal + step );
+ break;
+ case $.ui.keyCode.DOWN:
+ case $.ui.keyCode.LEFT:
+ if ( curVal === self._valueMin() ) {
+ return;
+ }
+ newVal = self._trimAlignValue( curVal - step );
+ break;
+ }
+
+ self._slide( event, index, newVal );
+
+ return ret;
+
+ })
+ .keyup(function( event ) {
+ var index = $( this ).data( "index.ui-slider-handle" );
+
+ if ( self._keySliding ) {
+ self._keySliding = false;
+ self._stop( event, index );
+ self._change( event, index );
+ $( this ).removeClass( "ui-state-active" );
+ }
+
+ });
+
+ this._refreshValue();
+
+ this._animateOff = false;
+ },
+
+ destroy: function() {
+ this.handles.remove();
+ this.range.remove();
+
+ this.element
+ .removeClass( "ui-slider" +
+ " ui-slider-horizontal" +
+ " ui-slider-vertical" +
+ " ui-slider-disabled" +
+ " ui-widget" +
+ " ui-widget-content" +
+ " ui-corner-all" )
+ .removeData( "slider" )
+ .unbind( ".slider" );
+
+ this._mouseDestroy();
+
+ return this;
+ },
+
+ _mouseCapture: function( event ) {
+ var o = this.options,
+ position,
+ normValue,
+ distance,
+ closestHandle,
+ self,
+ index,
+ allowed,
+ offset,
+ mouseOverHandle;
+
+ if ( o.disabled ) {
+ return false;
+ }
+
+ this.elementSize = {
+ width: this.element.outerWidth(),
+ height: this.element.outerHeight()
+ };
+ this.elementOffset = this.element.offset();
+
+ position = { x: event.pageX, y: event.pageY };
+ normValue = this._normValueFromMouse( position );
+ distance = this._valueMax() - this._valueMin() + 1;
+ self = this;
+ this.handles.each(function( i ) {
+ var thisDistance = Math.abs( normValue - self.values(i) );
+ if ( distance > thisDistance ) {
+ distance = thisDistance;
+ closestHandle = $( this );
+ index = i;
+ }
+ });
+
+ // workaround for bug #3736 (if both handles of a range are at 0,
+ // the first is always used as the one with least distance,
+ // and moving it is obviously prevented by preventing negative ranges)
+ if( o.range === true && this.values(1) === o.min ) {
+ index += 1;
+ closestHandle = $( this.handles[index] );
+ }
+
+ allowed = this._start( event, index );
+ if ( allowed === false ) {
+ return false;
+ }
+ this._mouseSliding = true;
+
+ self._handleIndex = index;
+
+ closestHandle
+ .addClass( "ui-state-active" )
+ .focus();
+
+ offset = closestHandle.offset();
+ mouseOverHandle = !$( event.target ).parents().andSelf().is( ".ui-slider-handle" );
+ this._clickOffset = mouseOverHandle ? { left: 0, top: 0 } : {
+ left: event.pageX - offset.left - ( closestHandle.width() / 2 ),
+ top: event.pageY - offset.top -
+ ( closestHandle.height() / 2 ) -
+ ( parseInt( closestHandle.css("borderTopWidth"), 10 ) || 0 ) -
+ ( parseInt( closestHandle.css("borderBottomWidth"), 10 ) || 0) +
+ ( parseInt( closestHandle.css("marginTop"), 10 ) || 0)
+ };
+
+ if ( !this.handles.hasClass( "ui-state-hover" ) ) {
+ this._slide( event, index, normValue );
+ }
+ this._animateOff = true;
+ return true;
+ },
+
+ _mouseStart: function( event ) {
+ return true;
+ },
+
+ _mouseDrag: function( event ) {
+ var position = { x: event.pageX, y: event.pageY },
+ normValue = this._normValueFromMouse( position );
+
+ this._slide( event, this._handleIndex, normValue );
+
+ return false;
+ },
+
+ _mouseStop: function( event ) {
+ this.handles.removeClass( "ui-state-active" );
+ this._mouseSliding = false;
+
+ this._stop( event, this._handleIndex );
+ this._change( event, this._handleIndex );
+
+ this._handleIndex = null;
+ this._clickOffset = null;
+ this._animateOff = false;
+
+ return false;
+ },
+
+ _detectOrientation: function() {
+ this.orientation = ( this.options.orientation === "vertical" ) ? "vertical" : "horizontal";
+ },
+
+ _normValueFromMouse: function( position ) {
+ var pixelTotal,
+ pixelMouse,
+ percentMouse,
+ valueTotal,
+ valueMouse;
+
+ if ( this.orientation === "horizontal" ) {
+ pixelTotal = this.elementSize.width;
+ pixelMouse = position.x - this.elementOffset.left - ( this._clickOffset ? this._clickOffset.left : 0 );
+ } else {
+ pixelTotal = this.elementSize.height;
+ pixelMouse = position.y - this.elementOffset.top - ( this._clickOffset ? this._clickOffset.top : 0 );
+ }
+
+ percentMouse = ( pixelMouse / pixelTotal );
+ if ( percentMouse > 1 ) {
+ percentMouse = 1;
+ }
+ if ( percentMouse < 0 ) {
+ percentMouse = 0;
+ }
+ if ( this.orientation === "vertical" ) {
+ percentMouse = 1 - percentMouse;
+ }
+
+ valueTotal = this._valueMax() - this._valueMin();
+ valueMouse = this._valueMin() + percentMouse * valueTotal;
+
+ return this._trimAlignValue( valueMouse );
+ },
+
+ _start: function( event, index ) {
+ var uiHash = {
+ handle: this.handles[ index ],
+ value: this.value()
+ };
+ if ( this.options.values && this.options.values.length ) {
+ uiHash.value = this.values( index );
+ uiHash.values = this.values();
+ }
+ return this._trigger( "start", event, uiHash );
+ },
+
+ _slide: function( event, index, newVal ) {
+ var otherVal,
+ newValues,
+ allowed;
+
+ if ( this.options.values && this.options.values.length ) {
+ otherVal = this.values( index ? 0 : 1 );
+
+ if ( ( this.options.values.length === 2 && this.options.range === true ) &&
+ ( ( index === 0 && newVal > otherVal) || ( index === 1 && newVal < otherVal ) )
+ ) {
+ newVal = otherVal;
+ }
+
+ if ( newVal !== this.values( index ) ) {
+ newValues = this.values();
+ newValues[ index ] = newVal;
+ // A slide can be canceled by returning false from the slide callback
+ allowed = this._trigger( "slide", event, {
+ handle: this.handles[ index ],
+ value: newVal,
+ values: newValues
+ } );
+ otherVal = this.values( index ? 0 : 1 );
+ if ( allowed !== false ) {
+ this.values( index, newVal, true );
+ }
+ }
+ } else {
+ if ( newVal !== this.value() ) {
+ // A slide can be canceled by returning false from the slide callback
+ allowed = this._trigger( "slide", event, {
+ handle: this.handles[ index ],
+ value: newVal
+ } );
+ if ( allowed !== false ) {
+ this.value( newVal );
+ }
+ }
+ }
+ },
+
+ _stop: function( event, index ) {
+ var uiHash = {
+ handle: this.handles[ index ],
+ value: this.value()
+ };
+ if ( this.options.values && this.options.values.length ) {
+ uiHash.value = this.values( index );
+ uiHash.values = this.values();
+ }
+
+ this._trigger( "stop", event, uiHash );
+ },
+
+ _change: function( event, index ) {
+ if ( !this._keySliding && !this._mouseSliding ) {
+ var uiHash = {
+ handle: this.handles[ index ],
+ value: this.value()
+ };
+ if ( this.options.values && this.options.values.length ) {
+ uiHash.value = this.values( index );
+ uiHash.values = this.values();
+ }
+
+ this._trigger( "change", event, uiHash );
+ }
+ },
+
+ value: function( newValue ) {
+ if ( arguments.length ) {
+ this.options.value = this._trimAlignValue( newValue );
+ this._refreshValue();
+ this._change( null, 0 );
+ return;
+ }
+
+ return this._value();
+ },
+
+ values: function( index, newValue ) {
+ var vals,
+ newValues,
+ i;
+
+ if ( arguments.length > 1 ) {
+ this.options.values[ index ] = this._trimAlignValue( newValue );
+ this._refreshValue();
+ this._change( null, index );
+ return;
+ }
+
+ if ( arguments.length ) {
+ if ( $.isArray( arguments[ 0 ] ) ) {
+ vals = this.options.values;
+ newValues = arguments[ 0 ];
+ for ( i = 0; i < vals.length; i += 1 ) {
+ vals[ i ] = this._trimAlignValue( newValues[ i ] );
+ this._change( null, i );
+ }
+ this._refreshValue();
+ } else {
+ if ( this.options.values && this.options.values.length ) {
+ return this._values( index );
+ } else {
+ return this.value();
+ }
+ }
+ } else {
+ return this._values();
+ }
+ },
+
+ _setOption: function( key, value ) {
+ var i,
+ valsLength = 0;
+
+ if ( $.isArray( this.options.values ) ) {
+ valsLength = this.options.values.length;
+ }
+
+ $.Widget.prototype._setOption.apply( this, arguments );
+
+ switch ( key ) {
+ case "disabled":
+ if ( value ) {
+ this.handles.filter( ".ui-state-focus" ).blur();
+ this.handles.removeClass( "ui-state-hover" );
+ this.handles.propAttr( "disabled", true );
+ this.element.addClass( "ui-disabled" );
+ } else {
+ this.handles.propAttr( "disabled", false );
+ this.element.removeClass( "ui-disabled" );
+ }
+ break;
+ case "orientation":
+ this._detectOrientation();
+ this.element
+ .removeClass( "ui-slider-horizontal ui-slider-vertical" )
+ .addClass( "ui-slider-" + this.orientation );
+ this._refreshValue();
+ break;
+ case "value":
+ this._animateOff = true;
+ this._refreshValue();
+ this._change( null, 0 );
+ this._animateOff = false;
+ break;
+ case "values":
+ this._animateOff = true;
+ this._refreshValue();
+ for ( i = 0; i < valsLength; i += 1 ) {
+ this._change( null, i );
+ }
+ this._animateOff = false;
+ break;
+ }
+ },
+
+ //internal value getter
+ // _value() returns value trimmed by min and max, aligned by step
+ _value: function() {
+ var val = this.options.value;
+ val = this._trimAlignValue( val );
+
+ return val;
+ },
+
+ //internal values getter
+ // _values() returns array of values trimmed by min and max, aligned by step
+ // _values( index ) returns single value trimmed by min and max, aligned by step
+ _values: function( index ) {
+ var val,
+ vals,
+ i;
+
+ if ( arguments.length ) {
+ val = this.options.values[ index ];
+ val = this._trimAlignValue( val );
+
+ return val;
+ } else {
+ // .slice() creates a copy of the array
+ // this copy gets trimmed by min and max and then returned
+ vals = this.options.values.slice();
+ for ( i = 0; i < vals.length; i+= 1) {
+ vals[ i ] = this._trimAlignValue( vals[ i ] );
+ }
+
+ return vals;
+ }
+ },
+
+ // returns the step-aligned value that val is closest to, between (inclusive) min and max
+ _trimAlignValue: function( val ) {
+ if ( val <= this._valueMin() ) {
+ return this._valueMin();
+ }
+ if ( val >= this._valueMax() ) {
+ return this._valueMax();
+ }
+ var step = ( this.options.step > 0 ) ? this.options.step : 1,
+ valModStep = (val - this._valueMin()) % step,
+ alignValue = val - valModStep;
+
+ if ( Math.abs(valModStep) * 2 >= step ) {
+ alignValue += ( valModStep > 0 ) ? step : ( -step );
+ }
+
+ // Since JavaScript has problems with large floats, round
+ // the final value to 5 digits after the decimal point (see #4124)
+ return parseFloat( alignValue.toFixed(5) );
+ },
+
+ _valueMin: function() {
+ return this.options.min;
+ },
+
+ _valueMax: function() {
+ return this.options.max;
+ },
+
+ _refreshValue: function() {
+ var oRange = this.options.range,
+ o = this.options,
+ self = this,
+ animate = ( !this._animateOff ) ? o.animate : false,
+ valPercent,
+ _set = {},
+ lastValPercent,
+ value,
+ valueMin,
+ valueMax;
+
+ if ( this.options.values && this.options.values.length ) {
+ this.handles.each(function( i, j ) {
+ valPercent = ( self.values(i) - self._valueMin() ) / ( self._valueMax() - self._valueMin() ) * 100;
+ _set[ self.orientation === "horizontal" ? "left" : "bottom" ] = valPercent + "%";
+ $( this ).stop( 1, 1 )[ animate ? "animate" : "css" ]( _set, o.animate );
+ if ( self.options.range === true ) {
+ if ( self.orientation === "horizontal" ) {
+ if ( i === 0 ) {
+ self.range.stop( 1, 1 )[ animate ? "animate" : "css" ]( { left: valPercent + "%" }, o.animate );
+ }
+ if ( i === 1 ) {
+ self.range[ animate ? "animate" : "css" ]( { width: ( valPercent - lastValPercent ) + "%" }, { queue: false, duration: o.animate } );
+ }
+ } else {
+ if ( i === 0 ) {
+ self.range.stop( 1, 1 )[ animate ? "animate" : "css" ]( { bottom: ( valPercent ) + "%" }, o.animate );
+ }
+ if ( i === 1 ) {
+ self.range[ animate ? "animate" : "css" ]( { height: ( valPercent - lastValPercent ) + "%" }, { queue: false, duration: o.animate } );
+ }
+ }
+ }
+ lastValPercent = valPercent;
+ });
+ } else {
+ value = this.value();
+ valueMin = this._valueMin();
+ valueMax = this._valueMax();
+ valPercent = ( valueMax !== valueMin ) ?
+ ( value - valueMin ) / ( valueMax - valueMin ) * 100 :
+ 0;
+ _set[ self.orientation === "horizontal" ? "left" : "bottom" ] = valPercent + "%";
+ this.handle.stop( 1, 1 )[ animate ? "animate" : "css" ]( _set, o.animate );
+
+ if ( oRange === "min" && this.orientation === "horizontal" ) {
+ this.range.stop( 1, 1 )[ animate ? "animate" : "css" ]( { width: valPercent + "%" }, o.animate );
+ }
+ if ( oRange === "max" && this.orientation === "horizontal" ) {
+ this.range[ animate ? "animate" : "css" ]( { width: ( 100 - valPercent ) + "%" }, { queue: false, duration: o.animate } );
+ }
+ if ( oRange === "min" && this.orientation === "vertical" ) {
+ this.range.stop( 1, 1 )[ animate ? "animate" : "css" ]( { height: valPercent + "%" }, o.animate );
+ }
+ if ( oRange === "max" && this.orientation === "vertical" ) {
+ this.range[ animate ? "animate" : "css" ]( { height: ( 100 - valPercent ) + "%" }, { queue: false, duration: o.animate } );
+ }
+ }
+ }
+
+});
+
+$.extend( $.ui.slider, {
+ version: "1.8.16"
+});
+
+}(jQuery));
+/*
+ * jQuery UI Tabs 1.8.16
+ *
+ * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about)
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://jquery.org/license
+ *
+ * http://docs.jquery.com/UI/Tabs
+ *
+ * Depends:
+ * jquery.ui.core.js
+ * jquery.ui.widget.js
+ */
+(function( $, undefined ) {
+
+var tabId = 0,
+ listId = 0;
+
+function getNextTabId() {
+ return ++tabId;
+}
+
+function getNextListId() {
+ return ++listId;
+}
+
+$.widget( "ui.tabs", {
+ options: {
+ add: null,
+ ajaxOptions: null,
+ cache: false,
+ cookie: null, // e.g. { expires: 7, path: '/', domain: 'jquery.com', secure: true }
+ collapsible: false,
+ disable: null,
+ disabled: [],
+ enable: null,
+ event: "click",
+ fx: null, // e.g. { height: 'toggle', opacity: 'toggle', duration: 200 }
+ idPrefix: "ui-tabs-",
+ load: null,
+ panelTemplate: "<div></div>",
+ remove: null,
+ select: null,
+ show: null,
+ spinner: "<em>Loading&#8230;</em>",
+ tabTemplate: "<li><a href='#{href}'><span>#{label}</span></a></li>"
+ },
+
+ _create: function() {
+ this._tabify( true );
+ },
+
+ _setOption: function( key, value ) {
+ if ( key == "selected" ) {
+ if (this.options.collapsible && value == this.options.selected ) {
+ return;
+ }
+ this.select( value );
+ } else {
+ this.options[ key ] = value;
+ this._tabify();
+ }
+ },
+
+ _tabId: function( a ) {
+ return a.title && a.title.replace( /\s/g, "_" ).replace( /[^\w\u00c0-\uFFFF-]/g, "" ) ||
+ this.options.idPrefix + getNextTabId();
+ },
+
+ _sanitizeSelector: function( hash ) {
+ // we need this because an id may contain a ":"
+ return hash.replace( /:/g, "\\:" );
+ },
+
+ _cookie: function() {
+ var cookie = this.cookie ||
+ ( this.cookie = this.options.cookie.name || "ui-tabs-" + getNextListId() );
+ return $.cookie.apply( null, [ cookie ].concat( $.makeArray( arguments ) ) );
+ },
+
+ _ui: function( tab, panel ) {
+ return {
+ tab: tab,
+ panel: panel,
+ index: this.anchors.index( tab )
+ };
+ },
+
+ _cleanup: function() {
+ // restore all former loading tabs labels
+ this.lis.filter( ".ui-state-processing" )
+ .removeClass( "ui-state-processing" )
+ .find( "span:data(label.tabs)" )
+ .each(function() {
+ var el = $( this );
+ el.html( el.data( "label.tabs" ) ).removeData( "label.tabs" );
+ });
+ },
+
+ _tabify: function( init ) {
+ var self = this,
+ o = this.options,
+ fragmentId = /^#.+/; // Safari 2 reports '#' for an empty hash
+
+ this.list = this.element.find( "ol,ul" ).eq( 0 );
+ this.lis = $( " > li:has(a[href])", this.list );
+ this.anchors = this.lis.map(function() {
+ return $( "a", this )[ 0 ];
+ });
+ this.panels = $( [] );
+
+ this.anchors.each(function( i, a ) {
+ var href = $( a ).attr( "href" );
+ // For dynamically created HTML that contains a hash as href IE < 8 expands
+ // such href to the full page url with hash and then misinterprets tab as ajax.
+ // Same consideration applies for an added tab with a fragment identifier
+ // since a[href=#fragment-identifier] does unexpectedly not match.
+ // Thus normalize href attribute...
+ var hrefBase = href.split( "#" )[ 0 ],
+ baseEl;
+ if ( hrefBase && ( hrefBase === location.toString().split( "#" )[ 0 ] ||
+ ( baseEl = $( "base" )[ 0 ]) && hrefBase === baseEl.href ) ) {
+ href = a.hash;
+ a.href = href;
+ }
+
+ // inline tab
+ if ( fragmentId.test( href ) ) {
+ self.panels = self.panels.add( self.element.find( self._sanitizeSelector( href ) ) );
+ // remote tab
+ // prevent loading the page itself if href is just "#"
+ } else if ( href && href !== "#" ) {
+ // required for restore on destroy
+ $.data( a, "href.tabs", href );
+
+ // TODO until #3808 is fixed strip fragment identifier from url
+ // (IE fails to load from such url)
+ $.data( a, "load.tabs", href.replace( /#.*$/, "" ) );
+
+ var id = self._tabId( a );
+ a.href = "#" + id;
+ var $panel = self.element.find( "#" + id );
+ if ( !$panel.length ) {
+ $panel = $( o.panelTemplate )
+ .attr( "id", id )
+ .addClass( "ui-tabs-panel ui-widget-content ui-corner-bottom" )
+ .insertAfter( self.panels[ i - 1 ] || self.list );
+ $panel.data( "destroy.tabs", true );
+ }
+ self.panels = self.panels.add( $panel );
+ // invalid tab href
+ } else {
+ o.disabled.push( i );
+ }
+ });
+
+ // initialization from scratch
+ if ( init ) {
+ // attach necessary classes for styling
+ this.element.addClass( "ui-tabs ui-widget ui-widget-content ui-corner-all" );
+ this.list.addClass( "ui-tabs-nav ui-helper-reset ui-helper-clearfix ui-widget-header ui-corner-all" );
+ this.lis.addClass( "ui-state-default ui-corner-top" );
+ this.panels.addClass( "ui-tabs-panel ui-widget-content ui-corner-bottom" );
+
+ // Selected tab
+ // use "selected" option or try to retrieve:
+ // 1. from fragment identifier in url
+ // 2. from cookie
+ // 3. from selected class attribute on <li>
+ if ( o.selected === undefined ) {
+ if ( location.hash ) {
+ this.anchors.each(function( i, a ) {
+ if ( a.hash == location.hash ) {
+ o.selected = i;
+ return false;
+ }
+ });
+ }
+ if ( typeof o.selected !== "number" && o.cookie ) {
+ o.selected = parseInt( self._cookie(), 10 );
+ }
+ if ( typeof o.selected !== "number" && this.lis.filter( ".ui-tabs-selected" ).length ) {
+ o.selected = this.lis.index( this.lis.filter( ".ui-tabs-selected" ) );
+ }
+ o.selected = o.selected || ( this.lis.length ? 0 : -1 );
+ } else if ( o.selected === null ) { // usage of null is deprecated, TODO remove in next release
+ o.selected = -1;
+ }
+
+ // sanity check - default to first tab...
+ o.selected = ( ( o.selected >= 0 && this.anchors[ o.selected ] ) || o.selected < 0 )
+ ? o.selected
+ : 0;
+
+ // Take disabling tabs via class attribute from HTML
+ // into account and update option properly.
+ // A selected tab cannot become disabled.
+ o.disabled = $.unique( o.disabled.concat(
+ $.map( this.lis.filter( ".ui-state-disabled" ), function( n, i ) {
+ return self.lis.index( n );
+ })
+ ) ).sort();
+
+ if ( $.inArray( o.selected, o.disabled ) != -1 ) {
+ o.disabled.splice( $.inArray( o.selected, o.disabled ), 1 );
+ }
+
+ // highlight selected tab
+ this.panels.addClass( "ui-tabs-hide" );
+ this.lis.removeClass( "ui-tabs-selected ui-state-active" );
+ // check for length avoids error when initializing empty list
+ if ( o.selected >= 0 && this.anchors.length ) {
+ self.element.find( self._sanitizeSelector( self.anchors[ o.selected ].hash ) ).removeClass( "ui-tabs-hide" );
+ this.lis.eq( o.selected ).addClass( "ui-tabs-selected ui-state-active" );
+
+ // seems to be expected behavior that the show callback is fired
+ self.element.queue( "tabs", function() {
+ self._trigger( "show", null,
+ self._ui( self.anchors[ o.selected ], self.element.find( self._sanitizeSelector( self.anchors[ o.selected ].hash ) )[ 0 ] ) );
+ });
+
+ this.load( o.selected );
+ }
+
+ // clean up to avoid memory leaks in certain versions of IE 6
+ // TODO: namespace this event
+ $( window ).bind( "unload", function() {
+ self.lis.add( self.anchors ).unbind( ".tabs" );
+ self.lis = self.anchors = self.panels = null;
+ });
+ // update selected after add/remove
+ } else {
+ o.selected = this.lis.index( this.lis.filter( ".ui-tabs-selected" ) );
+ }
+
+ // update collapsible
+ // TODO: use .toggleClass()
+ this.element[ o.collapsible ? "addClass" : "removeClass" ]( "ui-tabs-collapsible" );
+
+ // set or update cookie after init and add/remove respectively
+ if ( o.cookie ) {
+ this._cookie( o.selected, o.cookie );
+ }
+
+ // disable tabs
+ for ( var i = 0, li; ( li = this.lis[ i ] ); i++ ) {
+ $( li )[ $.inArray( i, o.disabled ) != -1 &&
+ // TODO: use .toggleClass()
+ !$( li ).hasClass( "ui-tabs-selected" ) ? "addClass" : "removeClass" ]( "ui-state-disabled" );
+ }
+
+ // reset cache if switching from cached to not cached
+ if ( o.cache === false ) {
+ this.anchors.removeData( "cache.tabs" );
+ }
+
+ // remove all handlers before, tabify may run on existing tabs after add or option change
+ this.lis.add( this.anchors ).unbind( ".tabs" );
+
+ if ( o.event !== "mouseover" ) {
+ var addState = function( state, el ) {
+ if ( el.is( ":not(.ui-state-disabled)" ) ) {
+ el.addClass( "ui-state-" + state );
+ }
+ };
+ var removeState = function( state, el ) {
+ el.removeClass( "ui-state-" + state );
+ };
+ this.lis.bind( "mouseover.tabs" , function() {
+ addState( "hover", $( this ) );
+ });
+ this.lis.bind( "mouseout.tabs", function() {
+ removeState( "hover", $( this ) );
+ });
+ this.anchors.bind( "focus.tabs", function() {
+ addState( "focus", $( this ).closest( "li" ) );
+ });
+ this.anchors.bind( "blur.tabs", function() {
+ removeState( "focus", $( this ).closest( "li" ) );
+ });
+ }
+
+ // set up animations
+ var hideFx, showFx;
+ if ( o.fx ) {
+ if ( $.isArray( o.fx ) ) {
+ hideFx = o.fx[ 0 ];
+ showFx = o.fx[ 1 ];
+ } else {
+ hideFx = showFx = o.fx;
+ }
+ }
+
+ // Reset certain styles left over from animation
+ // and prevent IE's ClearType bug...
+ function resetStyle( $el, fx ) {
+ $el.css( "display", "" );
+ if ( !$.support.opacity && fx.opacity ) {
+ $el[ 0 ].style.removeAttribute( "filter" );
+ }
+ }
+
+ // Show a tab...
+ var showTab = showFx
+ ? function( clicked, $show ) {
+ $( clicked ).closest( "li" ).addClass( "ui-tabs-selected ui-state-active" );
+ $show.hide().removeClass( "ui-tabs-hide" ) // avoid flicker that way
+ .animate( showFx, showFx.duration || "normal", function() {
+ resetStyle( $show, showFx );
+ self._trigger( "show", null, self._ui( clicked, $show[ 0 ] ) );
+ });
+ }
+ : function( clicked, $show ) {
+ $( clicked ).closest( "li" ).addClass( "ui-tabs-selected ui-state-active" );
+ $show.removeClass( "ui-tabs-hide" );
+ self._trigger( "show", null, self._ui( clicked, $show[ 0 ] ) );
+ };
+
+ // Hide a tab, $show is optional...
+ var hideTab = hideFx
+ ? function( clicked, $hide ) {
+ $hide.animate( hideFx, hideFx.duration || "normal", function() {
+ self.lis.removeClass( "ui-tabs-selected ui-state-active" );
+ $hide.addClass( "ui-tabs-hide" );
+ resetStyle( $hide, hideFx );
+ self.element.dequeue( "tabs" );
+ });
+ }
+ : function( clicked, $hide, $show ) {
+ self.lis.removeClass( "ui-tabs-selected ui-state-active" );
+ $hide.addClass( "ui-tabs-hide" );
+ self.element.dequeue( "tabs" );
+ };
+
+ // attach tab event handler, unbind to avoid duplicates from former tabifying...
+ this.anchors.bind( o.event + ".tabs", function() {
+ var el = this,
+ $li = $(el).closest( "li" ),
+ $hide = self.panels.filter( ":not(.ui-tabs-hide)" ),
+ $show = self.element.find( self._sanitizeSelector( el.hash ) );
+
+ // If tab is already selected and not collapsible or tab disabled or
+ // or is already loading or click callback returns false stop here.
+ // Check if click handler returns false last so that it is not executed
+ // for a disabled or loading tab!
+ if ( ( $li.hasClass( "ui-tabs-selected" ) && !o.collapsible) ||
+ $li.hasClass( "ui-state-disabled" ) ||
+ $li.hasClass( "ui-state-processing" ) ||
+ self.panels.filter( ":animated" ).length ||
+ self._trigger( "select", null, self._ui( this, $show[ 0 ] ) ) === false ) {
+ this.blur();
+ return false;
+ }
+
+ o.selected = self.anchors.index( this );
+
+ self.abort();
+
+ // if tab may be closed
+ if ( o.collapsible ) {
+ if ( $li.hasClass( "ui-tabs-selected" ) ) {
+ o.selected = -1;
+
+ if ( o.cookie ) {
+ self._cookie( o.selected, o.cookie );
+ }
+
+ self.element.queue( "tabs", function() {
+ hideTab( el, $hide );
+ }).dequeue( "tabs" );
+
+ this.blur();
+ return false;
+ } else if ( !$hide.length ) {
+ if ( o.cookie ) {
+ self._cookie( o.selected, o.cookie );
+ }
+
+ self.element.queue( "tabs", function() {
+ showTab( el, $show );
+ });
+
+ // TODO make passing in node possible, see also http://dev.jqueryui.com/ticket/3171
+ self.load( self.anchors.index( this ) );
+
+ this.blur();
+ return false;
+ }
+ }
+
+ if ( o.cookie ) {
+ self._cookie( o.selected, o.cookie );
+ }
+
+ // show new tab
+ if ( $show.length ) {
+ if ( $hide.length ) {
+ self.element.queue( "tabs", function() {
+ hideTab( el, $hide );
+ });
+ }
+ self.element.queue( "tabs", function() {
+ showTab( el, $show );
+ });
+
+ self.load( self.anchors.index( this ) );
+ } else {
+ throw "jQuery UI Tabs: Mismatching fragment identifier.";
+ }
+
+ // Prevent IE from keeping other link focussed when using the back button
+ // and remove dotted border from clicked link. This is controlled via CSS
+ // in modern browsers; blur() removes focus from address bar in Firefox
+ // which can become a usability and annoying problem with tabs('rotate').
+ if ( $.browser.msie ) {
+ this.blur();
+ }
+ });
+
+ // disable click in any case
+ this.anchors.bind( "click.tabs", function(){
+ return false;
+ });
+ },
+
+ _getIndex: function( index ) {
+ // meta-function to give users option to provide a href string instead of a numerical index.
+ // also sanitizes numerical indexes to valid values.
+ if ( typeof index == "string" ) {
+ index = this.anchors.index( this.anchors.filter( "[href$=" + index + "]" ) );
+ }
+
+ return index;
+ },
+
+ destroy: function() {
+ var o = this.options;
+
+ this.abort();
+
+ this.element
+ .unbind( ".tabs" )
+ .removeClass( "ui-tabs ui-widget ui-widget-content ui-corner-all ui-tabs-collapsible" )
+ .removeData( "tabs" );
+
+ this.list.removeClass( "ui-tabs-nav ui-helper-reset ui-helper-clearfix ui-widget-header ui-corner-all" );
+
+ this.anchors.each(function() {
+ var href = $.data( this, "href.tabs" );
+ if ( href ) {
+ this.href = href;
+ }
+ var $this = $( this ).unbind( ".tabs" );
+ $.each( [ "href", "load", "cache" ], function( i, prefix ) {
+ $this.removeData( prefix + ".tabs" );
+ });
+ });
+
+ this.lis.unbind( ".tabs" ).add( this.panels ).each(function() {
+ if ( $.data( this, "destroy.tabs" ) ) {
+ $( this ).remove();
+ } else {
+ $( this ).removeClass([
+ "ui-state-default",
+ "ui-corner-top",
+ "ui-tabs-selected",
+ "ui-state-active",
+ "ui-state-hover",
+ "ui-state-focus",
+ "ui-state-disabled",
+ "ui-tabs-panel",
+ "ui-widget-content",
+ "ui-corner-bottom",
+ "ui-tabs-hide"
+ ].join( " " ) );
+ }
+ });
+
+ if ( o.cookie ) {
+ this._cookie( null, o.cookie );
+ }
+
+ return this;
+ },
+
+ add: function( url, label, index ) {
+ if ( index === undefined ) {
+ index = this.anchors.length;
+ }
+
+ var self = this,
+ o = this.options,
+ $li = $( o.tabTemplate.replace( /#\{href\}/g, url ).replace( /#\{label\}/g, label ) ),
+ id = !url.indexOf( "#" ) ? url.replace( "#", "" ) : this._tabId( $( "a", $li )[ 0 ] );
+
+ $li.addClass( "ui-state-default ui-corner-top" ).data( "destroy.tabs", true );
+
+ // try to find an existing element before creating a new one
+ var $panel = self.element.find( "#" + id );
+ if ( !$panel.length ) {
+ $panel = $( o.panelTemplate )
+ .attr( "id", id )
+ .data( "destroy.tabs", true );
+ }
+ $panel.addClass( "ui-tabs-panel ui-widget-content ui-corner-bottom ui-tabs-hide" );
+
+ if ( index >= this.lis.length ) {
+ $li.appendTo( this.list );
+ $panel.appendTo( this.list[ 0 ].parentNode );
+ } else {
+ $li.insertBefore( this.lis[ index ] );
+ $panel.insertBefore( this.panels[ index ] );
+ }
+
+ o.disabled = $.map( o.disabled, function( n, i ) {
+ return n >= index ? ++n : n;
+ });
+
+ this._tabify();
+
+ if ( this.anchors.length == 1 ) {
+ o.selected = 0;
+ $li.addClass( "ui-tabs-selected ui-state-active" );
+ $panel.removeClass( "ui-tabs-hide" );
+ this.element.queue( "tabs", function() {
+ self._trigger( "show", null, self._ui( self.anchors[ 0 ], self.panels[ 0 ] ) );
+ });
+
+ this.load( 0 );
+ }
+
+ this._trigger( "add", null, this._ui( this.anchors[ index ], this.panels[ index ] ) );
+ return this;
+ },
+
+ remove: function( index ) {
+ index = this._getIndex( index );
+ var o = this.options,
+ $li = this.lis.eq( index ).remove(),
+ $panel = this.panels.eq( index ).remove();
+
+ // If selected tab was removed focus tab to the right or
+ // in case the last tab was removed the tab to the left.
+ if ( $li.hasClass( "ui-tabs-selected" ) && this.anchors.length > 1) {
+ this.select( index + ( index + 1 < this.anchors.length ? 1 : -1 ) );
+ }
+
+ o.disabled = $.map(
+ $.grep( o.disabled, function(n, i) {
+ return n != index;
+ }),
+ function( n, i ) {
+ return n >= index ? --n : n;
+ });
+
+ this._tabify();
+
+ this._trigger( "remove", null, this._ui( $li.find( "a" )[ 0 ], $panel[ 0 ] ) );
+ return this;
+ },
+
+ enable: function( index ) {
+ index = this._getIndex( index );
+ var o = this.options;
+ if ( $.inArray( index, o.disabled ) == -1 ) {
+ return;
+ }
+
+ this.lis.eq( index ).removeClass( "ui-state-disabled" );
+ o.disabled = $.grep( o.disabled, function( n, i ) {
+ return n != index;
+ });
+
+ this._trigger( "enable", null, this._ui( this.anchors[ index ], this.panels[ index ] ) );
+ return this;
+ },
+
+ disable: function( index ) {
+ index = this._getIndex( index );
+ var self = this, o = this.options;
+ // cannot disable already selected tab
+ if ( index != o.selected ) {
+ this.lis.eq( index ).addClass( "ui-state-disabled" );
+
+ o.disabled.push( index );
+ o.disabled.sort();
+
+ this._trigger( "disable", null, this._ui( this.anchors[ index ], this.panels[ index ] ) );
+ }
+
+ return this;
+ },
+
+ select: function( index ) {
+ index = this._getIndex( index );
+ if ( index == -1 ) {
+ if ( this.options.collapsible && this.options.selected != -1 ) {
+ index = this.options.selected;
+ } else {
+ return this;
+ }
+ }
+ this.anchors.eq( index ).trigger( this.options.event + ".tabs" );
+ return this;
+ },
+
+ load: function( index ) {
+ index = this._getIndex( index );
+ var self = this,
+ o = this.options,
+ a = this.anchors.eq( index )[ 0 ],
+ url = $.data( a, "load.tabs" );
+
+ this.abort();
+
+ // not remote or from cache
+ if ( !url || this.element.queue( "tabs" ).length !== 0 && $.data( a, "cache.tabs" ) ) {
+ this.element.dequeue( "tabs" );
+ return;
+ }
+
+ // load remote from here on
+ this.lis.eq( index ).addClass( "ui-state-processing" );
+
+ if ( o.spinner ) {
+ var span = $( "span", a );
+ span.data( "label.tabs", span.html() ).html( o.spinner );
+ }
+
+ this.xhr = $.ajax( $.extend( {}, o.ajaxOptions, {
+ url: url,
+ success: function( r, s ) {
+ self.element.find( self._sanitizeSelector( a.hash ) ).html( r );
+
+ // take care of tab labels
+ self._cleanup();
+
+ if ( o.cache ) {
+ $.data( a, "cache.tabs", true );
+ }
+
+ self._trigger( "load", null, self._ui( self.anchors[ index ], self.panels[ index ] ) );
+ try {
+ o.ajaxOptions.success( r, s );
+ }
+ catch ( e ) {}
+ },
+ error: function( xhr, s, e ) {
+ // take care of tab labels
+ self._cleanup();
+
+ self._trigger( "load", null, self._ui( self.anchors[ index ], self.panels[ index ] ) );
+ try {
+ // Passing index avoid a race condition when this method is
+ // called after the user has selected another tab.
+ // Pass the anchor that initiated this request allows
+ // loadError to manipulate the tab content panel via $(a.hash)
+ o.ajaxOptions.error( xhr, s, index, a );
+ }
+ catch ( e ) {}
+ }
+ } ) );
+
+ // last, so that load event is fired before show...
+ self.element.dequeue( "tabs" );
+
+ return this;
+ },
+
+ abort: function() {
+ // stop possibly running animations
+ this.element.queue( [] );
+ this.panels.stop( false, true );
+
+ // "tabs" queue must not contain more than two elements,
+ // which are the callbacks for the latest clicked tab...
+ this.element.queue( "tabs", this.element.queue( "tabs" ).splice( -2, 2 ) );
+
+ // terminate pending requests from other tabs
+ if ( this.xhr ) {
+ this.xhr.abort();
+ delete this.xhr;
+ }
+
+ // take care of tab labels
+ this._cleanup();
+ return this;
+ },
+
+ url: function( index, url ) {
+ this.anchors.eq( index ).removeData( "cache.tabs" ).data( "load.tabs", url );
+ return this;
+ },
+
+ length: function() {
+ return this.anchors.length;
+ }
+});
+
+$.extend( $.ui.tabs, {
+ version: "1.8.16"
+});
+
+/*
+ * Tabs Extensions
+ */
+
+/*
+ * Rotate
+ */
+$.extend( $.ui.tabs.prototype, {
+ rotation: null,
+ rotate: function( ms, continuing ) {
+ var self = this,
+ o = this.options;
+
+ var rotate = self._rotate || ( self._rotate = function( e ) {
+ clearTimeout( self.rotation );
+ self.rotation = setTimeout(function() {
+ var t = o.selected;
+ self.select( ++t < self.anchors.length ? t : 0 );
+ }, ms );
+
+ if ( e ) {
+ e.stopPropagation();
+ }
+ });
+
+ var stop = self._unrotate || ( self._unrotate = !continuing
+ ? function(e) {
+ if (e.clientX) { // in case of a true click
+ self.rotate(null);
+ }
+ }
+ : function( e ) {
+ t = o.selected;
+ rotate();
+ });
+
+ // start rotation
+ if ( ms ) {
+ this.element.bind( "tabsshow", rotate );
+ this.anchors.bind( o.event + ".tabs", stop );
+ rotate();
+ // stop rotation
+ } else {
+ clearTimeout( self.rotation );
+ this.element.unbind( "tabsshow", rotate );
+ this.anchors.unbind( o.event + ".tabs", stop );
+ delete this._rotate;
+ delete this._unrotate;
+ }
+
+ return this;
+ }
+});
+
+})( jQuery );
+/*
+ * jQuery UI Datepicker 1.8.16
+ *
+ * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about)
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://jquery.org/license
+ *
+ * http://docs.jquery.com/UI/Datepicker
+ *
+ * Depends:
+ * jquery.ui.core.js
+ */
+(function( $, undefined ) {
+
+$.extend($.ui, { datepicker: { version: "1.8.16" } });
+
+var PROP_NAME = 'datepicker';
+var dpuuid = new Date().getTime();
+var instActive;
+
+/* Date picker manager.
+ Use the singleton instance of this class, $.datepicker, to interact with the date picker.
+ Settings for (groups of) date pickers are maintained in an instance object,
+ allowing multiple different settings on the same page. */
+
+function Datepicker() {
+ this.debug = false; // Change this to true to start debugging
+ this._curInst = null; // The current instance in use
+ this._keyEvent = false; // If the last event was a key event
+ this._disabledInputs = []; // List of date picker inputs that have been disabled
+ this._datepickerShowing = false; // True if the popup picker is showing , false if not
+ this._inDialog = false; // True if showing within a "dialog", false if not
+ this._mainDivId = 'ui-datepicker-div'; // The ID of the main datepicker division
+ this._inlineClass = 'ui-datepicker-inline'; // The name of the inline marker class
+ this._appendClass = 'ui-datepicker-append'; // The name of the append marker class
+ this._triggerClass = 'ui-datepicker-trigger'; // The name of the trigger marker class
+ this._dialogClass = 'ui-datepicker-dialog'; // The name of the dialog marker class
+ this._disableClass = 'ui-datepicker-disabled'; // The name of the disabled covering marker class
+ this._unselectableClass = 'ui-datepicker-unselectable'; // The name of the unselectable cell marker class
+ this._currentClass = 'ui-datepicker-current-day'; // The name of the current day marker class
+ this._dayOverClass = 'ui-datepicker-days-cell-over'; // The name of the day hover marker class
+ this.regional = []; // Available regional settings, indexed by language code
+ this.regional[''] = { // Default regional settings
+ closeText: 'Done', // Display text for close link
+ prevText: 'Prev', // Display text for previous month link
+ nextText: 'Next', // Display text for next month link
+ currentText: 'Today', // Display text for current month link
+ monthNames: ['January','February','March','April','May','June',
+ 'July','August','September','October','November','December'], // Names of months for drop-down and formatting
+ monthNamesShort: ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'], // For formatting
+ dayNames: ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'], // For formatting
+ dayNamesShort: ['Sun', 'Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat'], // For formatting
+ dayNamesMin: ['Su','Mo','Tu','We','Th','Fr','Sa'], // Column headings for days starting at Sunday
+ weekHeader: 'Wk', // Column header for week of the year
+ dateFormat: 'mm/dd/yy', // See format options on parseDate
+ firstDay: 0, // The first day of the week, Sun = 0, Mon = 1, ...
+ isRTL: false, // True if right-to-left language, false if left-to-right
+ showMonthAfterYear: false, // True if the year select precedes month, false for month then year
+ yearSuffix: '' // Additional text to append to the year in the month headers
+ };
+ this._defaults = { // Global defaults for all the date picker instances
+ showOn: 'focus', // 'focus' for popup on focus,
+ // 'button' for trigger button, or 'both' for either
+ showAnim: 'fadeIn', // Name of jQuery animation for popup
+ showOptions: {}, // Options for enhanced animations
+ defaultDate: null, // Used when field is blank: actual date,
+ // +/-number for offset from today, null for today
+ appendText: '', // Display text following the input box, e.g. showing the format
+ buttonText: '...', // Text for trigger button
+ buttonImage: '', // URL for trigger button image
+ buttonImageOnly: false, // True if the image appears alone, false if it appears on a button
+ hideIfNoPrevNext: false, // True to hide next/previous month links
+ // if not applicable, false to just disable them
+ navigationAsDateFormat: false, // True if date formatting applied to prev/today/next links
+ gotoCurrent: false, // True if today link goes back to current selection instead
+ changeMonth: false, // True if month can be selected directly, false if only prev/next
+ changeYear: false, // True if year can be selected directly, false if only prev/next
+ yearRange: 'c-10:c+10', // Range of years to display in drop-down,
+ // either relative to today's year (-nn:+nn), relative to currently displayed year
+ // (c-nn:c+nn), absolute (nnnn:nnnn), or a combination of the above (nnnn:-n)
+ showOtherMonths: false, // True to show dates in other months, false to leave blank
+ selectOtherMonths: false, // True to allow selection of dates in other months, false for unselectable
+ showWeek: false, // True to show week of the year, false to not show it
+ calculateWeek: this.iso8601Week, // How to calculate the week of the year,
+ // takes a Date and returns the number of the week for it
+ shortYearCutoff: '+10', // Short year values < this are in the current century,
+ // > this are in the previous century,
+ // string value starting with '+' for current year + value
+ minDate: null, // The earliest selectable date, or null for no limit
+ maxDate: null, // The latest selectable date, or null for no limit
+ duration: 'fast', // Duration of display/closure
+ beforeShowDay: null, // Function that takes a date and returns an array with
+ // [0] = true if selectable, false if not, [1] = custom CSS class name(s) or '',
+ // [2] = cell title (optional), e.g. $.datepicker.noWeekends
+ beforeShow: null, // Function that takes an input field and
+ // returns a set of custom settings for the date picker
+ onSelect: null, // Define a callback function when a date is selected
+ onChangeMonthYear: null, // Define a callback function when the month or year is changed
+ onClose: null, // Define a callback function when the datepicker is closed
+ numberOfMonths: 1, // Number of months to show at a time
+ showCurrentAtPos: 0, // The position in multipe months at which to show the current month (starting at 0)
+ stepMonths: 1, // Number of months to step back/forward
+ stepBigMonths: 12, // Number of months to step back/forward for the big links
+ altField: '', // Selector for an alternate field to store selected dates into
+ altFormat: '', // The date format to use for the alternate field
+ constrainInput: true, // The input is constrained by the current date format
+ showButtonPanel: false, // True to show button panel, false to not show it
+ autoSize: false, // True to size the input for the date format, false to leave as is
+ disabled: false // The initial disabled state
+ };
+ $.extend(this._defaults, this.regional['']);
+ this.dpDiv = bindHover($('<div id="' + this._mainDivId + '" class="ui-datepicker ui-widget ui-widget-content ui-helper-clearfix ui-corner-all"></div>'));
+}
+
+$.extend(Datepicker.prototype, {
+ /* Class name added to elements to indicate already configured with a date picker. */
+ markerClassName: 'hasDatepicker',
+
+ //Keep track of the maximum number of rows displayed (see #7043)
+ maxRows: 4,
+
+ /* Debug logging (if enabled). */
+ log: function () {
+ if (this.debug)
+ console.log.apply('', arguments);
+ },
+
+ // TODO rename to "widget" when switching to widget factory
+ _widgetDatepicker: function() {
+ return this.dpDiv;
+ },
+
+ /* Override the default settings for all instances of the date picker.
+ @param settings object - the new settings to use as defaults (anonymous object)
+ @return the manager object */
+ setDefaults: function(settings) {
+ extendRemove(this._defaults, settings || {});
+ return this;
+ },
+
+ /* Attach the date picker to a jQuery selection.
+ @param target element - the target input field or division or span
+ @param settings object - the new settings to use for this date picker instance (anonymous) */
+ _attachDatepicker: function(target, settings) {
+ // check for settings on the control itself - in namespace 'date:'
+ var inlineSettings = null;
+ for (var attrName in this._defaults) {
+ var attrValue = target.getAttribute('date:' + attrName);
+ if (attrValue) {
+ inlineSettings = inlineSettings || {};
+ try {
+ inlineSettings[attrName] = eval(attrValue);
+ } catch (err) {
+ inlineSettings[attrName] = attrValue;
+ }
+ }
+ }
+ var nodeName = target.nodeName.toLowerCase();
+ var inline = (nodeName == 'div' || nodeName == 'span');
+ if (!target.id) {
+ this.uuid += 1;
+ target.id = 'dp' + this.uuid;
+ }
+ var inst = this._newInst($(target), inline);
+ inst.settings = $.extend({}, settings || {}, inlineSettings || {});
+ if (nodeName == 'input') {
+ this._connectDatepicker(target, inst);
+ } else if (inline) {
+ this._inlineDatepicker(target, inst);
+ }
+ },
+
+ /* Create a new instance object. */
+ _newInst: function(target, inline) {
+ var id = target[0].id.replace(/([^A-Za-z0-9_-])/g, '\\\\$1'); // escape jQuery meta chars
+ return {id: id, input: target, // associated target
+ selectedDay: 0, selectedMonth: 0, selectedYear: 0, // current selection
+ drawMonth: 0, drawYear: 0, // month being drawn
+ inline: inline, // is datepicker inline or not
+ dpDiv: (!inline ? this.dpDiv : // presentation div
+ bindHover($('<div class="' + this._inlineClass + ' ui-datepicker ui-widget ui-widget-content ui-helper-clearfix ui-corner-all"></div>')))};
+ },
+
+ /* Attach the date picker to an input field. */
+ _connectDatepicker: function(target, inst) {
+ var input = $(target);
+ inst.append = $([]);
+ inst.trigger = $([]);
+ if (input.hasClass(this.markerClassName))
+ return;
+ this._attachments(input, inst);
+ input.addClass(this.markerClassName).keydown(this._doKeyDown).
+ keypress(this._doKeyPress).keyup(this._doKeyUp).
+ bind("setData.datepicker", function(event, key, value) {
+ inst.settings[key] = value;
+ }).bind("getData.datepicker", function(event, key) {
+ return this._get(inst, key);
+ });
+ this._autoSize(inst);
+ $.data(target, PROP_NAME, inst);
+ //If disabled option is true, disable the datepicker once it has been attached to the input (see ticket #5665)
+ if( inst.settings.disabled ) {
+ this._disableDatepicker( target );
+ }
+ },
+
+ /* Make attachments based on settings. */
+ _attachments: function(input, inst) {
+ var appendText = this._get(inst, 'appendText');
+ var isRTL = this._get(inst, 'isRTL');
+ if (inst.append)
+ inst.append.remove();
+ if (appendText) {
+ inst.append = $('<span class="' + this._appendClass + '">' + appendText + '</span>');
+ input[isRTL ? 'before' : 'after'](inst.append);
+ }
+ input.unbind('focus', this._showDatepicker);
+ if (inst.trigger)
+ inst.trigger.remove();
+ var showOn = this._get(inst, 'showOn');
+ if (showOn == 'focus' || showOn == 'both') // pop-up date picker when in the marked field
+ input.focus(this._showDatepicker);
+ if (showOn == 'button' || showOn == 'both') { // pop-up date picker when button clicked
+ var buttonText = this._get(inst, 'buttonText');
+ var buttonImage = this._get(inst, 'buttonImage');
+ inst.trigger = $(this._get(inst, 'buttonImageOnly') ?
+ $('<img/>').addClass(this._triggerClass).
+ attr({ src: buttonImage, alt: buttonText, title: buttonText }) :
+ $('<button type="button"></button>').addClass(this._triggerClass).
+ html(buttonImage == '' ? buttonText : $('<img/>').attr(
+ { src:buttonImage, alt:buttonText, title:buttonText })));
+ input[isRTL ? 'before' : 'after'](inst.trigger);
+ inst.trigger.click(function() {
+ if ($.datepicker._datepickerShowing && $.datepicker._lastInput == input[0])
+ $.datepicker._hideDatepicker();
+ else
+ $.datepicker._showDatepicker(input[0]);
+ return false;
+ });
+ }
+ },
+
+ /* Apply the maximum length for the date format. */
+ _autoSize: function(inst) {
+ if (this._get(inst, 'autoSize') && !inst.inline) {
+ var date = new Date(2009, 12 - 1, 20); // Ensure double digits
+ var dateFormat = this._get(inst, 'dateFormat');
+ if (dateFormat.match(/[DM]/)) {
+ var findMax = function(names) {
+ var max = 0;
+ var maxI = 0;
+ for (var i = 0; i < names.length; i++) {
+ if (names[i].length > max) {
+ max = names[i].length;
+ maxI = i;
+ }
+ }
+ return maxI;
+ };
+ date.setMonth(findMax(this._get(inst, (dateFormat.match(/MM/) ?
+ 'monthNames' : 'monthNamesShort'))));
+ date.setDate(findMax(this._get(inst, (dateFormat.match(/DD/) ?
+ 'dayNames' : 'dayNamesShort'))) + 20 - date.getDay());
+ }
+ inst.input.attr('size', this._formatDate(inst, date).length);
+ }
+ },
+
+ /* Attach an inline date picker to a div. */
+ _inlineDatepicker: function(target, inst) {
+ var divSpan = $(target);
+ if (divSpan.hasClass(this.markerClassName))
+ return;
+ divSpan.addClass(this.markerClassName).append(inst.dpDiv).
+ bind("setData.datepicker", function(event, key, value){
+ inst.settings[key] = value;
+ }).bind("getData.datepicker", function(event, key){
+ return this._get(inst, key);
+ });
+ $.data(target, PROP_NAME, inst);
+ this._setDate(inst, this._getDefaultDate(inst), true);
+ this._updateDatepicker(inst);
+ this._updateAlternate(inst);
+ //If disabled option is true, disable the datepicker before showing it (see ticket #5665)
+ if( inst.settings.disabled ) {
+ this._disableDatepicker( target );
+ }
+ // Set display:block in place of inst.dpDiv.show() which won't work on disconnected elements
+ // http://bugs.jqueryui.com/ticket/7552 - A Datepicker created on a detached div has zero height
+ inst.dpDiv.css( "display", "block" );
+ },
+
+ /* Pop-up the date picker in a "dialog" box.
+ @param input element - ignored
+ @param date string or Date - the initial date to display
+ @param onSelect function - the function to call when a date is selected
+ @param settings object - update the dialog date picker instance's settings (anonymous object)
+ @param pos int[2] - coordinates for the dialog's position within the screen or
+ event - with x/y coordinates or
+ leave empty for default (screen centre)
+ @return the manager object */
+ _dialogDatepicker: function(input, date, onSelect, settings, pos) {
+ var inst = this._dialogInst; // internal instance
+ if (!inst) {
+ this.uuid += 1;
+ var id = 'dp' + this.uuid;
+ this._dialogInput = $('<input type="text" id="' + id +
+ '" style="position: absolute; top: -100px; width: 0px; z-index: -10;"/>');
+ this._dialogInput.keydown(this._doKeyDown);
+ $('body').append(this._dialogInput);
+ inst = this._dialogInst = this._newInst(this._dialogInput, false);
+ inst.settings = {};
+ $.data(this._dialogInput[0], PROP_NAME, inst);
+ }
+ extendRemove(inst.settings, settings || {});
+ date = (date && date.constructor == Date ? this._formatDate(inst, date) : date);
+ this._dialogInput.val(date);
+
+ this._pos = (pos ? (pos.length ? pos : [pos.pageX, pos.pageY]) : null);
+ if (!this._pos) {
+ var browserWidth = document.documentElement.clientWidth;
+ var browserHeight = document.documentElement.clientHeight;
+ var scrollX = document.documentElement.scrollLeft || document.body.scrollLeft;
+ var scrollY = document.documentElement.scrollTop || document.body.scrollTop;
+ this._pos = // should use actual width/height below
+ [(browserWidth / 2) - 100 + scrollX, (browserHeight / 2) - 150 + scrollY];
+ }
+
+ // move input on screen for focus, but hidden behind dialog
+ this._dialogInput.css('left', (this._pos[0] + 20) + 'px').css('top', this._pos[1] + 'px');
+ inst.settings.onSelect = onSelect;
+ this._inDialog = true;
+ this.dpDiv.addClass(this._dialogClass);
+ this._showDatepicker(this._dialogInput[0]);
+ if ($.blockUI)
+ $.blockUI(this.dpDiv);
+ $.data(this._dialogInput[0], PROP_NAME, inst);
+ return this;
+ },
+
+ /* Detach a datepicker from its control.
+ @param target element - the target input field or division or span */
+ _destroyDatepicker: function(target) {
+ var $target = $(target);
+ var inst = $.data(target, PROP_NAME);
+ if (!$target.hasClass(this.markerClassName)) {
+ return;
+ }
+ var nodeName = target.nodeName.toLowerCase();
+ $.removeData(target, PROP_NAME);
+ if (nodeName == 'input') {
+ inst.append.remove();
+ inst.trigger.remove();
+ $target.removeClass(this.markerClassName).
+ unbind('focus', this._showDatepicker).
+ unbind('keydown', this._doKeyDown).
+ unbind('keypress', this._doKeyPress).
+ unbind('keyup', this._doKeyUp);
+ } else if (nodeName == 'div' || nodeName == 'span')
+ $target.removeClass(this.markerClassName).empty();
+ },
+
+ /* Enable the date picker to a jQuery selection.
+ @param target element - the target input field or division or span */
+ _enableDatepicker: function(target) {
+ var $target = $(target);
+ var inst = $.data(target, PROP_NAME);
+ if (!$target.hasClass(this.markerClassName)) {
+ return;
+ }
+ var nodeName = target.nodeName.toLowerCase();
+ if (nodeName == 'input') {
+ target.disabled = false;
+ inst.trigger.filter('button').
+ each(function() { this.disabled = false; }).end().
+ filter('img').css({opacity: '1.0', cursor: ''});
+ }
+ else if (nodeName == 'div' || nodeName == 'span') {
+ var inline = $target.children('.' + this._inlineClass);
+ inline.children().removeClass('ui-state-disabled');
+ inline.find("select.ui-datepicker-month, select.ui-datepicker-year").
+ removeAttr("disabled");
+ }
+ this._disabledInputs = $.map(this._disabledInputs,
+ function(value) { return (value == target ? null : value); }); // delete entry
+ },
+
+ /* Disable the date picker to a jQuery selection.
+ @param target element - the target input field or division or span */
+ _disableDatepicker: function(target) {
+ var $target = $(target);
+ var inst = $.data(target, PROP_NAME);
+ if (!$target.hasClass(this.markerClassName)) {
+ return;
+ }
+ var nodeName = target.nodeName.toLowerCase();
+ if (nodeName == 'input') {
+ target.disabled = true;
+ inst.trigger.filter('button').
+ each(function() { this.disabled = true; }).end().
+ filter('img').css({opacity: '0.5', cursor: 'default'});
+ }
+ else if (nodeName == 'div' || nodeName == 'span') {
+ var inline = $target.children('.' + this._inlineClass);
+ inline.children().addClass('ui-state-disabled');
+ inline.find("select.ui-datepicker-month, select.ui-datepicker-year").
+ attr("disabled", "disabled");
+ }
+ this._disabledInputs = $.map(this._disabledInputs,
+ function(value) { return (value == target ? null : value); }); // delete entry
+ this._disabledInputs[this._disabledInputs.length] = target;
+ },
+
+ /* Is the first field in a jQuery collection disabled as a datepicker?
+ @param target element - the target input field or division or span
+ @return boolean - true if disabled, false if enabled */
+ _isDisabledDatepicker: function(target) {
+ if (!target) {
+ return false;
+ }
+ for (var i = 0; i < this._disabledInputs.length; i++) {
+ if (this._disabledInputs[i] == target)
+ return true;
+ }
+ return false;
+ },
+
+ /* Retrieve the instance data for the target control.
+ @param target element - the target input field or division or span
+ @return object - the associated instance data
+ @throws error if a jQuery problem getting data */
+ _getInst: function(target) {
+ try {
+ return $.data(target, PROP_NAME);
+ }
+ catch (err) {
+ throw 'Missing instance data for this datepicker';
+ }
+ },
+
+ /* Update or retrieve the settings for a date picker attached to an input field or division.
+ @param target element - the target input field or division or span
+ @param name object - the new settings to update or
+ string - the name of the setting to change or retrieve,
+ when retrieving also 'all' for all instance settings or
+ 'defaults' for all global defaults
+ @param value any - the new value for the setting
+ (omit if above is an object or to retrieve a value) */
+ _optionDatepicker: function(target, name, value) {
+ var inst = this._getInst(target);
+ if (arguments.length == 2 && typeof name == 'string') {
+ return (name == 'defaults' ? $.extend({}, $.datepicker._defaults) :
+ (inst ? (name == 'all' ? $.extend({}, inst.settings) :
+ this._get(inst, name)) : null));
+ }
+ var settings = name || {};
+ if (typeof name == 'string') {
+ settings = {};
+ settings[name] = value;
+ }
+ if (inst) {
+ if (this._curInst == inst) {
+ this._hideDatepicker();
+ }
+ var date = this._getDateDatepicker(target, true);
+ var minDate = this._getMinMaxDate(inst, 'min');
+ var maxDate = this._getMinMaxDate(inst, 'max');
+ extendRemove(inst.settings, settings);
+ // reformat the old minDate/maxDate values if dateFormat changes and a new minDate/maxDate isn't provided
+ if (minDate !== null && settings['dateFormat'] !== undefined && settings['minDate'] === undefined)
+ inst.settings.minDate = this._formatDate(inst, minDate);
+ if (maxDate !== null && settings['dateFormat'] !== undefined && settings['maxDate'] === undefined)
+ inst.settings.maxDate = this._formatDate(inst, maxDate);
+ this._attachments($(target), inst);
+ this._autoSize(inst);
+ this._setDate(inst, date);
+ this._updateAlternate(inst);
+ this._updateDatepicker(inst);
+ }
+ },
+
+ // change method deprecated
+ _changeDatepicker: function(target, name, value) {
+ this._optionDatepicker(target, name, value);
+ },
+
+ /* Redraw the date picker attached to an input field or division.
+ @param target element - the target input field or division or span */
+ _refreshDatepicker: function(target) {
+ var inst = this._getInst(target);
+ if (inst) {
+ this._updateDatepicker(inst);
+ }
+ },
+
+ /* Set the dates for a jQuery selection.
+ @param target element - the target input field or division or span
+ @param date Date - the new date */
+ _setDateDatepicker: function(target, date) {
+ var inst = this._getInst(target);
+ if (inst) {
+ this._setDate(inst, date);
+ this._updateDatepicker(inst);
+ this._updateAlternate(inst);
+ }
+ },
+
+ /* Get the date(s) for the first entry in a jQuery selection.
+ @param target element - the target input field or division or span
+ @param noDefault boolean - true if no default date is to be used
+ @return Date - the current date */
+ _getDateDatepicker: function(target, noDefault) {
+ var inst = this._getInst(target);
+ if (inst && !inst.inline)
+ this._setDateFromField(inst, noDefault);
+ return (inst ? this._getDate(inst) : null);
+ },
+
+ /* Handle keystrokes. */
+ _doKeyDown: function(event) {
+ var inst = $.datepicker._getInst(event.target);
+ var handled = true;
+ var isRTL = inst.dpDiv.is('.ui-datepicker-rtl');
+ inst._keyEvent = true;
+ if ($.datepicker._datepickerShowing)
+ switch (event.keyCode) {
+ case 9: $.datepicker._hideDatepicker();
+ handled = false;
+ break; // hide on tab out
+ case 13: var sel = $('td.' + $.datepicker._dayOverClass + ':not(.' +
+ $.datepicker._currentClass + ')', inst.dpDiv);
+ if (sel[0])
+ $.datepicker._selectDay(event.target, inst.selectedMonth, inst.selectedYear, sel[0]);
+ var onSelect = $.datepicker._get(inst, 'onSelect');
+ if (onSelect) {
+ var dateStr = $.datepicker._formatDate(inst);
+
+ // trigger custom callback
+ onSelect.apply((inst.input ? inst.input[0] : null), [dateStr, inst]);
+ }
+ else
+ $.datepicker._hideDatepicker();
+ return false; // don't submit the form
+ break; // select the value on enter
+ case 27: $.datepicker._hideDatepicker();
+ break; // hide on escape
+ case 33: $.datepicker._adjustDate(event.target, (event.ctrlKey ?
+ -$.datepicker._get(inst, 'stepBigMonths') :
+ -$.datepicker._get(inst, 'stepMonths')), 'M');
+ break; // previous month/year on page up/+ ctrl
+ case 34: $.datepicker._adjustDate(event.target, (event.ctrlKey ?
+ +$.datepicker._get(inst, 'stepBigMonths') :
+ +$.datepicker._get(inst, 'stepMonths')), 'M');
+ break; // next month/year on page down/+ ctrl
+ case 35: if (event.ctrlKey || event.metaKey) $.datepicker._clearDate(event.target);
+ handled = event.ctrlKey || event.metaKey;
+ break; // clear on ctrl or command +end
+ case 36: if (event.ctrlKey || event.metaKey) $.datepicker._gotoToday(event.target);
+ handled = event.ctrlKey || event.metaKey;
+ break; // current on ctrl or command +home
+ case 37: if (event.ctrlKey || event.metaKey) $.datepicker._adjustDate(event.target, (isRTL ? +1 : -1), 'D');
+ handled = event.ctrlKey || event.metaKey;
+ // -1 day on ctrl or command +left
+ if (event.originalEvent.altKey) $.datepicker._adjustDate(event.target, (event.ctrlKey ?
+ -$.datepicker._get(inst, 'stepBigMonths') :
+ -$.datepicker._get(inst, 'stepMonths')), 'M');
+ // next month/year on alt +left on Mac
+ break;
+ case 38: if (event.ctrlKey || event.metaKey) $.datepicker._adjustDate(event.target, -7, 'D');
+ handled = event.ctrlKey || event.metaKey;
+ break; // -1 week on ctrl or command +up
+ case 39: if (event.ctrlKey || event.metaKey) $.datepicker._adjustDate(event.target, (isRTL ? -1 : +1), 'D');
+ handled = event.ctrlKey || event.metaKey;
+ // +1 day on ctrl or command +right
+ if (event.originalEvent.altKey) $.datepicker._adjustDate(event.target, (event.ctrlKey ?
+ +$.datepicker._get(inst, 'stepBigMonths') :
+ +$.datepicker._get(inst, 'stepMonths')), 'M');
+ // next month/year on alt +right
+ break;
+ case 40: if (event.ctrlKey || event.metaKey) $.datepicker._adjustDate(event.target, +7, 'D');
+ handled = event.ctrlKey || event.metaKey;
+ break; // +1 week on ctrl or command +down
+ default: handled = false;
+ }
+ else if (event.keyCode == 36 && event.ctrlKey) // display the date picker on ctrl+home
+ $.datepicker._showDatepicker(this);
+ else {
+ handled = false;
+ }
+ if (handled) {
+ event.preventDefault();
+ event.stopPropagation();
+ }
+ },
+
+ /* Filter entered characters - based on date format. */
+ _doKeyPress: function(event) {
+ var inst = $.datepicker._getInst(event.target);
+ if ($.datepicker._get(inst, 'constrainInput')) {
+ var chars = $.datepicker._possibleChars($.datepicker._get(inst, 'dateFormat'));
+ var chr = String.fromCharCode(event.charCode == undefined ? event.keyCode : event.charCode);
+ return event.ctrlKey || event.metaKey || (chr < ' ' || !chars || chars.indexOf(chr) > -1);
+ }
+ },
+
+ /* Synchronise manual entry and field/alternate field. */
+ _doKeyUp: function(event) {
+ var inst = $.datepicker._getInst(event.target);
+ if (inst.input.val() != inst.lastVal) {
+ try {
+ var date = $.datepicker.parseDate($.datepicker._get(inst, 'dateFormat'),
+ (inst.input ? inst.input.val() : null),
+ $.datepicker._getFormatConfig(inst));
+ if (date) { // only if valid
+ $.datepicker._setDateFromField(inst);
+ $.datepicker._updateAlternate(inst);
+ $.datepicker._updateDatepicker(inst);
+ }
+ }
+ catch (event) {
+ $.datepicker.log(event);
+ }
+ }
+ return true;
+ },
+
+ /* Pop-up the date picker for a given input field.
+ If false returned from beforeShow event handler do not show.
+ @param input element - the input field attached to the date picker or
+ event - if triggered by focus */
+ _showDatepicker: function(input) {
+ input = input.target || input;
+ if (input.nodeName.toLowerCase() != 'input') // find from button/image trigger
+ input = $('input', input.parentNode)[0];
+ if ($.datepicker._isDisabledDatepicker(input) || $.datepicker._lastInput == input) // already here
+ return;
+ var inst = $.datepicker._getInst(input);
+ if ($.datepicker._curInst && $.datepicker._curInst != inst) {
+ if ( $.datepicker._datepickerShowing ) {
+ $.datepicker._triggerOnClose($.datepicker._curInst);
+ }
+ $.datepicker._curInst.dpDiv.stop(true, true);
+ }
+ var beforeShow = $.datepicker._get(inst, 'beforeShow');
+ var beforeShowSettings = beforeShow ? beforeShow.apply(input, [input, inst]) : {};
+ if(beforeShowSettings === false){
+ //false
+ return;
+ }
+ extendRemove(inst.settings, beforeShowSettings);
+ inst.lastVal = null;
+ $.datepicker._lastInput = input;
+ $.datepicker._setDateFromField(inst);
+ if ($.datepicker._inDialog) // hide cursor
+ input.value = '';
+ if (!$.datepicker._pos) { // position below input
+ $.datepicker._pos = $.datepicker._findPos(input);
+ $.datepicker._pos[1] += input.offsetHeight; // add the height
+ }
+ var isFixed = false;
+ $(input).parents().each(function() {
+ isFixed |= $(this).css('position') == 'fixed';
+ return !isFixed;
+ });
+ if (isFixed && $.browser.opera) { // correction for Opera when fixed and scrolled
+ $.datepicker._pos[0] -= document.documentElement.scrollLeft;
+ $.datepicker._pos[1] -= document.documentElement.scrollTop;
+ }
+ var offset = {left: $.datepicker._pos[0], top: $.datepicker._pos[1]};
+ $.datepicker._pos = null;
+ //to avoid flashes on Firefox
+ inst.dpDiv.empty();
+ // determine sizing offscreen
+ inst.dpDiv.css({position: 'absolute', display: 'block', top: '-1000px'});
+ $.datepicker._updateDatepicker(inst);
+ // fix width for dynamic number of date pickers
+ // and adjust position before showing
+ offset = $.datepicker._checkOffset(inst, offset, isFixed);
+ inst.dpDiv.css({position: ($.datepicker._inDialog && $.blockUI ?
+ 'static' : (isFixed ? 'fixed' : 'absolute')), display: 'none',
+ left: offset.left + 'px', top: offset.top + 'px'});
+ if (!inst.inline) {
+ var showAnim = $.datepicker._get(inst, 'showAnim');
+ var duration = $.datepicker._get(inst, 'duration');
+ var postProcess = function() {
+ var cover = inst.dpDiv.find('iframe.ui-datepicker-cover'); // IE6- only
+ if( !! cover.length ){
+ var borders = $.datepicker._getBorders(inst.dpDiv);
+ cover.css({left: -borders[0], top: -borders[1],
+ width: inst.dpDiv.outerWidth(), height: inst.dpDiv.outerHeight()});
+ }
+ };
+ inst.dpDiv.zIndex($(input).zIndex()+1);
+ $.datepicker._datepickerShowing = true;
+ if ($.effects && $.effects[showAnim])
+ inst.dpDiv.show(showAnim, $.datepicker._get(inst, 'showOptions'), duration, postProcess);
+ else
+ inst.dpDiv[showAnim || 'show']((showAnim ? duration : null), postProcess);
+ if (!showAnim || !duration)
+ postProcess();
+ if (inst.input.is(':visible') && !inst.input.is(':disabled'))
+ inst.input.focus();
+ $.datepicker._curInst = inst;
+ }
+ },
+
+ /* Generate the date picker content. */
+ _updateDatepicker: function(inst) {
+ var self = this;
+ self.maxRows = 4; //Reset the max number of rows being displayed (see #7043)
+ var borders = $.datepicker._getBorders(inst.dpDiv);
+ instActive = inst; // for delegate hover events
+ inst.dpDiv.empty().append(this._generateHTML(inst));
+ var cover = inst.dpDiv.find('iframe.ui-datepicker-cover'); // IE6- only
+ if( !!cover.length ){ //avoid call to outerXXXX() when not in IE6
+ cover.css({left: -borders[0], top: -borders[1], width: inst.dpDiv.outerWidth(), height: inst.dpDiv.outerHeight()})
+ }
+ inst.dpDiv.find('.' + this._dayOverClass + ' a').mouseover();
+ var numMonths = this._getNumberOfMonths(inst);
+ var cols = numMonths[1];
+ var width = 17;
+ inst.dpDiv.removeClass('ui-datepicker-multi-2 ui-datepicker-multi-3 ui-datepicker-multi-4').width('');
+ if (cols > 1)
+ inst.dpDiv.addClass('ui-datepicker-multi-' + cols).css('width', (width * cols) + 'em');
+ inst.dpDiv[(numMonths[0] != 1 || numMonths[1] != 1 ? 'add' : 'remove') +
+ 'Class']('ui-datepicker-multi');
+ inst.dpDiv[(this._get(inst, 'isRTL') ? 'add' : 'remove') +
+ 'Class']('ui-datepicker-rtl');
+ if (inst == $.datepicker._curInst && $.datepicker._datepickerShowing && inst.input &&
+ // #6694 - don't focus the input if it's already focused
+ // this breaks the change event in IE
+ inst.input.is(':visible') && !inst.input.is(':disabled') && inst.input[0] != document.activeElement)
+ inst.input.focus();
+ // deffered render of the years select (to avoid flashes on Firefox)
+ if( inst.yearshtml ){
+ var origyearshtml = inst.yearshtml;
+ setTimeout(function(){
+ //assure that inst.yearshtml didn't change.
+ if( origyearshtml === inst.yearshtml && inst.yearshtml ){
+ inst.dpDiv.find('select.ui-datepicker-year:first').replaceWith(inst.yearshtml);
+ }
+ origyearshtml = inst.yearshtml = null;
+ }, 0);
+ }
+ },
+
+ /* Retrieve the size of left and top borders for an element.
+ @param elem (jQuery object) the element of interest
+ @return (number[2]) the left and top borders */
+ _getBorders: function(elem) {
+ var convert = function(value) {
+ return {thin: 1, medium: 2, thick: 3}[value] || value;
+ };
+ return [parseFloat(convert(elem.css('border-left-width'))),
+ parseFloat(convert(elem.css('border-top-width')))];
+ },
+
+ /* Check positioning to remain on screen. */
+ _checkOffset: function(inst, offset, isFixed) {
+ var dpWidth = inst.dpDiv.outerWidth();
+ var dpHeight = inst.dpDiv.outerHeight();
+ var inputWidth = inst.input ? inst.input.outerWidth() : 0;
+ var inputHeight = inst.input ? inst.input.outerHeight() : 0;
+ var viewWidth = document.documentElement.clientWidth + $(document).scrollLeft();
+ var viewHeight = document.documentElement.clientHeight + $(document).scrollTop();
+
+ offset.left -= (this._get(inst, 'isRTL') ? (dpWidth - inputWidth) : 0);
+ offset.left -= (isFixed && offset.left == inst.input.offset().left) ? $(document).scrollLeft() : 0;
+ offset.top -= (isFixed && offset.top == (inst.input.offset().top + inputHeight)) ? $(document).scrollTop() : 0;
+
+ // now check if datepicker is showing outside window viewport - move to a better place if so.
+ offset.left -= Math.min(offset.left, (offset.left + dpWidth > viewWidth && viewWidth > dpWidth) ?
+ Math.abs(offset.left + dpWidth - viewWidth) : 0);
+ offset.top -= Math.min(offset.top, (offset.top + dpHeight > viewHeight && viewHeight > dpHeight) ?
+ Math.abs(dpHeight + inputHeight) : 0);
+
+ return offset;
+ },
+
+ /* Find an object's position on the screen. */
+ _findPos: function(obj) {
+ var inst = this._getInst(obj);
+ var isRTL = this._get(inst, 'isRTL');
+ while (obj && (obj.type == 'hidden' || obj.nodeType != 1 || $.expr.filters.hidden(obj))) {
+ obj = obj[isRTL ? 'previousSibling' : 'nextSibling'];
+ }
+ var position = $(obj).offset();
+ return [position.left, position.top];
+ },
+
+ /* Trigger custom callback of onClose. */
+ _triggerOnClose: function(inst) {
+ var onClose = this._get(inst, 'onClose');
+ if (onClose)
+ onClose.apply((inst.input ? inst.input[0] : null),
+ [(inst.input ? inst.input.val() : ''), inst]);
+ },
+
+ /* Hide the date picker from view.
+ @param input element - the input field attached to the date picker */
+ _hideDatepicker: function(input) {
+ var inst = this._curInst;
+ if (!inst || (input && inst != $.data(input, PROP_NAME)))
+ return;
+ if (this._datepickerShowing) {
+ var showAnim = this._get(inst, 'showAnim');
+ var duration = this._get(inst, 'duration');
+ var postProcess = function() {
+ $.datepicker._tidyDialog(inst);
+ this._curInst = null;
+ };
+ if ($.effects && $.effects[showAnim])
+ inst.dpDiv.hide(showAnim, $.datepicker._get(inst, 'showOptions'), duration, postProcess);
+ else
+ inst.dpDiv[(showAnim == 'slideDown' ? 'slideUp' :
+ (showAnim == 'fadeIn' ? 'fadeOut' : 'hide'))]((showAnim ? duration : null), postProcess);
+ if (!showAnim)
+ postProcess();
+ $.datepicker._triggerOnClose(inst);
+ this._datepickerShowing = false;
+ this._lastInput = null;
+ if (this._inDialog) {
+ this._dialogInput.css({ position: 'absolute', left: '0', top: '-100px' });
+ if ($.blockUI) {
+ $.unblockUI();
+ $('body').append(this.dpDiv);
+ }
+ }
+ this._inDialog = false;
+ }
+ },
+
+ /* Tidy up after a dialog display. */
+ _tidyDialog: function(inst) {
+ inst.dpDiv.removeClass(this._dialogClass).unbind('.ui-datepicker-calendar');
+ },
+
+ /* Close date picker if clicked elsewhere. */
+ _checkExternalClick: function(event) {
+ if (!$.datepicker._curInst)
+ return;
+ var $target = $(event.target);
+ if ($target[0].id != $.datepicker._mainDivId &&
+ $target.parents('#' + $.datepicker._mainDivId).length == 0 &&
+ !$target.hasClass($.datepicker.markerClassName) &&
+ !$target.hasClass($.datepicker._triggerClass) &&
+ $.datepicker._datepickerShowing && !($.datepicker._inDialog && $.blockUI))
+ $.datepicker._hideDatepicker();
+ },
+
+ /* Adjust one of the date sub-fields. */
+ _adjustDate: function(id, offset, period) {
+ var target = $(id);
+ var inst = this._getInst(target[0]);
+ if (this._isDisabledDatepicker(target[0])) {
+ return;
+ }
+ this._adjustInstDate(inst, offset +
+ (period == 'M' ? this._get(inst, 'showCurrentAtPos') : 0), // undo positioning
+ period);
+ this._updateDatepicker(inst);
+ },
+
+ /* Action for current link. */
+ _gotoToday: function(id) {
+ var target = $(id);
+ var inst = this._getInst(target[0]);
+ if (this._get(inst, 'gotoCurrent') && inst.currentDay) {
+ inst.selectedDay = inst.currentDay;
+ inst.drawMonth = inst.selectedMonth = inst.currentMonth;
+ inst.drawYear = inst.selectedYear = inst.currentYear;
+ }
+ else {
+ var date = new Date();
+ inst.selectedDay = date.getDate();
+ inst.drawMonth = inst.selectedMonth = date.getMonth();
+ inst.drawYear = inst.selectedYear = date.getFullYear();
+ }
+ this._notifyChange(inst);
+ this._adjustDate(target);
+ },
+
+ /* Action for selecting a new month/year. */
+ _selectMonthYear: function(id, select, period) {
+ var target = $(id);
+ var inst = this._getInst(target[0]);
+ inst['selected' + (period == 'M' ? 'Month' : 'Year')] =
+ inst['draw' + (period == 'M' ? 'Month' : 'Year')] =
+ parseInt(select.options[select.selectedIndex].value,10);
+ this._notifyChange(inst);
+ this._adjustDate(target);
+ },
+
+ /* Action for selecting a day. */
+ _selectDay: function(id, month, year, td) {
+ var target = $(id);
+ if ($(td).hasClass(this._unselectableClass) || this._isDisabledDatepicker(target[0])) {
+ return;
+ }
+ var inst = this._getInst(target[0]);
+ inst.selectedDay = inst.currentDay = $('a', td).html();
+ inst.selectedMonth = inst.currentMonth = month;
+ inst.selectedYear = inst.currentYear = year;
+ this._selectDate(id, this._formatDate(inst,
+ inst.currentDay, inst.currentMonth, inst.currentYear));
+ },
+
+ /* Erase the input field and hide the date picker. */
+ _clearDate: function(id) {
+ var target = $(id);
+ var inst = this._getInst(target[0]);
+ this._selectDate(target, '');
+ },
+
+ /* Update the input field with the selected date. */
+ _selectDate: function(id, dateStr) {
+ var target = $(id);
+ var inst = this._getInst(target[0]);
+ dateStr = (dateStr != null ? dateStr : this._formatDate(inst));
+ if (inst.input)
+ inst.input.val(dateStr);
+ this._updateAlternate(inst);
+ var onSelect = this._get(inst, 'onSelect');
+ if (onSelect)
+ onSelect.apply((inst.input ? inst.input[0] : null), [dateStr, inst]); // trigger custom callback
+ else if (inst.input)
+ inst.input.trigger('change'); // fire the change event
+ if (inst.inline)
+ this._updateDatepicker(inst);
+ else {
+ this._hideDatepicker();
+ this._lastInput = inst.input[0];
+ if (typeof(inst.input[0]) != 'object')
+ inst.input.focus(); // restore focus
+ this._lastInput = null;
+ }
+ },
+
+ /* Update any alternate field to synchronise with the main field. */
+ _updateAlternate: function(inst) {
+ var altField = this._get(inst, 'altField');
+ if (altField) { // update alternate field too
+ var altFormat = this._get(inst, 'altFormat') || this._get(inst, 'dateFormat');
+ var date = this._getDate(inst);
+ var dateStr = this.formatDate(altFormat, date, this._getFormatConfig(inst));
+ $(altField).each(function() { $(this).val(dateStr); });
+ }
+ },
+
+ /* Set as beforeShowDay function to prevent selection of weekends.
+ @param date Date - the date to customise
+ @return [boolean, string] - is this date selectable?, what is its CSS class? */
+ noWeekends: function(date) {
+ var day = date.getDay();
+ return [(day > 0 && day < 6), ''];
+ },
+
+ /* Set as calculateWeek to determine the week of the year based on the ISO 8601 definition.
+ @param date Date - the date to get the week for
+ @return number - the number of the week within the year that contains this date */
+ iso8601Week: function(date) {
+ var checkDate = new Date(date.getTime());
+ // Find Thursday of this week starting on Monday
+ checkDate.setDate(checkDate.getDate() + 4 - (checkDate.getDay() || 7));
+ var time = checkDate.getTime();
+ checkDate.setMonth(0); // Compare with Jan 1
+ checkDate.setDate(1);
+ return Math.floor(Math.round((time - checkDate) / 86400000) / 7) + 1;
+ },
+
+ /* Parse a string value into a date object.
+ See formatDate below for the possible formats.
+
+ @param format string - the expected format of the date
+ @param value string - the date in the above format
+ @param settings Object - attributes include:
+ shortYearCutoff number - the cutoff year for determining the century (optional)
+ dayNamesShort string[7] - abbreviated names of the days from Sunday (optional)
+ dayNames string[7] - names of the days from Sunday (optional)
+ monthNamesShort string[12] - abbreviated names of the months (optional)
+ monthNames string[12] - names of the months (optional)
+ @return Date - the extracted date value or null if value is blank */
+ parseDate: function (format, value, settings) {
+ if (format == null || value == null)
+ throw 'Invalid arguments';
+ value = (typeof value == 'object' ? value.toString() : value + '');
+ if (value == '')
+ return null;
+ var shortYearCutoff = (settings ? settings.shortYearCutoff : null) || this._defaults.shortYearCutoff;
+ shortYearCutoff = (typeof shortYearCutoff != 'string' ? shortYearCutoff :
+ new Date().getFullYear() % 100 + parseInt(shortYearCutoff, 10));
+ var dayNamesShort = (settings ? settings.dayNamesShort : null) || this._defaults.dayNamesShort;
+ var dayNames = (settings ? settings.dayNames : null) || this._defaults.dayNames;
+ var monthNamesShort = (settings ? settings.monthNamesShort : null) || this._defaults.monthNamesShort;
+ var monthNames = (settings ? settings.monthNames : null) || this._defaults.monthNames;
+ var year = -1;
+ var month = -1;
+ var day = -1;
+ var doy = -1;
+ var literal = false;
+ // Check whether a format character is doubled
+ var lookAhead = function(match) {
+ var matches = (iFormat + 1 < format.length && format.charAt(iFormat + 1) == match);
+ if (matches)
+ iFormat++;
+ return matches;
+ };
+ // Extract a number from the string value
+ var getNumber = function(match) {
+ var isDoubled = lookAhead(match);
+ var size = (match == '@' ? 14 : (match == '!' ? 20 :
+ (match == 'y' && isDoubled ? 4 : (match == 'o' ? 3 : 2))));
+ var digits = new RegExp('^\\d{1,' + size + '}');
+ var num = value.substring(iValue).match(digits);
+ if (!num)
+ throw 'Missing number at position ' + iValue;
+ iValue += num[0].length;
+ return parseInt(num[0], 10);
+ };
+ // Extract a name from the string value and convert to an index
+ var getName = function(match, shortNames, longNames) {
+ var names = $.map(lookAhead(match) ? longNames : shortNames, function (v, k) {
+ return [ [k, v] ];
+ }).sort(function (a, b) {
+ return -(a[1].length - b[1].length);
+ });
+ var index = -1;
+ $.each(names, function (i, pair) {
+ var name = pair[1];
+ if (value.substr(iValue, name.length).toLowerCase() == name.toLowerCase()) {
+ index = pair[0];
+ iValue += name.length;
+ return false;
+ }
+ });
+ if (index != -1)
+ return index + 1;
+ else
+ throw 'Unknown name at position ' + iValue;
+ };
+ // Confirm that a literal character matches the string value
+ var checkLiteral = function() {
+ if (value.charAt(iValue) != format.charAt(iFormat))
+ throw 'Unexpected literal at position ' + iValue;
+ iValue++;
+ };
+ var iValue = 0;
+ for (var iFormat = 0; iFormat < format.length; iFormat++) {
+ if (literal)
+ if (format.charAt(iFormat) == "'" && !lookAhead("'"))
+ literal = false;
+ else
+ checkLiteral();
+ else
+ switch (format.charAt(iFormat)) {
+ case 'd':
+ day = getNumber('d');
+ break;
+ case 'D':
+ getName('D', dayNamesShort, dayNames);
+ break;
+ case 'o':
+ doy = getNumber('o');
+ break;
+ case 'm':
+ month = getNumber('m');
+ break;
+ case 'M':
+ month = getName('M', monthNamesShort, monthNames);
+ break;
+ case 'y':
+ year = getNumber('y');
+ break;
+ case '@':
+ var date = new Date(getNumber('@'));
+ year = date.getFullYear();
+ month = date.getMonth() + 1;
+ day = date.getDate();
+ break;
+ case '!':
+ var date = new Date((getNumber('!') - this._ticksTo1970) / 10000);
+ year = date.getFullYear();
+ month = date.getMonth() + 1;
+ day = date.getDate();
+ break;
+ case "'":
+ if (lookAhead("'"))
+ checkLiteral();
+ else
+ literal = true;
+ break;
+ default:
+ checkLiteral();
+ }
+ }
+ if (iValue < value.length){
+ throw "Extra/unparsed characters found in date: " + value.substring(iValue);
+ }
+ if (year == -1)
+ year = new Date().getFullYear();
+ else if (year < 100)
+ year += new Date().getFullYear() - new Date().getFullYear() % 100 +
+ (year <= shortYearCutoff ? 0 : -100);
+ if (doy > -1) {
+ month = 1;
+ day = doy;
+ do {
+ var dim = this._getDaysInMonth(year, month - 1);
+ if (day <= dim)
+ break;
+ month++;
+ day -= dim;
+ } while (true);
+ }
+ var date = this._daylightSavingAdjust(new Date(year, month - 1, day));
+ if (date.getFullYear() != year || date.getMonth() + 1 != month || date.getDate() != day)
+ throw 'Invalid date'; // E.g. 31/02/00
+ return date;
+ },
+
+ /* Standard date formats. */
+ ATOM: 'yy-mm-dd', // RFC 3339 (ISO 8601)
+ COOKIE: 'D, dd M yy',
+ ISO_8601: 'yy-mm-dd',
+ RFC_822: 'D, d M y',
+ RFC_850: 'DD, dd-M-y',
+ RFC_1036: 'D, d M y',
+ RFC_1123: 'D, d M yy',
+ RFC_2822: 'D, d M yy',
+ RSS: 'D, d M y', // RFC 822
+ TICKS: '!',
+ TIMESTAMP: '@',
+ W3C: 'yy-mm-dd', // ISO 8601
+
+ _ticksTo1970: (((1970 - 1) * 365 + Math.floor(1970 / 4) - Math.floor(1970 / 100) +
+ Math.floor(1970 / 400)) * 24 * 60 * 60 * 10000000),
+
+ /* Format a date object into a string value.
+ The format can be combinations of the following:
+ d - day of month (no leading zero)
+ dd - day of month (two digit)
+ o - day of year (no leading zeros)
+ oo - day of year (three digit)
+ D - day name short
+ DD - day name long
+ m - month of year (no leading zero)
+ mm - month of year (two digit)
+ M - month name short
+ MM - month name long
+ y - year (two digit)
+ yy - year (four digit)
+ @ - Unix timestamp (ms since 01/01/1970)
+ ! - Windows ticks (100ns since 01/01/0001)
+ '...' - literal text
+ '' - single quote
+
+ @param format string - the desired format of the date
+ @param date Date - the date value to format
+ @param settings Object - attributes include:
+ dayNamesShort string[7] - abbreviated names of the days from Sunday (optional)
+ dayNames string[7] - names of the days from Sunday (optional)
+ monthNamesShort string[12] - abbreviated names of the months (optional)
+ monthNames string[12] - names of the months (optional)
+ @return string - the date in the above format */
+ formatDate: function (format, date, settings) {
+ if (!date)
+ return '';
+ var dayNamesShort = (settings ? settings.dayNamesShort : null) || this._defaults.dayNamesShort;
+ var dayNames = (settings ? settings.dayNames : null) || this._defaults.dayNames;
+ var monthNamesShort = (settings ? settings.monthNamesShort : null) || this._defaults.monthNamesShort;
+ var monthNames = (settings ? settings.monthNames : null) || this._defaults.monthNames;
+ // Check whether a format character is doubled
+ var lookAhead = function(match) {
+ var matches = (iFormat + 1 < format.length && format.charAt(iFormat + 1) == match);
+ if (matches)
+ iFormat++;
+ return matches;
+ };
+ // Format a number, with leading zero if necessary
+ var formatNumber = function(match, value, len) {
+ var num = '' + value;
+ if (lookAhead(match))
+ while (num.length < len)
+ num = '0' + num;
+ return num;
+ };
+ // Format a name, short or long as requested
+ var formatName = function(match, value, shortNames, longNames) {
+ return (lookAhead(match) ? longNames[value] : shortNames[value]);
+ };
+ var output = '';
+ var literal = false;
+ if (date)
+ for (var iFormat = 0; iFormat < format.length; iFormat++) {
+ if (literal)
+ if (format.charAt(iFormat) == "'" && !lookAhead("'"))
+ literal = false;
+ else
+ output += format.charAt(iFormat);
+ else
+ switch (format.charAt(iFormat)) {
+ case 'd':
+ output += formatNumber('d', date.getDate(), 2);
+ break;
+ case 'D':
+ output += formatName('D', date.getDay(), dayNamesShort, dayNames);
+ break;
+ case 'o':
+ output += formatNumber('o',
+ Math.round((new Date(date.getFullYear(), date.getMonth(), date.getDate()).getTime() - new Date(date.getFullYear(), 0, 0).getTime()) / 86400000), 3);
+ break;
+ case 'm':
+ output += formatNumber('m', date.getMonth() + 1, 2);
+ break;
+ case 'M':
+ output += formatName('M', date.getMonth(), monthNamesShort, monthNames);
+ break;
+ case 'y':
+ output += (lookAhead('y') ? date.getFullYear() :
+ (date.getYear() % 100 < 10 ? '0' : '') + date.getYear() % 100);
+ break;
+ case '@':
+ output += date.getTime();
+ break;
+ case '!':
+ output += date.getTime() * 10000 + this._ticksTo1970;
+ break;
+ case "'":
+ if (lookAhead("'"))
+ output += "'";
+ else
+ literal = true;
+ break;
+ default:
+ output += format.charAt(iFormat);
+ }
+ }
+ return output;
+ },
+
+ /* Extract all possible characters from the date format. */
+ _possibleChars: function (format) {
+ var chars = '';
+ var literal = false;
+ // Check whether a format character is doubled
+ var lookAhead = function(match) {
+ var matches = (iFormat + 1 < format.length && format.charAt(iFormat + 1) == match);
+ if (matches)
+ iFormat++;
+ return matches;
+ };
+ for (var iFormat = 0; iFormat < format.length; iFormat++)
+ if (literal)
+ if (format.charAt(iFormat) == "'" && !lookAhead("'"))
+ literal = false;
+ else
+ chars += format.charAt(iFormat);
+ else
+ switch (format.charAt(iFormat)) {
+ case 'd': case 'm': case 'y': case '@':
+ chars += '0123456789';
+ break;
+ case 'D': case 'M':
+ return null; // Accept anything
+ case "'":
+ if (lookAhead("'"))
+ chars += "'";
+ else
+ literal = true;
+ break;
+ default:
+ chars += format.charAt(iFormat);
+ }
+ return chars;
+ },
+
+ /* Get a setting value, defaulting if necessary. */
+ _get: function(inst, name) {
+ return inst.settings[name] !== undefined ?
+ inst.settings[name] : this._defaults[name];
+ },
+
+ /* Parse existing date and initialise date picker. */
+ _setDateFromField: function(inst, noDefault) {
+ if (inst.input.val() == inst.lastVal) {
+ return;
+ }
+ var dateFormat = this._get(inst, 'dateFormat');
+ var dates = inst.lastVal = inst.input ? inst.input.val() : null;
+ var date, defaultDate;
+ date = defaultDate = this._getDefaultDate(inst);
+ var settings = this._getFormatConfig(inst);
+ try {
+ date = this.parseDate(dateFormat, dates, settings) || defaultDate;
+ } catch (event) {
+ this.log(event);
+ dates = (noDefault ? '' : dates);
+ }
+ inst.selectedDay = date.getDate();
+ inst.drawMonth = inst.selectedMonth = date.getMonth();
+ inst.drawYear = inst.selectedYear = date.getFullYear();
+ inst.currentDay = (dates ? date.getDate() : 0);
+ inst.currentMonth = (dates ? date.getMonth() : 0);
+ inst.currentYear = (dates ? date.getFullYear() : 0);
+ this._adjustInstDate(inst);
+ },
+
+ /* Retrieve the default date shown on opening. */
+ _getDefaultDate: function(inst) {
+ return this._restrictMinMax(inst,
+ this._determineDate(inst, this._get(inst, 'defaultDate'), new Date()));
+ },
+
+ /* A date may be specified as an exact value or a relative one. */
+ _determineDate: function(inst, date, defaultDate) {
+ var offsetNumeric = function(offset) {
+ var date = new Date();
+ date.setDate(date.getDate() + offset);
+ return date;
+ };
+ var offsetString = function(offset) {
+ try {
+ return $.datepicker.parseDate($.datepicker._get(inst, 'dateFormat'),
+ offset, $.datepicker._getFormatConfig(inst));
+ }
+ catch (e) {
+ // Ignore
+ }
+ var date = (offset.toLowerCase().match(/^c/) ?
+ $.datepicker._getDate(inst) : null) || new Date();
+ var year = date.getFullYear();
+ var month = date.getMonth();
+ var day = date.getDate();
+ var pattern = /([+-]?[0-9]+)\s*(d|D|w|W|m|M|y|Y)?/g;
+ var matches = pattern.exec(offset);
+ while (matches) {
+ switch (matches[2] || 'd') {
+ case 'd' : case 'D' :
+ day += parseInt(matches[1],10); break;
+ case 'w' : case 'W' :
+ day += parseInt(matches[1],10) * 7; break;
+ case 'm' : case 'M' :
+ month += parseInt(matches[1],10);
+ day = Math.min(day, $.datepicker._getDaysInMonth(year, month));
+ break;
+ case 'y': case 'Y' :
+ year += parseInt(matches[1],10);
+ day = Math.min(day, $.datepicker._getDaysInMonth(year, month));
+ break;
+ }
+ matches = pattern.exec(offset);
+ }
+ return new Date(year, month, day);
+ };
+ var newDate = (date == null || date === '' ? defaultDate : (typeof date == 'string' ? offsetString(date) :
+ (typeof date == 'number' ? (isNaN(date) ? defaultDate : offsetNumeric(date)) : new Date(date.getTime()))));
+ newDate = (newDate && newDate.toString() == 'Invalid Date' ? defaultDate : newDate);
+ if (newDate) {
+ newDate.setHours(0);
+ newDate.setMinutes(0);
+ newDate.setSeconds(0);
+ newDate.setMilliseconds(0);
+ }
+ return this._daylightSavingAdjust(newDate);
+ },
+
+ /* Handle switch to/from daylight saving.
+ Hours may be non-zero on daylight saving cut-over:
+ > 12 when midnight changeover, but then cannot generate
+ midnight datetime, so jump to 1AM, otherwise reset.
+ @param date (Date) the date to check
+ @return (Date) the corrected date */
+ _daylightSavingAdjust: function(date) {
+ if (!date) return null;
+ date.setHours(date.getHours() > 12 ? date.getHours() + 2 : 0);
+ return date;
+ },
+
+ /* Set the date(s) directly. */
+ _setDate: function(inst, date, noChange) {
+ var clear = !date;
+ var origMonth = inst.selectedMonth;
+ var origYear = inst.selectedYear;
+ var newDate = this._restrictMinMax(inst, this._determineDate(inst, date, new Date()));
+ inst.selectedDay = inst.currentDay = newDate.getDate();
+ inst.drawMonth = inst.selectedMonth = inst.currentMonth = newDate.getMonth();
+ inst.drawYear = inst.selectedYear = inst.currentYear = newDate.getFullYear();
+ if ((origMonth != inst.selectedMonth || origYear != inst.selectedYear) && !noChange)
+ this._notifyChange(inst);
+ this._adjustInstDate(inst);
+ if (inst.input) {
+ inst.input.val(clear ? '' : this._formatDate(inst));
+ }
+ },
+
+ /* Retrieve the date(s) directly. */
+ _getDate: function(inst) {
+ var startDate = (!inst.currentYear || (inst.input && inst.input.val() == '') ? null :
+ this._daylightSavingAdjust(new Date(
+ inst.currentYear, inst.currentMonth, inst.currentDay)));
+ return startDate;
+ },
+
+ /* Generate the HTML for the current state of the date picker. */
+ _generateHTML: function(inst) {
+ var today = new Date();
+ today = this._daylightSavingAdjust(
+ new Date(today.getFullYear(), today.getMonth(), today.getDate())); // clear time
+ var isRTL = this._get(inst, 'isRTL');
+ var showButtonPanel = this._get(inst, 'showButtonPanel');
+ var hideIfNoPrevNext = this._get(inst, 'hideIfNoPrevNext');
+ var navigationAsDateFormat = this._get(inst, 'navigationAsDateFormat');
+ var numMonths = this._getNumberOfMonths(inst);
+ var showCurrentAtPos = this._get(inst, 'showCurrentAtPos');
+ var stepMonths = this._get(inst, 'stepMonths');
+ var isMultiMonth = (numMonths[0] != 1 || numMonths[1] != 1);
+ var currentDate = this._daylightSavingAdjust((!inst.currentDay ? new Date(9999, 9, 9) :
+ new Date(inst.currentYear, inst.currentMonth, inst.currentDay)));
+ var minDate = this._getMinMaxDate(inst, 'min');
+ var maxDate = this._getMinMaxDate(inst, 'max');
+ var drawMonth = inst.drawMonth - showCurrentAtPos;
+ var drawYear = inst.drawYear;
+ if (drawMonth < 0) {
+ drawMonth += 12;
+ drawYear--;
+ }
+ if (maxDate) {
+ var maxDraw = this._daylightSavingAdjust(new Date(maxDate.getFullYear(),
+ maxDate.getMonth() - (numMonths[0] * numMonths[1]) + 1, maxDate.getDate()));
+ maxDraw = (minDate && maxDraw < minDate ? minDate : maxDraw);
+ while (this._daylightSavingAdjust(new Date(drawYear, drawMonth, 1)) > maxDraw) {
+ drawMonth--;
+ if (drawMonth < 0) {
+ drawMonth = 11;
+ drawYear--;
+ }
+ }
+ }
+ inst.drawMonth = drawMonth;
+ inst.drawYear = drawYear;
+ var prevText = this._get(inst, 'prevText');
+ prevText = (!navigationAsDateFormat ? prevText : this.formatDate(prevText,
+ this._daylightSavingAdjust(new Date(drawYear, drawMonth - stepMonths, 1)),
+ this._getFormatConfig(inst)));
+ var prev = (this._canAdjustMonth(inst, -1, drawYear, drawMonth) ?
+ '<a class="ui-datepicker-prev ui-corner-all" onclick="DP_jQuery_' + dpuuid +
+ '.datepicker._adjustDate(\'#' + inst.id + '\', -' + stepMonths + ', \'M\');"' +
+ ' title="' + prevText + '"><span class="ui-icon ui-icon-circle-triangle-' + ( isRTL ? 'e' : 'w') + '">' + prevText + '</span></a>' :
+ (hideIfNoPrevNext ? '' : '<a class="ui-datepicker-prev ui-corner-all ui-state-disabled" title="'+ prevText +'"><span class="ui-icon ui-icon-circle-triangle-' + ( isRTL ? 'e' : 'w') + '">' + prevText + '</span></a>'));
+ var nextText = this._get(inst, 'nextText');
+ nextText = (!navigationAsDateFormat ? nextText : this.formatDate(nextText,
+ this._daylightSavingAdjust(new Date(drawYear, drawMonth + stepMonths, 1)),
+ this._getFormatConfig(inst)));
+ var next = (this._canAdjustMonth(inst, +1, drawYear, drawMonth) ?
+ '<a class="ui-datepicker-next ui-corner-all" onclick="DP_jQuery_' + dpuuid +
+ '.datepicker._adjustDate(\'#' + inst.id + '\', +' + stepMonths + ', \'M\');"' +
+ ' title="' + nextText + '"><span class="ui-icon ui-icon-circle-triangle-' + ( isRTL ? 'w' : 'e') + '">' + nextText + '</span></a>' :
+ (hideIfNoPrevNext ? '' : '<a class="ui-datepicker-next ui-corner-all ui-state-disabled" title="'+ nextText + '"><span class="ui-icon ui-icon-circle-triangle-' + ( isRTL ? 'w' : 'e') + '">' + nextText + '</span></a>'));
+ var currentText = this._get(inst, 'currentText');
+ var gotoDate = (this._get(inst, 'gotoCurrent') && inst.currentDay ? currentDate : today);
+ currentText = (!navigationAsDateFormat ? currentText :
+ this.formatDate(currentText, gotoDate, this._getFormatConfig(inst)));
+ var controls = (!inst.inline ? '<button type="button" class="ui-datepicker-close ui-state-default ui-priority-primary ui-corner-all" onclick="DP_jQuery_' + dpuuid +
+ '.datepicker._hideDatepicker();">' + this._get(inst, 'closeText') + '</button>' : '');
+ var buttonPanel = (showButtonPanel) ? '<div class="ui-datepicker-buttonpane ui-widget-content">' + (isRTL ? controls : '') +
+ (this._isInRange(inst, gotoDate) ? '<button type="button" class="ui-datepicker-current ui-state-default ui-priority-secondary ui-corner-all" onclick="DP_jQuery_' + dpuuid +
+ '.datepicker._gotoToday(\'#' + inst.id + '\');"' +
+ '>' + currentText + '</button>' : '') + (isRTL ? '' : controls) + '</div>' : '';
+ var firstDay = parseInt(this._get(inst, 'firstDay'),10);
+ firstDay = (isNaN(firstDay) ? 0 : firstDay);
+ var showWeek = this._get(inst, 'showWeek');
+ var dayNames = this._get(inst, 'dayNames');
+ var dayNamesShort = this._get(inst, 'dayNamesShort');
+ var dayNamesMin = this._get(inst, 'dayNamesMin');
+ var monthNames = this._get(inst, 'monthNames');
+ var monthNamesShort = this._get(inst, 'monthNamesShort');
+ var beforeShowDay = this._get(inst, 'beforeShowDay');
+ var showOtherMonths = this._get(inst, 'showOtherMonths');
+ var selectOtherMonths = this._get(inst, 'selectOtherMonths');
+ var calculateWeek = this._get(inst, 'calculateWeek') || this.iso8601Week;
+ var defaultDate = this._getDefaultDate(inst);
+ var html = '';
+ for (var row = 0; row < numMonths[0]; row++) {
+ var group = '';
+ this.maxRows = 4;
+ for (var col = 0; col < numMonths[1]; col++) {
+ var selectedDate = this._daylightSavingAdjust(new Date(drawYear, drawMonth, inst.selectedDay));
+ var cornerClass = ' ui-corner-all';
+ var calender = '';
+ if (isMultiMonth) {
+ calender += '<div class="ui-datepicker-group';
+ if (numMonths[1] > 1)
+ switch (col) {
+ case 0: calender += ' ui-datepicker-group-first';
+ cornerClass = ' ui-corner-' + (isRTL ? 'right' : 'left'); break;
+ case numMonths[1]-1: calender += ' ui-datepicker-group-last';
+ cornerClass = ' ui-corner-' + (isRTL ? 'left' : 'right'); break;
+ default: calender += ' ui-datepicker-group-middle'; cornerClass = ''; break;
+ }
+ calender += '">';
+ }
+ calender += '<div class="ui-datepicker-header ui-widget-header ui-helper-clearfix' + cornerClass + '">' +
+ (/all|left/.test(cornerClass) && row == 0 ? (isRTL ? next : prev) : '') +
+ (/all|right/.test(cornerClass) && row == 0 ? (isRTL ? prev : next) : '') +
+ this._generateMonthYearHeader(inst, drawMonth, drawYear, minDate, maxDate,
+ row > 0 || col > 0, monthNames, monthNamesShort) + // draw month headers
+ '</div><table class="ui-datepicker-calendar"><thead>' +
+ '<tr>';
+ var thead = (showWeek ? '<th class="ui-datepicker-week-col">' + this._get(inst, 'weekHeader') + '</th>' : '');
+ for (var dow = 0; dow < 7; dow++) { // days of the week
+ var day = (dow + firstDay) % 7;
+ thead += '<th' + ((dow + firstDay + 6) % 7 >= 5 ? ' class="ui-datepicker-week-end"' : '') + '>' +
+ '<span title="' + dayNames[day] + '">' + dayNamesMin[day] + '</span></th>';
+ }
+ calender += thead + '</tr></thead><tbody>';
+ var daysInMonth = this._getDaysInMonth(drawYear, drawMonth);
+ if (drawYear == inst.selectedYear && drawMonth == inst.selectedMonth)
+ inst.selectedDay = Math.min(inst.selectedDay, daysInMonth);
+ var leadDays = (this._getFirstDayOfMonth(drawYear, drawMonth) - firstDay + 7) % 7;
+ var curRows = Math.ceil((leadDays + daysInMonth) / 7); // calculate the number of rows to generate
+ var numRows = (isMultiMonth ? this.maxRows > curRows ? this.maxRows : curRows : curRows); //If multiple months, use the higher number of rows (see #7043)
+ this.maxRows = numRows;
+ var printDate = this._daylightSavingAdjust(new Date(drawYear, drawMonth, 1 - leadDays));
+ for (var dRow = 0; dRow < numRows; dRow++) { // create date picker rows
+ calender += '<tr>';
+ var tbody = (!showWeek ? '' : '<td class="ui-datepicker-week-col">' +
+ this._get(inst, 'calculateWeek')(printDate) + '</td>');
+ for (var dow = 0; dow < 7; dow++) { // create date picker days
+ var daySettings = (beforeShowDay ?
+ beforeShowDay.apply((inst.input ? inst.input[0] : null), [printDate]) : [true, '']);
+ var otherMonth = (printDate.getMonth() != drawMonth);
+ var unselectable = (otherMonth && !selectOtherMonths) || !daySettings[0] ||
+ (minDate && printDate < minDate) || (maxDate && printDate > maxDate);
+ tbody += '<td class="' +
+ ((dow + firstDay + 6) % 7 >= 5 ? ' ui-datepicker-week-end' : '') + // highlight weekends
+ (otherMonth ? ' ui-datepicker-other-month' : '') + // highlight days from other months
+ ((printDate.getTime() == selectedDate.getTime() && drawMonth == inst.selectedMonth && inst._keyEvent) || // user pressed key
+ (defaultDate.getTime() == printDate.getTime() && defaultDate.getTime() == selectedDate.getTime()) ?
+ // or defaultDate is current printedDate and defaultDate is selectedDate
+ ' ' + this._dayOverClass : '') + // highlight selected day
+ (unselectable ? ' ' + this._unselectableClass + ' ui-state-disabled': '') + // highlight unselectable days
+ (otherMonth && !showOtherMonths ? '' : ' ' + daySettings[1] + // highlight custom dates
+ (printDate.getTime() == currentDate.getTime() ? ' ' + this._currentClass : '') + // highlight selected day
+ (printDate.getTime() == today.getTime() ? ' ui-datepicker-today' : '')) + '"' + // highlight today (if different)
+ ((!otherMonth || showOtherMonths) && daySettings[2] ? ' title="' + daySettings[2] + '"' : '') + // cell title
+ (unselectable ? '' : ' onclick="DP_jQuery_' + dpuuid + '.datepicker._selectDay(\'#' +
+ inst.id + '\',' + printDate.getMonth() + ',' + printDate.getFullYear() + ', this);return false;"') + '>' + // actions
+ (otherMonth && !showOtherMonths ? '&#xa0;' : // display for other months
+ (unselectable ? '<span class="ui-state-default">' + printDate.getDate() + '</span>' : '<a class="ui-state-default' +
+ (printDate.getTime() == today.getTime() ? ' ui-state-highlight' : '') +
+ (printDate.getTime() == currentDate.getTime() ? ' ui-state-active' : '') + // highlight selected day
+ (otherMonth ? ' ui-priority-secondary' : '') + // distinguish dates from other months
+ '" href="#">' + printDate.getDate() + '</a>')) + '</td>'; // display selectable date
+ printDate.setDate(printDate.getDate() + 1);
+ printDate = this._daylightSavingAdjust(printDate);
+ }
+ calender += tbody + '</tr>';
+ }
+ drawMonth++;
+ if (drawMonth > 11) {
+ drawMonth = 0;
+ drawYear++;
+ }
+ calender += '</tbody></table>' + (isMultiMonth ? '</div>' +
+ ((numMonths[0] > 0 && col == numMonths[1]-1) ? '<div class="ui-datepicker-row-break"></div>' : '') : '');
+ group += calender;
+ }
+ html += group;
+ }
+ html += buttonPanel + ($.browser.msie && parseInt($.browser.version,10) < 7 && !inst.inline ?
+ '<iframe src="javascript:false;" class="ui-datepicker-cover" frameborder="0"></iframe>' : '');
+ inst._keyEvent = false;
+ return html;
+ },
+
+ /* Generate the month and year header. */
+ _generateMonthYearHeader: function(inst, drawMonth, drawYear, minDate, maxDate,
+ secondary, monthNames, monthNamesShort) {
+ var changeMonth = this._get(inst, 'changeMonth');
+ var changeYear = this._get(inst, 'changeYear');
+ var showMonthAfterYear = this._get(inst, 'showMonthAfterYear');
+ var html = '<div class="ui-datepicker-title">';
+ var monthHtml = '';
+ // month selection
+ if (secondary || !changeMonth)
+ monthHtml += '<span class="ui-datepicker-month">' + monthNames[drawMonth] + '</span>';
+ else {
+ var inMinYear = (minDate && minDate.getFullYear() == drawYear);
+ var inMaxYear = (maxDate && maxDate.getFullYear() == drawYear);
+ monthHtml += '<select class="ui-datepicker-month" ' +
+ 'onchange="DP_jQuery_' + dpuuid + '.datepicker._selectMonthYear(\'#' + inst.id + '\', this, \'M\');" ' +
+ '>';
+ for (var month = 0; month < 12; month++) {
+ if ((!inMinYear || month >= minDate.getMonth()) &&
+ (!inMaxYear || month <= maxDate.getMonth()))
+ monthHtml += '<option value="' + month + '"' +
+ (month == drawMonth ? ' selected="selected"' : '') +
+ '>' + monthNamesShort[month] + '</option>';
+ }
+ monthHtml += '</select>';
+ }
+ if (!showMonthAfterYear)
+ html += monthHtml + (secondary || !(changeMonth && changeYear) ? '&#xa0;' : '');
+ // year selection
+ if ( !inst.yearshtml ) {
+ inst.yearshtml = '';
+ if (secondary || !changeYear)
+ html += '<span class="ui-datepicker-year">' + drawYear + '</span>';
+ else {
+ // determine range of years to display
+ var years = this._get(inst, 'yearRange').split(':');
+ var thisYear = new Date().getFullYear();
+ var determineYear = function(value) {
+ var year = (value.match(/c[+-].*/) ? drawYear + parseInt(value.substring(1), 10) :
+ (value.match(/[+-].*/) ? thisYear + parseInt(value, 10) :
+ parseInt(value, 10)));
+ return (isNaN(year) ? thisYear : year);
+ };
+ var year = determineYear(years[0]);
+ var endYear = Math.max(year, determineYear(years[1] || ''));
+ year = (minDate ? Math.max(year, minDate.getFullYear()) : year);
+ endYear = (maxDate ? Math.min(endYear, maxDate.getFullYear()) : endYear);
+ inst.yearshtml += '<select class="ui-datepicker-year" ' +
+ 'onchange="DP_jQuery_' + dpuuid + '.datepicker._selectMonthYear(\'#' + inst.id + '\', this, \'Y\');" ' +
+ '>';
+ for (; year <= endYear; year++) {
+ inst.yearshtml += '<option value="' + year + '"' +
+ (year == drawYear ? ' selected="selected"' : '') +
+ '>' + year + '</option>';
+ }
+ inst.yearshtml += '</select>';
+
+ html += inst.yearshtml;
+ inst.yearshtml = null;
+ }
+ }
+ html += this._get(inst, 'yearSuffix');
+ if (showMonthAfterYear)
+ html += (secondary || !(changeMonth && changeYear) ? '&#xa0;' : '') + monthHtml;
+ html += '</div>'; // Close datepicker_header
+ return html;
+ },
+
+ /* Adjust one of the date sub-fields. */
+ _adjustInstDate: function(inst, offset, period) {
+ var year = inst.drawYear + (period == 'Y' ? offset : 0);
+ var month = inst.drawMonth + (period == 'M' ? offset : 0);
+ var day = Math.min(inst.selectedDay, this._getDaysInMonth(year, month)) +
+ (period == 'D' ? offset : 0);
+ var date = this._restrictMinMax(inst,
+ this._daylightSavingAdjust(new Date(year, month, day)));
+ inst.selectedDay = date.getDate();
+ inst.drawMonth = inst.selectedMonth = date.getMonth();
+ inst.drawYear = inst.selectedYear = date.getFullYear();
+ if (period == 'M' || period == 'Y')
+ this._notifyChange(inst);
+ },
+
+ /* Ensure a date is within any min/max bounds. */
+ _restrictMinMax: function(inst, date) {
+ var minDate = this._getMinMaxDate(inst, 'min');
+ var maxDate = this._getMinMaxDate(inst, 'max');
+ var newDate = (minDate && date < minDate ? minDate : date);
+ newDate = (maxDate && newDate > maxDate ? maxDate : newDate);
+ return newDate;
+ },
+
+ /* Notify change of month/year. */
+ _notifyChange: function(inst) {
+ var onChange = this._get(inst, 'onChangeMonthYear');
+ if (onChange)
+ onChange.apply((inst.input ? inst.input[0] : null),
+ [inst.selectedYear, inst.selectedMonth + 1, inst]);
+ },
+
+ /* Determine the number of months to show. */
+ _getNumberOfMonths: function(inst) {
+ var numMonths = this._get(inst, 'numberOfMonths');
+ return (numMonths == null ? [1, 1] : (typeof numMonths == 'number' ? [1, numMonths] : numMonths));
+ },
+
+ /* Determine the current maximum date - ensure no time components are set. */
+ _getMinMaxDate: function(inst, minMax) {
+ return this._determineDate(inst, this._get(inst, minMax + 'Date'), null);
+ },
+
+ /* Find the number of days in a given month. */
+ _getDaysInMonth: function(year, month) {
+ return 32 - this._daylightSavingAdjust(new Date(year, month, 32)).getDate();
+ },
+
+ /* Find the day of the week of the first of a month. */
+ _getFirstDayOfMonth: function(year, month) {
+ return new Date(year, month, 1).getDay();
+ },
+
+ /* Determines if we should allow a "next/prev" month display change. */
+ _canAdjustMonth: function(inst, offset, curYear, curMonth) {
+ var numMonths = this._getNumberOfMonths(inst);
+ var date = this._daylightSavingAdjust(new Date(curYear,
+ curMonth + (offset < 0 ? offset : numMonths[0] * numMonths[1]), 1));
+ if (offset < 0)
+ date.setDate(this._getDaysInMonth(date.getFullYear(), date.getMonth()));
+ return this._isInRange(inst, date);
+ },
+
+ /* Is the given date in the accepted range? */
+ _isInRange: function(inst, date) {
+ var minDate = this._getMinMaxDate(inst, 'min');
+ var maxDate = this._getMinMaxDate(inst, 'max');
+ return ((!minDate || date.getTime() >= minDate.getTime()) &&
+ (!maxDate || date.getTime() <= maxDate.getTime()));
+ },
+
+ /* Provide the configuration settings for formatting/parsing. */
+ _getFormatConfig: function(inst) {
+ var shortYearCutoff = this._get(inst, 'shortYearCutoff');
+ shortYearCutoff = (typeof shortYearCutoff != 'string' ? shortYearCutoff :
+ new Date().getFullYear() % 100 + parseInt(shortYearCutoff, 10));
+ return {shortYearCutoff: shortYearCutoff,
+ dayNamesShort: this._get(inst, 'dayNamesShort'), dayNames: this._get(inst, 'dayNames'),
+ monthNamesShort: this._get(inst, 'monthNamesShort'), monthNames: this._get(inst, 'monthNames')};
+ },
+
+ /* Format the given date for display. */
+ _formatDate: function(inst, day, month, year) {
+ if (!day) {
+ inst.currentDay = inst.selectedDay;
+ inst.currentMonth = inst.selectedMonth;
+ inst.currentYear = inst.selectedYear;
+ }
+ var date = (day ? (typeof day == 'object' ? day :
+ this._daylightSavingAdjust(new Date(year, month, day))) :
+ this._daylightSavingAdjust(new Date(inst.currentYear, inst.currentMonth, inst.currentDay)));
+ return this.formatDate(this._get(inst, 'dateFormat'), date, this._getFormatConfig(inst));
+ }
+});
+
+/*
+ * Bind hover events for datepicker elements.
+ * Done via delegate so the binding only occurs once in the lifetime of the parent div.
+ * Global instActive, set by _updateDatepicker allows the handlers to find their way back to the active picker.
+ */
+function bindHover(dpDiv) {
+ var selector = 'button, .ui-datepicker-prev, .ui-datepicker-next, .ui-datepicker-calendar td a';
+ return dpDiv.bind('mouseout', function(event) {
+ var elem = $( event.target ).closest( selector );
+ if ( !elem.length ) {
+ return;
+ }
+ elem.removeClass( "ui-state-hover ui-datepicker-prev-hover ui-datepicker-next-hover" );
+ })
+ .bind('mouseover', function(event) {
+ var elem = $( event.target ).closest( selector );
+ if ($.datepicker._isDisabledDatepicker( instActive.inline ? dpDiv.parent()[0] : instActive.input[0]) ||
+ !elem.length ) {
+ return;
+ }
+ elem.parents('.ui-datepicker-calendar').find('a').removeClass('ui-state-hover');
+ elem.addClass('ui-state-hover');
+ if (elem.hasClass('ui-datepicker-prev')) elem.addClass('ui-datepicker-prev-hover');
+ if (elem.hasClass('ui-datepicker-next')) elem.addClass('ui-datepicker-next-hover');
+ });
+}
+
+/* jQuery extend now ignores nulls! */
+function extendRemove(target, props) {
+ $.extend(target, props);
+ for (var name in props)
+ if (props[name] == null || props[name] == undefined)
+ target[name] = props[name];
+ return target;
+};
+
+/* Determine whether an object is an array. */
+function isArray(a) {
+ return (a && (($.browser.safari && typeof a == 'object' && a.length) ||
+ (a.constructor && a.constructor.toString().match(/\Array\(\)/))));
+};
+
+/* Invoke the datepicker functionality.
+ @param options string - a command, optionally followed by additional parameters or
+ Object - settings for attaching new datepicker functionality
+ @return jQuery object */
+$.fn.datepicker = function(options){
+
+ /* Verify an empty collection wasn't passed - Fixes #6976 */
+ if ( !this.length ) {
+ return this;
+ }
+
+ /* Initialise the date picker. */
+ if (!$.datepicker.initialized) {
+ $(document).mousedown($.datepicker._checkExternalClick).
+ find('body').append($.datepicker.dpDiv);
+ $.datepicker.initialized = true;
+ }
+
+ var otherArgs = Array.prototype.slice.call(arguments, 1);
+ if (typeof options == 'string' && (options == 'isDisabled' || options == 'getDate' || options == 'widget'))
+ return $.datepicker['_' + options + 'Datepicker'].
+ apply($.datepicker, [this[0]].concat(otherArgs));
+ if (options == 'option' && arguments.length == 2 && typeof arguments[1] == 'string')
+ return $.datepicker['_' + options + 'Datepicker'].
+ apply($.datepicker, [this[0]].concat(otherArgs));
+ return this.each(function() {
+ typeof options == 'string' ?
+ $.datepicker['_' + options + 'Datepicker'].
+ apply($.datepicker, [this].concat(otherArgs)) :
+ $.datepicker._attachDatepicker(this, options);
+ });
+};
+
+$.datepicker = new Datepicker(); // singleton instance
+$.datepicker.initialized = false;
+$.datepicker.uuid = new Date().getTime();
+$.datepicker.version = "1.8.16";
+
+// Workaround for #4055
+// Add another global to avoid noConflict issues with inline event handlers
+window['DP_jQuery_' + dpuuid] = $;
+
+})(jQuery);
+/*
+ * jQuery UI Progressbar 1.8.16
+ *
+ * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about)
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://jquery.org/license
+ *
+ * http://docs.jquery.com/UI/Progressbar
+ *
+ * Depends:
+ * jquery.ui.core.js
+ * jquery.ui.widget.js
+ */
+(function( $, undefined ) {
+
+$.widget( "ui.progressbar", {
+ options: {
+ value: 0,
+ max: 100
+ },
+
+ min: 0,
+
+ _create: function() {
+ this.element
+ .addClass( "ui-progressbar ui-widget ui-widget-content ui-corner-all" )
+ .attr({
+ role: "progressbar",
+ "aria-valuemin": this.min,
+ "aria-valuemax": this.options.max,
+ "aria-valuenow": this._value()
+ });
+
+ this.valueDiv = $( "<div class='ui-progressbar-value ui-widget-header ui-corner-left'></div>" )
+ .appendTo( this.element );
+
+ this.oldValue = this._value();
+ this._refreshValue();
+ },
+
+ destroy: function() {
+ this.element
+ .removeClass( "ui-progressbar ui-widget ui-widget-content ui-corner-all" )
+ .removeAttr( "role" )
+ .removeAttr( "aria-valuemin" )
+ .removeAttr( "aria-valuemax" )
+ .removeAttr( "aria-valuenow" );
+
+ this.valueDiv.remove();
+
+ $.Widget.prototype.destroy.apply( this, arguments );
+ },
+
+ value: function( newValue ) {
+ if ( newValue === undefined ) {
+ return this._value();
+ }
+
+ this._setOption( "value", newValue );
+ return this;
+ },
+
+ _setOption: function( key, value ) {
+ if ( key === "value" ) {
+ this.options.value = value;
+ this._refreshValue();
+ if ( this._value() === this.options.max ) {
+ this._trigger( "complete" );
+ }
+ }
+
+ $.Widget.prototype._setOption.apply( this, arguments );
+ },
+
+ _value: function() {
+ var val = this.options.value;
+ // normalize invalid value
+ if ( typeof val !== "number" ) {
+ val = 0;
+ }
+ return Math.min( this.options.max, Math.max( this.min, val ) );
+ },
+
+ _percentage: function() {
+ return 100 * this._value() / this.options.max;
+ },
+
+ _refreshValue: function() {
+ var value = this.value();
+ var percentage = this._percentage();
+
+ if ( this.oldValue !== value ) {
+ this.oldValue = value;
+ this._trigger( "change" );
+ }
+
+ this.valueDiv
+ .toggle( value > this.min )
+ .toggleClass( "ui-corner-right", value === this.options.max )
+ .width( percentage.toFixed(0) + "%" );
+ this.element.attr( "aria-valuenow", value );
+ }
+});
+
+$.extend( $.ui.progressbar, {
+ version: "1.8.16"
+});
+
+})( jQuery );
+/*
+ * jQuery UI Effects 1.8.16
+ *
+ * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about)
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://jquery.org/license
+ *
+ * http://docs.jquery.com/UI/Effects/
+ */
+;jQuery.effects || (function($, undefined) {
+
+$.effects = {};
+
+
+
+/******************************************************************************/
+/****************************** COLOR ANIMATIONS ******************************/
+/******************************************************************************/
+
+// override the animation for color styles
+$.each(['backgroundColor', 'borderBottomColor', 'borderLeftColor',
+ 'borderRightColor', 'borderTopColor', 'borderColor', 'color', 'outlineColor'],
+function(i, attr) {
+ $.fx.step[attr] = function(fx) {
+ if (!fx.colorInit) {
+ fx.start = getColor(fx.elem, attr);
+ fx.end = getRGB(fx.end);
+ fx.colorInit = true;
+ }
+
+ fx.elem.style[attr] = 'rgb(' +
+ Math.max(Math.min(parseInt((fx.pos * (fx.end[0] - fx.start[0])) + fx.start[0], 10), 255), 0) + ',' +
+ Math.max(Math.min(parseInt((fx.pos * (fx.end[1] - fx.start[1])) + fx.start[1], 10), 255), 0) + ',' +
+ Math.max(Math.min(parseInt((fx.pos * (fx.end[2] - fx.start[2])) + fx.start[2], 10), 255), 0) + ')';
+ };
+});
+
+// Color Conversion functions from highlightFade
+// By Blair Mitchelmore
+// http://jquery.offput.ca/highlightFade/
+
+// Parse strings looking for color tuples [255,255,255]
+function getRGB(color) {
+ var result;
+
+ // Check if we're already dealing with an array of colors
+ if ( color && color.constructor == Array && color.length == 3 )
+ return color;
+
+ // Look for rgb(num,num,num)
+ if (result = /rgb\(\s*([0-9]{1,3})\s*,\s*([0-9]{1,3})\s*,\s*([0-9]{1,3})\s*\)/.exec(color))
+ return [parseInt(result[1],10), parseInt(result[2],10), parseInt(result[3],10)];
+
+ // Look for rgb(num%,num%,num%)
+ if (result = /rgb\(\s*([0-9]+(?:\.[0-9]+)?)\%\s*,\s*([0-9]+(?:\.[0-9]+)?)\%\s*,\s*([0-9]+(?:\.[0-9]+)?)\%\s*\)/.exec(color))
+ return [parseFloat(result[1])*2.55, parseFloat(result[2])*2.55, parseFloat(result[3])*2.55];
+
+ // Look for #a0b1c2
+ if (result = /#([a-fA-F0-9]{2})([a-fA-F0-9]{2})([a-fA-F0-9]{2})/.exec(color))
+ return [parseInt(result[1],16), parseInt(result[2],16), parseInt(result[3],16)];
+
+ // Look for #fff
+ if (result = /#([a-fA-F0-9])([a-fA-F0-9])([a-fA-F0-9])/.exec(color))
+ return [parseInt(result[1]+result[1],16), parseInt(result[2]+result[2],16), parseInt(result[3]+result[3],16)];
+
+ // Look for rgba(0, 0, 0, 0) == transparent in Safari 3
+ if (result = /rgba\(0, 0, 0, 0\)/.exec(color))
+ return colors['transparent'];
+
+ // Otherwise, we're most likely dealing with a named color
+ return colors[$.trim(color).toLowerCase()];
+}
+
+function getColor(elem, attr) {
+ var color;
+
+ do {
+ color = $.curCSS(elem, attr);
+
+ // Keep going until we find an element that has color, or we hit the body
+ if ( color != '' && color != 'transparent' || $.nodeName(elem, "body") )
+ break;
+
+ attr = "backgroundColor";
+ } while ( elem = elem.parentNode );
+
+ return getRGB(color);
+};
+
+// Some named colors to work with
+// From Interface by Stefan Petre
+// http://interface.eyecon.ro/
+
+var colors = {
+ aqua:[0,255,255],
+ azure:[240,255,255],
+ beige:[245,245,220],
+ black:[0,0,0],
+ blue:[0,0,255],
+ brown:[165,42,42],
+ cyan:[0,255,255],
+ darkblue:[0,0,139],
+ darkcyan:[0,139,139],
+ darkgrey:[169,169,169],
+ darkgreen:[0,100,0],
+ darkkhaki:[189,183,107],
+ darkmagenta:[139,0,139],
+ darkolivegreen:[85,107,47],
+ darkorange:[255,140,0],
+ darkorchid:[153,50,204],
+ darkred:[139,0,0],
+ darksalmon:[233,150,122],
+ darkviolet:[148,0,211],
+ fuchsia:[255,0,255],
+ gold:[255,215,0],
+ green:[0,128,0],
+ indigo:[75,0,130],
+ khaki:[240,230,140],
+ lightblue:[173,216,230],
+ lightcyan:[224,255,255],
+ lightgreen:[144,238,144],
+ lightgrey:[211,211,211],
+ lightpink:[255,182,193],
+ lightyellow:[255,255,224],
+ lime:[0,255,0],
+ magenta:[255,0,255],
+ maroon:[128,0,0],
+ navy:[0,0,128],
+ olive:[128,128,0],
+ orange:[255,165,0],
+ pink:[255,192,203],
+ purple:[128,0,128],
+ violet:[128,0,128],
+ red:[255,0,0],
+ silver:[192,192,192],
+ white:[255,255,255],
+ yellow:[255,255,0],
+ transparent: [255,255,255]
+};
+
+
+
+/******************************************************************************/
+/****************************** CLASS ANIMATIONS ******************************/
+/******************************************************************************/
+
+var classAnimationActions = ['add', 'remove', 'toggle'],
+ shorthandStyles = {
+ border: 1,
+ borderBottom: 1,
+ borderColor: 1,
+ borderLeft: 1,
+ borderRight: 1,
+ borderTop: 1,
+ borderWidth: 1,
+ margin: 1,
+ padding: 1
+ };
+
+function getElementStyles() {
+ var style = document.defaultView
+ ? document.defaultView.getComputedStyle(this, null)
+ : this.currentStyle,
+ newStyle = {},
+ key,
+ camelCase;
+
+ // webkit enumerates style porperties
+ if (style && style.length && style[0] && style[style[0]]) {
+ var len = style.length;
+ while (len--) {
+ key = style[len];
+ if (typeof style[key] == 'string') {
+ camelCase = key.replace(/\-(\w)/g, function(all, letter){
+ return letter.toUpperCase();
+ });
+ newStyle[camelCase] = style[key];
+ }
+ }
+ } else {
+ for (key in style) {
+ if (typeof style[key] === 'string') {
+ newStyle[key] = style[key];
+ }
+ }
+ }
+
+ return newStyle;
+}
+
+function filterStyles(styles) {
+ var name, value;
+ for (name in styles) {
+ value = styles[name];
+ if (
+ // ignore null and undefined values
+ value == null ||
+ // ignore functions (when does this occur?)
+ $.isFunction(value) ||
+ // shorthand styles that need to be expanded
+ name in shorthandStyles ||
+ // ignore scrollbars (break in IE)
+ (/scrollbar/).test(name) ||
+
+ // only colors or values that can be converted to numbers
+ (!(/color/i).test(name) && isNaN(parseFloat(value)))
+ ) {
+ delete styles[name];
+ }
+ }
+
+ return styles;
+}
+
+function styleDifference(oldStyle, newStyle) {
+ var diff = { _: 0 }, // http://dev.jquery.com/ticket/5459
+ name;
+
+ for (name in newStyle) {
+ if (oldStyle[name] != newStyle[name]) {
+ diff[name] = newStyle[name];
+ }
+ }
+
+ return diff;
+}
+
+$.effects.animateClass = function(value, duration, easing, callback) {
+ if ($.isFunction(easing)) {
+ callback = easing;
+ easing = null;
+ }
+
+ return this.queue(function() {
+ var that = $(this),
+ originalStyleAttr = that.attr('style') || ' ',
+ originalStyle = filterStyles(getElementStyles.call(this)),
+ newStyle,
+ className = that.attr('class');
+
+ $.each(classAnimationActions, function(i, action) {
+ if (value[action]) {
+ that[action + 'Class'](value[action]);
+ }
+ });
+ newStyle = filterStyles(getElementStyles.call(this));
+ that.attr('class', className);
+
+ that.animate(styleDifference(originalStyle, newStyle), {
+ queue: false,
+ duration: duration,
+ easing: easing,
+ complete: function() {
+ $.each(classAnimationActions, function(i, action) {
+ if (value[action]) { that[action + 'Class'](value[action]); }
+ });
+ // work around bug in IE by clearing the cssText before setting it
+ if (typeof that.attr('style') == 'object') {
+ that.attr('style').cssText = '';
+ that.attr('style').cssText = originalStyleAttr;
+ } else {
+ that.attr('style', originalStyleAttr);
+ }
+ if (callback) { callback.apply(this, arguments); }
+ $.dequeue( this );
+ }
+ });
+ });
+};
+
+$.fn.extend({
+ _addClass: $.fn.addClass,
+ addClass: function(classNames, speed, easing, callback) {
+ return speed ? $.effects.animateClass.apply(this, [{ add: classNames },speed,easing,callback]) : this._addClass(classNames);
+ },
+
+ _removeClass: $.fn.removeClass,
+ removeClass: function(classNames,speed,easing,callback) {
+ return speed ? $.effects.animateClass.apply(this, [{ remove: classNames },speed,easing,callback]) : this._removeClass(classNames);
+ },
+
+ _toggleClass: $.fn.toggleClass,
+ toggleClass: function(classNames, force, speed, easing, callback) {
+ if ( typeof force == "boolean" || force === undefined ) {
+ if ( !speed ) {
+ // without speed parameter;
+ return this._toggleClass(classNames, force);
+ } else {
+ return $.effects.animateClass.apply(this, [(force?{add:classNames}:{remove:classNames}),speed,easing,callback]);
+ }
+ } else {
+ // without switch parameter;
+ return $.effects.animateClass.apply(this, [{ toggle: classNames },force,speed,easing]);
+ }
+ },
+
+ switchClass: function(remove,add,speed,easing,callback) {
+ return $.effects.animateClass.apply(this, [{ add: add, remove: remove },speed,easing,callback]);
+ }
+});
+
+
+
+/******************************************************************************/
+/*********************************** EFFECTS **********************************/
+/******************************************************************************/
+
+$.extend($.effects, {
+ version: "1.8.16",
+
+ // Saves a set of properties in a data storage
+ save: function(element, set) {
+ for(var i=0; i < set.length; i++) {
+ if(set[i] !== null) element.data("ec.storage."+set[i], element[0].style[set[i]]);
+ }
+ },
+
+ // Restores a set of previously saved properties from a data storage
+ restore: function(element, set) {
+ for(var i=0; i < set.length; i++) {
+ if(set[i] !== null) element.css(set[i], element.data("ec.storage."+set[i]));
+ }
+ },
+
+ setMode: function(el, mode) {
+ if (mode == 'toggle') mode = el.is(':hidden') ? 'show' : 'hide'; // Set for toggle
+ return mode;
+ },
+
+ getBaseline: function(origin, original) { // Translates a [top,left] array into a baseline value
+ // this should be a little more flexible in the future to handle a string & hash
+ var y, x;
+ switch (origin[0]) {
+ case 'top': y = 0; break;
+ case 'middle': y = 0.5; break;
+ case 'bottom': y = 1; break;
+ default: y = origin[0] / original.height;
+ };
+ switch (origin[1]) {
+ case 'left': x = 0; break;
+ case 'center': x = 0.5; break;
+ case 'right': x = 1; break;
+ default: x = origin[1] / original.width;
+ };
+ return {x: x, y: y};
+ },
+
+ // Wraps the element around a wrapper that copies position properties
+ createWrapper: function(element) {
+
+ // if the element is already wrapped, return it
+ if (element.parent().is('.ui-effects-wrapper')) {
+ return element.parent();
+ }
+
+ // wrap the element
+ var props = {
+ width: element.outerWidth(true),
+ height: element.outerHeight(true),
+ 'float': element.css('float')
+ },
+ wrapper = $('<div></div>')
+ .addClass('ui-effects-wrapper')
+ .css({
+ fontSize: '100%',
+ background: 'transparent',
+ border: 'none',
+ margin: 0,
+ padding: 0
+ }),
+ active = document.activeElement;
+
+ element.wrap(wrapper);
+
+ // Fixes #7595 - Elements lose focus when wrapped.
+ if ( element[ 0 ] === active || $.contains( element[ 0 ], active ) ) {
+ $( active ).focus();
+ }
+
+ wrapper = element.parent(); //Hotfix for jQuery 1.4 since some change in wrap() seems to actually loose the reference to the wrapped element
+
+ // transfer positioning properties to the wrapper
+ if (element.css('position') == 'static') {
+ wrapper.css({ position: 'relative' });
+ element.css({ position: 'relative' });
+ } else {
+ $.extend(props, {
+ position: element.css('position'),
+ zIndex: element.css('z-index')
+ });
+ $.each(['top', 'left', 'bottom', 'right'], function(i, pos) {
+ props[pos] = element.css(pos);
+ if (isNaN(parseInt(props[pos], 10))) {
+ props[pos] = 'auto';
+ }
+ });
+ element.css({position: 'relative', top: 0, left: 0, right: 'auto', bottom: 'auto' });
+ }
+
+ return wrapper.css(props).show();
+ },
+
+ removeWrapper: function(element) {
+ var parent,
+ active = document.activeElement;
+
+ if (element.parent().is('.ui-effects-wrapper')) {
+ parent = element.parent().replaceWith(element);
+ // Fixes #7595 - Elements lose focus when wrapped.
+ if ( element[ 0 ] === active || $.contains( element[ 0 ], active ) ) {
+ $( active ).focus();
+ }
+ return parent;
+ }
+
+ return element;
+ },
+
+ setTransition: function(element, list, factor, value) {
+ value = value || {};
+ $.each(list, function(i, x){
+ unit = element.cssUnit(x);
+ if (unit[0] > 0) value[x] = unit[0] * factor + unit[1];
+ });
+ return value;
+ }
+});
+
+
+function _normalizeArguments(effect, options, speed, callback) {
+ // shift params for method overloading
+ if (typeof effect == 'object') {
+ callback = options;
+ speed = null;
+ options = effect;
+ effect = options.effect;
+ }
+ if ($.isFunction(options)) {
+ callback = options;
+ speed = null;
+ options = {};
+ }
+ if (typeof options == 'number' || $.fx.speeds[options]) {
+ callback = speed;
+ speed = options;
+ options = {};
+ }
+ if ($.isFunction(speed)) {
+ callback = speed;
+ speed = null;
+ }
+
+ options = options || {};
+
+ speed = speed || options.duration;
+ speed = $.fx.off ? 0 : typeof speed == 'number'
+ ? speed : speed in $.fx.speeds ? $.fx.speeds[speed] : $.fx.speeds._default;
+
+ callback = callback || options.complete;
+
+ return [effect, options, speed, callback];
+}
+
+function standardSpeed( speed ) {
+ // valid standard speeds
+ if ( !speed || typeof speed === "number" || $.fx.speeds[ speed ] ) {
+ return true;
+ }
+
+ // invalid strings - treat as "normal" speed
+ if ( typeof speed === "string" && !$.effects[ speed ] ) {
+ return true;
+ }
+
+ return false;
+}
+
+$.fn.extend({
+ effect: function(effect, options, speed, callback) {
+ var args = _normalizeArguments.apply(this, arguments),
+ // TODO: make effects take actual parameters instead of a hash
+ args2 = {
+ options: args[1],
+ duration: args[2],
+ callback: args[3]
+ },
+ mode = args2.options.mode,
+ effectMethod = $.effects[effect];
+
+ if ( $.fx.off || !effectMethod ) {
+ // delegate to the original method (e.g., .show()) if possible
+ if ( mode ) {
+ return this[ mode ]( args2.duration, args2.callback );
+ } else {
+ return this.each(function() {
+ if ( args2.callback ) {
+ args2.callback.call( this );
+ }
+ });
+ }
+ }
+
+ return effectMethod.call(this, args2);
+ },
+
+ _show: $.fn.show,
+ show: function(speed) {
+ if ( standardSpeed( speed ) ) {
+ return this._show.apply(this, arguments);
+ } else {
+ var args = _normalizeArguments.apply(this, arguments);
+ args[1].mode = 'show';
+ return this.effect.apply(this, args);
+ }
+ },
+
+ _hide: $.fn.hide,
+ hide: function(speed) {
+ if ( standardSpeed( speed ) ) {
+ return this._hide.apply(this, arguments);
+ } else {
+ var args = _normalizeArguments.apply(this, arguments);
+ args[1].mode = 'hide';
+ return this.effect.apply(this, args);
+ }
+ },
+
+ // jQuery core overloads toggle and creates _toggle
+ __toggle: $.fn.toggle,
+ toggle: function(speed) {
+ if ( standardSpeed( speed ) || typeof speed === "boolean" || $.isFunction( speed ) ) {
+ return this.__toggle.apply(this, arguments);
+ } else {
+ var args = _normalizeArguments.apply(this, arguments);
+ args[1].mode = 'toggle';
+ return this.effect.apply(this, args);
+ }
+ },
+
+ // helper functions
+ cssUnit: function(key) {
+ var style = this.css(key), val = [];
+ $.each( ['em','px','%','pt'], function(i, unit){
+ if(style.indexOf(unit) > 0)
+ val = [parseFloat(style), unit];
+ });
+ return val;
+ }
+});
+
+
+
+/******************************************************************************/
+/*********************************** EASING ***********************************/
+/******************************************************************************/
+
+/*
+ * jQuery Easing v1.3 - http://gsgd.co.uk/sandbox/jquery/easing/
+ *
+ * Uses the built in easing capabilities added In jQuery 1.1
+ * to offer multiple easing options
+ *
+ * TERMS OF USE - jQuery Easing
+ *
+ * Open source under the BSD License.
+ *
+ * Copyright 2008 George McGinley Smith
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this list of
+ * conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice, this list
+ * of conditions and the following disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * Neither the name of the author nor the names of contributors may be used to endorse
+ * or promote products derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+*/
+
+// t: current time, b: begInnIng value, c: change In value, d: duration
+$.easing.jswing = $.easing.swing;
+
+$.extend($.easing,
+{
+ def: 'easeOutQuad',
+ swing: function (x, t, b, c, d) {
+ //alert($.easing.default);
+ return $.easing[$.easing.def](x, t, b, c, d);
+ },
+ easeInQuad: function (x, t, b, c, d) {
+ return c*(t/=d)*t + b;
+ },
+ easeOutQuad: function (x, t, b, c, d) {
+ return -c *(t/=d)*(t-2) + b;
+ },
+ easeInOutQuad: function (x, t, b, c, d) {
+ if ((t/=d/2) < 1) return c/2*t*t + b;
+ return -c/2 * ((--t)*(t-2) - 1) + b;
+ },
+ easeInCubic: function (x, t, b, c, d) {
+ return c*(t/=d)*t*t + b;
+ },
+ easeOutCubic: function (x, t, b, c, d) {
+ return c*((t=t/d-1)*t*t + 1) + b;
+ },
+ easeInOutCubic: function (x, t, b, c, d) {
+ if ((t/=d/2) < 1) return c/2*t*t*t + b;
+ return c/2*((t-=2)*t*t + 2) + b;
+ },
+ easeInQuart: function (x, t, b, c, d) {
+ return c*(t/=d)*t*t*t + b;
+ },
+ easeOutQuart: function (x, t, b, c, d) {
+ return -c * ((t=t/d-1)*t*t*t - 1) + b;
+ },
+ easeInOutQuart: function (x, t, b, c, d) {
+ if ((t/=d/2) < 1) return c/2*t*t*t*t + b;
+ return -c/2 * ((t-=2)*t*t*t - 2) + b;
+ },
+ easeInQuint: function (x, t, b, c, d) {
+ return c*(t/=d)*t*t*t*t + b;
+ },
+ easeOutQuint: function (x, t, b, c, d) {
+ return c*((t=t/d-1)*t*t*t*t + 1) + b;
+ },
+ easeInOutQuint: function (x, t, b, c, d) {
+ if ((t/=d/2) < 1) return c/2*t*t*t*t*t + b;
+ return c/2*((t-=2)*t*t*t*t + 2) + b;
+ },
+ easeInSine: function (x, t, b, c, d) {
+ return -c * Math.cos(t/d * (Math.PI/2)) + c + b;
+ },
+ easeOutSine: function (x, t, b, c, d) {
+ return c * Math.sin(t/d * (Math.PI/2)) + b;
+ },
+ easeInOutSine: function (x, t, b, c, d) {
+ return -c/2 * (Math.cos(Math.PI*t/d) - 1) + b;
+ },
+ easeInExpo: function (x, t, b, c, d) {
+ return (t==0) ? b : c * Math.pow(2, 10 * (t/d - 1)) + b;
+ },
+ easeOutExpo: function (x, t, b, c, d) {
+ return (t==d) ? b+c : c * (-Math.pow(2, -10 * t/d) + 1) + b;
+ },
+ easeInOutExpo: function (x, t, b, c, d) {
+ if (t==0) return b;
+ if (t==d) return b+c;
+ if ((t/=d/2) < 1) return c/2 * Math.pow(2, 10 * (t - 1)) + b;
+ return c/2 * (-Math.pow(2, -10 * --t) + 2) + b;
+ },
+ easeInCirc: function (x, t, b, c, d) {
+ return -c * (Math.sqrt(1 - (t/=d)*t) - 1) + b;
+ },
+ easeOutCirc: function (x, t, b, c, d) {
+ return c * Math.sqrt(1 - (t=t/d-1)*t) + b;
+ },
+ easeInOutCirc: function (x, t, b, c, d) {
+ if ((t/=d/2) < 1) return -c/2 * (Math.sqrt(1 - t*t) - 1) + b;
+ return c/2 * (Math.sqrt(1 - (t-=2)*t) + 1) + b;
+ },
+ easeInElastic: function (x, t, b, c, d) {
+ var s=1.70158;var p=0;var a=c;
+ if (t==0) return b; if ((t/=d)==1) return b+c; if (!p) p=d*.3;
+ if (a < Math.abs(c)) { a=c; var s=p/4; }
+ else var s = p/(2*Math.PI) * Math.asin (c/a);
+ return -(a*Math.pow(2,10*(t-=1)) * Math.sin( (t*d-s)*(2*Math.PI)/p )) + b;
+ },
+ easeOutElastic: function (x, t, b, c, d) {
+ var s=1.70158;var p=0;var a=c;
+ if (t==0) return b; if ((t/=d)==1) return b+c; if (!p) p=d*.3;
+ if (a < Math.abs(c)) { a=c; var s=p/4; }
+ else var s = p/(2*Math.PI) * Math.asin (c/a);
+ return a*Math.pow(2,-10*t) * Math.sin( (t*d-s)*(2*Math.PI)/p ) + c + b;
+ },
+ easeInOutElastic: function (x, t, b, c, d) {
+ var s=1.70158;var p=0;var a=c;
+ if (t==0) return b; if ((t/=d/2)==2) return b+c; if (!p) p=d*(.3*1.5);
+ if (a < Math.abs(c)) { a=c; var s=p/4; }
+ else var s = p/(2*Math.PI) * Math.asin (c/a);
+ if (t < 1) return -.5*(a*Math.pow(2,10*(t-=1)) * Math.sin( (t*d-s)*(2*Math.PI)/p )) + b;
+ return a*Math.pow(2,-10*(t-=1)) * Math.sin( (t*d-s)*(2*Math.PI)/p )*.5 + c + b;
+ },
+ easeInBack: function (x, t, b, c, d, s) {
+ if (s == undefined) s = 1.70158;
+ return c*(t/=d)*t*((s+1)*t - s) + b;
+ },
+ easeOutBack: function (x, t, b, c, d, s) {
+ if (s == undefined) s = 1.70158;
+ return c*((t=t/d-1)*t*((s+1)*t + s) + 1) + b;
+ },
+ easeInOutBack: function (x, t, b, c, d, s) {
+ if (s == undefined) s = 1.70158;
+ if ((t/=d/2) < 1) return c/2*(t*t*(((s*=(1.525))+1)*t - s)) + b;
+ return c/2*((t-=2)*t*(((s*=(1.525))+1)*t + s) + 2) + b;
+ },
+ easeInBounce: function (x, t, b, c, d) {
+ return c - $.easing.easeOutBounce (x, d-t, 0, c, d) + b;
+ },
+ easeOutBounce: function (x, t, b, c, d) {
+ if ((t/=d) < (1/2.75)) {
+ return c*(7.5625*t*t) + b;
+ } else if (t < (2/2.75)) {
+ return c*(7.5625*(t-=(1.5/2.75))*t + .75) + b;
+ } else if (t < (2.5/2.75)) {
+ return c*(7.5625*(t-=(2.25/2.75))*t + .9375) + b;
+ } else {
+ return c*(7.5625*(t-=(2.625/2.75))*t + .984375) + b;
+ }
+ },
+ easeInOutBounce: function (x, t, b, c, d) {
+ if (t < d/2) return $.easing.easeInBounce (x, t*2, 0, c, d) * .5 + b;
+ return $.easing.easeOutBounce (x, t*2-d, 0, c, d) * .5 + c*.5 + b;
+ }
+});
+
+/*
+ *
+ * TERMS OF USE - EASING EQUATIONS
+ *
+ * Open source under the BSD License.
+ *
+ * Copyright 2001 Robert Penner
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without modification,
+ * are permitted provided that the following conditions are met:
+ *
+ * Redistributions of source code must retain the above copyright notice, this list of
+ * conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice, this list
+ * of conditions and the following disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * Neither the name of the author nor the names of contributors may be used to endorse
+ * or promote products derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
+ * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+})(jQuery);
+/*
+ * jQuery UI Effects Blind 1.8.16
+ *
+ * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about)
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://jquery.org/license
+ *
+ * http://docs.jquery.com/UI/Effects/Blind
+ *
+ * Depends:
+ * jquery.effects.core.js
+ */
+(function( $, undefined ) {
+
+$.effects.blind = function(o) {
+
+ return this.queue(function() {
+
+ // Create element
+ var el = $(this), props = ['position','top','bottom','left','right'];
+
+ // Set options
+ var mode = $.effects.setMode(el, o.options.mode || 'hide'); // Set Mode
+ var direction = o.options.direction || 'vertical'; // Default direction
+
+ // Adjust
+ $.effects.save(el, props); el.show(); // Save & Show
+ var wrapper = $.effects.createWrapper(el).css({overflow:'hidden'}); // Create Wrapper
+ var ref = (direction == 'vertical') ? 'height' : 'width';
+ var distance = (direction == 'vertical') ? wrapper.height() : wrapper.width();
+ if(mode == 'show') wrapper.css(ref, 0); // Shift
+
+ // Animation
+ var animation = {};
+ animation[ref] = mode == 'show' ? distance : 0;
+
+ // Animate
+ wrapper.animate(animation, o.duration, o.options.easing, function() {
+ if(mode == 'hide') el.hide(); // Hide
+ $.effects.restore(el, props); $.effects.removeWrapper(el); // Restore
+ if(o.callback) o.callback.apply(el[0], arguments); // Callback
+ el.dequeue();
+ });
+
+ });
+
+};
+
+})(jQuery);
+/*
+ * jQuery UI Effects Bounce 1.8.16
+ *
+ * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about)
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://jquery.org/license
+ *
+ * http://docs.jquery.com/UI/Effects/Bounce
+ *
+ * Depends:
+ * jquery.effects.core.js
+ */
+(function( $, undefined ) {
+
+$.effects.bounce = function(o) {
+
+ return this.queue(function() {
+
+ // Create element
+ var el = $(this), props = ['position','top','bottom','left','right'];
+
+ // Set options
+ var mode = $.effects.setMode(el, o.options.mode || 'effect'); // Set Mode
+ var direction = o.options.direction || 'up'; // Default direction
+ var distance = o.options.distance || 20; // Default distance
+ var times = o.options.times || 5; // Default # of times
+ var speed = o.duration || 250; // Default speed per bounce
+ if (/show|hide/.test(mode)) props.push('opacity'); // Avoid touching opacity to prevent clearType and PNG issues in IE
+
+ // Adjust
+ $.effects.save(el, props); el.show(); // Save & Show
+ $.effects.createWrapper(el); // Create Wrapper
+ var ref = (direction == 'up' || direction == 'down') ? 'top' : 'left';
+ var motion = (direction == 'up' || direction == 'left') ? 'pos' : 'neg';
+ var distance = o.options.distance || (ref == 'top' ? el.outerHeight({margin:true}) / 3 : el.outerWidth({margin:true}) / 3);
+ if (mode == 'show') el.css('opacity', 0).css(ref, motion == 'pos' ? -distance : distance); // Shift
+ if (mode == 'hide') distance = distance / (times * 2);
+ if (mode != 'hide') times--;
+
+ // Animate
+ if (mode == 'show') { // Show Bounce
+ var animation = {opacity: 1};
+ animation[ref] = (motion == 'pos' ? '+=' : '-=') + distance;
+ el.animate(animation, speed / 2, o.options.easing);
+ distance = distance / 2;
+ times--;
+ };
+ for (var i = 0; i < times; i++) { // Bounces
+ var animation1 = {}, animation2 = {};
+ animation1[ref] = (motion == 'pos' ? '-=' : '+=') + distance;
+ animation2[ref] = (motion == 'pos' ? '+=' : '-=') + distance;
+ el.animate(animation1, speed / 2, o.options.easing).animate(animation2, speed / 2, o.options.easing);
+ distance = (mode == 'hide') ? distance * 2 : distance / 2;
+ };
+ if (mode == 'hide') { // Last Bounce
+ var animation = {opacity: 0};
+ animation[ref] = (motion == 'pos' ? '-=' : '+=') + distance;
+ el.animate(animation, speed / 2, o.options.easing, function(){
+ el.hide(); // Hide
+ $.effects.restore(el, props); $.effects.removeWrapper(el); // Restore
+ if(o.callback) o.callback.apply(this, arguments); // Callback
+ });
+ } else {
+ var animation1 = {}, animation2 = {};
+ animation1[ref] = (motion == 'pos' ? '-=' : '+=') + distance;
+ animation2[ref] = (motion == 'pos' ? '+=' : '-=') + distance;
+ el.animate(animation1, speed / 2, o.options.easing).animate(animation2, speed / 2, o.options.easing, function(){
+ $.effects.restore(el, props); $.effects.removeWrapper(el); // Restore
+ if(o.callback) o.callback.apply(this, arguments); // Callback
+ });
+ };
+ el.queue('fx', function() { el.dequeue(); });
+ el.dequeue();
+ });
+
+};
+
+})(jQuery);
+/*
+ * jQuery UI Effects Clip 1.8.16
+ *
+ * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about)
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://jquery.org/license
+ *
+ * http://docs.jquery.com/UI/Effects/Clip
+ *
+ * Depends:
+ * jquery.effects.core.js
+ */
+(function( $, undefined ) {
+
+$.effects.clip = function(o) {
+
+ return this.queue(function() {
+
+ // Create element
+ var el = $(this), props = ['position','top','bottom','left','right','height','width'];
+
+ // Set options
+ var mode = $.effects.setMode(el, o.options.mode || 'hide'); // Set Mode
+ var direction = o.options.direction || 'vertical'; // Default direction
+
+ // Adjust
+ $.effects.save(el, props); el.show(); // Save & Show
+ var wrapper = $.effects.createWrapper(el).css({overflow:'hidden'}); // Create Wrapper
+ var animate = el[0].tagName == 'IMG' ? wrapper : el;
+ var ref = {
+ size: (direction == 'vertical') ? 'height' : 'width',
+ position: (direction == 'vertical') ? 'top' : 'left'
+ };
+ var distance = (direction == 'vertical') ? animate.height() : animate.width();
+ if(mode == 'show') { animate.css(ref.size, 0); animate.css(ref.position, distance / 2); } // Shift
+
+ // Animation
+ var animation = {};
+ animation[ref.size] = mode == 'show' ? distance : 0;
+ animation[ref.position] = mode == 'show' ? 0 : distance / 2;
+
+ // Animate
+ animate.animate(animation, { queue: false, duration: o.duration, easing: o.options.easing, complete: function() {
+ if(mode == 'hide') el.hide(); // Hide
+ $.effects.restore(el, props); $.effects.removeWrapper(el); // Restore
+ if(o.callback) o.callback.apply(el[0], arguments); // Callback
+ el.dequeue();
+ }});
+
+ });
+
+};
+
+})(jQuery);
+/*
+ * jQuery UI Effects Drop 1.8.16
+ *
+ * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about)
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://jquery.org/license
+ *
+ * http://docs.jquery.com/UI/Effects/Drop
+ *
+ * Depends:
+ * jquery.effects.core.js
+ */
+(function( $, undefined ) {
+
+$.effects.drop = function(o) {
+
+ return this.queue(function() {
+
+ // Create element
+ var el = $(this), props = ['position','top','bottom','left','right','opacity'];
+
+ // Set options
+ var mode = $.effects.setMode(el, o.options.mode || 'hide'); // Set Mode
+ var direction = o.options.direction || 'left'; // Default Direction
+
+ // Adjust
+ $.effects.save(el, props); el.show(); // Save & Show
+ $.effects.createWrapper(el); // Create Wrapper
+ var ref = (direction == 'up' || direction == 'down') ? 'top' : 'left';
+ var motion = (direction == 'up' || direction == 'left') ? 'pos' : 'neg';
+ var distance = o.options.distance || (ref == 'top' ? el.outerHeight({margin:true}) / 2 : el.outerWidth({margin:true}) / 2);
+ if (mode == 'show') el.css('opacity', 0).css(ref, motion == 'pos' ? -distance : distance); // Shift
+
+ // Animation
+ var animation = {opacity: mode == 'show' ? 1 : 0};
+ animation[ref] = (mode == 'show' ? (motion == 'pos' ? '+=' : '-=') : (motion == 'pos' ? '-=' : '+=')) + distance;
+
+ // Animate
+ el.animate(animation, { queue: false, duration: o.duration, easing: o.options.easing, complete: function() {
+ if(mode == 'hide') el.hide(); // Hide
+ $.effects.restore(el, props); $.effects.removeWrapper(el); // Restore
+ if(o.callback) o.callback.apply(this, arguments); // Callback
+ el.dequeue();
+ }});
+
+ });
+
+};
+
+})(jQuery);
+/*
+ * jQuery UI Effects Explode 1.8.16
+ *
+ * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about)
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://jquery.org/license
+ *
+ * http://docs.jquery.com/UI/Effects/Explode
+ *
+ * Depends:
+ * jquery.effects.core.js
+ */
+(function( $, undefined ) {
+
+$.effects.explode = function(o) {
+
+ return this.queue(function() {
+
+ var rows = o.options.pieces ? Math.round(Math.sqrt(o.options.pieces)) : 3;
+ var cells = o.options.pieces ? Math.round(Math.sqrt(o.options.pieces)) : 3;
+
+ o.options.mode = o.options.mode == 'toggle' ? ($(this).is(':visible') ? 'hide' : 'show') : o.options.mode;
+ var el = $(this).show().css('visibility', 'hidden');
+ var offset = el.offset();
+
+ //Substract the margins - not fixing the problem yet.
+ offset.top -= parseInt(el.css("marginTop"),10) || 0;
+ offset.left -= parseInt(el.css("marginLeft"),10) || 0;
+
+ var width = el.outerWidth(true);
+ var height = el.outerHeight(true);
+
+ for(var i=0;i<rows;i++) { // =
+ for(var j=0;j<cells;j++) { // ||
+ el
+ .clone()
+ .appendTo('body')
+ .wrap('<div></div>')
+ .css({
+ position: 'absolute',
+ visibility: 'visible',
+ left: -j*(width/cells),
+ top: -i*(height/rows)
+ })
+ .parent()
+ .addClass('ui-effects-explode')
+ .css({
+ position: 'absolute',
+ overflow: 'hidden',
+ width: width/cells,
+ height: height/rows,
+ left: offset.left + j*(width/cells) + (o.options.mode == 'show' ? (j-Math.floor(cells/2))*(width/cells) : 0),
+ top: offset.top + i*(height/rows) + (o.options.mode == 'show' ? (i-Math.floor(rows/2))*(height/rows) : 0),
+ opacity: o.options.mode == 'show' ? 0 : 1
+ }).animate({
+ left: offset.left + j*(width/cells) + (o.options.mode == 'show' ? 0 : (j-Math.floor(cells/2))*(width/cells)),
+ top: offset.top + i*(height/rows) + (o.options.mode == 'show' ? 0 : (i-Math.floor(rows/2))*(height/rows)),
+ opacity: o.options.mode == 'show' ? 1 : 0
+ }, o.duration || 500);
+ }
+ }
+
+ // Set a timeout, to call the callback approx. when the other animations have finished
+ setTimeout(function() {
+
+ o.options.mode == 'show' ? el.css({ visibility: 'visible' }) : el.css({ visibility: 'visible' }).hide();
+ if(o.callback) o.callback.apply(el[0]); // Callback
+ el.dequeue();
+
+ $('div.ui-effects-explode').remove();
+
+ }, o.duration || 500);
+
+
+ });
+
+};
+
+})(jQuery);
+/*
+ * jQuery UI Effects Fade 1.8.16
+ *
+ * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about)
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://jquery.org/license
+ *
+ * http://docs.jquery.com/UI/Effects/Fade
+ *
+ * Depends:
+ * jquery.effects.core.js
+ */
+(function( $, undefined ) {
+
+$.effects.fade = function(o) {
+ return this.queue(function() {
+ var elem = $(this),
+ mode = $.effects.setMode(elem, o.options.mode || 'hide');
+
+ elem.animate({ opacity: mode }, {
+ queue: false,
+ duration: o.duration,
+ easing: o.options.easing,
+ complete: function() {
+ (o.callback && o.callback.apply(this, arguments));
+ elem.dequeue();
+ }
+ });
+ });
+};
+
+})(jQuery);
+/*
+ * jQuery UI Effects Fold 1.8.16
+ *
+ * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about)
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://jquery.org/license
+ *
+ * http://docs.jquery.com/UI/Effects/Fold
+ *
+ * Depends:
+ * jquery.effects.core.js
+ */
+(function( $, undefined ) {
+
+$.effects.fold = function(o) {
+
+ return this.queue(function() {
+
+ // Create element
+ var el = $(this), props = ['position','top','bottom','left','right'];
+
+ // Set options
+ var mode = $.effects.setMode(el, o.options.mode || 'hide'); // Set Mode
+ var size = o.options.size || 15; // Default fold size
+ var horizFirst = !(!o.options.horizFirst); // Ensure a boolean value
+ var duration = o.duration ? o.duration / 2 : $.fx.speeds._default / 2;
+
+ // Adjust
+ $.effects.save(el, props); el.show(); // Save & Show
+ var wrapper = $.effects.createWrapper(el).css({overflow:'hidden'}); // Create Wrapper
+ var widthFirst = ((mode == 'show') != horizFirst);
+ var ref = widthFirst ? ['width', 'height'] : ['height', 'width'];
+ var distance = widthFirst ? [wrapper.width(), wrapper.height()] : [wrapper.height(), wrapper.width()];
+ var percent = /([0-9]+)%/.exec(size);
+ if(percent) size = parseInt(percent[1],10) / 100 * distance[mode == 'hide' ? 0 : 1];
+ if(mode == 'show') wrapper.css(horizFirst ? {height: 0, width: size} : {height: size, width: 0}); // Shift
+
+ // Animation
+ var animation1 = {}, animation2 = {};
+ animation1[ref[0]] = mode == 'show' ? distance[0] : size;
+ animation2[ref[1]] = mode == 'show' ? distance[1] : 0;
+
+ // Animate
+ wrapper.animate(animation1, duration, o.options.easing)
+ .animate(animation2, duration, o.options.easing, function() {
+ if(mode == 'hide') el.hide(); // Hide
+ $.effects.restore(el, props); $.effects.removeWrapper(el); // Restore
+ if(o.callback) o.callback.apply(el[0], arguments); // Callback
+ el.dequeue();
+ });
+
+ });
+
+};
+
+})(jQuery);
+/*
+ * jQuery UI Effects Highlight 1.8.16
+ *
+ * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about)
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://jquery.org/license
+ *
+ * http://docs.jquery.com/UI/Effects/Highlight
+ *
+ * Depends:
+ * jquery.effects.core.js
+ */
+(function( $, undefined ) {
+
+$.effects.highlight = function(o) {
+ return this.queue(function() {
+ var elem = $(this),
+ props = ['backgroundImage', 'backgroundColor', 'opacity'],
+ mode = $.effects.setMode(elem, o.options.mode || 'show'),
+ animation = {
+ backgroundColor: elem.css('backgroundColor')
+ };
+
+ if (mode == 'hide') {
+ animation.opacity = 0;
+ }
+
+ $.effects.save(elem, props);
+ elem
+ .show()
+ .css({
+ backgroundImage: 'none',
+ backgroundColor: o.options.color || '#ffff99'
+ })
+ .animate(animation, {
+ queue: false,
+ duration: o.duration,
+ easing: o.options.easing,
+ complete: function() {
+ (mode == 'hide' && elem.hide());
+ $.effects.restore(elem, props);
+ (mode == 'show' && !$.support.opacity && this.style.removeAttribute('filter'));
+ (o.callback && o.callback.apply(this, arguments));
+ elem.dequeue();
+ }
+ });
+ });
+};
+
+})(jQuery);
+/*
+ * jQuery UI Effects Pulsate 1.8.16
+ *
+ * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about)
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://jquery.org/license
+ *
+ * http://docs.jquery.com/UI/Effects/Pulsate
+ *
+ * Depends:
+ * jquery.effects.core.js
+ */
+(function( $, undefined ) {
+
+$.effects.pulsate = function(o) {
+ return this.queue(function() {
+ var elem = $(this),
+ mode = $.effects.setMode(elem, o.options.mode || 'show');
+ times = ((o.options.times || 5) * 2) - 1;
+ duration = o.duration ? o.duration / 2 : $.fx.speeds._default / 2,
+ isVisible = elem.is(':visible'),
+ animateTo = 0;
+
+ if (!isVisible) {
+ elem.css('opacity', 0).show();
+ animateTo = 1;
+ }
+
+ if ((mode == 'hide' && isVisible) || (mode == 'show' && !isVisible)) {
+ times--;
+ }
+
+ for (var i = 0; i < times; i++) {
+ elem.animate({ opacity: animateTo }, duration, o.options.easing);
+ animateTo = (animateTo + 1) % 2;
+ }
+
+ elem.animate({ opacity: animateTo }, duration, o.options.easing, function() {
+ if (animateTo == 0) {
+ elem.hide();
+ }
+ (o.callback && o.callback.apply(this, arguments));
+ });
+
+ elem
+ .queue('fx', function() { elem.dequeue(); })
+ .dequeue();
+ });
+};
+
+})(jQuery);
+/*
+ * jQuery UI Effects Scale 1.8.16
+ *
+ * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about)
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://jquery.org/license
+ *
+ * http://docs.jquery.com/UI/Effects/Scale
+ *
+ * Depends:
+ * jquery.effects.core.js
+ */
+(function( $, undefined ) {
+
+$.effects.puff = function(o) {
+ return this.queue(function() {
+ var elem = $(this),
+ mode = $.effects.setMode(elem, o.options.mode || 'hide'),
+ percent = parseInt(o.options.percent, 10) || 150,
+ factor = percent / 100,
+ original = { height: elem.height(), width: elem.width() };
+
+ $.extend(o.options, {
+ fade: true,
+ mode: mode,
+ percent: mode == 'hide' ? percent : 100,
+ from: mode == 'hide'
+ ? original
+ : {
+ height: original.height * factor,
+ width: original.width * factor
+ }
+ });
+
+ elem.effect('scale', o.options, o.duration, o.callback);
+ elem.dequeue();
+ });
+};
+
+$.effects.scale = function(o) {
+
+ return this.queue(function() {
+
+ // Create element
+ var el = $(this);
+
+ // Set options
+ var options = $.extend(true, {}, o.options);
+ var mode = $.effects.setMode(el, o.options.mode || 'effect'); // Set Mode
+ var percent = parseInt(o.options.percent,10) || (parseInt(o.options.percent,10) == 0 ? 0 : (mode == 'hide' ? 0 : 100)); // Set default scaling percent
+ var direction = o.options.direction || 'both'; // Set default axis
+ var origin = o.options.origin; // The origin of the scaling
+ if (mode != 'effect') { // Set default origin and restore for show/hide
+ options.origin = origin || ['middle','center'];
+ options.restore = true;
+ }
+ var original = {height: el.height(), width: el.width()}; // Save original
+ el.from = o.options.from || (mode == 'show' ? {height: 0, width: 0} : original); // Default from state
+
+ // Adjust
+ var factor = { // Set scaling factor
+ y: direction != 'horizontal' ? (percent / 100) : 1,
+ x: direction != 'vertical' ? (percent / 100) : 1
+ };
+ el.to = {height: original.height * factor.y, width: original.width * factor.x}; // Set to state
+
+ if (o.options.fade) { // Fade option to support puff
+ if (mode == 'show') {el.from.opacity = 0; el.to.opacity = 1;};
+ if (mode == 'hide') {el.from.opacity = 1; el.to.opacity = 0;};
+ };
+
+ // Animation
+ options.from = el.from; options.to = el.to; options.mode = mode;
+
+ // Animate
+ el.effect('size', options, o.duration, o.callback);
+ el.dequeue();
+ });
+
+};
+
+$.effects.size = function(o) {
+
+ return this.queue(function() {
+
+ // Create element
+ var el = $(this), props = ['position','top','bottom','left','right','width','height','overflow','opacity'];
+ var props1 = ['position','top','bottom','left','right','overflow','opacity']; // Always restore
+ var props2 = ['width','height','overflow']; // Copy for children
+ var cProps = ['fontSize'];
+ var vProps = ['borderTopWidth', 'borderBottomWidth', 'paddingTop', 'paddingBottom'];
+ var hProps = ['borderLeftWidth', 'borderRightWidth', 'paddingLeft', 'paddingRight'];
+
+ // Set options
+ var mode = $.effects.setMode(el, o.options.mode || 'effect'); // Set Mode
+ var restore = o.options.restore || false; // Default restore
+ var scale = o.options.scale || 'both'; // Default scale mode
+ var origin = o.options.origin; // The origin of the sizing
+ var original = {height: el.height(), width: el.width()}; // Save original
+ el.from = o.options.from || original; // Default from state
+ el.to = o.options.to || original; // Default to state
+ // Adjust
+ if (origin) { // Calculate baseline shifts
+ var baseline = $.effects.getBaseline(origin, original);
+ el.from.top = (original.height - el.from.height) * baseline.y;
+ el.from.left = (original.width - el.from.width) * baseline.x;
+ el.to.top = (original.height - el.to.height) * baseline.y;
+ el.to.left = (original.width - el.to.width) * baseline.x;
+ };
+ var factor = { // Set scaling factor
+ from: {y: el.from.height / original.height, x: el.from.width / original.width},
+ to: {y: el.to.height / original.height, x: el.to.width / original.width}
+ };
+ if (scale == 'box' || scale == 'both') { // Scale the css box
+ if (factor.from.y != factor.to.y) { // Vertical props scaling
+ props = props.concat(vProps);
+ el.from = $.effects.setTransition(el, vProps, factor.from.y, el.from);
+ el.to = $.effects.setTransition(el, vProps, factor.to.y, el.to);
+ };
+ if (factor.from.x != factor.to.x) { // Horizontal props scaling
+ props = props.concat(hProps);
+ el.from = $.effects.setTransition(el, hProps, factor.from.x, el.from);
+ el.to = $.effects.setTransition(el, hProps, factor.to.x, el.to);
+ };
+ };
+ if (scale == 'content' || scale == 'both') { // Scale the content
+ if (factor.from.y != factor.to.y) { // Vertical props scaling
+ props = props.concat(cProps);
+ el.from = $.effects.setTransition(el, cProps, factor.from.y, el.from);
+ el.to = $.effects.setTransition(el, cProps, factor.to.y, el.to);
+ };
+ };
+ $.effects.save(el, restore ? props : props1); el.show(); // Save & Show
+ $.effects.createWrapper(el); // Create Wrapper
+ el.css('overflow','hidden').css(el.from); // Shift
+
+ // Animate
+ if (scale == 'content' || scale == 'both') { // Scale the children
+ vProps = vProps.concat(['marginTop','marginBottom']).concat(cProps); // Add margins/font-size
+ hProps = hProps.concat(['marginLeft','marginRight']); // Add margins
+ props2 = props.concat(vProps).concat(hProps); // Concat
+ el.find("*[width]").each(function(){
+ child = $(this);
+ if (restore) $.effects.save(child, props2);
+ var c_original = {height: child.height(), width: child.width()}; // Save original
+ child.from = {height: c_original.height * factor.from.y, width: c_original.width * factor.from.x};
+ child.to = {height: c_original.height * factor.to.y, width: c_original.width * factor.to.x};
+ if (factor.from.y != factor.to.y) { // Vertical props scaling
+ child.from = $.effects.setTransition(child, vProps, factor.from.y, child.from);
+ child.to = $.effects.setTransition(child, vProps, factor.to.y, child.to);
+ };
+ if (factor.from.x != factor.to.x) { // Horizontal props scaling
+ child.from = $.effects.setTransition(child, hProps, factor.from.x, child.from);
+ child.to = $.effects.setTransition(child, hProps, factor.to.x, child.to);
+ };
+ child.css(child.from); // Shift children
+ child.animate(child.to, o.duration, o.options.easing, function(){
+ if (restore) $.effects.restore(child, props2); // Restore children
+ }); // Animate children
+ });
+ };
+
+ // Animate
+ el.animate(el.to, { queue: false, duration: o.duration, easing: o.options.easing, complete: function() {
+ if (el.to.opacity === 0) {
+ el.css('opacity', el.from.opacity);
+ }
+ if(mode == 'hide') el.hide(); // Hide
+ $.effects.restore(el, restore ? props : props1); $.effects.removeWrapper(el); // Restore
+ if(o.callback) o.callback.apply(this, arguments); // Callback
+ el.dequeue();
+ }});
+
+ });
+
+};
+
+})(jQuery);
+/*
+ * jQuery UI Effects Shake 1.8.16
+ *
+ * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about)
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://jquery.org/license
+ *
+ * http://docs.jquery.com/UI/Effects/Shake
+ *
+ * Depends:
+ * jquery.effects.core.js
+ */
+(function( $, undefined ) {
+
+$.effects.shake = function(o) {
+
+ return this.queue(function() {
+
+ // Create element
+ var el = $(this), props = ['position','top','bottom','left','right'];
+
+ // Set options
+ var mode = $.effects.setMode(el, o.options.mode || 'effect'); // Set Mode
+ var direction = o.options.direction || 'left'; // Default direction
+ var distance = o.options.distance || 20; // Default distance
+ var times = o.options.times || 3; // Default # of times
+ var speed = o.duration || o.options.duration || 140; // Default speed per shake
+
+ // Adjust
+ $.effects.save(el, props); el.show(); // Save & Show
+ $.effects.createWrapper(el); // Create Wrapper
+ var ref = (direction == 'up' || direction == 'down') ? 'top' : 'left';
+ var motion = (direction == 'up' || direction == 'left') ? 'pos' : 'neg';
+
+ // Animation
+ var animation = {}, animation1 = {}, animation2 = {};
+ animation[ref] = (motion == 'pos' ? '-=' : '+=') + distance;
+ animation1[ref] = (motion == 'pos' ? '+=' : '-=') + distance * 2;
+ animation2[ref] = (motion == 'pos' ? '-=' : '+=') + distance * 2;
+
+ // Animate
+ el.animate(animation, speed, o.options.easing);
+ for (var i = 1; i < times; i++) { // Shakes
+ el.animate(animation1, speed, o.options.easing).animate(animation2, speed, o.options.easing);
+ };
+ el.animate(animation1, speed, o.options.easing).
+ animate(animation, speed / 2, o.options.easing, function(){ // Last shake
+ $.effects.restore(el, props); $.effects.removeWrapper(el); // Restore
+ if(o.callback) o.callback.apply(this, arguments); // Callback
+ });
+ el.queue('fx', function() { el.dequeue(); });
+ el.dequeue();
+ });
+
+};
+
+})(jQuery);
+/*
+ * jQuery UI Effects Slide 1.8.16
+ *
+ * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about)
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://jquery.org/license
+ *
+ * http://docs.jquery.com/UI/Effects/Slide
+ *
+ * Depends:
+ * jquery.effects.core.js
+ */
+(function( $, undefined ) {
+
+$.effects.slide = function(o) {
+
+ return this.queue(function() {
+
+ // Create element
+ var el = $(this), props = ['position','top','bottom','left','right'];
+
+ // Set options
+ var mode = $.effects.setMode(el, o.options.mode || 'show'); // Set Mode
+ var direction = o.options.direction || 'left'; // Default Direction
+
+ // Adjust
+ $.effects.save(el, props); el.show(); // Save & Show
+ $.effects.createWrapper(el).css({overflow:'hidden'}); // Create Wrapper
+ var ref = (direction == 'up' || direction == 'down') ? 'top' : 'left';
+ var motion = (direction == 'up' || direction == 'left') ? 'pos' : 'neg';
+ var distance = o.options.distance || (ref == 'top' ? el.outerHeight({margin:true}) : el.outerWidth({margin:true}));
+ if (mode == 'show') el.css(ref, motion == 'pos' ? (isNaN(distance) ? "-" + distance : -distance) : distance); // Shift
+
+ // Animation
+ var animation = {};
+ animation[ref] = (mode == 'show' ? (motion == 'pos' ? '+=' : '-=') : (motion == 'pos' ? '-=' : '+=')) + distance;
+
+ // Animate
+ el.animate(animation, { queue: false, duration: o.duration, easing: o.options.easing, complete: function() {
+ if(mode == 'hide') el.hide(); // Hide
+ $.effects.restore(el, props); $.effects.removeWrapper(el); // Restore
+ if(o.callback) o.callback.apply(this, arguments); // Callback
+ el.dequeue();
+ }});
+
+ });
+
+};
+
+})(jQuery);
+/*
+ * jQuery UI Effects Transfer 1.8.16
+ *
+ * Copyright 2011, AUTHORS.txt (http://jqueryui.com/about)
+ * Dual licensed under the MIT or GPL Version 2 licenses.
+ * http://jquery.org/license
+ *
+ * http://docs.jquery.com/UI/Effects/Transfer
+ *
+ * Depends:
+ * jquery.effects.core.js
+ */
+(function( $, undefined ) {
+
+$.effects.transfer = function(o) {
+ return this.queue(function() {
+ var elem = $(this),
+ target = $(o.options.to),
+ endPosition = target.offset(),
+ animation = {
+ top: endPosition.top,
+ left: endPosition.left,
+ height: target.innerHeight(),
+ width: target.innerWidth()
+ },
+ startPosition = elem.offset(),
+ transfer = $('<div class="ui-effects-transfer"></div>')
+ .appendTo(document.body)
+ .addClass(o.options.className)
+ .css({
+ top: startPosition.top,
+ left: startPosition.left,
+ height: elem.innerHeight(),
+ width: elem.innerWidth(),
+ position: 'absolute'
+ })
+ .animate(animation, o.duration, o.options.easing, function() {
+ transfer.remove();
+ (o.callback && o.callback.apply(elem[0], arguments));
+ elem.dequeue();
+ });
+ });
+};
+
+})(jQuery);
diff --git a/contrib/server/mosesserver.cpp b/contrib/server/mosesserver.cpp
index 0091ee2de..f040d8b12 100644
--- a/contrib/server/mosesserver.cpp
+++ b/contrib/server/mosesserver.cpp
@@ -192,7 +192,8 @@ public:
staticData.GetInputFactorOrder();
stringstream in(source + "\n");
sentence.Read(in,inputFactorOrder);
- Manager manager(sentence,staticData.GetSearchAlgorithm(), &system);
+ size_t lineNumber = 0; // TODO: Include sentence request number here?
+ Manager manager(lineNumber, sentence, staticData.GetSearchAlgorithm(), &system);
manager.ProcessSentence();
const Hypothesis* hypo = manager.GetBestHypothesis();
@@ -367,7 +368,7 @@ int main(int argc, char** argv)
params->Explain();
exit(1);
}
- if (!StaticData::LoadDataStatic(params)) {
+ if (!StaticData::LoadDataStatic(params, argv[0])) {
exit(1);
}
diff --git a/contrib/sigtest-filter/Makefile b/contrib/sigtest-filter/Makefile
index ddefc907b..71de9c45f 100644
--- a/contrib/sigtest-filter/Makefile
+++ b/contrib/sigtest-filter/Makefile
@@ -1,5 +1,5 @@
SALMDIR=/Users/hieuhoang/workspace/salm
-FLAVOR?=o32
+FLAVOR?=o64
INC=-I$(SALMDIR)/Src/Shared -I$(SALMDIR)/Src/SuffixArrayApplications -I$(SALMDIR)/Src/SuffixArrayApplications/SuffixArraySearch
OBJS=$(SALMDIR)/Distribution/Linux/Objs/Search/_SuffixArrayApplicationBase.$(FLAVOR) $(SALMDIR)/Distribution/Linux/Objs/Search/_SuffixArraySearchApplicationBase.$(FLAVOR) $(SALMDIR)/Distribution/Linux/Objs/Shared/_String.$(FLAVOR) $(SALMDIR)/Distribution/Linux/Objs/Shared/_IDVocabulary.$(FLAVOR)
diff --git a/contrib/sigtest-filter/check-install b/contrib/sigtest-filter/check-install
index ba4f431e0..ba4f431e0 100644..100755
--- a/contrib/sigtest-filter/check-install
+++ b/contrib/sigtest-filter/check-install
diff --git a/contrib/sigtest-filter/filter-pt.cpp b/contrib/sigtest-filter/filter-pt.cpp
index b0828ae33..f06d2b430 100644
--- a/contrib/sigtest-filter/filter-pt.cpp
+++ b/contrib/sigtest-filter/filter-pt.cpp
@@ -17,14 +17,15 @@
#include <unistd.h>
#endif
-typedef std::set<TextLenType> SentIdSet;
-typedef std::map<std::string, SentIdSet> PhraseSetMap;
+typedef std::vector<TextLenType> SentIdSet;
+typedef std::pair<SentIdSet, clock_t> ClockedSentIdSet;
+typedef std::map<std::string, ClockedSentIdSet> PhraseSetMap;
#undef min
// constants
-const size_t MINIMUM_SIZE_TO_KEEP = 10000; // reduce this to improve memory usage,
-// increase for speed
+const size_t MINIMUM_SIZE_TO_KEEP = 10000; // increase this to improve memory usage,
+// reduce for speed
const std::string SEPARATOR = " ||| ";
const double ALPHA_PLUS_EPS = -1000.0; // dummy value
@@ -37,9 +38,12 @@ bool print_neglog_significance = false; // add -log(p) to phrase table?
double sig_filter_limit = 0; // keep phrase pairs with -log(sig) > sig_filter_limit
// higher = filter-more
bool pef_filter_only = false; // only filter based on pef
+bool hierarchical = false;
+int max_cache = 0;
// globals
PhraseSetMap esets;
+PhraseSetMap fsets;
double p_111 = 0.0; // alpha
size_t nremoved_sigfilter = 0;
size_t nremoved_pfefilter = 0;
@@ -59,7 +63,9 @@ void usage()
<< " [-l threshold] >0.0, a+e, or a-e: keep values that have a -log significance > this\n"
<< " [-n num ] 0, 1...: 0=no filtering, >0 sort by P(e|f) and keep the top num elements\n"
<< " [-c ] add the cooccurence counts to the phrase table\n"
- << " [-p ] add -log(significance) to the phrasetable\n\n";
+ << " [-p ] add -log(significance) to the phrasetable\n"
+ << " [-h ] filter hierarchical rule table\n"
+ << " [-m num ] limit cache to num most recent phrases\n";
exit(1);
}
@@ -96,10 +102,15 @@ PTEntry::PTEntry(const std::string& str, int index) :
pos = nextPos + SEPARATOR.size();
nextPos = str.find(SEPARATOR, pos);
- this->scores = str.substr(pos,nextPos-pos);
+ if (nextPos < str.size()) {
+ this->scores = str.substr(pos,nextPos-pos);
- pos = nextPos + SEPARATOR.size();
- this->extra = str.substr(pos);
+ pos = nextPos + SEPARATOR.size();
+ this->extra = str.substr(pos);
+ }
+ else {
+ this->scores = str.substr(pos,str.size()-pos);
+ }
int c = 0;
std::string::iterator i=scores.begin();
@@ -190,6 +201,119 @@ double fisher_exact(int cfe, int ce, int cf)
return total_p;
}
+template <class setType>
+setType ordered_set_intersect(setType & set_1, setType & set_2)
+{
+ setType set_out;
+ std::set_intersection(set_1.begin(), set_1.end(), set_2.begin(), set_2.end(), inserter(set_out,set_out.begin()) );
+ return set_out;
+}
+
+
+SentIdSet lookup_phrase(const std::string & phrase, C_SuffixArraySearchApplicationBase & my_sa)
+{
+ SentIdSet occur_set;
+ vector<S_SimplePhraseLocationElement> locations;
+
+ locations = my_sa.locateExactPhraseInCorpus(phrase.c_str());
+ if(locations.size()==0) {
+ cerr<<"No occurrences found!!\n";
+ }
+ for (vector<S_SimplePhraseLocationElement>::iterator i=locations.begin(); i != locations.end(); ++i) {
+ occur_set.push_back(i->sentIdInCorpus);
+ }
+
+ std::sort(occur_set.begin(), occur_set.end());
+ SentIdSet::iterator it = std::unique(occur_set.begin(), occur_set.end());
+ occur_set.resize(it - occur_set.begin());
+
+ return occur_set;
+}
+
+
+// slight simplicifaction: we consider all sentences in which "a" and "b" occur to be instances of the rule "a [X][X] b".
+SentIdSet lookup_multiple_phrases(vector<std::string> & phrases, C_SuffixArraySearchApplicationBase & my_sa, const std::string & rule, PhraseSetMap & cache)
+{
+
+ if (phrases.size() == 1) {
+ return lookup_phrase(phrases.front(), my_sa);
+ }
+
+ else {
+ SentIdSet main_set;
+ ClockedSentIdSet & clocked_first_set = cache[phrases.front()];
+ SentIdSet & first_set = clocked_first_set.first;
+ clocked_first_set.second = clock();
+
+ bool first = true;
+ if (first_set.empty()) {
+ first_set = lookup_phrase(phrases.front(), my_sa);
+ }
+ for (vector<std::string>::iterator phrase=phrases.begin()+1; phrase != phrases.end(); ++phrase) {
+ ClockedSentIdSet & clocked_temp_set = cache[*phrase];
+ SentIdSet & temp_set = clocked_temp_set.first;
+ clocked_temp_set.second = clock();
+
+ if (temp_set.empty()) {
+ temp_set = lookup_phrase(*phrase, my_sa);
+ }
+ if (first) {
+ main_set = ordered_set_intersect(first_set,temp_set);
+ first = false;
+ }
+ else {
+ main_set = ordered_set_intersect(main_set,temp_set);
+ }
+ if (temp_set.size() < MINIMUM_SIZE_TO_KEEP) {
+ cache.erase(*phrase);
+ }
+ }
+
+ if (first_set.size() < MINIMUM_SIZE_TO_KEEP) {
+ cache.erase(phrases.front());
+ }
+
+ return main_set;
+ }
+}
+
+
+SentIdSet find_occurrences(const std::string& rule, C_SuffixArraySearchApplicationBase & my_sa, PhraseSetMap & cache)
+{
+ SentIdSet sa_set;
+
+ // we search for hierarchical rules by stripping away NT and looking for terminals sequences
+ // if a rule contains multiple sequences of terminals, we intersect their occurrences.
+ if (hierarchical) {
+ // std::cerr << "splitting up phrase: " << phrase << "\n";
+ int pos = 0;
+ int endPos = 0;
+ vector<std::string> phrases;
+
+ while (rule.find("[X][X] ", pos) < rule.size()) {
+ endPos = rule.find("[X][X] ",pos) - 1; // -1 to cut space before NT
+ if (endPos < pos) { // no space: NT at start of rule (or two consecutive NTs)
+ pos += 7;
+ continue;
+ }
+ phrases.push_back(rule.substr(pos,endPos-pos));
+ pos = endPos + 8;
+ }
+
+ // cut LHS of rule
+ endPos = rule.size()-4;
+ if (endPos > pos) {
+ phrases.push_back(rule.substr(pos,endPos-pos));
+ }
+ sa_set = lookup_multiple_phrases(phrases, my_sa, rule, cache);
+ }
+ else {
+ sa_set = lookup_phrase(rule, my_sa);
+ }
+ return sa_set;
+}
+
+
// input: unordered list of translation options for a single source phrase
void compute_cooc_stats_and_filter(std::vector<PTEntry*>& options)
{
@@ -201,41 +325,28 @@ void compute_cooc_stats_and_filter(std::vector<PTEntry*>& options)
options.erase(options.begin()+pfe_filter_limit,options.end());
}
if (pef_filter_only) return;
-
+// std::cerr << "f phrase: " << options.front()->f_phrase << "\n";
SentIdSet fset;
- vector<S_SimplePhraseLocationElement> locations;
- //std::cerr << "Looking up f-phrase: " << options.front()->f_phrase << "\n";
-
- locations = f_sa.locateExactPhraseInCorpus(options.front()->f_phrase.c_str());
- if(locations.size()==0) {
- cerr<<"No occurrences found!!\n";
- }
- for (vector<S_SimplePhraseLocationElement>::iterator i=locations.begin();
- i != locations.end();
- ++i) {
- fset.insert(i->sentIdInCorpus);
- }
+ fset = find_occurrences(options.front()->f_phrase, f_sa, fsets);
size_t cf = fset.size();
for (std::vector<PTEntry*>::iterator i=options.begin(); i != options.end(); ++i) {
const std::string& e_phrase = (*i)->e_phrase;
size_t cef=0;
- SentIdSet& eset = esets[(*i)->e_phrase];
+ ClockedSentIdSet& clocked_eset = esets[e_phrase];
+ SentIdSet & eset = clocked_eset.first;
+ clocked_eset.second = clock();
if (eset.empty()) {
- //std::cerr << "Looking up e-phrase: " << e_phrase << "\n";
- vector<S_SimplePhraseLocationElement> locations = e_sa.locateExactPhraseInCorpus(e_phrase.c_str());
- for (vector<S_SimplePhraseLocationElement>::iterator i=locations.begin(); i!= locations.end(); ++i) {
- TextLenType curSentId = i->sentIdInCorpus;
- eset.insert(curSentId);
- }
+ eset = find_occurrences(e_phrase, e_sa, esets);
+ //std::cerr << "Looking up e-phrase: " << e_phrase << "\n";
}
size_t ce=eset.size();
if (ce < cf) {
for (SentIdSet::iterator i=eset.begin(); i != eset.end(); ++i) {
- if (fset.find(*i) != fset.end()) cef++;
+ if (std::binary_search(fset.begin(), fset.end(), *i)) cef++;
}
} else {
for (SentIdSet::iterator i=fset.begin(); i != fset.end(); ++i) {
- if (eset.find(*i) != eset.end()) cef++;
+ if (std::binary_search(eset.begin(), eset.end(), *i)) cef++;
}
}
double nlp = -log(fisher_exact(cef, cf, ce));
@@ -243,6 +354,7 @@ void compute_cooc_stats_and_filter(std::vector<PTEntry*>& options)
if (ce < MINIMUM_SIZE_TO_KEEP) {
esets.erase(e_phrase);
}
+
}
std::vector<PTEntry*>::iterator new_end =
std::remove_if(options.begin(), options.end(), NlogSigThresholder(sig_filter_limit));
@@ -250,13 +362,28 @@ void compute_cooc_stats_and_filter(std::vector<PTEntry*>& options)
options.erase(new_end,options.end());
}
+void prune_cache(PhraseSetMap & psm) {
+ if(max_cache && psm.size() > max_cache) {
+ std::vector<clock_t> clocks;
+ for(PhraseSetMap::iterator it = psm.begin(); it != psm.end(); it++)
+ clocks.push_back(it->second.second);
+
+ std::sort(clocks.begin(), clocks.end());
+ clock_t out = clocks[psm.size()-max_cache];
+
+ for(PhraseSetMap::iterator it = psm.begin(); it != psm.end(); it++)
+ if(it->second.second < out)
+ psm.erase(it);
+ }
+}
+
int main(int argc, char * argv[])
{
int c;
const char* efile=0;
const char* ffile=0;
int pfe_index = 2;
- while ((c = getopt(argc, argv, "cpf:e:i:n:l:")) != -1) {
+ while ((c = getopt(argc, argv, "cpf:e:i:n:l:m:h")) != -1) {
switch (c) {
case 'e':
efile = optarg;
@@ -277,6 +404,12 @@ int main(int argc, char * argv[])
case 'p':
print_neglog_significance = true;
break;
+ case 'h':
+ hierarchical = true;
+ break;
+ case 'm':
+ max_cache = atoi(optarg);
+ break;
case 'l':
std::cerr << "-l = " << optarg << "\n";
if (strcmp(optarg,"a+e") == 0) {
@@ -333,9 +466,14 @@ int main(int argc, char * argv[])
size_t pt_lines = 0;
while(!cin.eof()) {
cin.getline(tmpString,10000,'\n');
- if(++pt_lines%10000==0) {
+ if(++pt_lines%10000==0) {
std::cerr << ".";
- if(pt_lines%500000==0) std::cerr << "[n:"<<pt_lines<<"]\n";
+
+ prune_cache(esets);
+ prune_cache(fsets);
+
+ if(pt_lines%500000==0)
+ std::cerr << "[n:"<<pt_lines<<"]\n";
}
if(strlen(tmpString)>0) {
diff --git a/contrib/tmcombine/README.md b/contrib/tmcombine/README.md
index 2d21b95c8..2cbc83299 100644
--- a/contrib/tmcombine/README.md
+++ b/contrib/tmcombine/README.md
@@ -58,7 +58,7 @@ Regression tests (check if the output files (`test/phrase-table_testN`) differ f
FURTHER NOTES
-------------
- - Different combination algorithms require different statistics. To be on the safe side, apply `train_model.patch` to `train_model.perl` and use the option `-phrase-word-alignment` when training models.
+ - Different combination algorithms require different statistics. To be on the safe side, use the options `-phrase-word-alignment` and `-write-lexical-counts` when training models.
- The script assumes that phrase tables are sorted (to allow incremental, more memory-friendly processing). Sort the tables with `LC_ALL=C`. Phrase tables produced by Moses are sorted correctly.
diff --git a/contrib/tmcombine/test/model3/model/lex.counts.e2f b/contrib/tmcombine/test/model3/model/lex.counts.e2f
new file mode 100644
index 000000000..ed05c0b7d
--- /dev/null
+++ b/contrib/tmcombine/test/model3/model/lex.counts.e2f
@@ -0,0 +1,8 @@
+ad af 500 1000
+bd bf 5 10
+der le 20285 102586
+der NULL 12926 704917
+gipfel sommet 3485 7322
+pass col 419 2911
+pass passeport 7 28
+sitzung séance 14 59 \ No newline at end of file
diff --git a/contrib/tmcombine/test/model3/model/lex.counts.f2e b/contrib/tmcombine/test/model3/model/lex.counts.f2e
new file mode 100644
index 000000000..ea31f690d
--- /dev/null
+++ b/contrib/tmcombine/test/model3/model/lex.counts.f2e
@@ -0,0 +1,8 @@
+af ad 500 1000
+bf bd 5 10
+col pass 419 615
+le der 20285 113635
+passeport pass 7 615
+retrouvé NULL 34 1016136
+séance sitzung 14 33
+sommet gipfel 3485 5700 \ No newline at end of file
diff --git a/contrib/tmcombine/test/model3/model/lex.e2f b/contrib/tmcombine/test/model3/model/lex.e2f
new file mode 100644
index 000000000..f9263ffe5
--- /dev/null
+++ b/contrib/tmcombine/test/model3/model/lex.e2f
@@ -0,0 +1,8 @@
+ad af 0.5
+bd bf 0.5
+der le 0.1977365
+der NULL 0.0183369
+gipfel sommet 0.4759629
+pass col 0.1439368
+pass passeport 0.2500000
+sitzung séance 0.2372881 \ No newline at end of file
diff --git a/contrib/tmcombine/test/model3/model/lex.f2e b/contrib/tmcombine/test/model3/model/lex.f2e
new file mode 100644
index 000000000..2bba51f01
--- /dev/null
+++ b/contrib/tmcombine/test/model3/model/lex.f2e
@@ -0,0 +1,8 @@
+af ad 0.5
+bf bd 0.5
+col pass 0.6813008
+le der 0.1785101
+passeport pass 0.0113821
+retrouvé NULL 0.0000335
+séance sitzung 0.4242424
+sommet gipfel 0.6114035 \ No newline at end of file
diff --git a/contrib/tmcombine/test/model3/model/phrase-table b/contrib/tmcombine/test/model3/model/phrase-table
new file mode 100644
index 000000000..f5c8647de
--- /dev/null
+++ b/contrib/tmcombine/test/model3/model/phrase-table
@@ -0,0 +1,8 @@
+ad ||| af ||| 0.3 0.3 0.3 0.3 0.5 0.5 0.5 0.5 2.718 ||| 0-0 ||| 1000 1000
+bd ||| bf ||| 0.3 0.3 0.3 0.3 0.5 0.5 0.5 0.5 2.718 ||| 0-0 ||| 10 10
+der gipfel ||| sommet ||| 0.3 0.3 0.3 0.3 0.00327135 0.00872768 0.0366795 0.611403 2.718 ||| 1-0 ||| 5808 518
+der pass ||| le col ||| 0.3 0.3 0.3 0.3 0.0173565 0.0284616 0.288889 0.121619 2.718 ||| 0-0 1-1 ||| 749 45
+pass ||| col ||| 0.3 0.3 0.3 0.3 0.1952 0.143937 0.628866 0.681301 2.718 ||| 0-0 ||| 1875 582
+pass ||| passeport retrouvé ||| 0.3 0.3 0.3 0.3 0.5 0.25 0.00171821 3.813e-07 2.718 ||| 0-0 ||| 2 582
+pass ||| passeport ||| 0.3 0.3 0.3 0.3 0.266667 0.25 0.00687285 0.0113821 2.718 ||| 0-0 ||| 15 582
+sitzung ||| séance ||| 0.3 0.3 0.3 0.3 0.272727 0.237288 0.352941 0.424242 2.718 ||| 0-0 ||| 22 17 \ No newline at end of file
diff --git a/contrib/tmcombine/test/model4/model/lex.counts.e2f b/contrib/tmcombine/test/model4/model/lex.counts.e2f
new file mode 100644
index 000000000..8475fcdf9
--- /dev/null
+++ b/contrib/tmcombine/test/model4/model/lex.counts.e2f
@@ -0,0 +1,8 @@
+ad af 100 1000
+bd bf 1 10
+der le 150181 944391
+der NULL 54483 3595140
+gipfel sommet 3421 9342
+pass col 2 70
+pass passeport 73 379
+sitzung séance 3441 5753 \ No newline at end of file
diff --git a/contrib/tmcombine/test/model4/model/lex.counts.f2e b/contrib/tmcombine/test/model4/model/lex.counts.f2e
new file mode 100644
index 000000000..b0913088a
--- /dev/null
+++ b/contrib/tmcombine/test/model4/model/lex.counts.f2e
@@ -0,0 +1,8 @@
+af ad 100 1000
+bf bd 1 10
+col pass 2 108
+le der 150181 1356104
+passeport pass 73 108
+retrouvé NULL 43 6276240
+séance sitzung 3441 6142
+sommet gipfel 3421 4908 \ No newline at end of file
diff --git a/contrib/tmcombine/test/model4/model/lex.e2f b/contrib/tmcombine/test/model4/model/lex.e2f
new file mode 100644
index 000000000..b1ce3a613
--- /dev/null
+++ b/contrib/tmcombine/test/model4/model/lex.e2f
@@ -0,0 +1,8 @@
+ad af 0.1
+bd bf 0.1
+der le 0.1590242
+der NULL 0.0151546
+gipfel sommet 0.366195
+pass col 0.0285714
+pass passeport 0.1926121
+sitzung séance 0.5981227 \ No newline at end of file
diff --git a/contrib/tmcombine/test/model4/model/lex.f2e b/contrib/tmcombine/test/model4/model/lex.f2e
new file mode 100644
index 000000000..d931dcb72
--- /dev/null
+++ b/contrib/tmcombine/test/model4/model/lex.f2e
@@ -0,0 +1,8 @@
+af ad 0.1
+bf bd 0.1
+col pass 0.0185185
+le der 0.1107445
+passeport pass 0.6759259
+retrouvé NULL 0.0000069
+séance sitzung 0.5602410
+sommet gipfel 0.6970253 \ No newline at end of file
diff --git a/contrib/tmcombine/test/model4/model/phrase-table b/contrib/tmcombine/test/model4/model/phrase-table
new file mode 100644
index 000000000..494b6a37f
--- /dev/null
+++ b/contrib/tmcombine/test/model4/model/phrase-table
@@ -0,0 +1,5 @@
+ad ||| af ||| 0.6 0.6 0.6 0.6 0.1 0.1 0.1 0.1 2.718 ||| 0-0 ||| 1000 1000
+bd ||| bf ||| 0.6 0.6 0.6 0.6 0.1 0.1 0.1 0.1 2.718 ||| 0-0 ||| 10 10
+der pass ||| le passeport ||| 0.6 0.6 0.6 0.6 0.16 0.03063 0.4 0.0748551 2.718 ||| 0-0 1-1 ||| 25 10
+pass ||| passeport ||| 0.6 0.6 0.6 0.6 0.28022 0.192612 0.607143 0.675926 2.718 ||| 0-0 ||| 182 84
+sitzung ||| séance ||| 0.6 0.6 0.6 0.6 0.784521 0.598123 0.516654 0.560241 2.718 ||| 0-0 ||| 4251 6455 \ No newline at end of file
diff --git a/contrib/tmcombine/test/phrase-table_test10 b/contrib/tmcombine/test/phrase-table_test10
new file mode 100644
index 000000000..ee2aebeb1
--- /dev/null
+++ b/contrib/tmcombine/test/phrase-table_test10
@@ -0,0 +1,9 @@
+ad ||| af ||| 0.3 0.3 0.3 0.3 0.11579 0.35574 0.472359 0.469238 2.718 ||| 0-0 ||| 25332.4712297 1074.23173673
+bd ||| bf ||| 0.3 0.3 0.3 0.3 0.11579 0.35574 0.472359 0.469238 2.718 ||| 0-0 ||| 253.324712297 10.7423173673
+der gipfel ||| sommet ||| 0.3 0.3 0.3 0.3 0.00327135 0.00686984 0.0366795 0.617135 2.718 ||| 1-0 ||| 5808.0 518.0
+der pass ||| le col ||| 0.3 0.3 0.3 0.3 0.0173565 0.023534 0.284201 0.0972183 2.718 ||| 0-0 1-1 ||| 749.0 45.7423173673
+der pass ||| le passeport ||| 6e-10 6e-10 6e-10 6e-10 0.16 0.0329324 0.0064913 0.00303408 2.718 ||| 0-0 1-1 ||| 608.311780741 45.7423173673
+pass ||| col ||| 0.3 0.3 0.3 0.3 0.1952 0.142393 0.6222 0.671744 2.718 ||| 0-0 ||| 1875.0 588.235465885
+pass ||| passeport retrouvé ||| 0.3 0.3 0.3 0.3 0.5 0.199258 0.0017 5.11945e-07 2.718 ||| 0-0 ||| 2.0 588.235465885
+pass ||| passeport ||| 0.3 0.3 0.3 0.3 0.280174 0.199258 0.0132359 0.0209644 2.718 ||| 0-0 ||| 4443.5097638 588.235465885
+sitzung ||| séance ||| 0.3 0.3 0.3 0.3 0.784412 0.59168 0.511045 0.552002 2.718 ||| 0-0 ||| 103459.335197 496.165860589
diff --git a/contrib/tmcombine/test/phrase-table_test8 b/contrib/tmcombine/test/phrase-table_test8
index 74eb27c0c..f0776cd80 100644
--- a/contrib/tmcombine/test/phrase-table_test8
+++ b/contrib/tmcombine/test/phrase-table_test8
@@ -1,9 +1,9 @@
-ad ||| af ||| 0.5 0.398085 0.5 0.482814 2.718 ||| 0-0 ||| 1000.000001 1000.000001
-bd ||| bf ||| 0.5 0.111367 0.5 0.172867 2.718 ||| 0-0 ||| 10.00000001 10.00000001
+ad ||| af ||| 0.242966 0.398085 0.483231 0.482814 2.718 ||| 0-0 ||| 2797.86490081 1043.7557397
+bd ||| bf ||| 0.102213 0.111367 0.174411 0.172867 2.718 ||| 0-0 ||| 1807.86490081 53.7557396976
der gipfel ||| sommet ||| 0.00327135 0.00863717 0.0366795 0.612073 2.718 ||| 1-0 ||| 5808.0 518.0
-der pass ||| le col ||| 0.0173565 0.0260469 0.288889 0.113553 2.718 ||| 0-0 1-1 ||| 749.0 45.00000001
-der pass ||| le passeport ||| 0.0064 0.0389201 8.88889e-12 0.0101009 2.718 ||| 0-0 1-1 ||| 2.5e-08 45.00000001
-pass ||| col ||| 0.1952 0.131811 0.628866 0.63621 2.718 ||| 0-0 ||| 1875.0 582.000000084
-pass ||| passeport retrouvé ||| 0.5 0.196956 0.00171821 1.89355e-06 2.718 ||| 0-0 ||| 2.0 582.000000084
-pass ||| passeport ||| 0.266667 0.196956 0.00687285 0.0565932 2.718 ||| 0-0 ||| 15.000000182 582.000000084
-sitzung ||| séance ||| 0.272727 0.545019 0.352941 0.502625 2.718 ||| 0-0 ||| 22.000004251 17.000006455
+der pass ||| le col ||| 0.0173565 0.0260469 0.146469 0.113553 2.718 ||| 0-0 1-1 ||| 749.0 88.7557396976
+der pass ||| le passeport ||| 0.16 0.0389201 0.197196 0.0101009 2.718 ||| 0-0 1-1 ||| 1797.86490081 88.7557396976
+pass ||| col ||| 0.1952 0.131811 0.584893 0.63621 2.718 ||| 0-0 ||| 1875.0 625.755739698
+pass ||| passeport retrouvé ||| 0.5 0.196956 0.00159806 1.89355e-06 2.718 ||| 0-0 ||| 2.0 625.755739698
+pass ||| passeport ||| 0.280108 0.196956 0.0488465 0.0565932 2.718 ||| 0-0 ||| 1812.86490081 625.755739698
+sitzung ||| séance ||| 0.778334 0.545019 0.470846 0.502625 2.718 ||| 0-0 ||| 1819.86490081 60.7557396976
diff --git a/contrib/tmcombine/test/phrase-table_test9 b/contrib/tmcombine/test/phrase-table_test9
new file mode 100644
index 000000000..017c97854
--- /dev/null
+++ b/contrib/tmcombine/test/phrase-table_test9
@@ -0,0 +1,9 @@
+ad ||| af ||| 0.45 0.45 0.45 0.45 0.14 0.136364 0.18 0.3 2.718 ||| 0-0 ||| 10000.0 5000.0
+bd ||| bf ||| 0.45 0.45 0.45 0.45 0.14 0.136364 0.18 0.3 2.718 ||| 0-0 ||| 100.0 50.0
+der gipfel ||| sommet ||| 0.15 0.15 0.15 0.15 0.00327135 0.00569336 0.0366795 0.651018 2.718 ||| 1-0 ||| 5808.0 518.0
+der pass ||| le col ||| 0.15 0.15 0.15 0.15 0.0173565 0.0193836 0.152941 0.0675369 2.718 ||| 0-0 1-1 ||| 749.0 85.0
+der pass ||| le passeport ||| 0.3 0.3 0.3 0.3 0.16 0.0307772 0.188235 0.0128336 2.718 ||| 0-0 1-1 ||| 225.0 85.0
+pass ||| col ||| 0.15 0.15 0.15 0.15 0.1952 0.121573 0.398693 0.582296 2.718 ||| 0-0 ||| 1875.0 918.0
+pass ||| passeport retrouvé ||| 0.15 0.15 0.15 0.15 0.5 0.193033 0.00108932 1.16835e-06 2.718 ||| 0-0 ||| 2.0 918.0
+pass ||| passeport ||| 0.45 0.45 0.45 0.45 0.280097 0.193033 0.22658 0.11065 2.718 ||| 0-0 ||| 1653.0 918.0
+sitzung ||| séance ||| 0.45 0.45 0.45 0.45 0.784227 0.597753 0.516546 0.559514 2.718 ||| 0-0 ||| 38281.0 25837.0
diff --git a/contrib/tmcombine/tmcombine.py b/contrib/tmcombine/tmcombine.py
index e166c33c8..6560ad23b 100755
--- a/contrib/tmcombine/tmcombine.py
+++ b/contrib/tmcombine/tmcombine.py
@@ -15,7 +15,7 @@
# Some general things to note:
-# - Different combination algorithms require different statistics. To be on the safe side, apply train_model.patch to train_model.perl and use the option -phrase-word-alignment for training all models.
+# - Different combination algorithms require different statistics. To be on the safe side, use the options `-phrase-word-alignment` and `-write-lexical-counts` when training models.
# - The script assumes that phrase tables are sorted (to allow incremental, more memory-friendly processing). sort with LC_ALL=C.
# - Some configurations require additional statistics that are loaded in memory (lexical tables; complete list of target phrases). If memory consumption is a problem, use the option --lowmem (slightly slower and writes temporary files to disk), or consider pruning your phrase table before combining (e.g. using Johnson et al. 2007).
# - The script can read/write gzipped files, but the Python implementation is slow. You're better off unzipping the files on the command line and working with the unzipped files.
@@ -36,7 +36,10 @@ from collections import defaultdict
from operator import mul
from tempfile import NamedTemporaryFile
from subprocess import Popen
-from itertools import izip
+try:
+ from itertools import izip
+except:
+ izip = zip
try:
from lxml import etree as ET
@@ -85,76 +88,9 @@ class Moses():
filename = os.path.join(model,'model',table)
fileobj = handle_file(filename,'open',mode)
return fileobj
-
-
- def load_phrase_counts(self,line,priority,i,store='all',filter_by=None,filter_by_src=None,filter_by_target=None,inverted=False):
- """take single phrase table line and store counts in internal data structure"""
- src = line[0]
- target = line[1]
-
- if inverted:
- src,target = target,src
- target_count = 0
- src_count = 0
-
- if (store == 'all' or store == 'pairs') and not (filter_by and not (src in filter_by and target in filter_by[src])):
-
- if priority < 10 or (src in self.phrase_pairs and target in self.phrase_pairs[src]):
-
- try:
- target_count,src_count = map(float,line[-1].split())
- except:
- sys.stderr.write(str(line)+'\n')
- sys.stderr.write('Counts are missing. Maybe you have an older Moses version without counts?\n')
- return
-
- scores = line[2].split()
- pst = float(scores[0])
- pts = float(scores[2])
-
- if priority == 2: #MAP
- self.phrase_pairs[src][target][0][0][i] = pst
- self.phrase_pairs[src][target][0][1][i] = pts
- else:
- self.phrase_pairs[src][target][0][0][i] = pst*target_count
- self.phrase_pairs[src][target][0][1][i] = pts*src_count
-
- self.store_info(src,target,line)
-
- if (store == 'all' or store == 'source') and not (filter_by_src and not filter_by_src.get(src)):
-
- if not src_count:
- try:
- if priority == 2: #MAP
- src_count = 1
- else:
- src_count = float(line[-1].split()[1])
- except:
- sys.stderr.write(str(line)+'\n')
- sys.stderr.write('Counts are missing. Maybe you have an older Moses version without counts?\n')
- return
-
- self.phrase_source[src][i] = src_count
-
- if (store == 'all' or store == 'target') and not (filter_by_target and not filter_by_target.get(target)):
-
- if not target_count:
- try:
- if priority == 2: #MAP
- target_count = 1
- else:
- target_count = float(line[-1].split()[0])
- except:
- sys.stderr.write(str(line)+'\n')
- sys.stderr.write('Counts are missing. Maybe you have an older Moses version without counts?\n')
- return
-
- self.phrase_target[target][i] = target_count
-
-
- def load_phrase_probabilities(self,line,priority,i,store='pairs',filter_by=None,filter_by_src=None,filter_by_target=None,inverted=False):
+ def load_phrase_features(self,line,priority,i,mode='interpolate',store='pairs',filter_by=None,filter_by_src=None,filter_by_target=None,inverted=False,flags=None):
"""take single phrase table line and store probablities in internal data structure"""
src = line[0]
@@ -167,21 +103,57 @@ class Moses():
self.store_info(src,target,line)
- model_probabilities = map(float,line[2].split()[:-1])
+ scores = line[2].split()
+ if len(scores) <self.number_of_features:
+ sys.stderr.write('Error: model only has {0} features. Expected {1}.\n'.format(len(scores),self.number_of_features))
+ exit()
+
+ scores = scores[:self.number_of_features]
+ model_probabilities = map(float,scores)
phrase_probabilities = self.phrase_pairs[src][target][0]
+ if mode == 'counts' and not priority == 2: #priority 2 is MAP
+ try:
+ target_count,src_count = map(float,line[-1].split())
+ except:
+ sys.stderr.write(str(line)+'\n')
+ sys.stderr.write('Counts are missing. Maybe your phrase table is from an older Moses version that doesn\'t store counts?\n')
+ return
+
+ i_e2f = flags['i_e2f']
+ i_f2e = flags['i_f2e']
+ model_probabilities[i_e2f] *= target_count
+ model_probabilities[i_f2e] *= src_count
+
for j,p in enumerate(model_probabilities):
phrase_probabilities[j][i] = p
# mark that the src/target phrase has been seen.
# needed for re-normalization during linear interpolation
if (store == 'all' or store == 'source') and not (filter_by_src and not src in filter_by_src):
- self.phrase_source[src][i] = 1
+ if mode == 'counts' and not priority == 2: #priority 2 is MAP
+ try:
+ self.phrase_source[src][i] = float(line[-1].split()[1])
+ except:
+ sys.stderr.write(str(line)+'\n')
+ sys.stderr.write('Counts are missing. Maybe your phrase table is from an older Moses version that doesn\'t store counts?\n')
+ return
+ else:
+ self.phrase_source[src][i] = 1
+
if (store == 'all' or store == 'target') and not (filter_by_target and not target in filter_by_target):
- self.phrase_target[target][i] = 1
+ if mode == 'counts' and not priority == 2: #priority 2 is MAP
+ try:
+ self.phrase_target[target][i] = float(line[-1].split()[0])
+ except:
+ sys.stderr.write(str(line)+'\n')
+ sys.stderr.write('Counts are missing. Maybe your phrase table is from an older Moses version that doesn\'t store counts?\n')
+ return
+ else:
+ self.phrase_target[target][i] = 1
- def load_reordering_probabilities(self,line,priority,i,store='pairs'):
+ def load_reordering_probabilities(self,line,priority,i,**unused):
"""take single reordering table line and store probablities in internal data structure"""
src = line[0]
@@ -189,13 +161,15 @@ class Moses():
model_probabilities = map(float,line[2].split())
reordering_probabilities = self.reordering_pairs[src][target]
-
- for j,p in enumerate(model_probabilities):
- reordering_probabilities[j][i] = p
-
-
+
+ try:
+ for j,p in enumerate(model_probabilities):
+ reordering_probabilities[j][i] = p
+ except IndexError:
+ sys.stderr.write('\nIndexError: Did you correctly specify the number of reordering features? (--number_of_features N in command line)\n')
+ exit()
- def traverse_incrementally(self,table,models,load_lines,store_flag,inverted=False,lowmem=False):
+ def traverse_incrementally(self,table,models,load_lines,store_flag,mode='interpolate',inverted=False,lowmem=False,flags=None):
"""hack-ish way to find common phrase pairs in multiple models in one traversal without storing it all in memory
relies on alphabetical sorting of phrase table.
"""
@@ -218,7 +192,7 @@ class Moses():
if increment != stack[i][0]:
continue
else:
- load_lines(stack[i],priority,i,store=store_flag,inverted=inverted)
+ load_lines(stack[i],priority,i,mode=mode,store=store_flag,inverted=inverted,flags=flags)
stack[i] = ''
for line in model:
@@ -229,7 +203,7 @@ class Moses():
stack[i] = line
break
- load_lines(line,priority,i,store=store_flag,inverted=inverted)
+ load_lines(line,priority,i,mode=mode,store=store_flag,inverted=inverted,flags=flags)
yield 1
@@ -246,16 +220,16 @@ class Moses():
a, b, prob = line.split(b' ')
- if side == 'e2f' and not e2f_filter or a in e2f_filter and b in e2f_filter[a]:
+ if side == 'e2f' and (not e2f_filter or a in e2f_filter and b in e2f_filter[a]):
self.word_pairs_e2f[a][b][i] = float(prob)
- elif side == 'f2e' and not f2e_filter or a in f2e_filter and b in f2e_filter[a]:
+ elif side == 'f2e' and (not f2e_filter or a in f2e_filter and b in f2e_filter[a]):
self.word_pairs_f2e[a][b][i] = float(prob)
- def load_word_counts(self,line,side,i,priority,e2f_filter=None,f2e_filter=None):
+ def load_word_counts(self,line,side,i,priority,e2f_filter=None,f2e_filter=None,flags=None):
"""process single line of lexical table"""
a, b, ab_count, b_count = line.split(b' ')
@@ -323,7 +297,7 @@ class Moses():
sys.stderr.write('Error: unexpected phrase table format. Your current configuration requires alignment information. Make sure you trained your model with -phrase-word-alignment\n')
exit()
- self.phrase_pairs[src][target][1] = ['',line[3].lstrip(b'| ')]
+ self.phrase_pairs[src][target][1] = [b'',line[3].lstrip(b'| ')]
else:
sys.stderr.write('Error: unexpected phrase table format. Are you using a very old/new version of Moses with different formatting?\n')
@@ -388,9 +362,16 @@ class Moses():
# information specific to Moses model: alignment info and comment section with target and source counts
alignment,comments = self.phrase_pairs[src][target][1]
+ if alignment:
+ extra_space = b' '
+ else:
+ extra_space = b''
+
if mode == 'counts':
- srccount = dot_product(self.phrase_source[src],weights[2])
- targetcount = dot_product(self.phrase_target[target],weights[0])
+ i_e2f = flags['i_e2f']
+ i_f2e = flags['i_f2e']
+ srccount = dot_product(self.phrase_source[src],weights[i_f2e])
+ targetcount = dot_product(self.phrase_target[target],weights[i_e2f])
comments = b"%s %s" %(targetcount,srccount)
features = b' '.join([b'%.6g' %(f) for f in features])
@@ -400,8 +381,7 @@ class Moses():
origin_features = b' '.join([b'%.4f' %(f) for f in origin_features]) + ' '
else:
origin_features = b''
-
- line = b"%s ||| %s ||| %s 2.718 %s||| %s ||| %s\n" %(src,target,features,origin_features,alignment,comments)
+ line = b"%s ||| %s ||| %s 2.718 %s||| %s%s||| %s\n" %(src,target,features,origin_features,alignment,extra_space,comments)
return line
@@ -446,7 +426,7 @@ class Moses():
if 0 in features:
return ''
- features = b' '.join([b'%6g' %(f) for f in features])
+ features = b' '.join([b'%.6g' %(f) for f in features])
line = b"%s ||| %s ||| %s\n" %(src,target,features)
return line
@@ -471,7 +451,8 @@ class Moses():
def merge(self,pt_normal, pt_inverse, pt_out, mode='interpolate'):
- """merge two phrasetables (the latter having been inverted to calculate p(s|t) and lex(s|t) in sorted order)"""
+ """merge two phrasetables (the latter having been inverted to calculate p(s|t) and lex(s|t) in sorted order)
+ Assumes that p(s|t) and lex(s|t) are in first table half, p(t|s) and lex(t|s) in second"""
for line,line2 in izip(pt_normal,pt_inverse):
@@ -479,9 +460,10 @@ class Moses():
line2 = line2.split(b' ||| ')
#scores
+ mid = int(self.number_of_features/2)
scores1 = line[2].split()
scores2 = line2[2].split()
- line[2] = b' '.join(scores2[:2]+scores1[2:])
+ line[2] = b' '.join(scores2[:mid]+scores1[mid:])
# marginal counts
if mode == 'counts':
@@ -722,17 +704,14 @@ def cross_entropy(model_interface,reference_interface,weights,score,mode,flags):
don't call this directly, but use one of the Combine_TMs methods.
"""
- weights = normalize_weights(weights,mode)
+ weights = normalize_weights(weights,mode,flags)
if 'compare_cross-entropies' in flags and flags['compare_cross-entropies']:
num_results = len(model_interface.models)
else:
num_results = 1
- cross_entropy_ts = [0]*num_results
- cross_entropy_st = [0]*num_results
- cross_entropy_lex_ts = [0]*num_results
- cross_entropy_lex_st = [0]*num_results
+ cross_entropies = [[0]*num_results for i in range(model_interface.number_of_features)]
oov = [0]*num_results
oov2 = 0
other_translations = [0]*num_results
@@ -780,11 +759,8 @@ def cross_entropy(model_interface,reference_interface,weights,score,mode,flags):
continue
n[i] += c
- cross_entropy_ts[i] += -log(features[2],2)*c
- cross_entropy_st[i] += -log(features[0],2)*c
-
- cross_entropy_lex_ts[i] += -log(features[3],2)*c
- cross_entropy_lex_st[i] += -log(features[1],2)*c
+ for j in range(model_interface.number_of_features):
+ cross_entropies[j][i] -= log(features[j],2)*c
elif src in model_interface.phrase_source and not ('compare_cross-entropies' in flags and flags['compare_cross-entropies']):
other_translations[i] += c
@@ -799,22 +775,18 @@ def cross_entropy(model_interface,reference_interface,weights,score,mode,flags):
for i in range(num_results):
try:
- cross_entropy_ts[i] /= n[i]
- cross_entropy_st[i] /= n[i]
- cross_entropy_lex_ts[i] /= n[i]
- cross_entropy_lex_st[i] /= n[i]
+ for j in range(model_interface.number_of_features):
+ cross_entropies[j][i] /= n[i]
except ZeroDivisionError:
sys.stderr.write('Warning: no matching phrase pairs between reference set and model\n')
- cross_entropy_ts[i] = 0
- cross_entropy_st[i] = 0
- cross_entropy_lex_ts[i] = 0
- cross_entropy_lex_st[i] = 0
+ for j in range(model_interface.number_of_features):
+ cross_entropies[j][i] = 0
if 'compare_cross-entropies' in flags and flags['compare_cross-entropies']:
- return [(cross_entropy_st[i],cross_entropy_lex_st[i],cross_entropy_ts[i],cross_entropy_lex_ts[i],other_translations[i],oov[i],ignored[i],n[i],total_pairs) for i in range(num_results)], (n[0],total_pairs,oov2)
+ return [tuple([ce[i] for ce in cross_entropies]) + (other_translations[i],oov[i],ignored[i],n[i],total_pairs) for i in range(num_results)], (n[0],total_pairs,oov2)
else:
- return cross_entropy_st[0],cross_entropy_lex_st[0],cross_entropy_ts[0],cross_entropy_lex_ts[0],other_translations[0],oov2,total_pairs
+ return tuple([ce[0] for ce in cross_entropies]) + (other_translations[0],oov2,total_pairs)
def cross_entropy_light(model_interface,reference_interface,weights,score,mode,flags,cache):
@@ -823,22 +795,16 @@ def cross_entropy_light(model_interface,reference_interface,weights,score,mode,f
Same as cross_entropy, but optimized for speed: it doesn't generate all of the statistics,
doesn't normalize, and uses caching.
"""
- weights = normalize_weights(weights,mode)
- cross_entropy_ts = 0
- cross_entropy_st = 0
- cross_entropy_lex_ts = 0
- cross_entropy_lex_st = 0
+ weights = normalize_weights(weights,mode,flags)
+ cross_entropies = [0]*model_interface.number_of_features
for (src,target,c) in cache:
features = score(weights,src,target,model_interface,flags,cache=True)
- cross_entropy_ts -= log(features[2],2)*c
- cross_entropy_st -= log(features[0],2)*c
-
- cross_entropy_lex_ts -= log(features[3],2)*c
- cross_entropy_lex_st -= log(features[1],2)*c
+ for i in range(model_interface.number_of_features):
+ cross_entropies[i] -= log(features[i],2)*c
- return cross_entropy_st,cross_entropy_lex_st,cross_entropy_ts,cross_entropy_lex_ts
+ return cross_entropies
def _get_reference_cache(reference_interface,model_interface):
@@ -881,16 +847,16 @@ def _get_lexical_filter(reference_interface,model_interface):
return e2f_filter,f2e_filter
-def _hillclimb_move(weights,stepsize,mode):
+def _hillclimb_move(weights,stepsize,mode,flags):
"""Move function for hillclimb algorithm. Updates each weight by stepsize."""
for i,w in enumerate(weights):
- yield normalize_weights(weights[:i]+[w+stepsize]+weights[i+1:],mode)
+ yield normalize_weights(weights[:i]+[w+stepsize]+weights[i+1:],mode,flags)
for i,w in enumerate(weights):
new = w-stepsize
if new >= 1e-10:
- yield normalize_weights(weights[:i]+[new]+weights[i+1:],mode)
+ yield normalize_weights(weights[:i]+[new]+weights[i+1:],mode,flags)
def _hillclimb(scores,best_weights,objective,model_interface,reference_interface,score_function,mode,flags,precision,cache,n):
"""first (deprecated) implementation of iterative weight optimization."""
@@ -912,7 +878,7 @@ def _hillclimb(scores,best_weights,objective,model_interface,reference_interface
move = 0
- for w in _hillclimb_move(list(best_weights),stepsize,mode):
+ for w in _hillclimb_move(list(best_weights),stepsize,mode,flags):
weights_tuple = tuple(w)
if weights_tuple in scores:
@@ -948,22 +914,18 @@ def optimize_cross_entropy_hillclimb(model_interface,reference_interface,initial
cache,n = _get_reference_cache(reference_interface,model_interface)
# each objective is a triple: which score to minimize from cross_entropy(), which weights to update accordingly, and a comment that is printed
- objectives = [(lambda x: scores[x][0]/n,[0],'minimize cross-entropy for p(s|t)'), #optimize cross-entropy for p(s|t)
- (lambda x: scores[x][1]/n,[1],'minimize cross-entropy for lex(s|t)'),
- (lambda x: scores[x][2]/n,[2],'minimize cross-entropy for p(t|s)'), #optimize for p(t|s)
- (lambda x: scores[x][3]/n,[3],'minimize cross-entropy for lex(t|s)')]
-
+ objectives = [(lambda x: scores[x][i]/n,[i],'minimize cross-entropy for feature {0}'.format(i)) for i in range(model_interface.number_of_features)]
scores[best_weights] = cross_entropy_light(model_interface,reference_interface,initial_weights,score_function,mode,flags,cache)
final_weights = initial_weights[:]
final_cross_entropy = [0]*model_interface.number_of_features
- for objective, features, comment in objectives:
+ for i,(objective, features, comment) in enumerate(objectives):
best_weights = min(scores,key=objective)
sys.stderr.write('Optimizing objective "' + comment +'"\n')
- best_weights = _hillclimb(scores,best_weights,objective,model_interface,reference_interface,score_function,mode,flags,precision,cache,n)
+ best_weights = _hillclimb(scores,best_weights,objective,model_interface,reference_interface,score_function,feature_specific_mode(mode,i,flags),flags,precision,cache,n)
- sys.stderr.write('\nCross-entropy:' + str(objective(best_weights)) + '- weights: ' + str(best_weights)+'\n\n')
+ sys.stderr.write('\nCross-entropy:' + str(objective(best_weights)) + ' - weights: ' + str(best_weights)+'\n\n')
for j in features:
final_weights[j] = list(best_weights)
@@ -984,11 +946,8 @@ def optimize_cross_entropy(model_interface,reference_interface,initial_weights,s
cache,n = _get_reference_cache(reference_interface,model_interface)
# each objective is a triple: which score to minimize from cross_entropy(), which weights to update accordingly, and a comment that is printed
- objectives = [(lambda w: cross_entropy_light(model_interface,reference_interface,[[1]+list(w) for m in range(model_interface.number_of_features)],score_function,mode,flags,cache)[0],[0],'minimize cross-entropy for p(s|t)'), #optimize cross-entropy for p(s|t)
- (lambda w: cross_entropy_light(model_interface,reference_interface,[[1]+list(w) for m in range(model_interface.number_of_features)],score_function,mode,flags,cache)[1],[1],'minimize cross-entropy for lex(s|t)'),
- (lambda w: cross_entropy_light(model_interface,reference_interface,[[1]+list(w) for m in range(model_interface.number_of_features)],score_function,mode,flags,cache)[2],[2],'minimize cross-entropy for p(t|s)'), #optimize for p(t|s)
- (lambda w: cross_entropy_light(model_interface,reference_interface,[[1]+list(w) for m in range(model_interface.number_of_features)],score_function,mode,flags,cache)[3],[3],'minimize cross-entropy for lex(t|s)')]
-
+ objectives = [(lambda w: cross_entropy_light(model_interface,reference_interface,[[1]+list(w) for m in range(model_interface.number_of_features)],score_function,feature_specific_mode(mode,i,flags),flags,cache)[i],[i],'minimize cross-entropy for feature {0}'.format(i)) for i in range(model_interface.number_of_features)] #optimize cross-entropy for p(s|t)
+
final_weights = initial_weights[:]
final_cross_entropy = [0]*model_interface.number_of_features
@@ -996,7 +955,7 @@ def optimize_cross_entropy(model_interface,reference_interface,initial_weights,s
sys.stderr.write('Optimizing objective "' + comment +'"\n')
initial_values = [1]*(len(model_interface.models)-1) # we leave value of first model at 1 and optimize all others (normalized of course)
best_weights, best_point, data = fmin_l_bfgs_b(objective,initial_values,approx_grad=True,bounds=[(0.000000001,None)]*len(initial_values))
- best_weights = normalize_weights([1]+list(best_weights),mode)
+ best_weights = normalize_weights([1]+list(best_weights),feature_specific_mode(mode,i,flags),flags)
sys.stderr.write('Cross-entropy after L-BFGS optimization: ' + str(best_point/n) + ' - weights: ' + str(best_weights)+'\n')
for j in features:
@@ -1006,41 +965,59 @@ def optimize_cross_entropy(model_interface,reference_interface,initial_weights,s
return final_weights,final_cross_entropy
+def feature_specific_mode(mode,i,flags):
+ """in mode 'counts', only the default Moses features can be recomputed from raw frequencies;
+ all other features are interpolated by default.
+ This fucntion mostly serves optical purposes (i.e. normalizing a single weight vector for logging),
+ since normalize_weights also handles a mix of interpolated and recomputed features.
+ """
+
+ if mode == 'counts' and i not in [flags['i_e2f'],flags['i_e2f_lex'],flags['i_f2e'],flags['i_f2e_lex']]:
+ return 'interpolate'
+ else:
+ return mode
+
+
def redistribute_probability_mass(weights,src,target,interface,flags,mode='interpolate'):
"""the conditional probability p(x|y) is undefined for cases where p(y) = 0
this function redistributes the probability mass to only consider models for which p(y) > 0
"""
+ i_e2f = flags['i_e2f']
+ i_e2f_lex = flags['i_e2f_lex']
+ i_f2e = flags['i_f2e']
+ i_f2e_lex = flags['i_f2e_lex']
+
new_weights = weights[:]
if flags['normalize_s_given_t'] == 's':
# set weight to 0 for all models where target phrase is unseen (p(s|t)
- new_weights[0] = map(mul,interface.phrase_source[src],weights[0])
+ new_weights[i_e2f] = map(mul,interface.phrase_source[src],weights[i_e2f])
if flags['normalize-lexical_weights']:
- new_weights[1] = map(mul,interface.phrase_source[src],weights[1])
+ new_weights[i_e2f_lex] = map(mul,interface.phrase_source[src],weights[i_e2f_lex])
elif flags['normalize_s_given_t'] == 't':
# set weight to 0 for all models where target phrase is unseen (p(s|t)
- new_weights[0] = map(mul,interface.phrase_target[target],weights[0])
+ new_weights[i_e2f] = map(mul,interface.phrase_target[target],weights[i_e2f])
if flags['normalize-lexical_weights']:
- new_weights[1] = map(mul,interface.phrase_target[target],weights[1])
+ new_weights[i_e2f_lex] = map(mul,interface.phrase_target[target],weights[i_e2f_lex])
# set weight to 0 for all models where source phrase is unseen (p(t|s)
- new_weights[2] = map(mul,interface.phrase_source[src],weights[2])
+ new_weights[i_f2e] = map(mul,interface.phrase_source[src],weights[i_f2e])
if flags['normalize-lexical_weights']:
- new_weights[3] = map(mul,interface.phrase_source[src],weights[3])
+ new_weights[i_f2e_lex] = map(mul,interface.phrase_source[src],weights[i_f2e_lex])
- return normalize_weights(new_weights,mode)
+ return normalize_weights(new_weights,mode,flags)
def score_interpolate(weights,src,target,interface,flags,cache=False):
"""linear interpolation of probabilites (and other feature values)
if normalized is True, the probability mass for p(x|y) is redistributed to models with p(y) > 0
"""
-
+
model_values = interface.phrase_pairs[src][target][0]
scores = [0]*len(model_values)
@@ -1059,12 +1036,12 @@ def score_interpolate(weights,src,target,interface,flags,cache=False):
lts = 0
else:
- scores[1] = compute_lexicalweight(normalized_weights[1],e2f_alignment,interface.word_pairs_e2f,None,mode='interpolate')
- scores[3] = compute_lexicalweight(normalized_weights[3],f2e_alignment,interface.word_pairs_f2e,None,mode='interpolate')
+ scores[flags['i_e2f_lex']] = compute_lexicalweight(normalized_weights[flags['i_e2f_lex']],e2f_alignment,interface.word_pairs_e2f,None,mode='interpolate')
+ scores[flags['i_f2e_lex']] = compute_lexicalweight(normalized_weights[flags['i_f2e_lex']],f2e_alignment,interface.word_pairs_f2e,None,mode='interpolate')
for idx,prob in enumerate(model_values):
- if not ('recompute_lexweights' in flags and flags['recompute_lexweights'] and (idx == 1 or idx == 3)):
+ if not ('recompute_lexweights' in flags and flags['recompute_lexweights'] and (idx == flags['i_e2f_lex'] or idx == flags['i_f2e_lex'])):
scores[idx] = dot_product(prob,normalized_weights[idx])
return scores
@@ -1093,32 +1070,43 @@ def score_counts(weights,src,target,interface,flags,cache=False):
each count is multiplied by its weight; trivial case is weight 1 for each model, which corresponds to a concatentation
"""
+ i_e2f = flags['i_e2f']
+ i_e2f_lex = flags['i_e2f_lex']
+ i_f2e = flags['i_f2e']
+ i_f2e_lex = flags['i_f2e_lex']
+
+ # if we have non-default number of weights, assume that we might have to do a mix of count-based and interpolated scores.
+ if len(weights) == 4:
+ scores = [0]*len(weights)
+ else:
+ scores = score_interpolate(weights,src,target,interface,flags,cache=cache)
+
try:
- joined_count = dot_product(interface.phrase_pairs[src][target][0][0],weights[0])
- target_count = dot_product(interface.phrase_target[target],weights[0])
- pst = joined_count / target_count
+ joined_count = dot_product(interface.phrase_pairs[src][target][0][i_e2f],weights[i_e2f])
+ target_count = dot_product(interface.phrase_target[target],weights[i_e2f])
+ scores[i_e2f] = joined_count / target_count
except ZeroDivisionError:
- pst = 0
+ scores[i_e2f] = 0
try:
- joined_count = dot_product(interface.phrase_pairs[src][target][0][1],weights[2])
- source_count = dot_product(interface.phrase_source[src],weights[2])
- pts = joined_count / source_count
+ joined_count = dot_product(interface.phrase_pairs[src][target][0][i_f2e],weights[i_f2e])
+ source_count = dot_product(interface.phrase_source[src],weights[i_f2e])
+ scores[i_f2e] = joined_count / source_count
except ZeroDivisionError:
- pts = 0
+ scores[i_f2e] = 0
e2f_alignment,f2e_alignment = interface.get_word_alignments(src,target,cache=cache)
if not e2f_alignment or not f2e_alignment:
sys.stderr.write('Error: no word alignments found, but necessary for lexical weight computation.\n')
- lst = 0
- lts = 0
+ scores[i_e2f_lex] = 0
+ scores[i_f2e_lex] = 0
else:
- lst = compute_lexicalweight(weights[1],e2f_alignment,interface.word_pairs_e2f,interface.word_target,mode='counts',cache=cache)
- lts = compute_lexicalweight(weights[3],f2e_alignment,interface.word_pairs_f2e,interface.word_source,mode='counts',cache=cache)
+ scores[i_e2f_lex] = compute_lexicalweight(weights[i_e2f_lex],e2f_alignment,interface.word_pairs_e2f,interface.word_target,mode='counts',cache=cache)
+ scores[i_f2e_lex] = compute_lexicalweight(weights[i_f2e_lex],f2e_alignment,interface.word_pairs_f2e,interface.word_source,mode='counts',cache=cache)
- return [pst,lst,pts,lts]
+ return scores
def score_interpolate_reordering(weights,src,target,interface):
@@ -1174,7 +1162,7 @@ def compute_lexicalweight(weights,alignment,word_pairs,marginal,mode='counts',ca
return lex
-def normalize_weights(weights,mode):
+def normalize_weights(weights,mode,flags=None):
"""make sure that probability mass in linear interpolation is 1
for weighted counts, weight of first model is set to 1
"""
@@ -1203,7 +1191,7 @@ def normalize_weights(weights,mode):
except ZeroDivisionError:
sys.stderr.write('Error: Zero division in weight normalization. Are some of your weights zero? This might lead to undefined behaviour if a phrase pair is only seen in model with weight 0\n')
- elif mode == 'counts':
+ elif mode == 'counts_pure':
if type(weights[0]) == list:
@@ -1217,6 +1205,19 @@ def normalize_weights(weights,mode):
ratio = 1/weights[0]
new_weights = [weight * ratio for weight in weights]
+ # make sure that features other than the standard Moses features are always interpolated (since no count-based computation is defined)
+ elif mode == 'counts':
+
+ if type(weights[0]) == list:
+ norm_counts = normalize_weights(weights,'counts_pure')
+ new_weights = normalize_weights(weights,'interpolate')
+ for i in [flags['i_e2f'],flags['i_e2f_lex'],flags['i_f2e'],flags['i_f2e_lex']]:
+ new_weights[i] = norm_counts[i]
+ return new_weights
+
+ else:
+ return normalize_weights(weights,'counts_pure')
+
return new_weights
@@ -1236,7 +1237,7 @@ def handle_file(filename,action,fileobj=None,mode='r'):
if 'counts' in filename and os.path.exists(os.path.isdir(filename)):
sys.stderr.write('For a weighted counts combination, we need statistics that Moses doesn\'t write to disk by default.\n')
- sys.stderr.write('Apply train_model.patch to train_model.perl and repeat step 4 of Moses training for all models.\n')
+ sys.stderr.write('Repeat step 4 of Moses training for all models with the option -write-lexical-counts.\n')
exit()
@@ -1288,7 +1289,11 @@ class Combine_TMs():
'normalize_s_given_t':None,
'normalize-lexical_weights':True,
'add_origin_features':False,
- 'lowmem': False
+ 'lowmem': False,
+ 'i_e2f':0,
+ 'i_e2f_lex':1,
+ 'i_f2e':2,
+ 'i_f2e_lex':3
}
# each model needs a priority. See init docstring for more info
@@ -1296,7 +1301,17 @@ class Combine_TMs():
'map':2,
'supplementary':10}
- def __init__(self,models,weights=None,output_file=None,mode='interpolate',number_of_features=4,model_interface=Moses,reference_interface=Moses_Alignment,reference_file=None,lang_src=None,lang_target=None,output_lexical=None,**flags):
+ def __init__(self,models,weights=None,
+ output_file=None,
+ mode='interpolate',
+ number_of_features=4,
+ model_interface=Moses,
+ reference_interface=Moses_Alignment,
+ reference_file=None,
+ lang_src=None,
+ lang_target=None,
+ output_lexical=None,
+ **flags):
"""The whole configuration of the task is done during intialization. Afterwards, you only need to call your intended method(s).
You can change some of the class attributes afterwards (such as the weights, or the output file), but you should never change the models or mode after initialization.
See unit_test function for example configurations
@@ -1316,13 +1331,18 @@ class Combine_TMs():
output_lexical: If defined, also writes combined lexical tables. Writes to output_lexical.e2f and output_lexical.f2e, or output_lexical.counts.e2f in mode 'counts'.
mode: declares the basic mixture-model algorithm. there are currently three options:
- 'counts': weighted counts (requires some statistics that Moses doesn't produce. Apply train_model.patch to train_model.perl and repeat step 4 of Moses training to obtain them.)
+ 'counts': weighted counts (requires some statistics that Moses doesn't produce. Repeat step 4 of Moses training with the option -write-lexical-counts to obtain them.)
+ Only the standard Moses features are recomputed from weighted counts; additional features are linearly interpolated
+ (see number_of_features to allow more features, and i_e2f etc. if the standard features are in a non-standard position)
'interpolate': linear interpolation
'loglinear': loglinear interpolation (careful: this creates the intersection of phrase tables and is often of little use)
number_of_features: could be used to interpolate models with non-default Moses features. 4 features is currently still hardcoded in various places
(e.g. cross_entropy calculations, mode 'counts')
+ i_e2f,i_e2f_lex,i_f2e,i_f2e_lex: Index of the (Moses) phrase table features p(s|t), lex(s|t), p(t|s) and lex(t|s).
+ Relevant for mode 'counts', and if 'recompute_lexweights' is True in mode 'interpolate'. In mode 'counts', any additional features are combined through linear interpolation.
+
model_interface: class that handles reading phrase tables and lexical tables, and writing phrase tables. Currently only Moses is implemented.
default: Moses
@@ -1378,7 +1398,15 @@ class Combine_TMs():
self.lang_target = lang_target
self.loaded = defaultdict(int)
self.output_lexical = output_lexical
+
+ self.flags = copy.copy(self.flags)
+ self.flags.update(flags)
+ self.flags['i_e2f'] = int(self.flags['i_e2f'])
+ self.flags['i_e2f_lex'] = int(self.flags['i_e2f_lex'])
+ self.flags['i_f2e'] = int(self.flags['i_f2e'])
+ self.flags['i_f2e_lex'] = int(self.flags['i_f2e_lex'])
+
if reference_interface:
self.reference_interface = reference_interface(reference_file)
@@ -1395,22 +1423,18 @@ class Combine_TMs():
if mode == 'interpolate':
self.score = score_interpolate
- self.load_lines = self.model_interface.load_phrase_probabilities
elif mode == 'loglinear':
self.score = score_loglinear
- self.load_lines = self.model_interface.load_phrase_probabilities
elif mode == 'counts':
self.score = score_counts
- self.load_lines = self.model_interface.load_phrase_counts
-
- self.flags = copy.copy(self.flags)
- self.flags.update(flags)
def _sanity_checks(self,models,number_of_features,weights):
"""check if input arguments make sense (correct number of weights, valid model priorities etc.)
is only called on initialization. If you change weights afterwards, better know what you're doing.
"""
+
+ number_of_features = int(number_of_features)
for (model,priority) in models:
assert(priority in self._priorities)
@@ -1437,12 +1461,12 @@ class Combine_TMs():
sys.stderr.write('Warning: No weights defined: initializing with uniform weights\n')
- new_weights = normalize_weights(weights,self.mode)
+ new_weights = normalize_weights(weights,self.mode,self.flags)
if weights != new_weights:
if self.mode == 'interpolate' or self.mode == 'loglinear':
sys.stderr.write('Warning: weights should sum to 1 - ')
elif self.mode == 'counts':
- sys.stderr.write('Warning: normalizing weights so that first model has weight 1 - ')
+ sys.stderr.write('Warning: normalizing weights so that first model has weight 1 (for features that are recomputed from counts) - ')
sys.stderr.write('normalizing to: '+ str(new_weights) +'\n')
weights = new_weights
@@ -1472,7 +1496,7 @@ class Combine_TMs():
if 'pt-filtered' in data and not self.loaded['pt-filtered']:
models_prioritized = [(self.model_interface.open_table(model,'phrase-table'),priority,i) for (model,priority,i) in priority_sort_models(self.models)]
-
+
for model,priority,i in models_prioritized:
sys.stderr.write('Loading phrase table ' + str(i) + ' (only data relevant for reference set)')
j = 0
@@ -1481,7 +1505,7 @@ class Combine_TMs():
sys.stderr.write('...'+str(j))
j += 1
line = line.rstrip().split(b' ||| ')
- self.load_lines(line,priority,i,store='all',filter_by=self.reference_interface.word_pairs,filter_by_src=self.reference_interface.word_source,filter_by_target=self.reference_interface.word_target)
+ self.model_interface.load_phrase_features(line,priority,i,store='all',mode=self.mode,filter_by=self.reference_interface.word_pairs,filter_by_src=self.reference_interface.word_source,filter_by_target=self.reference_interface.word_target,flags=self.flags)
sys.stderr.write(' done\n')
self.loaded['pt-filtered'] = 1
@@ -1506,7 +1530,7 @@ class Combine_TMs():
sys.stderr.write('...'+str(j))
j += 1
line = line.rstrip().split(b' ||| ')
- self.load_lines(line,priority,i,store='target')
+ self.model_interface.load_phrase_features(line,priority,i,mode=self.mode,store='target',flags=self.flags)
sys.stderr.write(' done\n')
self.loaded['pt-target'] = 1
@@ -1554,8 +1578,8 @@ class Combine_TMs():
i = 0
sys.stderr.write('Incrementally loading and processing phrase tables...')
- for block in self.model_interface.traverse_incrementally('phrase-table',models,self.load_lines,store_flag,inverted=inverted,lowmem=self.flags['lowmem']):
-
+
+ for block in self.model_interface.traverse_incrementally('phrase-table',models,self.model_interface.load_phrase_features,store_flag,mode=self.mode,inverted=inverted,lowmem=self.flags['lowmem'],flags=self.flags):
for src in sorted(self.model_interface.phrase_pairs, key = lambda x: x + b' |'):
for target in sorted(self.model_interface.phrase_pairs[src], key = lambda x: x + b' |'):
@@ -1642,8 +1666,8 @@ class Combine_TMs():
i = 0
sys.stderr.write('Incrementally loading and processing phrase tables...')
- for block in self.model_interface.traverse_incrementally('reordering-table',models,self.model_interface.load_reordering_probabilities,'pairs',lowmem=self.flags['lowmem']):
-
+
+ for block in self.model_interface.traverse_incrementally('reordering-table',models,self.model_interface.load_reordering_probabilities,'pairs',mode=self.mode,lowmem=self.flags['lowmem'],flags=self.flags):
for src in sorted(self.model_interface.reordering_pairs):
for target in sorted(self.model_interface.reordering_pairs[src]):
if not i % 1000000:
@@ -1676,18 +1700,21 @@ class Combine_TMs():
results, (intersection,total_pairs,oov2) = cross_entropy(self.model_interface,self.reference_interface,self.weights,self.score,self.mode,self.flags)
padding = 90
+ num_features = self.model_interface.number_of_features
print('\nResults of model comparison\n')
print('{0:<{padding}}: {1}'.format('phrase pairs in reference (tokens)',total_pairs, padding=padding))
print('{0:<{padding}}: {1}'.format('phrase pairs in model intersection (tokens)',intersection, padding=padding))
print('{0:<{padding}}: {1}\n'.format('phrase pairs in model union (tokens)',total_pairs-oov2, padding=padding))
- for i,(cross_entropy_st,cross_entropy_lex_st,cross_entropy_ts,cross_entropy_lex_ts,other_translations,oov,ignored,n,total_pairs) in enumerate(results):
+ for i,data in enumerate(results):
+
+ cross_entropies = data[:num_features]
+ (other_translations,oov,ignored,n,total_pairs) = data[num_features:]
+
print('model ' +str(i))
- print('{0:<{padding}}: {1}'.format('cross-entropy p(s|t)', cross_entropy_st, padding=padding))
- print('{0:<{padding}}: {1}'.format('cross-entropy lex(s|t)', cross_entropy_lex_st, padding=padding))
- print('{0:<{padding}}: {1}'.format('cross-entropy p(t|s)', cross_entropy_ts, padding=padding))
- print('{0:<{padding}}: {1}'.format('cross-entropy lex(t|s)', cross_entropy_lex_ts, padding=padding))
+ for j in range(num_features):
+ print('{0:<{padding}}: {1}'.format('cross-entropy for feature {0}'.format(j), cross_entropies[j], padding=padding))
print('{0:<{padding}}: {1}'.format('phrase pairs in model (tokens)', n+ignored, padding=padding))
print('{0:<{padding}}: {1}'.format('phrase pairs in model, but not in intersection (tokens)', ignored, padding=padding))
print('{0:<{padding}}: {1}'.format('phrase pairs in union, but not in model (but source phrase is) (tokens)', other_translations, padding=padding))
@@ -1786,6 +1813,18 @@ def test():
Combiner = Combine_TMs([[os.path.join('test','model1'),'primary'],[os.path.join('test','model2'),'map']],output_file=os.path.join('test','phrase-table_test8'),mode='counts',reference_file='test/extract')
Combiner.combine_given_tuning_set()
+ # count-based combination of two non-default models, with fixed weights. Same as test 3, but with the standard features moved back
+ # command line: python tmcombine.py combine_given_weights test/model3 test/model4 -w "0.5,0.5;0.5,0.5;0.5,0.5;0.5,0.5;0.1,0.9;0.1,1;0.2,0.8;0.5,0.5" -o test/phrase-table_test9 -m counts --number_of_features 8 --i_e2f 4 --i_e2f_lex 5 --i_f2e 6 --i_f2e_lex 7 -r test/extract
+ sys.stderr.write('Regression test 9\n')
+ Combiner = Combine_TMs([[os.path.join('test','model3'),'primary'],[os.path.join('test','model4'),'primary']],[[0.5,0.5],[0.5,0.5],[0.5,0.5],[0.5,0.5],[0.1,0.9],[0.1,1],[0.2,0.8],[0.5,0.5]],os.path.join('test','phrase-table_test9'),mode='counts',number_of_features=8,i_e2f=4,i_e2f_lex=5,i_f2e=6,i_f2e_lex=7)
+ Combiner.combine_given_weights()
+
+ # count-based combination of two non-default models, with fixed weights. Same as test 5, but with the standard features moved back
+ # command line: python tmcombine.py combine_given_tuning_set test/model3 test/model4 -o test/phrase-table_test10 -m counts --number_of_features 8 --i_e2f 4 --i_e2f_lex 5 --i_f2e 6 --i_f2e_lex 7 -r test/extract
+ sys.stderr.write('Regression test 10\n')
+ Combiner = Combine_TMs([[os.path.join('test','model3'),'primary'],[os.path.join('test','model4'),'primary']],output_file=os.path.join('test','phrase-table_test10'),mode='counts',number_of_features=8,i_e2f=4,i_e2f_lex=5,i_f2e=6,i_f2e_lex=7,reference_file='test/extract')
+ Combiner.combine_given_tuning_set()
+
#convert weight vector passed as a command line argument
class to_list(argparse.Action):
@@ -1800,46 +1839,68 @@ class to_list(argparse.Action):
def parse_command_line():
parser = argparse.ArgumentParser(description='Combine translation models. Check DOCSTRING of the class Combine_TMs() and its methods for a more in-depth documentation and additional configuration options not available through the command line. The function test() shows examples.')
- parser.add_argument('action', metavar='ACTION', choices=["combine_given_weights","combine_given_tuning_set","combine_reordering_tables","compute_cross_entropy","return_best_cross_entropy","compare_cross_entropies"],
+ group1 = parser.add_argument_group('Main options')
+ group2 = parser.add_argument_group('More model combination options')
+
+ group1.add_argument('action', metavar='ACTION', choices=["combine_given_weights","combine_given_tuning_set","combine_reordering_tables","compute_cross_entropy","return_best_cross_entropy","compare_cross_entropies"],
help='What you want to do with the models. One of %(choices)s.')
- parser.add_argument('model', metavar='DIRECTORY', nargs='+',
+ group1.add_argument('model', metavar='DIRECTORY', nargs='+',
help='Model directory. Assumes default Moses structure (i.e. path to phrase table and lexical tables).')
- parser.add_argument('-w', '--weights', dest='weights', action=to_list,
+ group1.add_argument('-w', '--weights', dest='weights', action=to_list,
default=None,
help='weight vector. Format 1: single vector, one weight per model. Example: \"0.1,0.9\" ; format 2: one vector per feature, one weight per model: \"0.1,0.9;0.5,0.5;0.4,0.6;0.2,0.8\"')
- parser.add_argument('-m', '--mode', type=str,
+ group1.add_argument('-m', '--mode', type=str,
default="interpolate",
choices=["counts","interpolate","loglinear"],
help='basic mixture-model algorithm. Default: %(default)s. Note: depending on mode and additional configuration, additional statistics are needed. Check docstring documentation of Combine_TMs() for more info.')
- parser.add_argument('-r', '--reference', type=str,
+ group1.add_argument('-r', '--reference', type=str,
default=None,
help='File containing reference phrase pairs for cross-entropy calculation. Default interface expects \'path/model/extract.gz\' that is produced by training a model on the reference (i.e. development) corpus.')
- parser.add_argument('-o', '--output', type=str,
+ group1.add_argument('-o', '--output', type=str,
default="-",
help='Output file (phrase table). If not specified, model is written to standard output.')
- parser.add_argument('--output-lexical', type=str,
+ group1.add_argument('--output-lexical', type=str,
default=None,
help=('Not only create a combined phrase table, but also combined lexical tables. Writes to OUTPUT_LEXICAL.e2f and OUTPUT_LEXICAL.f2e, or OUTPUT_LEXICAL.counts.e2f in mode \'counts\'.'))
- parser.add_argument('--lowmem', action="store_true",
+ group1.add_argument('--lowmem', action="store_true",
help=('Low memory mode: requires two passes (and sorting in between) to combine a phrase table, but loads less data into memory. Only relevant for mode "counts" and some configurations of mode "interpolate".'))
- parser.add_argument('--normalized', action="store_true",
- help=('for each phrase pair x,y: ignore models with p(y)=0, and distribute probability mass among models with p(y)>0. (default: missing entries (x,y) are always interpreted as p(x|y)=0). Only relevant in mode "interpolate".'))
-
- parser.add_argument('--recompute_lexweights', action="store_true",
- help=('don\'t directly interpolate lexical weights, but interpolate word translation probabilities instead and recompute the lexical weights. Only relevant in mode "interpolate".'))
-
- parser.add_argument('--tempdir', type=str,
+ group1.add_argument('--tempdir', type=str,
default=None,
help=('Temporary directory in --lowmem mode.'))
+ group2.add_argument('--i_e2f', type=int,
+ default=0, metavar='N',
+ help=('Index of p(f|e) (relevant for mode counts if phrase table has custom feature order). (default: %(default)s)'))
+
+ group2.add_argument('--i_e2f_lex', type=int,
+ default=1, metavar='N',
+ help=('Index of lex(f|e) (relevant for mode counts or with option recompute_lexweights if phrase table has custom feature order). (default: %(default)s)'))
+
+ group2.add_argument('--i_f2e', type=int,
+ default=2, metavar='N',
+ help=('Index of p(e|f) (relevant for mode counts if phrase table has custom feature order). (default: %(default)s)'))
+
+ group2.add_argument('--i_f2e_lex', type=int,
+ default=3, metavar='N',
+ help=('Index of lex(e|f) (relevant for mode counts or with option recompute_lexweights if phrase table has custom feature order). (default: %(default)s)'))
+
+ group2.add_argument('--number_of_features', type=int,
+ default=4, metavar='N',
+ help=('Combine models with N + 1 features (last feature is constant phrase penalty). (default: %(default)s)'))
+
+ group2.add_argument('--normalized', action="store_true",
+ help=('for each phrase pair x,y: ignore models with p(y)=0, and distribute probability mass among models with p(y)>0. (default: missing entries (x,y) are always interpreted as p(x|y)=0). Only relevant in mode "interpolate".'))
+
+ group2.add_argument('--recompute_lexweights', action="store_true",
+ help=('don\'t directly interpolate lexical weights, but interpolate word translation probabilities instead and recompute the lexical weights. Only relevant in mode "interpolate".'))
return parser.parse_args()
@@ -1854,7 +1915,21 @@ if __name__ == "__main__":
else:
args = parse_command_line()
#initialize
- combiner = Combine_TMs([(m,'primary') for m in args.model],weights=args.weights,mode=args.mode,output_file=args.output,reference_file=args.reference,output_lexical=args.output_lexical,lowmem=args.lowmem,normalized=args.normalized,recompute_lexweights=args.recompute_lexweights,tempdir=args.tempdir)
+ combiner = Combine_TMs([(m,'primary') for m in args.model],
+ weights=args.weights,
+ mode=args.mode,
+ output_file=args.output,
+ reference_file=args.reference,
+ output_lexical=args.output_lexical,
+ lowmem=args.lowmem,
+ normalized=args.normalized,
+ recompute_lexweights=args.recompute_lexweights,
+ tempdir=args.tempdir,
+ number_of_features=args.number_of_features,
+ i_e2f=args.i_e2f,
+ i_e2f_lex=args.i_e2f_lex,
+ i_f2e=args.i_f2e,
+ i_f2e_lex=args.i_f2e_lex)
# execute right method
f_string = "combiner."+args.action+'()'
exec(f_string)
diff --git a/contrib/tmcombine/train_model.patch b/contrib/tmcombine/train_model.patch
deleted file mode 100644
index d422a1628..000000000
--- a/contrib/tmcombine/train_model.patch
+++ /dev/null
@@ -1,24 +0,0 @@
---- train-model.perl 2011-11-01 15:17:04.763230934 +0100
-+++ train-model.perl 2011-11-01 15:17:00.033229220 +0100
-@@ -1185,15 +1185,21 @@
-
- open(F2E,">$lexical_file.f2e") or die "ERROR: Can't write $lexical_file.f2e";
- open(E2F,">$lexical_file.e2f") or die "ERROR: Can't write $lexical_file.e2f";
-+ open(F2E2,">$lexical_file.counts.f2e") or die "ERROR: Can't write $lexical_file.counts.f2e";
-+ open(E2F2,">$lexical_file.counts.e2f") or die "ERROR: Can't write $lexical_file.counts.e2f";
-
- foreach my $f (keys %WORD_TRANSLATION) {
- foreach my $e (keys %{$WORD_TRANSLATION{$f}}) {
- printf F2E "%s %s %.7f\n",$e,$f,$WORD_TRANSLATION{$f}{$e}/$TOTAL_FOREIGN{$f};
- printf E2F "%s %s %.7f\n",$f,$e,$WORD_TRANSLATION{$f}{$e}/$TOTAL_ENGLISH{$e};
-+ printf F2E2 "%s %s %i %i\n",$e,$f,$WORD_TRANSLATION{$f}{$e},$TOTAL_FOREIGN{$f};
-+ printf E2F2 "%s %s %i %i\n",$f,$e,$WORD_TRANSLATION{$f}{$e},$TOTAL_ENGLISH{$e};
- }
- }
- close(E2F);
- close(F2E);
-+ close(E2F2);
-+ close(F2E2);
- print STDERR "Saved: $lexical_file.f2e and $lexical_file.e2f\n";
- }
-
diff --git a/contrib/web/bin/daemon.pl b/contrib/web/bin/daemon.pl
index 8e6a08739..acb63bda7 100755
--- a/contrib/web/bin/daemon.pl
+++ b/contrib/web/bin/daemon.pl
@@ -1,4 +1,5 @@
#!/usr/bin/perl -w
+use FindBin qw($Bin);
use warnings;
use strict;
$|++;
@@ -27,12 +28,12 @@ use IPC::Open2;
#------------------------------------------------------------------------------
# constants, global vars, config
-my $MOSES = '/local/herves/moses/moses-irst';
-my $MOSES_INI = '/local/herves/moses/fr-en/moses.ini.2';
+my $MOSES = "$Bin/../../../bin/moses";
-die "usage: daemon.pl <hostname> <port>" unless (@ARGV == 2);
+die "usage: daemon.pl <hostname> <port> <ini>" unless (@ARGV == 3);
my $LISTEN_HOST = shift;
my $LISTEN_PORT = shift;
+my $MOSES_INI = shift;
#------------------------------------------------------------------------------
# main
diff --git a/cruise-control/config.ems b/cruise-control/config.ems
index e64f68f43..e4b9592ba 100644
--- a/cruise-control/config.ems
+++ b/cruise-control/config.ems
@@ -8,6 +8,9 @@
#
working-dir = WORKDIR/ems_workdir
+# Giza and friends
+external-bin-dir = WORKDIR/giza-pp/bin/
+
# specification of the language pair
input-extension = fr
output-extension = en
diff --git a/cruise-control/test_all_new_commits.sh b/cruise-control/test_all_new_commits.sh
index 873691e22..93ef30cf1 100755
--- a/cruise-control/test_all_new_commits.sh
+++ b/cruise-control/test_all_new_commits.sh
@@ -44,7 +44,7 @@ GITREPO="$MCC_GITREPO"
# location of moses regression test data archive (assumes url at the moment)
REGTEST_ARCHIVE="$MCC_REGTEST_ARCHIVE"
[ -n "$REGTEST_ARCHIVE" ] \
- || REGTEST_ARCHIVE="http://www.statmt.org/moses/reg-testing/moses-reg-test-data-10.tgz"
+ || REGTEST_ARCHIVE="git://github.com/moses-smt/moses-regression-tests.git"
if [ ! -d "$WORKDIR" ]; then
mkdir "$WORKDIR" || die "Failed to create workdir $WORKDIR"
@@ -87,16 +87,24 @@ function run_single_test () {
err=""
cd regression-testing
- regtest_file=$(echo "$REGTEST_ARCHIVE" | sed 's/^.*\///')
+ #regtest_file=$(echo "$REGTEST_ARCHIVE" | sed 's/^.*\///')
# download data for regression tests if necessary
- if [ ! -f $regtest_file.ok ]; then
- wget $REGTEST_ARCHIVE &> /dev/null \
- || die "Failed to download data for regression tests"
- tar xzf $regtest_file
- touch $regtest_file.ok
+ regtest_dir=$PWD/moses-reg-test-data
+ if [ -e $regtest_dir ]; then
+ (cd $regtest_dir; git pull) &> /dev/null ||
+ die "Failed to update regression testing data"
+ else
+ git clone $REGTEST_ARCHIVE $regtest_dir &> /dev/null ||
+ die "Failed to clone regression testing data"
fi
- regtest_dir=$PWD/$(basename $regtest_file .tgz)
+ #if [ ! -f $regtest_file.ok ]; then
+ # wget $REGTEST_ARCHIVE &> /dev/null \
+ # || die "Failed to download data for regression tests"
+ # tar xzf $regtest_file
+ # touch $regtest_file.ok
+ #fi
+ #regtest_dir=$PWD/$(basename $regtest_file .tgz)
cd ..
diff --git a/jam-files/engine/debian/rules b/jam-files/engine/debian/rules
index 756052a3b..756052a3b 100644..100755
--- a/jam-files/engine/debian/rules
+++ b/jam-files/engine/debian/rules
diff --git a/jam-files/fail/Jamroot b/jam-files/fail/Jamroot
new file mode 100644
index 000000000..c3584d896
--- /dev/null
+++ b/jam-files/fail/Jamroot
@@ -0,0 +1,4 @@
+actions fail {
+ false
+}
+make fail : : fail ;
diff --git a/jam-files/sanity.jam b/jam-files/sanity.jam
index 7b93cf36f..6beec3f94 100644
--- a/jam-files/sanity.jam
+++ b/jam-files/sanity.jam
@@ -3,6 +3,8 @@ import option ;
import os ;
import path ;
import project ;
+import build-system ;
+import version ;
#Shell with trailing line removed http://lists.boost.org/boost-build/2007/08/17051.php
rule trim-nl ( str extras * ) {
@@ -13,8 +15,20 @@ rule _shell ( cmd : extras * ) {
return [ trim-nl [ SHELL $(cmd) : $(extras) ] ] ;
}
+rule shell_or_fail ( cmd ) {
+ local ret = [ SHELL $(cmd) : exit-status ] ;
+ if $(ret[2]) != 0 {
+ exit $(cmd) failed : 1 ;
+ }
+}
+
+cxxflags = [ os.environ "CXXFLAGS" ] ;
+cflags = [ os.environ "CFLAGS" ] ;
+ldflags = [ os.environ "LDFLAGS" ] ;
+
#Run g++ with empty main and these arguments to see if it passes.
rule test_flags ( flags * ) {
+ flags = $(cxxflags) $(ldflags) $(flags) ;
local cmd = "bash -c \"g++ "$(flags:J=" ")" -x c++ - <<<'int main() {}' -o /dev/null >/dev/null 2>/dev/null\"" ;
local ret = [ SHELL $(cmd) : exit-status ] ;
if --debug-configuration in [ modules.peek : ARGV ] {
@@ -28,6 +42,14 @@ rule test_flags ( flags * ) {
}
}
+rule test_header ( name ) {
+ return [ test_flags "-include $(name)" ] ;
+}
+
+rule test_library ( name ) {
+ return [ test_flags "-l$(name)" ] ;
+}
+
{
local cleaning = [ option.get "clean" : : yes ] ;
cleaning ?= [ option.get "clean-all" : no : yes ] ;
@@ -37,13 +59,25 @@ rule test_flags ( flags * ) {
constant CLEANING : $(cleaning) ;
}
+requirements = ;
+
+FORCE-STATIC = [ option.get "static" : : "yes" ] ;
+if $(FORCE-STATIC) {
+ requirements += <runtime-link>static ;
+}
+
#Determine if a library can be compiled statically.
rule auto-shared ( name : additional * ) {
additional ?= "" ;
if [ test_flags $(additional)" -static -l"$(name) ] {
return ;
} else {
- return "<link>shared" ;
+ if $(FORCE-STATIC) {
+ echo "Could not statically link against lib $(name). Your build will probably fail." ;
+ return ;
+ } else {
+ return "<link>shared" ;
+ }
}
}
@@ -60,6 +94,7 @@ if $(with-macports) {
}
else {
with-boost = [ option.get "with-boost" ] ;
+ with-boost ?= [ os.environ "BOOST_ROOT" ] ;
if $(with-boost) {
L-boost-search = -L$(with-boost)/lib" "-L$(with-boost)/lib64 ;
boost-search = <search>$(with-boost)/lib <search>$(with-boost)/lib64 ;
@@ -73,18 +108,16 @@ else {
}
}
-#Are we linking static binaries against shared boost?
-boost-auto-shared = [ auto-shared "boost_program_options" : $(L-boost-search) ] ;
#Convenience rule for boost libraries. Defines library boost_$(name).
-rule boost-lib ( name macro ) {
+rule boost-lib ( name macro : deps * ) {
#Link multi-threaded programs against the -mt version if available. Old
#versions of boost do not have -mt tagged versions of all libraries. Sadly,
#boost.jam does not handle this correctly.
- if [ test_flags $(L-boost-search)" -lboost_"$(name)"-mt" ] {
- lib inner_boost_$(name) : : <threading>single $(boost-search) <name>boost_$(name) ;
- lib inner_boost_$(name) : : <threading>multi $(boost-search) <name>boost_$(name)-mt ;
+ if [ test_flags $(L-boost-search)" -lboost_"$(name)"-mt$(boost-lib-version)" ] {
+ lib inner_boost_$(name) : : <threading>single $(boost-search) <name>boost_$(name)$(boost-lib-version) : : <library>$(deps) ;
+ lib inner_boost_$(name) : : <threading>multi $(boost-search) <name>boost_$(name)-mt$(boost-lib-version) : : <library>$(deps) ;
} else {
- lib inner_boost_$(name) : : $(boost-search) <name>boost_$(name) ;
+ lib inner_boost_$(name) : : $(boost-search) <name>boost_$(name)$(boost-lib-version) : : <library>$(deps) ;
}
alias boost_$(name) : inner_boost_$(name) : $(boost-auto-shared) : : <link>shared:<define>BOOST_$(macro) $(boost-include) ;
@@ -92,7 +125,7 @@ rule boost-lib ( name macro ) {
#Argument is e.g. 103600
rule boost ( min-version ) {
- local cmd = "bash -c \"g++ "$(I-boost-include)" -dM -x c++ -E /dev/null -include boost/version.hpp 2>/dev/null |grep '#define BOOST_VERSION '\"" ;
+ local cmd = "bash -c \"g++ "$(I-boost-include)" -dM -x c++ -E /dev/null -include boost/version.hpp 2>/dev/null |grep '#define BOOST_'\"" ;
local boost-shell = [ SHELL "$(cmd)" : exit-status ] ;
if $(boost-shell[2]) != 0 && $(CLEANING) = no {
echo Failed to run "$(cmd)" ;
@@ -102,11 +135,24 @@ rule boost ( min-version ) {
if $(boost-version) < $(min-version) && $(CLEANING) = no {
exit You have Boost $(boost-version). This package requires Boost at least $(min-version) (and preferably newer). : 1 ;
}
+ # If matching version tags exist, use them.
+ boost-lib-version = [ MATCH "#define BOOST_LIB_VERSION \"([^\"]*)\"" : $(boost-shell[1]) ] ;
+ if [ test_flags $(L-boost-search)" -lboost_program_options-"$(boost-lib-version) ] {
+ boost-lib-version = "-"$(boost-lib-version) ;
+ } else {
+ boost-lib-version = "" ;
+ }
+
+ #Are we linking static binaries against shared boost?
+ boost-auto-shared = [ auto-shared "boost_program_options"$(boost-lib-version) : $(L-boost-search) ] ;
+
#See tools/build/v2/contrib/boost.jam in a boost distribution for a table of macros to define.
- boost-lib thread THREAD_DYN_DLL ;
+ boost-lib system SYSTEM_DYN_LINK ;
+ boost-lib thread THREAD_DYN_DLL : boost_system ;
boost-lib program_options PROGRAM_OPTIONS_DYN_LINK ;
boost-lib unit_test_framework TEST_DYN_LINK ;
boost-lib iostreams IOSTREAMS_DYN_LINK ;
+ boost-lib filesystem FILE_SYSTEM_DYN_LINK : boost_system ;
}
#Link normally to a library, but sometimes static isn't installed so fall back to dynamic.
@@ -132,15 +178,10 @@ rule external-lib ( name : search-path * ) {
local ignored = @($(build-log):E=$(script)) ;
}
-requirements = ;
-{
- local cxxflags = [ os.environ "CXXFLAGS" ] ;
- local cflags = [ os.environ "CFLAGS" ] ;
- local ldflags = [ os.environ "LDFLAGS" ] ;
-
- #Boost jam's static clang for Linux is buggy.
- requirements += <cxxflags>$(cxxflags) <cflags>$(cflags) <linkflags>$(ldflags) <os>LINUX,<toolset>clang:<link>shared ;
+#Boost jam's static clang for Linux is buggy.
+requirements += <cxxflags>$(cxxflags) <cflags>$(cflags) <linkflags>$(ldflags) <os>LINUX,<toolset>clang:<link>shared ;
+if ! [ option.get "without-libsegfault" : : "yes" ] && ! $(FORCE-STATIC) {
#libSegFault prints a stack trace on segfault. Link against it if available.
if [ test_flags "-lSegFault" ] {
external-lib SegFault ;
@@ -158,12 +199,14 @@ if [ option.get "git" : : "yes" ] {
prefix = [ option.get "prefix" ] ;
if $(prefix) {
prefix = [ path.root $(prefix) [ path.pwd ] ] ;
+ prefix = $(prefix)$(GITTAG) ;
} else {
- prefix = $(TOP)/dist$(GITTAG) ;
+ prefix = $(TOP)$(GITTAG) ;
}
+
+bindir = [ option.get "bindir" : $(prefix)/bin ] ;
+libdir = [ option.get "libdir" : $(prefix)/lib ] ;
rule install-bin-libs ( deps * ) {
- local bindir = [ option.get "bindir" : $(prefix)/bin ] ;
- local libdir = [ option.get "libdir" : $(prefix)/lib ] ;
install prefix-bin : $(deps) : <location>$(bindir) <install-dependencies>on <install-type>EXE <link>shared:<dll-path>$(libdir) ;
install prefix-lib : $(deps) : <location>$(libdir) <install-dependencies>on <install-type>LIB <link>shared:<dll-path>$(libdir) ;
}
@@ -172,3 +215,49 @@ rule install-headers ( name : list * : source-root ? ) {
source-root ?= "." ;
install $(name) : $(list) : <location>$(includedir) <install-source-root>$(source-root) ;
}
+
+rule build-projects ( projects * ) {
+ for local p in $(projects) {
+ build-project $(p) ;
+ }
+}
+
+#Only one post build hook is allowed. Allow multiple.
+post-hooks = ;
+rule post-build ( ok ? ) {
+ for local r in $(post-hooks) {
+ $(r) $(ok) ;
+ }
+}
+IMPORT $(__name__) : post-build : : $(__name__).post-build ;
+build-system.set-post-build-hook $(__name__).post-build ;
+rule add-post-hook ( names * ) {
+ post-hooks += $(names) ;
+}
+
+import feature : feature ;
+feature options-to-write : : free ;
+import toolset : flags ;
+flags write-options OPTIONS-TO-WRITE <options-to-write> ;
+actions write-options {
+ echo "$(OPTIONS-TO-WRITE)" > $(<) ;
+}
+
+#Compare contents of file with current. If they're different, write to the
+#file. This file can then be used with <dependency>$(file) to force
+#recompilation.
+rule update-if-changed ( file current ) {
+ if ( ! [ path.exists $(file) ] ) || ( [ _shell "cat $(file)" ] != $(current) ) {
+ make $(file) : : $(__name__).write-options : <options-to-write>$(current) ;
+ always $(file) ;
+ }
+}
+
+if [ option.get "sanity-test" : : "yes" ] {
+ local current_version = [ modules.peek : JAM_VERSION ] ;
+ if ( $(current_version[0]) < 2000 && [ version.check-jam-version 3 1 16 ] ) || [ version.check-jam-version 2011 0 0 ] {
+ EXIT "Sane" : 0 ;
+ } else {
+ EXIT "Bad" : 1 ;
+ }
+}
diff --git a/kenlm b/kenlm
deleted file mode 120000
index 0e6a96204..000000000
--- a/kenlm
+++ /dev/null
@@ -1 +0,0 @@
-lm \ No newline at end of file
diff --git a/lm/Jamfile b/lm/Jamfile
index 68039a1b6..88455709b 100644
--- a/lm/Jamfile
+++ b/lm/Jamfile
@@ -1,4 +1,14 @@
-lib kenlm : bhiksha.cc binary_format.cc config.cc lm_exception.cc model.cc quantize.cc read_arpa.cc search_hashed.cc search_trie.cc trie.cc trie_sort.cc virtual_interface.cc vocab.cc ../util//kenutil : <include>.. : : <include>.. <library>../util//kenutil ;
+# If you need higher order, change this option
+# Having this limit means that State can be
+# (KENLM_MAX_ORDER - 1) * sizeof(float) bytes instead of
+# sizeof(float*) + (KENLM_MAX_ORDER - 1) * sizeof(float) + malloc overhead
+max-order = [ option.get "max-kenlm-order" : 6 : 6 ] ;
+if ( $(max-order) != 6 ) {
+ echo "Setting KenLM maximum n-gram order to $(max-order)" ;
+}
+max-order = <define>KENLM_MAX_ORDER=$(max-order) ;
+
+lib kenlm : bhiksha.cc binary_format.cc config.cc lm_exception.cc model.cc quantize.cc read_arpa.cc search_hashed.cc search_trie.cc trie.cc trie_sort.cc value_build.cc virtual_interface.cc vocab.cc ../util//kenutil : <include>.. $(max-order) : : <include>.. <library>../util//kenutil $(max-order) ;
import testing ;
@@ -7,6 +17,4 @@ run model_test.cc ../util//kenutil kenlm ..//boost_unit_test_framework : : test.
exe query : ngram_query.cc kenlm ../util//kenutil ;
exe build_binary : build_binary.cc kenlm ../util//kenutil ;
-
-install legacy : build_binary query
- : <location>$(TOP)/lm <install-type>EXE <install-dependencies>on <link>shared:<dll-path>$(TOP)/lm <link>shared:<install-type>LIB ;
+exe kenlm_max_order : max_order.cc : $(max-order) ;
diff --git a/lm/binary_format.cc b/lm/binary_format.cc
index 4796f6d1b..a56e998ef 100644
--- a/lm/binary_format.cc
+++ b/lm/binary_format.cc
@@ -57,7 +57,7 @@ struct Sanity {
}
};
-const char *kModelNames[6] = {"hashed n-grams with probing", "hashed n-grams with sorted uniform find", "trie", "trie with quantization", "trie with array-compressed pointers", "trie with quantization and array-compressed pointers"};
+const char *kModelNames[6] = {"probing hash tables", "probing hash tables with rest costs", "trie", "trie with quantization", "trie with array-compressed pointers", "trie with quantization and array-compressed pointers"};
std::size_t TotalHeaderSize(unsigned char order) {
return ALIGN8(sizeof(Sanity) + sizeof(FixedWidthParameters) + sizeof(uint64_t) * order);
diff --git a/lm/build_binary.cc b/lm/build_binary.cc
index 8cbb69d0a..49901c9ea 100644
--- a/lm/build_binary.cc
+++ b/lm/build_binary.cc
@@ -25,7 +25,11 @@ void Usage(const char *name) {
"-i allows buggy models from IRSTLM by mapping positive log probability to 0.\n"
"-w mmap|after determines how writing is done.\n"
" mmap maps the binary file and writes to it. Default for trie.\n"
-" after allocates anonymous memory, builds, and writes. Default for probing.\n\n"
+" after allocates anonymous memory, builds, and writes. Default for probing.\n"
+"-r \"order1.arpa order2 order3 order4\" adds lower-order rest costs from these\n"
+" model files. order1.arpa must be an ARPA file. All others may be ARPA or\n"
+" the same data structure as being built. All files must have the same\n"
+" vocabulary. For probing, the unigrams must be in the same order.\n\n"
"type is either probing or trie. Default is probing.\n\n"
"probing uses a probing hash table. It is the fastest but uses the most memory.\n"
"-p sets the space multiplier and must be >1.0. The default is 1.5.\n\n"
@@ -66,16 +70,28 @@ uint8_t ParseBitCount(const char *from) {
return val;
}
+void ParseFileList(const char *from, std::vector<std::string> &to) {
+ to.clear();
+ while (true) {
+ const char *i;
+ for (i = from; *i && *i != ' '; ++i) {}
+ to.push_back(std::string(from, i - from));
+ if (!*i) break;
+ from = i + 1;
+ }
+}
+
void ShowSizes(const char *file, const lm::ngram::Config &config) {
std::vector<uint64_t> counts;
util::FilePiece f(file);
lm::ReadARPACounts(f, counts);
- std::size_t sizes[5];
+ std::size_t sizes[6];
sizes[0] = ProbingModel::Size(counts, config);
- sizes[1] = TrieModel::Size(counts, config);
- sizes[2] = QuantTrieModel::Size(counts, config);
- sizes[3] = ArrayTrieModel::Size(counts, config);
- sizes[4] = QuantArrayTrieModel::Size(counts, config);
+ sizes[1] = RestProbingModel::Size(counts, config);
+ sizes[2] = TrieModel::Size(counts, config);
+ sizes[3] = QuantTrieModel::Size(counts, config);
+ sizes[4] = ArrayTrieModel::Size(counts, config);
+ sizes[5] = QuantArrayTrieModel::Size(counts, config);
std::size_t max_length = *std::max_element(sizes, sizes + sizeof(sizes) / sizeof(size_t));
std::size_t min_length = *std::min_element(sizes, sizes + sizeof(sizes) / sizeof(size_t));
std::size_t divide;
@@ -99,10 +115,11 @@ void ShowSizes(const char *file, const lm::ngram::Config &config) {
for (long int i = 0; i < length - 2; ++i) std::cout << ' ';
std::cout << prefix << "B\n"
"probing " << std::setw(length) << (sizes[0] / divide) << " assuming -p " << config.probing_multiplier << "\n"
- "trie " << std::setw(length) << (sizes[1] / divide) << " without quantization\n"
- "trie " << std::setw(length) << (sizes[2] / divide) << " assuming -q " << (unsigned)config.prob_bits << " -b " << (unsigned)config.backoff_bits << " quantization \n"
- "trie " << std::setw(length) << (sizes[3] / divide) << " assuming -a " << (unsigned)config.pointer_bhiksha_bits << " array pointer compression\n"
- "trie " << std::setw(length) << (sizes[4] / divide) << " assuming -a " << (unsigned)config.pointer_bhiksha_bits << " -q " << (unsigned)config.prob_bits << " -b " << (unsigned)config.backoff_bits<< " array pointer compression and quantization\n";
+ "probing " << std::setw(length) << (sizes[1] / divide) << " assuming -r models -p " << config.probing_multiplier << "\n"
+ "trie " << std::setw(length) << (sizes[2] / divide) << " without quantization\n"
+ "trie " << std::setw(length) << (sizes[3] / divide) << " assuming -q " << (unsigned)config.prob_bits << " -b " << (unsigned)config.backoff_bits << " quantization \n"
+ "trie " << std::setw(length) << (sizes[4] / divide) << " assuming -a " << (unsigned)config.pointer_bhiksha_bits << " array pointer compression\n"
+ "trie " << std::setw(length) << (sizes[5] / divide) << " assuming -a " << (unsigned)config.pointer_bhiksha_bits << " -q " << (unsigned)config.prob_bits << " -b " << (unsigned)config.backoff_bits<< " array pointer compression and quantization\n";
}
void ProbingQuantizationUnsupported() {
@@ -118,10 +135,10 @@ int main(int argc, char *argv[]) {
using namespace lm::ngram;
try {
- bool quantize = false, set_backoff_bits = false, bhiksha = false, set_write_method = false;
+ bool quantize = false, set_backoff_bits = false, bhiksha = false, set_write_method = false, rest = false;
lm::ngram::Config config;
int opt;
- while ((opt = getopt(argc, argv, "q:b:a:u:p:t:m:w:si")) != -1) {
+ while ((opt = getopt(argc, argv, "q:b:a:u:p:t:m:w:sir:")) != -1) {
switch(opt) {
case 'q':
config.prob_bits = ParseBitCount(optarg);
@@ -164,6 +181,11 @@ int main(int argc, char *argv[]) {
case 'i':
config.positive_log_probability = lm::SILENT;
break;
+ case 'r':
+ rest = true;
+ ParseFileList(optarg, config.rest_lower_files);
+ config.rest_function = Config::REST_LOWER;
+ break;
default:
Usage(argv[0]);
}
@@ -174,35 +196,48 @@ int main(int argc, char *argv[]) {
}
if (optind + 1 == argc) {
ShowSizes(argv[optind], config);
- } else if (optind + 2 == argc) {
+ return 0;
+ }
+ const char *model_type;
+ const char *from_file;
+
+ if (optind + 2 == argc) {
+ model_type = "probing";
+ from_file = argv[optind];
config.write_mmap = argv[optind + 1];
- if (quantize || set_backoff_bits) ProbingQuantizationUnsupported();
- ProbingModel(argv[optind], config);
} else if (optind + 3 == argc) {
- const char *model_type = argv[optind];
- const char *from_file = argv[optind + 1];
+ model_type = argv[optind];
+ from_file = argv[optind + 1];
config.write_mmap = argv[optind + 2];
- if (!strcmp(model_type, "probing")) {
- if (!set_write_method) config.write_method = Config::WRITE_AFTER;
- if (quantize || set_backoff_bits) ProbingQuantizationUnsupported();
+ } else {
+ Usage(argv[0]);
+ }
+ if (!strcmp(model_type, "probing")) {
+ if (!set_write_method) config.write_method = Config::WRITE_AFTER;
+ if (quantize || set_backoff_bits) ProbingQuantizationUnsupported();
+ if (rest) {
+ RestProbingModel(from_file, config);
+ } else {
ProbingModel(from_file, config);
- } else if (!strcmp(model_type, "trie")) {
- if (!set_write_method) config.write_method = Config::WRITE_MMAP;
- if (quantize) {
- if (bhiksha) {
- QuantArrayTrieModel(from_file, config);
- } else {
- QuantTrieModel(from_file, config);
- }
+ }
+ } else if (!strcmp(model_type, "trie")) {
+ if (rest) {
+ std::cerr << "Rest + trie is not supported yet." << std::endl;
+ return 1;
+ }
+ if (!set_write_method) config.write_method = Config::WRITE_MMAP;
+ if (quantize) {
+ if (bhiksha) {
+ QuantArrayTrieModel(from_file, config);
} else {
- if (bhiksha) {
- ArrayTrieModel(from_file, config);
- } else {
- TrieModel(from_file, config);
- }
+ QuantTrieModel(from_file, config);
}
} else {
- Usage(argv[0]);
+ if (bhiksha) {
+ ArrayTrieModel(from_file, config);
+ } else {
+ TrieModel(from_file, config);
+ }
}
} else {
Usage(argv[0]);
diff --git a/lm/config.cc b/lm/config.cc
index dbe762b32..f9d988cab 100644
--- a/lm/config.cc
+++ b/lm/config.cc
@@ -19,6 +19,7 @@ Config::Config() :
write_mmap(NULL),
write_method(WRITE_AFTER),
include_vocab(true),
+ rest_function(REST_MAX),
prob_bits(8),
backoff_bits(8),
pointer_bhiksha_bits(22),
diff --git a/lm/config.hh b/lm/config.hh
index 01b756322..739cee9c1 100644
--- a/lm/config.hh
+++ b/lm/config.hh
@@ -1,11 +1,13 @@
#ifndef LM_CONFIG__
#define LM_CONFIG__
-#include <iosfwd>
-
#include "lm/lm_exception.hh"
#include "util/mmap.hh"
+#include <iosfwd>
+#include <string>
+#include <vector>
+
/* Configuration for ngram model. Separate header to reduce pollution. */
namespace lm {
@@ -63,23 +65,33 @@ struct Config {
const char *temporary_directory_prefix;
// Level of complaining to do when loading from ARPA instead of binary format.
- typedef enum {ALL, EXPENSIVE, NONE} ARPALoadComplain;
+ enum ARPALoadComplain {ALL, EXPENSIVE, NONE};
ARPALoadComplain arpa_complain;
// While loading an ARPA file, also write out this binary format file. Set
// to NULL to disable.
const char *write_mmap;
- typedef enum {
+ enum WriteMethod {
WRITE_MMAP, // Map the file directly.
WRITE_AFTER // Write after we're done.
- } WriteMethod;
+ };
WriteMethod write_method;
// Include the vocab in the binary file? Only effective if write_mmap != NULL.
bool include_vocab;
+ // Left rest options. Only used when the model includes rest costs.
+ enum RestFunction {
+ REST_MAX, // Maximum of any score to the left
+ REST_LOWER, // Use lower-order files given below.
+ };
+ RestFunction rest_function;
+ // Only used for REST_LOWER.
+ std::vector<std::string> rest_lower_files;
+
+
// Quantization options. Only effective for QuantTrieModel. One value is
// reserved for each of prob and backoff, so 2^bits - 1 buckets will be used
diff --git a/lm/left.hh b/lm/left.hh
index 308644228..751984c5e 100644
--- a/lm/left.hh
+++ b/lm/left.hh
@@ -38,8 +38,7 @@
#ifndef LM_LEFT__
#define LM_LEFT__
-#include "lm/max_order.hh"
-#include "lm/model.hh"
+#include "lm/state.hh"
#include "lm/return.hh"
#include "util/murmur_hash.hh"
@@ -49,72 +48,6 @@
namespace lm {
namespace ngram {
-struct Left {
- bool operator==(const Left &other) const {
- return
- (length == other.length) &&
- pointers[length - 1] == other.pointers[length - 1];
- }
-
- int Compare(const Left &other) const {
- if (length != other.length) return length < other.length ? -1 : 1;
- if (pointers[length - 1] > other.pointers[length - 1]) return 1;
- if (pointers[length - 1] < other.pointers[length - 1]) return -1;
- return 0;
- }
-
- bool operator<(const Left &other) const {
- if (length != other.length) return length < other.length;
- return pointers[length - 1] < other.pointers[length - 1];
- }
-
- void ZeroRemaining() {
- for (uint64_t * i = pointers + length; i < pointers + kMaxOrder - 1; ++i)
- *i = 0;
- }
-
- unsigned char length;
- uint64_t pointers[kMaxOrder - 1];
-};
-
-inline size_t hash_value(const Left &left) {
- return util::MurmurHashNative(&left.length, 1, left.length ? left.pointers[left.length - 1] : 0);
-}
-
-struct ChartState {
- bool operator==(const ChartState &other) {
- return (left == other.left) && (right == other.right) && (full == other.full);
- }
-
- int Compare(const ChartState &other) const {
- int lres = left.Compare(other.left);
- if (lres) return lres;
- int rres = right.Compare(other.right);
- if (rres) return rres;
- return (int)full - (int)other.full;
- }
-
- bool operator<(const ChartState &other) const {
- return Compare(other) == -1;
- }
-
- void ZeroRemaining() {
- left.ZeroRemaining();
- right.ZeroRemaining();
- }
-
- Left left;
- bool full;
- State right;
-};
-
-inline size_t hash_value(const ChartState &state) {
- size_t hashes[2];
- hashes[0] = hash_value(state.left);
- hashes[1] = hash_value(state.right);
- return util::MurmurHashNative(hashes, sizeof(size_t) * 2, state.full);
-}
-
template <class M> class RuleScore {
public:
explicit RuleScore(const M &model, ChartState &out) : model_(model), out_(out), left_done_(false), prob_(0.0) {
@@ -131,29 +64,30 @@ template <class M> class RuleScore {
void Terminal(WordIndex word) {
State copy(out_.right);
FullScoreReturn ret(model_.FullScore(copy, word, out_.right));
- prob_ += ret.prob;
- if (left_done_) return;
+ if (left_done_) { prob_ += ret.prob; return; }
if (ret.independent_left) {
+ prob_ += ret.prob;
left_done_ = true;
return;
}
out_.left.pointers[out_.left.length++] = ret.extend_left;
+ prob_ += ret.rest;
if (out_.right.length != copy.length + 1)
left_done_ = true;
}
// Faster version of NonTerminal for the case where the rule begins with a non-terminal.
- void BeginNonTerminal(const ChartState &in, float prob) {
+ void BeginNonTerminal(const ChartState &in, float prob = 0.0) {
prob_ = prob;
out_ = in;
- left_done_ = in.full;
+ left_done_ = in.left.full;
}
- void NonTerminal(const ChartState &in, float prob) {
+ void NonTerminal(const ChartState &in, float prob = 0.0) {
prob_ += prob;
if (!in.left.length) {
- if (in.full) {
+ if (in.left.full) {
for (const float *i = out_.right.backoff; i < out_.right.backoff + out_.right.length; ++i) prob_ += *i;
left_done_ = true;
out_.right = in.right;
@@ -163,17 +97,20 @@ template <class M> class RuleScore {
if (!out_.right.length) {
out_.right = in.right;
- if (left_done_) return;
+ if (left_done_) {
+ prob_ += model_.UnRest(in.left.pointers, in.left.pointers + in.left.length, 1);
+ return;
+ }
if (out_.left.length) {
left_done_ = true;
} else {
out_.left = in.left;
- left_done_ = in.full;
+ left_done_ = in.left.full;
}
return;
}
- float backoffs[kMaxOrder - 1], backoffs2[kMaxOrder - 1];
+ float backoffs[KENLM_MAX_ORDER - 1], backoffs2[KENLM_MAX_ORDER - 1];
float *back = backoffs, *back2 = backoffs2;
unsigned char next_use = out_.right.length;
@@ -186,7 +123,7 @@ template <class M> class RuleScore {
std::swap(back, back2);
}
- if (in.full) {
+ if (in.left.full) {
for (const float *i = back; i != back + next_use; ++i) prob_ += *i;
left_done_ = true;
out_.right = in.right;
@@ -213,10 +150,17 @@ template <class M> class RuleScore {
float Finish() {
// A N-1-gram might extend left and right but we should still set full to true because it's an N-1-gram.
- out_.full = left_done_ || (out_.left.length == model_.Order() - 1);
+ out_.left.full = left_done_ || (out_.left.length == model_.Order() - 1);
return prob_;
}
+ void Reset() {
+ prob_ = 0.0;
+ left_done_ = false;
+ out_.left.length = 0;
+ out_.right.length = 0;
+ }
+
private:
bool ExtendLeft(const ChartState &in, unsigned char &next_use, unsigned char extend_length, const float *back_in, float *back_out) {
ProcessRet(model_.ExtendLeft(
@@ -228,8 +172,9 @@ template <class M> class RuleScore {
if (next_use != out_.right.length) {
left_done_ = true;
if (!next_use) {
- out_.right = in.right;
// Early exit.
+ out_.right = in.right;
+ prob_ += model_.UnRest(in.left.pointers + extend_length, in.left.pointers + in.left.length, extend_length + 1);
return true;
}
}
@@ -238,13 +183,17 @@ template <class M> class RuleScore {
}
void ProcessRet(const FullScoreReturn &ret) {
- prob_ += ret.prob;
- if (left_done_) return;
+ if (left_done_) {
+ prob_ += ret.prob;
+ return;
+ }
if (ret.independent_left) {
+ prob_ += ret.prob;
left_done_ = true;
return;
}
out_.left.pointers[out_.left.length++] = ret.extend_left;
+ prob_ += ret.rest;
}
const M &model_;
diff --git a/lm/left_test.cc b/lm/left_test.cc
index c85e5efa8..b45614613 100644
--- a/lm/left_test.cc
+++ b/lm/left_test.cc
@@ -16,15 +16,18 @@ namespace {
#define Term(word) score.Terminal(m.GetVocabulary().Index(word));
#define VCheck(word, value) BOOST_CHECK_EQUAL(m.GetVocabulary().Index(word), value);
+// Apparently some Boost versions use templates and are pretty strict about types matching.
+#define SLOPPY_CHECK_CLOSE(ref, value, tol) BOOST_CHECK_CLOSE(static_cast<double>(ref), static_cast<double>(value), static_cast<double>(tol));
+
template <class M> void Short(const M &m) {
ChartState base;
{
RuleScore<M> score(m, base);
Term("more");
Term("loin");
- BOOST_CHECK_CLOSE(-1.206319 - 0.3561665, score.Finish(), 0.001);
+ SLOPPY_CHECK_CLOSE(-1.206319 - 0.3561665, score.Finish(), 0.001);
}
- BOOST_CHECK(base.full);
+ BOOST_CHECK(base.left.full);
BOOST_CHECK_EQUAL(2, base.left.length);
BOOST_CHECK_EQUAL(1, base.right.length);
VCheck("loin", base.right.words[0]);
@@ -35,24 +38,24 @@ template <class M> void Short(const M &m) {
Term("little");
score.NonTerminal(base, -1.206319 - 0.3561665);
// p(little more loin | null context)
- BOOST_CHECK_CLOSE(-1.56538, score.Finish(), 0.001);
+ SLOPPY_CHECK_CLOSE(-1.56538, score.Finish(), 0.001);
}
BOOST_CHECK_EQUAL(3, more_left.left.length);
BOOST_CHECK_EQUAL(1, more_left.right.length);
VCheck("loin", more_left.right.words[0]);
- BOOST_CHECK(more_left.full);
+ BOOST_CHECK(more_left.left.full);
ChartState shorter;
{
RuleScore<M> score(m, shorter);
Term("to");
score.NonTerminal(base, -1.206319 - 0.3561665);
- BOOST_CHECK_CLOSE(-0.30103 - 1.687872 - 1.206319 - 0.3561665, score.Finish(), 0.01);
+ SLOPPY_CHECK_CLOSE(-0.30103 - 1.687872 - 1.206319 - 0.3561665, score.Finish(), 0.01);
}
BOOST_CHECK_EQUAL(1, shorter.left.length);
BOOST_CHECK_EQUAL(1, shorter.right.length);
VCheck("loin", shorter.right.words[0]);
- BOOST_CHECK(shorter.full);
+ BOOST_CHECK(shorter.left.full);
}
template <class M> void Charge(const M &m) {
@@ -61,39 +64,39 @@ template <class M> void Charge(const M &m) {
RuleScore<M> score(m, base);
Term("on");
Term("more");
- BOOST_CHECK_CLOSE(-1.509559 -0.4771212 -1.206319, score.Finish(), 0.001);
+ SLOPPY_CHECK_CLOSE(-1.509559 -0.4771212 -1.206319, score.Finish(), 0.001);
}
BOOST_CHECK_EQUAL(1, base.left.length);
BOOST_CHECK_EQUAL(1, base.right.length);
VCheck("more", base.right.words[0]);
- BOOST_CHECK(base.full);
+ BOOST_CHECK(base.left.full);
ChartState extend;
{
RuleScore<M> score(m, extend);
Term("looking");
score.NonTerminal(base, -1.509559 -0.4771212 -1.206319);
- BOOST_CHECK_CLOSE(-3.91039, score.Finish(), 0.001);
+ SLOPPY_CHECK_CLOSE(-3.91039, score.Finish(), 0.001);
}
BOOST_CHECK_EQUAL(2, extend.left.length);
BOOST_CHECK_EQUAL(1, extend.right.length);
VCheck("more", extend.right.words[0]);
- BOOST_CHECK(extend.full);
+ BOOST_CHECK(extend.left.full);
ChartState tobos;
{
RuleScore<M> score(m, tobos);
score.BeginSentence();
score.NonTerminal(extend, -3.91039);
- BOOST_CHECK_CLOSE(-3.471169, score.Finish(), 0.001);
+ SLOPPY_CHECK_CLOSE(-3.471169, score.Finish(), 0.001);
}
BOOST_CHECK_EQUAL(0, tobos.left.length);
BOOST_CHECK_EQUAL(1, tobos.right.length);
}
-template <class M> float LeftToRight(const M &m, const std::vector<WordIndex> &words) {
+template <class M> float LeftToRight(const M &m, const std::vector<WordIndex> &words, bool begin_sentence = false) {
float ret = 0.0;
- State right = m.NullContextState();
+ State right = begin_sentence ? m.BeginSentenceState() : m.NullContextState();
for (std::vector<WordIndex>::const_iterator i = words.begin(); i != words.end(); ++i) {
State copy(right);
ret += m.Score(copy, *i, right);
@@ -101,12 +104,12 @@ template <class M> float LeftToRight(const M &m, const std::vector<WordIndex> &w
return ret;
}
-template <class M> float RightToLeft(const M &m, const std::vector<WordIndex> &words) {
+template <class M> float RightToLeft(const M &m, const std::vector<WordIndex> &words, bool begin_sentence = false) {
float ret = 0.0;
ChartState state;
state.left.length = 0;
state.right.length = 0;
- state.full = false;
+ state.left.full = false;
for (std::vector<WordIndex>::const_reverse_iterator i = words.rbegin(); i != words.rend(); ++i) {
ChartState copy(state);
RuleScore<M> score(m, state);
@@ -114,10 +117,17 @@ template <class M> float RightToLeft(const M &m, const std::vector<WordIndex> &w
score.NonTerminal(copy, ret);
ret = score.Finish();
}
+ if (begin_sentence) {
+ ChartState copy(state);
+ RuleScore<M> score(m, state);
+ score.BeginSentence();
+ score.NonTerminal(copy, ret);
+ ret = score.Finish();
+ }
return ret;
}
-template <class M> float TreeMiddle(const M &m, const std::vector<WordIndex> &words) {
+template <class M> float TreeMiddle(const M &m, const std::vector<WordIndex> &words, bool begin_sentence = false) {
std::vector<std::pair<ChartState, float> > states(words.size());
for (unsigned int i = 0; i < words.size(); ++i) {
RuleScore<M> score(m, states[i].first);
@@ -137,7 +147,19 @@ template <class M> float TreeMiddle(const M &m, const std::vector<WordIndex> &wo
}
std::swap(states, upper);
}
- return states.empty() ? 0 : states.back().second;
+
+ if (states.empty()) return 0.0;
+
+ if (begin_sentence) {
+ ChartState ignored;
+ RuleScore<M> score(m, ignored);
+ score.BeginSentence();
+ score.NonTerminal(states.front().first, states.front().second);
+ return score.Finish();
+ } else {
+ return states.front().second;
+ }
+
}
template <class M> void LookupVocab(const M &m, const StringPiece &str, std::vector<WordIndex> &out) {
@@ -148,16 +170,15 @@ template <class M> void LookupVocab(const M &m, const StringPiece &str, std::vec
}
#define TEXT_TEST(str) \
-{ \
- std::vector<WordIndex> words; \
LookupVocab(m, str, words); \
- float expect = LeftToRight(m, words); \
- BOOST_CHECK_CLOSE(expect, RightToLeft(m, words), 0.001); \
- BOOST_CHECK_CLOSE(expect, TreeMiddle(m, words), 0.001); \
-}
+ expect = LeftToRight(m, words, rest); \
+ SLOPPY_CHECK_CLOSE(expect, RightToLeft(m, words, rest), 0.001); \
+ SLOPPY_CHECK_CLOSE(expect, TreeMiddle(m, words, rest), 0.001); \
// Build sentences, or parts thereof, from right to left.
-template <class M> void GrowBig(const M &m) {
+template <class M> void GrowBig(const M &m, bool rest = false) {
+ std::vector<WordIndex> words;
+ float expect;
TEXT_TEST("in biarritz watching considering looking . on a little more loin also would consider higher to look good unknown the screening foo bar , unknown however unknown </s>");
TEXT_TEST("on a little more loin also would consider higher to look good unknown the screening foo bar , unknown however unknown </s>");
TEXT_TEST("on a little more loin also would consider higher to look good");
@@ -171,25 +192,33 @@ template <class M> void GrowBig(const M &m) {
TEXT_TEST("consider higher");
}
+template <class M> void GrowSmall(const M &m, bool rest = false) {
+ std::vector<WordIndex> words;
+ float expect;
+ TEXT_TEST("in biarritz watching considering looking . </s>");
+ TEXT_TEST("in biarritz watching considering looking .");
+ TEXT_TEST("in biarritz");
+}
+
template <class M> void AlsoWouldConsiderHigher(const M &m) {
ChartState also;
{
RuleScore<M> score(m, also);
score.Terminal(m.GetVocabulary().Index("also"));
- BOOST_CHECK_CLOSE(-1.687872, score.Finish(), 0.001);
+ SLOPPY_CHECK_CLOSE(-1.687872, score.Finish(), 0.001);
}
ChartState would;
{
RuleScore<M> score(m, would);
score.Terminal(m.GetVocabulary().Index("would"));
- BOOST_CHECK_CLOSE(-1.687872, score.Finish(), 0.001);
+ SLOPPY_CHECK_CLOSE(-1.687872, score.Finish(), 0.001);
}
ChartState combine_also_would;
{
RuleScore<M> score(m, combine_also_would);
score.NonTerminal(also, -1.687872);
score.NonTerminal(would, -1.687872);
- BOOST_CHECK_CLOSE(-1.687872 - 2.0, score.Finish(), 0.001);
+ SLOPPY_CHECK_CLOSE(-1.687872 - 2.0, score.Finish(), 0.001);
}
BOOST_CHECK_EQUAL(2, combine_also_would.right.length);
@@ -198,7 +227,7 @@ template <class M> void AlsoWouldConsiderHigher(const M &m) {
RuleScore<M> score(m, also_would);
score.Terminal(m.GetVocabulary().Index("also"));
score.Terminal(m.GetVocabulary().Index("would"));
- BOOST_CHECK_CLOSE(-1.687872 - 2.0, score.Finish(), 0.001);
+ SLOPPY_CHECK_CLOSE(-1.687872 - 2.0, score.Finish(), 0.001);
}
BOOST_CHECK_EQUAL(2, also_would.right.length);
@@ -206,11 +235,11 @@ template <class M> void AlsoWouldConsiderHigher(const M &m) {
{
RuleScore<M> score(m, consider);
score.Terminal(m.GetVocabulary().Index("consider"));
- BOOST_CHECK_CLOSE(-1.687872, score.Finish(), 0.001);
+ SLOPPY_CHECK_CLOSE(-1.687872, score.Finish(), 0.001);
}
BOOST_CHECK_EQUAL(1, consider.left.length);
BOOST_CHECK_EQUAL(1, consider.right.length);
- BOOST_CHECK(!consider.full);
+ BOOST_CHECK(!consider.left.full);
ChartState higher;
float higher_score;
@@ -219,45 +248,39 @@ template <class M> void AlsoWouldConsiderHigher(const M &m) {
score.Terminal(m.GetVocabulary().Index("higher"));
higher_score = score.Finish();
}
- BOOST_CHECK_CLOSE(-1.509559, higher_score, 0.001);
+ SLOPPY_CHECK_CLOSE(-1.509559, higher_score, 0.001);
BOOST_CHECK_EQUAL(1, higher.left.length);
BOOST_CHECK_EQUAL(1, higher.right.length);
- BOOST_CHECK(!higher.full);
+ BOOST_CHECK(!higher.left.full);
VCheck("higher", higher.right.words[0]);
- BOOST_CHECK_CLOSE(-0.30103, higher.right.backoff[0], 0.001);
+ SLOPPY_CHECK_CLOSE(-0.30103, higher.right.backoff[0], 0.001);
ChartState consider_higher;
{
RuleScore<M> score(m, consider_higher);
score.NonTerminal(consider, -1.687872);
score.NonTerminal(higher, higher_score);
- BOOST_CHECK_CLOSE(-1.509559 - 1.687872 - 0.30103, score.Finish(), 0.001);
+ SLOPPY_CHECK_CLOSE(-1.509559 - 1.687872 - 0.30103, score.Finish(), 0.001);
}
BOOST_CHECK_EQUAL(2, consider_higher.left.length);
- BOOST_CHECK(!consider_higher.full);
+ BOOST_CHECK(!consider_higher.left.full);
ChartState full;
{
RuleScore<M> score(m, full);
score.NonTerminal(combine_also_would, -1.687872 - 2.0);
score.NonTerminal(consider_higher, -1.509559 - 1.687872 - 0.30103);
- BOOST_CHECK_CLOSE(-10.6879, score.Finish(), 0.001);
+ SLOPPY_CHECK_CLOSE(-10.6879, score.Finish(), 0.001);
}
BOOST_CHECK_EQUAL(4, full.right.length);
}
-template <class M> void GrowSmall(const M &m) {
- TEXT_TEST("in biarritz watching considering looking . </s>");
- TEXT_TEST("in biarritz watching considering looking .");
- TEXT_TEST("in biarritz");
-}
-
#define CHECK_SCORE(str, val) \
{ \
float got = val; \
std::vector<WordIndex> indices; \
LookupVocab(m, str, indices); \
- BOOST_CHECK_CLOSE(LeftToRight(m, indices), got, 0.001); \
+ SLOPPY_CHECK_CLOSE(LeftToRight(m, indices), got, 0.001); \
}
template <class M> void FullGrow(const M &m) {
@@ -315,7 +338,7 @@ template <class M> void FullGrow(const M &m) {
CHECK_SCORE("looking . </s>", l2_scores[1] = score.Finish());
}
BOOST_CHECK_EQUAL(l2[1].left.length, 1);
- BOOST_CHECK(l2[1].full);
+ BOOST_CHECK(l2[1].left.full);
ChartState top;
{
@@ -362,6 +385,13 @@ BOOST_AUTO_TEST_CASE(ArrayTrieAll) {
Everything<ArrayTrieModel>();
}
+BOOST_AUTO_TEST_CASE(RestProbing) {
+ Config config;
+ config.messages = NULL;
+ RestProbingModel m(FileLocation(), config);
+ GrowBig(m, true);
+}
+
} // namespace
} // namespace ngram
} // namespace lm
diff --git a/lm/max_order.cc b/lm/max_order.cc
new file mode 100644
index 000000000..6d4895bd4
--- /dev/null
+++ b/lm/max_order.cc
@@ -0,0 +1,5 @@
+#include <iostream>
+
+int main(int argc, char *argv[]) {
+ std::cerr << "KenLM was compiled with a maximum supported n-gram order set to " << KENLM_MAX_ORDER << "." << std::endl;
+}
diff --git a/lm/max_order.hh b/lm/max_order.hh
deleted file mode 100644
index 71cd23dd2..000000000
--- a/lm/max_order.hh
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef LM_MAX_ORDER__
-#define LM_MAX_ORDER__
-namespace lm {
-namespace ngram {
-// If you need higher order, change this and recompile.
-// Having this limit means that State can be
-// (kMaxOrder - 1) * sizeof(float) bytes instead of
-// sizeof(float*) + (kMaxOrder - 1) * sizeof(float) + malloc overhead
-const unsigned char kMaxOrder = 6;
-
-} // namespace ngram
-} // namespace lm
-
-#endif // LM_MAX_ORDER__
diff --git a/lm/model.cc b/lm/model.cc
index 478ebed1b..aace40df9 100644
--- a/lm/model.cc
+++ b/lm/model.cc
@@ -38,13 +38,17 @@ template <class Search, class VocabularyT> GenericModel<Search, VocabularyT>::Ge
State begin_sentence = State();
begin_sentence.length = 1;
begin_sentence.words[0] = vocab_.BeginSentence();
- begin_sentence.backoff[0] = search_.unigram.Lookup(begin_sentence.words[0]).backoff;
+ typename Search::Node ignored_node;
+ bool ignored_independent_left;
+ uint64_t ignored_extend_left;
+ begin_sentence.backoff[0] = search_.LookupUnigram(begin_sentence.words[0], ignored_node, ignored_independent_left, ignored_extend_left).Backoff();
State null_context = State();
null_context.length = 0;
- P::Init(begin_sentence, null_context, vocab_, search_.MiddleEnd() - search_.MiddleBegin() + 2);
+ P::Init(begin_sentence, null_context, vocab_, search_.Order());
}
template <class Search, class VocabularyT> void GenericModel<Search, VocabularyT>::InitializeFromBinary(void *start, const Parameters &params, const Config &config, int fd) {
+ UTIL_THROW_IF(params.counts.size() > KENLM_MAX_ORDER, FormatLoadException, "This model has order " << params.counts.size() << ". Re-compile (use -a), passing a number at least this large to bjam's --max-kenlm-order flag.");
SetupMemory(start, params.counts, config);
vocab_.LoadedBinary(params.fixed.has_vocabulary, fd, config.enumerate_vocab);
search_.LoadedBinary();
@@ -58,7 +62,7 @@ template <class Search, class VocabularyT> void GenericModel<Search, VocabularyT
// File counts do not include pruned trigrams that extend to quadgrams etc. These will be fixed by search_.
ReadARPACounts(f, counts);
- if (counts.size() > kMaxOrder) UTIL_THROW(FormatLoadException, "This model has order " << counts.size() << ". Edit lm/max_order.hh, set kMaxOrder to at least this value, and recompile.");
+ UTIL_THROW_IF(counts.size() > KENLM_MAX_ORDER, FormatLoadException, "This model has order " << counts.size() << ". Re-compile (use -a), passing a number at least this large to bjam's --max-kenlm-order flag.");
if (counts.size() < 2) UTIL_THROW(FormatLoadException, "This ngram implementation assumes at least a bigram model.");
if (config.probing_multiplier <= 1.0) UTIL_THROW(ConfigException, "probing multiplier must be > 1.0");
@@ -79,8 +83,8 @@ template <class Search, class VocabularyT> void GenericModel<Search, VocabularyT
if (!vocab_.SawUnk()) {
assert(config.unknown_missing != THROW_UP);
// Default probabilities for unknown.
- search_.unigram.Unknown().backoff = 0.0;
- search_.unigram.Unknown().prob = config.unknown_missing_logprob;
+ search_.UnknownUnigram().backoff = 0.0;
+ search_.UnknownUnigram().prob = config.unknown_missing_logprob;
}
FinishFile(config, kModelType, kVersion, counts, vocab_.UnkCountChangePadding(), backing_);
} catch (util::Exception &e) {
@@ -109,20 +113,22 @@ template <class Search, class VocabularyT> FullScoreReturn GenericModel<Search,
// Add the backoff weights for n-grams of order start to (context_rend - context_rbegin).
unsigned char start = ret.ngram_length;
if (context_rend - context_rbegin < static_cast<std::ptrdiff_t>(start)) return ret;
+
+ bool independent_left;
+ uint64_t extend_left;
+ typename Search::Node node;
if (start <= 1) {
- ret.prob += search_.unigram.Lookup(*context_rbegin).backoff;
+ ret.prob += search_.LookupUnigram(*context_rbegin, node, independent_left, extend_left).Backoff();
start = 2;
- }
- typename Search::Node node;
- if (!search_.FastMakeNode(context_rbegin, context_rbegin + start - 1, node)) {
+ } else if (!search_.FastMakeNode(context_rbegin, context_rbegin + start - 1, node)) {
return ret;
}
- float backoff;
// i is the order of the backoff we're looking for.
- typename Search::MiddleIter mid_iter = search_.MiddleBegin() + start - 2;
- for (const WordIndex *i = context_rbegin + start - 1; i < context_rend; ++i, ++mid_iter) {
- if (!search_.LookupMiddleNoProb(*mid_iter, *i, backoff, node)) break;
- ret.prob += backoff;
+ unsigned char order_minus_2 = start - 2;
+ for (const WordIndex *i = context_rbegin + start - 1; i < context_rend; ++i, ++order_minus_2) {
+ typename Search::MiddlePointer p(search_.LookupMiddle(order_minus_2, *i, node, independent_left, extend_left));
+ if (!p.Found()) break;
+ ret.prob += p.Backoff();
}
return ret;
}
@@ -134,17 +140,20 @@ template <class Search, class VocabularyT> void GenericModel<Search, VocabularyT
out_state.length = 0;
return;
}
- FullScoreReturn ignored;
typename Search::Node node;
- search_.LookupUnigram(*context_rbegin, out_state.backoff[0], node, ignored);
+ bool independent_left;
+ uint64_t extend_left;
+ out_state.backoff[0] = search_.LookupUnigram(*context_rbegin, node, independent_left, extend_left).Backoff();
out_state.length = HasExtension(out_state.backoff[0]) ? 1 : 0;
float *backoff_out = out_state.backoff + 1;
- typename Search::MiddleIter mid(search_.MiddleBegin());
- for (const WordIndex *i = context_rbegin + 1; i < context_rend; ++i, ++backoff_out, ++mid) {
- if (!search_.LookupMiddleNoProb(*mid, *i, *backoff_out, node)) {
+ unsigned char order_minus_2 = 0;
+ for (const WordIndex *i = context_rbegin + 1; i < context_rend; ++i, ++backoff_out, ++order_minus_2) {
+ typename Search::MiddlePointer p(search_.LookupMiddle(order_minus_2, *i, node, independent_left, extend_left));
+ if (!p.Found()) {
std::copy(context_rbegin, context_rbegin + out_state.length, out_state.words);
return;
}
+ *backoff_out = p.Backoff();
if (HasExtension(*backoff_out)) out_state.length = i - context_rbegin + 1;
}
std::copy(context_rbegin, context_rbegin + out_state.length, out_state.words);
@@ -158,43 +167,29 @@ template <class Search, class VocabularyT> FullScoreReturn GenericModel<Search,
float *backoff_out,
unsigned char &next_use) const {
FullScoreReturn ret;
- float subtract_me;
- typename Search::Node node(search_.Unpack(extend_pointer, extend_length, subtract_me));
- ret.prob = subtract_me;
- ret.ngram_length = extend_length;
- next_use = 0;
- // If this function is called, then it does depend on left words.
- ret.independent_left = false;
- ret.extend_left = extend_pointer;
- typename Search::MiddleIter mid_iter(search_.MiddleBegin() + extend_length - 1);
- const WordIndex *i = add_rbegin;
- for (; ; ++i, ++backoff_out, ++mid_iter) {
- if (i == add_rend) {
- // Ran out of words.
- for (const float *b = backoff_in + ret.ngram_length - extend_length; b < backoff_in + (add_rend - add_rbegin); ++b) ret.prob += *b;
- ret.prob -= subtract_me;
- return ret;
- }
- if (mid_iter == search_.MiddleEnd()) break;
- if (ret.independent_left || !search_.LookupMiddle(*mid_iter, *i, *backoff_out, node, ret)) {
- // Didn't match a word.
- ret.independent_left = true;
- for (const float *b = backoff_in + ret.ngram_length - extend_length; b < backoff_in + (add_rend - add_rbegin); ++b) ret.prob += *b;
- ret.prob -= subtract_me;
- return ret;
- }
- ret.ngram_length = mid_iter - search_.MiddleBegin() + 2;
- if (HasExtension(*backoff_out)) next_use = i - add_rbegin + 1;
- }
-
- if (ret.independent_left || !search_.LookupLongest(*i, ret.prob, node)) {
- // The last backoff weight, for Order() - 1.
- ret.prob += backoff_in[i - add_rbegin];
+ typename Search::Node node;
+ if (extend_length == 1) {
+ typename Search::UnigramPointer ptr(search_.LookupUnigram(static_cast<WordIndex>(extend_pointer), node, ret.independent_left, ret.extend_left));
+ ret.rest = ptr.Rest();
+ ret.prob = ptr.Prob();
+ assert(!ret.independent_left);
} else {
- ret.ngram_length = P::Order();
+ typename Search::MiddlePointer ptr(search_.Unpack(extend_pointer, extend_length, node));
+ ret.rest = ptr.Rest();
+ ret.prob = ptr.Prob();
+ ret.extend_left = extend_pointer;
+ // If this function is called, then it does depend on left words.
+ ret.independent_left = false;
}
- ret.independent_left = true;
+ float subtract_me = ret.rest;
+ ret.ngram_length = extend_length;
+ next_use = extend_length;
+ ResumeScore(add_rbegin, add_rend, extend_length - 1, node, backoff_out, next_use, ret);
+ next_use -= extend_length;
+ // Charge backoffs.
+ for (const float *b = backoff_in + ret.ngram_length - extend_length; b < backoff_in + (add_rend - add_rbegin); ++b) ret.prob += *b;
ret.prob -= subtract_me;
+ ret.rest -= subtract_me;
return ret;
}
@@ -215,66 +210,83 @@ void CopyRemainingHistory(const WordIndex *from, State &out_state) {
* new_word.
*/
template <class Search, class VocabularyT> FullScoreReturn GenericModel<Search, VocabularyT>::ScoreExceptBackoff(
- const WordIndex *context_rbegin,
- const WordIndex *context_rend,
+ const WordIndex *const context_rbegin,
+ const WordIndex *const context_rend,
const WordIndex new_word,
State &out_state) const {
FullScoreReturn ret;
// ret.ngram_length contains the last known non-blank ngram length.
ret.ngram_length = 1;
- float *backoff_out(out_state.backoff);
typename Search::Node node;
- search_.LookupUnigram(new_word, *backoff_out, node, ret);
+ typename Search::UnigramPointer uni(search_.LookupUnigram(new_word, node, ret.independent_left, ret.extend_left));
+ out_state.backoff[0] = uni.Backoff();
+ ret.prob = uni.Prob();
+ ret.rest = uni.Rest();
+
// This is the length of the context that should be used for continuation to the right.
- out_state.length = HasExtension(*backoff_out) ? 1 : 0;
+ out_state.length = HasExtension(out_state.backoff[0]) ? 1 : 0;
// We'll write the word anyway since it will probably be used and does no harm being there.
out_state.words[0] = new_word;
if (context_rbegin == context_rend) return ret;
- ++backoff_out;
- // Ok start by looking up the bigram.
- const WordIndex *hist_iter = context_rbegin;
- typename Search::MiddleIter mid_iter(search_.MiddleBegin());
- for (; ; ++mid_iter, ++hist_iter, ++backoff_out) {
- if (hist_iter == context_rend) {
- // Ran out of history. Typically no backoff, but this could be a blank.
- CopyRemainingHistory(context_rbegin, out_state);
- // ret.prob was already set.
- return ret;
- }
+ ResumeScore(context_rbegin, context_rend, 0, node, out_state.backoff + 1, out_state.length, ret);
+ CopyRemainingHistory(context_rbegin, out_state);
+ return ret;
+}
- if (mid_iter == search_.MiddleEnd()) break;
+template <class Search, class VocabularyT> void GenericModel<Search, VocabularyT>::ResumeScore(const WordIndex *hist_iter, const WordIndex *const context_rend, unsigned char order_minus_2, typename Search::Node &node, float *backoff_out, unsigned char &next_use, FullScoreReturn &ret) const {
+ for (; ; ++order_minus_2, ++hist_iter, ++backoff_out) {
+ if (hist_iter == context_rend) return;
+ if (ret.independent_left) return;
+ if (order_minus_2 == P::Order() - 2) break;
- if (ret.independent_left || !search_.LookupMiddle(*mid_iter, *hist_iter, *backoff_out, node, ret)) {
- // Didn't find an ngram using hist_iter.
- CopyRemainingHistory(context_rbegin, out_state);
- // ret.prob was already set.
- ret.independent_left = true;
- return ret;
- }
- ret.ngram_length = hist_iter - context_rbegin + 2;
+ typename Search::MiddlePointer pointer(search_.LookupMiddle(order_minus_2, *hist_iter, node, ret.independent_left, ret.extend_left));
+ if (!pointer.Found()) return;
+ *backoff_out = pointer.Backoff();
+ ret.prob = pointer.Prob();
+ ret.rest = pointer.Rest();
+ ret.ngram_length = order_minus_2 + 2;
if (HasExtension(*backoff_out)) {
- out_state.length = ret.ngram_length;
+ next_use = ret.ngram_length;
}
}
-
- // It passed every lookup in search_.middle. All that's left is to check search_.longest.
- if (!ret.independent_left && search_.LookupLongest(*hist_iter, ret.prob, node)) {
- // It's an P::Order()-gram.
+ ret.independent_left = true;
+ typename Search::LongestPointer longest(search_.LookupLongest(*hist_iter, node));
+ if (longest.Found()) {
+ ret.prob = longest.Prob();
+ ret.rest = ret.prob;
// There is no blank in longest_.
ret.ngram_length = P::Order();
}
- // This handles (N-1)-grams and N-grams.
- CopyRemainingHistory(context_rbegin, out_state);
- ret.independent_left = true;
+}
+
+template <class Search, class VocabularyT> float GenericModel<Search, VocabularyT>::InternalUnRest(const uint64_t *pointers_begin, const uint64_t *pointers_end, unsigned char first_length) const {
+ float ret;
+ typename Search::Node node;
+ if (first_length == 1) {
+ if (pointers_begin >= pointers_end) return 0.0;
+ bool independent_left;
+ uint64_t extend_left;
+ typename Search::UnigramPointer ptr(search_.LookupUnigram(static_cast<WordIndex>(*pointers_begin), node, independent_left, extend_left));
+ ret = ptr.Prob() - ptr.Rest();
+ ++first_length;
+ ++pointers_begin;
+ } else {
+ ret = 0.0;
+ }
+ for (const uint64_t *i = pointers_begin; i < pointers_end; ++i, ++first_length) {
+ typename Search::MiddlePointer ptr(search_.Unpack(*i, first_length, node));
+ ret += ptr.Prob() - ptr.Rest();
+ }
return ret;
}
-template class GenericModel<ProbingHashedSearch, ProbingVocabulary>; // HASH_PROBING
-template class GenericModel<trie::TrieSearch<DontQuantize, trie::DontBhiksha>, SortedVocabulary>; // TRIE_SORTED
+template class GenericModel<HashedSearch<BackoffValue>, ProbingVocabulary>;
+template class GenericModel<HashedSearch<RestValue>, ProbingVocabulary>;
+template class GenericModel<trie::TrieSearch<DontQuantize, trie::DontBhiksha>, SortedVocabulary>;
template class GenericModel<trie::TrieSearch<DontQuantize, trie::ArrayBhiksha>, SortedVocabulary>;
-template class GenericModel<trie::TrieSearch<SeparatelyQuantize, trie::DontBhiksha>, SortedVocabulary>; // TRIE_SORTED_QUANT
+template class GenericModel<trie::TrieSearch<SeparatelyQuantize, trie::DontBhiksha>, SortedVocabulary>;
template class GenericModel<trie::TrieSearch<SeparatelyQuantize, trie::ArrayBhiksha>, SortedVocabulary>;
} // namespace detail
diff --git a/lm/model.hh b/lm/model.hh
index 6ea62a789..6dee94196 100644
--- a/lm/model.hh
+++ b/lm/model.hh
@@ -5,10 +5,11 @@
#include "lm/binary_format.hh"
#include "lm/config.hh"
#include "lm/facade.hh"
-#include "lm/max_order.hh"
#include "lm/quantize.hh"
#include "lm/search_hashed.hh"
#include "lm/search_trie.hh"
+#include "lm/state.hh"
+#include "lm/value.hh"
#include "lm/vocab.hh"
#include "lm/weights.hh"
@@ -23,48 +24,6 @@ namespace util { class FilePiece; }
namespace lm {
namespace ngram {
-
-// This is a POD but if you want memcmp to return the same as operator==, call
-// ZeroRemaining first.
-class State {
- public:
- bool operator==(const State &other) const {
- if (length != other.length) return false;
- return !memcmp(words, other.words, length * sizeof(WordIndex));
- }
-
- // Three way comparison function.
- int Compare(const State &other) const {
- if (length != other.length) return length < other.length ? -1 : 1;
- return memcmp(words, other.words, length * sizeof(WordIndex));
- }
-
- bool operator<(const State &other) const {
- if (length != other.length) return length < other.length;
- return memcmp(words, other.words, length * sizeof(WordIndex)) < 0;
- }
-
- // Call this before using raw memcmp.
- void ZeroRemaining() {
- for (unsigned char i = length; i < kMaxOrder - 1; ++i) {
- words[i] = 0;
- backoff[i] = 0.0;
- }
- }
-
- unsigned char Length() const { return length; }
-
- // You shouldn't need to touch anything below this line, but the members are public so FullState will qualify as a POD.
- // This order minimizes total size of the struct if WordIndex is 64 bit, float is 32 bit, and alignment of 64 bit integers is 64 bit.
- WordIndex words[kMaxOrder - 1];
- float backoff[kMaxOrder - 1];
- unsigned char length;
-};
-
-inline size_t hash_value(const State &state) {
- return util::MurmurHashNative(state.words, sizeof(WordIndex) * state.length);
-}
-
namespace detail {
// Should return the same results as SRI.
@@ -119,8 +78,7 @@ template <class Search, class VocabularyT> class GenericModel : public base::Mod
/* More efficient version of FullScore where a partial n-gram has already
* been scored.
- * NOTE: THE RETURNED .prob IS RELATIVE, NOT ABSOLUTE. So for example, if
- * the n-gram does not end up extending further left, then 0 is returned.
+ * NOTE: THE RETURNED .rest AND .prob ARE RELATIVE TO THE .rest RETURNED BEFORE.
*/
FullScoreReturn ExtendLeft(
// Additional context in reverse order. This will update add_rend to
@@ -136,12 +94,24 @@ template <class Search, class VocabularyT> class GenericModel : public base::Mod
// Amount of additional content that should be considered by the next call.
unsigned char &next_use) const;
+ /* Return probabilities minus rest costs for an array of pointers. The
+ * first length should be the length of the n-gram to which pointers_begin
+ * points.
+ */
+ float UnRest(const uint64_t *pointers_begin, const uint64_t *pointers_end, unsigned char first_length) const {
+ // Compiler should optimize this if away.
+ return Search::kDifferentRest ? InternalUnRest(pointers_begin, pointers_end, first_length) : 0.0;
+ }
+
private:
friend void lm::ngram::LoadLM<>(const char *file, const Config &config, GenericModel<Search, VocabularyT> &to);
static void UpdateConfigFromBinary(int fd, const std::vector<uint64_t> &counts, Config &config);
- FullScoreReturn ScoreExceptBackoff(const WordIndex *context_rbegin, const WordIndex *context_rend, const WordIndex new_word, State &out_state) const;
+ FullScoreReturn ScoreExceptBackoff(const WordIndex *const context_rbegin, const WordIndex *const context_rend, const WordIndex new_word, State &out_state) const;
+
+ // Score bigrams and above. Do not include backoff.
+ void ResumeScore(const WordIndex *context_rbegin, const WordIndex *const context_rend, unsigned char starting_order_minus_2, typename Search::Node &node, float *backoff_out, unsigned char &next_use, FullScoreReturn &ret) const;
// Appears after Size in the cc file.
void SetupMemory(void *start, const std::vector<uint64_t> &counts, const Config &config);
@@ -150,32 +120,38 @@ template <class Search, class VocabularyT> class GenericModel : public base::Mod
void InitializeFromARPA(const char *file, const Config &config);
+ float InternalUnRest(const uint64_t *pointers_begin, const uint64_t *pointers_end, unsigned char first_length) const;
+
Backing &MutableBacking() { return backing_; }
Backing backing_;
VocabularyT vocab_;
- typedef typename Search::Middle Middle;
-
Search search_;
};
} // namespace detail
-// These must also be instantiated in the cc file.
-typedef ::lm::ngram::ProbingVocabulary Vocabulary;
-typedef detail::GenericModel<detail::ProbingHashedSearch, Vocabulary> ProbingModel; // HASH_PROBING
-// Default implementation. No real reason for it to be the default.
-typedef ProbingModel Model;
+// Instead of typedef, inherit. This allows the Model etc to be forward declared.
+// Oh the joys of C and C++.
+#define LM_COMMA() ,
+#define LM_NAME_MODEL(name, from)\
+class name : public from {\
+ public:\
+ name(const char *file, const Config &config = Config()) : from(file, config) {}\
+};
-// Smaller implementation.
-typedef ::lm::ngram::SortedVocabulary SortedVocabulary;
-typedef detail::GenericModel<trie::TrieSearch<DontQuantize, trie::DontBhiksha>, SortedVocabulary> TrieModel; // TRIE_SORTED
-typedef detail::GenericModel<trie::TrieSearch<DontQuantize, trie::ArrayBhiksha>, SortedVocabulary> ArrayTrieModel;
+LM_NAME_MODEL(ProbingModel, detail::GenericModel<detail::HashedSearch<BackoffValue> LM_COMMA() ProbingVocabulary>);
+LM_NAME_MODEL(RestProbingModel, detail::GenericModel<detail::HashedSearch<RestValue> LM_COMMA() ProbingVocabulary>);
+LM_NAME_MODEL(TrieModel, detail::GenericModel<trie::TrieSearch<DontQuantize LM_COMMA() trie::DontBhiksha> LM_COMMA() SortedVocabulary>);
+LM_NAME_MODEL(ArrayTrieModel, detail::GenericModel<trie::TrieSearch<DontQuantize LM_COMMA() trie::ArrayBhiksha> LM_COMMA() SortedVocabulary>);
+LM_NAME_MODEL(QuantTrieModel, detail::GenericModel<trie::TrieSearch<SeparatelyQuantize LM_COMMA() trie::DontBhiksha> LM_COMMA() SortedVocabulary>);
+LM_NAME_MODEL(QuantArrayTrieModel, detail::GenericModel<trie::TrieSearch<SeparatelyQuantize LM_COMMA() trie::ArrayBhiksha> LM_COMMA() SortedVocabulary>);
-typedef detail::GenericModel<trie::TrieSearch<SeparatelyQuantize, trie::DontBhiksha>, SortedVocabulary> QuantTrieModel; // QUANT_TRIE_SORTED
-typedef detail::GenericModel<trie::TrieSearch<SeparatelyQuantize, trie::ArrayBhiksha>, SortedVocabulary> QuantArrayTrieModel;
+// Default implementation. No real reason for it to be the default.
+typedef ::lm::ngram::ProbingVocabulary Vocabulary;
+typedef ProbingModel Model;
} // namespace ngram
} // namespace lm
diff --git a/lm/model_test.cc b/lm/model_test.cc
index 461704d43..32084b5b5 100644
--- a/lm/model_test.cc
+++ b/lm/model_test.cc
@@ -6,6 +6,9 @@
#include <boost/test/unit_test.hpp>
#include <boost/test/floating_point_comparison.hpp>
+// Apparently some Boost versions use templates and are pretty strict about types matching.
+#define SLOPPY_CHECK_CLOSE(ref, value, tol) BOOST_CHECK_CLOSE(static_cast<double>(ref), static_cast<double>(value), static_cast<double>(tol));
+
namespace lm {
namespace ngram {
@@ -30,7 +33,15 @@ const char *TestNoUnkLocation() {
return "test_nounk.arpa";
}
return boost::unit_test::framework::master_test_suite().argv[2];
+}
+template <class Model> State GetState(const Model &model, const char *word, const State &in) {
+ WordIndex context[in.length + 1];
+ context[0] = model.GetVocabulary().Index(word);
+ std::copy(in.words, in.words + in.length, context + 1);
+ State ret;
+ model.GetState(context, context + in.length + 1, ret);
+ return ret;
}
#define StartTest(word, ngram, score, indep_left) \
@@ -38,18 +49,11 @@ const char *TestNoUnkLocation() {
state, \
model.GetVocabulary().Index(word), \
out);\
- BOOST_CHECK_CLOSE(score, ret.prob, 0.001); \
+ SLOPPY_CHECK_CLOSE(score, ret.prob, 0.001); \
BOOST_CHECK_EQUAL(static_cast<unsigned int>(ngram), ret.ngram_length); \
BOOST_CHECK_GE(std::min<unsigned char>(ngram, 5 - 1), out.length); \
BOOST_CHECK_EQUAL(indep_left, ret.independent_left); \
- {\
- WordIndex context[state.length + 1]; \
- context[0] = model.GetVocabulary().Index(word); \
- std::copy(state.words, state.words + state.length, context + 1); \
- State get_state; \
- model.GetState(context, context + state.length + 1, get_state); \
- BOOST_CHECK_EQUAL(out, get_state); \
- }
+ BOOST_CHECK_EQUAL(out, GetState(model, word, state));
#define AppendTest(word, ngram, score, indep_left) \
StartTest(word, ngram, score, indep_left) \
@@ -175,14 +179,14 @@ template <class M> void ExtendLeftTest(const M &model) {
State right;
FullScoreReturn little(model.FullScore(model.NullContextState(), model.GetVocabulary().Index("little"), right));
const float kLittleProb = -1.285941;
- BOOST_CHECK_CLOSE(kLittleProb, little.prob, 0.001);
+ SLOPPY_CHECK_CLOSE(kLittleProb, little.prob, 0.001);
unsigned char next_use;
float backoff_out[4];
FullScoreReturn extend_none(model.ExtendLeft(NULL, NULL, NULL, little.extend_left, 1, NULL, next_use));
BOOST_CHECK_EQUAL(0, next_use);
BOOST_CHECK_EQUAL(little.extend_left, extend_none.extend_left);
- BOOST_CHECK_CLOSE(0.0, extend_none.prob, 0.001);
+ SLOPPY_CHECK_CLOSE(little.prob - little.rest, extend_none.prob, 0.001);
BOOST_CHECK_EQUAL(1, extend_none.ngram_length);
const WordIndex a = model.GetVocabulary().Index("a");
@@ -190,16 +194,16 @@ template <class M> void ExtendLeftTest(const M &model) {
// a little
FullScoreReturn extend_a(model.ExtendLeft(&a, &a + 1, &backoff_in, little.extend_left, 1, backoff_out, next_use));
BOOST_CHECK_EQUAL(1, next_use);
- BOOST_CHECK_CLOSE(-0.69897, backoff_out[0], 0.001);
- BOOST_CHECK_CLOSE(-0.09132547 - kLittleProb, extend_a.prob, 0.001);
+ SLOPPY_CHECK_CLOSE(-0.69897, backoff_out[0], 0.001);
+ SLOPPY_CHECK_CLOSE(-0.09132547 - little.rest, extend_a.prob, 0.001);
BOOST_CHECK_EQUAL(2, extend_a.ngram_length);
BOOST_CHECK(!extend_a.independent_left);
const WordIndex on = model.GetVocabulary().Index("on");
FullScoreReturn extend_on(model.ExtendLeft(&on, &on + 1, &backoff_in, extend_a.extend_left, 2, backoff_out, next_use));
BOOST_CHECK_EQUAL(1, next_use);
- BOOST_CHECK_CLOSE(-0.4771212, backoff_out[0], 0.001);
- BOOST_CHECK_CLOSE(-0.0283603 - -0.09132547, extend_on.prob, 0.001);
+ SLOPPY_CHECK_CLOSE(-0.4771212, backoff_out[0], 0.001);
+ SLOPPY_CHECK_CLOSE(-0.0283603 - (extend_a.rest + little.rest), extend_on.prob, 0.001);
BOOST_CHECK_EQUAL(3, extend_on.ngram_length);
BOOST_CHECK(!extend_on.independent_left);
@@ -207,9 +211,9 @@ template <class M> void ExtendLeftTest(const M &model) {
float backoff_in_arr[4];
FullScoreReturn extend_both(model.ExtendLeft(both, both + 2, backoff_in_arr, little.extend_left, 1, backoff_out, next_use));
BOOST_CHECK_EQUAL(2, next_use);
- BOOST_CHECK_CLOSE(-0.69897, backoff_out[0], 0.001);
- BOOST_CHECK_CLOSE(-0.4771212, backoff_out[1], 0.001);
- BOOST_CHECK_CLOSE(-0.0283603 - kLittleProb, extend_both.prob, 0.001);
+ SLOPPY_CHECK_CLOSE(-0.69897, backoff_out[0], 0.001);
+ SLOPPY_CHECK_CLOSE(-0.4771212, backoff_out[1], 0.001);
+ SLOPPY_CHECK_CLOSE(-0.0283603 - little.rest, extend_both.prob, 0.001);
BOOST_CHECK_EQUAL(3, extend_both.ngram_length);
BOOST_CHECK(!extend_both.independent_left);
BOOST_CHECK_EQUAL(extend_on.extend_left, extend_both.extend_left);
@@ -217,12 +221,12 @@ template <class M> void ExtendLeftTest(const M &model) {
#define StatelessTest(word, provide, ngram, score) \
ret = model.FullScoreForgotState(indices + num_words - word, indices + num_words - word + provide, indices[num_words - word - 1], state); \
- BOOST_CHECK_CLOSE(score, ret.prob, 0.001); \
+ SLOPPY_CHECK_CLOSE(score, ret.prob, 0.001); \
BOOST_CHECK_EQUAL(static_cast<unsigned int>(ngram), ret.ngram_length); \
model.GetState(indices + num_words - word, indices + num_words - word + provide, before); \
ret = model.FullScore(before, indices[num_words - word - 1], out); \
BOOST_CHECK(state == out); \
- BOOST_CHECK_CLOSE(score, ret.prob, 0.001); \
+ SLOPPY_CHECK_CLOSE(score, ret.prob, 0.001); \
BOOST_CHECK_EQUAL(static_cast<unsigned int>(ngram), ret.ngram_length);
template <class M> void Stateless(const M &model) {
@@ -237,7 +241,7 @@ template <class M> void Stateless(const M &model) {
State state, out, before;
ret = model.FullScoreForgotState(indices + num_words - 1, indices + num_words, indices[num_words - 2], state);
- BOOST_CHECK_CLOSE(-0.484652, ret.prob, 0.001);
+ SLOPPY_CHECK_CLOSE(-0.484652, ret.prob, 0.001);
StatelessTest(1, 1, 2, -0.484652);
// looking
@@ -275,7 +279,7 @@ template <class M> void NoUnkCheck(const M &model) {
State state;
FullScoreReturn ret = model.FullScoreForgotState(&unk_index, &unk_index + 1, unk_index, state);
- BOOST_CHECK_CLOSE(-100.0, ret.prob, 0.001);
+ SLOPPY_CHECK_CLOSE(-100.0, ret.prob, 0.001);
}
template <class M> void Everything(const M &m) {
@@ -399,7 +403,10 @@ template <class ModelT> void BinaryTest() {
}
BOOST_AUTO_TEST_CASE(write_and_read_probing) {
- BinaryTest<Model>();
+ BinaryTest<ProbingModel>();
+}
+BOOST_AUTO_TEST_CASE(write_and_read_rest_probing) {
+ BinaryTest<RestProbingModel>();
}
BOOST_AUTO_TEST_CASE(write_and_read_trie) {
BinaryTest<TrieModel>();
@@ -414,6 +421,18 @@ BOOST_AUTO_TEST_CASE(write_and_read_quant_array_trie) {
BinaryTest<QuantArrayTrieModel>();
}
+BOOST_AUTO_TEST_CASE(rest_max) {
+ Config config;
+ config.arpa_complain = Config::NONE;
+ config.messages = NULL;
+
+ RestProbingModel model(TestLocation(), config);
+ State state, out;
+ FullScoreReturn ret(model.FullScore(model.NullContextState(), model.GetVocabulary().Index("."), state));
+ SLOPPY_CHECK_CLOSE(-0.2705918, ret.rest, 0.001);
+ SLOPPY_CHECK_CLOSE(-0.01916512, model.FullScore(state, model.GetVocabulary().EndSentence(), out).rest, 0.001);
+}
+
} // namespace
} // namespace ngram
} // namespace lm
diff --git a/lm/model_type.hh b/lm/model_type.hh
index 5057ed251..8b35c793a 100644
--- a/lm/model_type.hh
+++ b/lm/model_type.hh
@@ -6,10 +6,17 @@ namespace ngram {
/* Not the best numbering system, but it grew this way for historical reasons
* and I want to preserve existing binary files. */
-typedef enum {HASH_PROBING=0, HASH_SORTED=1, TRIE_SORTED=2, QUANT_TRIE_SORTED=3, ARRAY_TRIE_SORTED=4, QUANT_ARRAY_TRIE_SORTED=5} ModelType;
+typedef enum {PROBING=0, REST_PROBING=1, TRIE=2, QUANT_TRIE=3, ARRAY_TRIE=4, QUANT_ARRAY_TRIE=5} ModelType;
-const static ModelType kQuantAdd = static_cast<ModelType>(QUANT_TRIE_SORTED - TRIE_SORTED);
-const static ModelType kArrayAdd = static_cast<ModelType>(ARRAY_TRIE_SORTED - TRIE_SORTED);
+// Historical names.
+const ModelType HASH_PROBING = PROBING;
+const ModelType TRIE_SORTED = TRIE;
+const ModelType QUANT_TRIE_SORTED = QUANT_TRIE;
+const ModelType ARRAY_TRIE_SORTED = ARRAY_TRIE;
+const ModelType QUANT_ARRAY_TRIE_SORTED = QUANT_ARRAY_TRIE;
+
+const static ModelType kQuantAdd = static_cast<ModelType>(QUANT_TRIE - TRIE);
+const static ModelType kArrayAdd = static_cast<ModelType>(ARRAY_TRIE - TRIE);
} // namespace ngram
} // namespace lm
diff --git a/lm/ngram_query.cc b/lm/ngram_query.cc
index 8f7a0e1cc..49757d9aa 100644
--- a/lm/ngram_query.cc
+++ b/lm/ngram_query.cc
@@ -12,22 +12,24 @@ int main(int argc, char *argv[]) {
ModelType model_type;
if (RecognizeBinary(argv[1], model_type)) {
switch(model_type) {
- case HASH_PROBING:
+ case PROBING:
Query<lm::ngram::ProbingModel>(argv[1], sentence_context, std::cin, std::cout);
break;
- case TRIE_SORTED:
+ case REST_PROBING:
+ Query<lm::ngram::RestProbingModel>(argv[1], sentence_context, std::cin, std::cout);
+ break;
+ case TRIE:
Query<TrieModel>(argv[1], sentence_context, std::cin, std::cout);
break;
- case QUANT_TRIE_SORTED:
+ case QUANT_TRIE:
Query<QuantTrieModel>(argv[1], sentence_context, std::cin, std::cout);
break;
- case ARRAY_TRIE_SORTED:
+ case ARRAY_TRIE:
Query<ArrayTrieModel>(argv[1], sentence_context, std::cin, std::cout);
break;
- case QUANT_ARRAY_TRIE_SORTED:
+ case QUANT_ARRAY_TRIE:
Query<QuantArrayTrieModel>(argv[1], sentence_context, std::cin, std::cout);
break;
- case HASH_SORTED:
default:
std::cerr << "Unrecognized kenlm model type " << model_type << std::endl;
abort();
@@ -35,8 +37,8 @@ int main(int argc, char *argv[]) {
} else {
Query<ProbingModel>(argv[1], sentence_context, std::cin, std::cout);
}
-
- PrintUsage("Total time including destruction:\n");
+ std::cerr << "Total time including destruction:\n";
+ util::PrintUsage(std::cerr);
} catch (const std::exception &e) {
std::cerr << e.what() << std::endl;
return 1;
diff --git a/lm/ngram_query.hh b/lm/ngram_query.hh
index 4990df226..dfcda170e 100644
--- a/lm/ngram_query.hh
+++ b/lm/ngram_query.hh
@@ -3,51 +3,20 @@
#include "lm/enumerate_vocab.hh"
#include "lm/model.hh"
+#include "util/usage.hh"
#include <cstdlib>
-#include <fstream>
#include <iostream>
+#include <ostream>
+#include <istream>
#include <string>
-#include <ctype.h>
-#if !defined(_WIN32) && !defined(_WIN64)
-#include <sys/resource.h>
-#include <sys/time.h>
-#endif
-
namespace lm {
namespace ngram {
-#if !defined(_WIN32) && !defined(_WIN64)
-float FloatSec(const struct timeval &tv) {
- return static_cast<float>(tv.tv_sec) + (static_cast<float>(tv.tv_usec) / 1000000000.0);
-}
-#endif
-
-void PrintUsage(const char *message) {
-#if !defined(_WIN32) && !defined(_WIN64)
- struct rusage usage;
- if (getrusage(RUSAGE_SELF, &usage)) {
- perror("getrusage");
- return;
- }
- std::cerr << message;
- std::cerr << "user\t" << FloatSec(usage.ru_utime) << "\nsys\t" << FloatSec(usage.ru_stime) << '\n';
-
- // Linux doesn't set memory usage :-(.
- std::ifstream status("/proc/self/status", std::ios::in);
- std::string line;
- while (getline(status, line)) {
- if (!strncmp(line.c_str(), "VmRSS:\t", 7)) {
- std::cerr << "rss " << (line.c_str() + 7) << '\n';
- break;
- }
- }
-#endif
-}
-
template <class Model> void Query(const Model &model, bool sentence_context, std::istream &in_stream, std::ostream &out_stream) {
- PrintUsage("Loading statistics:\n");
+ std::cerr << "Loading statistics:\n";
+ util::PrintUsage(std::cerr);
typename Model::State state, out;
lm::FullScoreReturn ret;
std::string word;
@@ -84,13 +53,13 @@ template <class Model> void Query(const Model &model, bool sentence_context, std
out_stream << "</s>=" << model.GetVocabulary().EndSentence() << ' ' << static_cast<unsigned int>(ret.ngram_length) << ' ' << ret.prob << '\t';
}
out_stream << "Total: " << total << " OOV: " << oov << '\n';
- }
- PrintUsage("After queries:\n");
+ }
+ std::cerr << "After queries:\n";
+ util::PrintUsage(std::cerr);
}
template <class M> void Query(const char *file, bool sentence_context, std::istream &in_stream, std::ostream &out_stream) {
Config config;
-// config.load_method = util::LAZY;
M model(file, config);
Query(model, sentence_context, in_stream, out_stream);
}
diff --git a/lm/quantize.cc b/lm/quantize.cc
index a8e0cb21c..b58c3f3f6 100644
--- a/lm/quantize.cc
+++ b/lm/quantize.cc
@@ -47,9 +47,7 @@ void SeparatelyQuantize::UpdateConfigFromBinary(int fd, const std::vector<uint64
util::AdvanceOrThrow(fd, -3);
}
-void SeparatelyQuantize::SetupMemory(void *start, const Config &config) {
- // Reserve 8 byte header for bit counts.
- start_ = reinterpret_cast<float*>(static_cast<uint8_t*>(start) + 8);
+void SeparatelyQuantize::SetupMemory(void *base, unsigned char order, const Config &config) {
prob_bits_ = config.prob_bits;
backoff_bits_ = config.backoff_bits;
// We need the reserved values.
@@ -57,25 +55,35 @@ void SeparatelyQuantize::SetupMemory(void *start, const Config &config) {
if (config.backoff_bits == 0) UTIL_THROW(ConfigException, "You can't quantize backoff to zero");
if (config.prob_bits > 25) UTIL_THROW(ConfigException, "For efficiency reasons, quantizing probability supports at most 25 bits. Currently you have requested " << static_cast<unsigned>(config.prob_bits) << " bits.");
if (config.backoff_bits > 25) UTIL_THROW(ConfigException, "For efficiency reasons, quantizing backoff supports at most 25 bits. Currently you have requested " << static_cast<unsigned>(config.backoff_bits) << " bits.");
+ // Reserve 8 byte header for bit counts.
+ actual_base_ = static_cast<uint8_t*>(base);
+ float *start = reinterpret_cast<float*>(actual_base_ + 8);
+ for (unsigned char i = 0; i < order - 2; ++i) {
+ tables_[i][0] = Bins(prob_bits_, start);
+ start += (1ULL << prob_bits_);
+ tables_[i][1] = Bins(backoff_bits_, start);
+ start += (1ULL << backoff_bits_);
+ }
+ longest_ = tables_[order - 2][0] = Bins(prob_bits_, start);
}
void SeparatelyQuantize::Train(uint8_t order, std::vector<float> &prob, std::vector<float> &backoff) {
TrainProb(order, prob);
// Backoff
- float *centers = start_ + TableStart(order) + ProbTableLength();
+ float *centers = tables_[order - 2][1].Populate();
*(centers++) = kNoExtensionBackoff;
*(centers++) = kExtensionBackoff;
MakeBins(backoff, centers, (1ULL << backoff_bits_) - 2);
}
void SeparatelyQuantize::TrainProb(uint8_t order, std::vector<float> &prob) {
- float *centers = start_ + TableStart(order);
+ float *centers = tables_[order - 2][0].Populate();
MakeBins(prob, centers, (1ULL << prob_bits_));
}
void SeparatelyQuantize::FinishedLoading(const Config &config) {
- uint8_t *actual_base = reinterpret_cast<uint8_t*>(start_) - 8;
+ uint8_t *actual_base = actual_base_;
*(actual_base++) = kSeparatelyQuantizeVersion; // version
*(actual_base++) = config.prob_bits;
*(actual_base++) = config.backoff_bits;
diff --git a/lm/quantize.hh b/lm/quantize.hh
index a81fe3aa2..36c427272 100644
--- a/lm/quantize.hh
+++ b/lm/quantize.hh
@@ -27,37 +27,60 @@ class DontQuantize {
static uint8_t MiddleBits(const Config &/*config*/) { return 63; }
static uint8_t LongestBits(const Config &/*config*/) { return 31; }
- struct Middle {
- void Write(void *base, uint64_t bit_offset, float prob, float backoff) const {
- util::WriteNonPositiveFloat31(base, bit_offset, prob);
- util::WriteFloat32(base, bit_offset + 31, backoff);
- }
- void Read(const void *base, uint64_t bit_offset, float &prob, float &backoff) const {
- prob = util::ReadNonPositiveFloat31(base, bit_offset);
- backoff = util::ReadFloat32(base, bit_offset + 31);
- }
- void ReadProb(const void *base, uint64_t bit_offset, float &prob) const {
- prob = util::ReadNonPositiveFloat31(base, bit_offset);
- }
- void ReadBackoff(const void *base, uint64_t bit_offset, float &backoff) const {
- backoff = util::ReadFloat32(base, bit_offset + 31);
- }
- uint8_t TotalBits() const { return 63; }
+ class MiddlePointer {
+ public:
+ MiddlePointer(const DontQuantize & /*quant*/, unsigned char /*order_minus_2*/, util::BitAddress address) : address_(address) {}
+
+ MiddlePointer() : address_(NULL, 0) {}
+
+ bool Found() const {
+ return address_.base != NULL;
+ }
+
+ float Prob() const {
+ return util::ReadNonPositiveFloat31(address_.base, address_.offset);
+ }
+
+ float Backoff() const {
+ return util::ReadFloat32(address_.base, address_.offset + 31);
+ }
+
+ float Rest() const { return Prob(); }
+
+ void Write(float prob, float backoff) {
+ util::WriteNonPositiveFloat31(address_.base, address_.offset, prob);
+ util::WriteFloat32(address_.base, address_.offset + 31, backoff);
+ }
+
+ private:
+ util::BitAddress address_;
};
- struct Longest {
- void Write(void *base, uint64_t bit_offset, float prob) const {
- util::WriteNonPositiveFloat31(base, bit_offset, prob);
- }
- void Read(const void *base, uint64_t bit_offset, float &prob) const {
- prob = util::ReadNonPositiveFloat31(base, bit_offset);
- }
- uint8_t TotalBits() const { return 31; }
+ class LongestPointer {
+ public:
+ explicit LongestPointer(const DontQuantize &/*quant*/, util::BitAddress address) : address_(address) {}
+
+ LongestPointer() : address_(NULL, 0) {}
+
+ bool Found() const {
+ return address_.base != NULL;
+ }
+
+ float Prob() const {
+ return util::ReadNonPositiveFloat31(address_.base, address_.offset);
+ }
+
+ void Write(float prob) {
+ util::WriteNonPositiveFloat31(address_.base, address_.offset, prob);
+ }
+
+ private:
+ util::BitAddress address_;
};
DontQuantize() {}
- void SetupMemory(void * /*start*/, const Config & /*config*/) {}
+ void SetupMemory(void * /*start*/, unsigned char /*order*/, const Config & /*config*/) {}
static const bool kTrain = false;
// These should never be called because kTrain is false.
@@ -65,9 +88,6 @@ class DontQuantize {
void TrainProb(uint8_t, std::vector<float> &/*prob*/) {}
void FinishedLoading(const Config &) {}
-
- Middle Mid(uint8_t /*order*/) const { return Middle(); }
- Longest Long(uint8_t /*order*/) const { return Longest(); }
};
class SeparatelyQuantize {
@@ -77,7 +97,9 @@ class SeparatelyQuantize {
// Sigh C++ default constructor
Bins() {}
- Bins(uint8_t bits, const float *const begin) : begin_(begin), end_(begin_ + (1ULL << bits)), bits_(bits), mask_((1ULL << bits) - 1) {}
+ Bins(uint8_t bits, float *begin) : begin_(begin), end_(begin_ + (1ULL << bits)), bits_(bits), mask_((1ULL << bits) - 1) {}
+
+ float *Populate() { return begin_; }
uint64_t EncodeProb(float value) const {
return Encode(value, 0);
@@ -98,13 +120,13 @@ class SeparatelyQuantize {
private:
uint64_t Encode(float value, size_t reserved) const {
- const float *above = std::lower_bound(begin_ + reserved, end_, value);
+ const float *above = std::lower_bound(static_cast<const float*>(begin_) + reserved, end_, value);
if (above == begin_ + reserved) return reserved;
if (above == end_) return end_ - begin_ - 1;
return above - begin_ - (value - *(above - 1) < *above - value);
}
- const float *begin_;
+ float *begin_;
const float *end_;
uint8_t bits_;
uint64_t mask_;
@@ -125,65 +147,61 @@ class SeparatelyQuantize {
static uint8_t MiddleBits(const Config &config) { return config.prob_bits + config.backoff_bits; }
static uint8_t LongestBits(const Config &config) { return config.prob_bits; }
- class Middle {
+ class MiddlePointer {
public:
- Middle(uint8_t prob_bits, const float *prob_begin, uint8_t backoff_bits, const float *backoff_begin) :
- total_bits_(prob_bits + backoff_bits), total_mask_((1ULL << total_bits_) - 1), prob_(prob_bits, prob_begin), backoff_(backoff_bits, backoff_begin) {}
+ MiddlePointer(const SeparatelyQuantize &quant, unsigned char order_minus_2, const util::BitAddress &address) : bins_(quant.GetTables(order_minus_2)), address_(address) {}
- void Write(void *base, uint64_t bit_offset, float prob, float backoff) const {
- util::WriteInt57(base, bit_offset, total_bits_,
- (prob_.EncodeProb(prob) << backoff_.Bits()) | backoff_.EncodeBackoff(backoff));
- }
+ MiddlePointer() : address_(NULL, 0) {}
- void ReadProb(const void *base, uint64_t bit_offset, float &prob) const {
- prob = prob_.Decode(util::ReadInt25(base, bit_offset + backoff_.Bits(), prob_.Bits(), prob_.Mask()));
- }
+ bool Found() const { return address_.base != NULL; }
- void Read(const void *base, uint64_t bit_offset, float &prob, float &backoff) const {
- uint64_t both = util::ReadInt57(base, bit_offset, total_bits_, total_mask_);
- prob = prob_.Decode(both >> backoff_.Bits());
- backoff = backoff_.Decode(both & backoff_.Mask());
+ float Prob() const {
+ return ProbBins().Decode(util::ReadInt25(address_.base, address_.offset + BackoffBins().Bits(), ProbBins().Bits(), ProbBins().Mask()));
}
- void ReadBackoff(const void *base, uint64_t bit_offset, float &backoff) const {
- backoff = backoff_.Decode(util::ReadInt25(base, bit_offset, backoff_.Bits(), backoff_.Mask()));
+ float Backoff() const {
+ return BackoffBins().Decode(util::ReadInt25(address_.base, address_.offset, BackoffBins().Bits(), BackoffBins().Mask()));
}
- uint8_t TotalBits() const {
- return total_bits_;
+ float Rest() const { return Prob(); }
+
+ void Write(float prob, float backoff) const {
+ util::WriteInt57(address_.base, address_.offset, ProbBins().Bits() + BackoffBins().Bits(),
+ (ProbBins().EncodeProb(prob) << BackoffBins().Bits()) | BackoffBins().EncodeBackoff(backoff));
}
private:
- const uint8_t total_bits_;
- const uint64_t total_mask_;
- const Bins prob_;
- const Bins backoff_;
+ const Bins &ProbBins() const { return bins_[0]; }
+ const Bins &BackoffBins() const { return bins_[1]; }
+ const Bins *bins_;
+
+ util::BitAddress address_;
};
- class Longest {
+ class LongestPointer {
public:
- // Sigh C++ default constructor
- Longest() {}
+ LongestPointer(const SeparatelyQuantize &quant, const util::BitAddress &address) : table_(&quant.LongestTable()), address_(address) {}
+
+ LongestPointer() : address_(NULL, 0) {}
- Longest(uint8_t prob_bits, const float *prob_begin) : prob_(prob_bits, prob_begin) {}
+ bool Found() const { return address_.base != NULL; }
- void Write(void *base, uint64_t bit_offset, float prob) const {
- util::WriteInt25(base, bit_offset, prob_.Bits(), prob_.EncodeProb(prob));
+ void Write(float prob) const {
+ util::WriteInt25(address_.base, address_.offset, table_->Bits(), table_->EncodeProb(prob));
}
- void Read(const void *base, uint64_t bit_offset, float &prob) const {
- prob = prob_.Decode(util::ReadInt25(base, bit_offset, prob_.Bits(), prob_.Mask()));
+ float Prob() const {
+ return table_->Decode(util::ReadInt25(address_.base, address_.offset, table_->Bits(), table_->Mask()));
}
- uint8_t TotalBits() const { return prob_.Bits(); }
-
private:
- Bins prob_;
+ const Bins *table_;
+ util::BitAddress address_;
};
SeparatelyQuantize() {}
- void SetupMemory(void *start, const Config &config);
+ void SetupMemory(void *start, unsigned char order, const Config &config);
static const bool kTrain = true;
// Assumes 0.0 is removed from backoff.
@@ -193,18 +211,17 @@ class SeparatelyQuantize {
void FinishedLoading(const Config &config);
- Middle Mid(uint8_t order) const {
- const float *table = start_ + TableStart(order);
- return Middle(prob_bits_, table, backoff_bits_, table + ProbTableLength());
- }
+ const Bins *GetTables(unsigned char order_minus_2) const { return tables_[order_minus_2]; }
- Longest Long(uint8_t order) const { return Longest(prob_bits_, start_ + TableStart(order)); }
+ const Bins &LongestTable() const { return longest_; }
private:
- size_t TableStart(uint8_t order) const { return ((1ULL << prob_bits_) + (1ULL << backoff_bits_)) * static_cast<uint64_t>(order - 2); }
- size_t ProbTableLength() const { return (1ULL << prob_bits_); }
+ Bins tables_[KENLM_MAX_ORDER - 1][2];
+
+ Bins longest_;
+
+ uint8_t *actual_base_;
- float *start_;
uint8_t prob_bits_, backoff_bits_;
};
diff --git a/lm/read_arpa.cc b/lm/read_arpa.cc
index be6565992..70727e4cb 100644
--- a/lm/read_arpa.cc
+++ b/lm/read_arpa.cc
@@ -11,6 +11,10 @@
#include <string.h>
#include <stdint.h>
+#ifdef WIN32
+#include <float.h>
+#endif
+
namespace lm {
// 1 for '\t', '\n', and ' '. This is stricter than isspace.
@@ -84,7 +88,7 @@ void ReadBackoff(util::FilePiece &in, Prob &/*weights*/) {
}
}
-void ReadBackoff(util::FilePiece &in, ProbBackoff &weights) {
+void ReadBackoff(util::FilePiece &in, float &backoff) {
// Always make zero negative.
// Negative zero means that no (n+1)-gram has this n-gram as context.
// Therefore the hypothesis state can be shorter. Of course, many n-grams
@@ -92,16 +96,21 @@ void ReadBackoff(util::FilePiece &in, ProbBackoff &weights) {
// back and set the backoff to positive zero in these cases.
switch (in.get()) {
case '\t':
- weights.backoff = in.ReadFloat();
- if (weights.backoff == ngram::kExtensionBackoff) weights.backoff = ngram::kNoExtensionBackoff;
+ backoff = in.ReadFloat();
+ if (backoff == ngram::kExtensionBackoff) backoff = ngram::kNoExtensionBackoff;
{
- int float_class = fpclassify(weights.backoff);
- UTIL_THROW_IF(float_class == FP_NAN || float_class == FP_INFINITE, FormatLoadException, "Bad backoff " << weights.backoff);
+#ifdef WIN32
+ int float_class = _fpclass(backoff);
+ UTIL_THROW_IF(float_class == _FPCLASS_SNAN || float_class == _FPCLASS_QNAN || float_class == _FPCLASS_NINF || float_class == _FPCLASS_PINF, FormatLoadException, "Bad backoff " << backoff);
+#else
+ int float_class = fpclassify(backoff);
+ UTIL_THROW_IF(float_class == FP_NAN || float_class == FP_INFINITE, FormatLoadException, "Bad backoff " << backoff);
+#endif
}
- UTIL_THROW_IF((in.get() != '\n'), FormatLoadException, "Expected newline after backoff");
+ UTIL_THROW_IF(in.get() != '\n', FormatLoadException, "Expected newline after backoff");
break;
case '\n':
- weights.backoff = ngram::kNoExtensionBackoff;
+ backoff = ngram::kNoExtensionBackoff;
break;
default:
UTIL_THROW(FormatLoadException, "Expected tab or newline for backoff");
diff --git a/lm/read_arpa.hh b/lm/read_arpa.hh
index 25648d3fb..234d130c2 100644
--- a/lm/read_arpa.hh
+++ b/lm/read_arpa.hh
@@ -10,15 +10,19 @@
#include <iosfwd>
#include <vector>
-#include <math.h>
-
namespace lm {
void ReadARPACounts(util::FilePiece &in, std::vector<uint64_t> &number);
void ReadNGramHeader(util::FilePiece &in, unsigned int length);
void ReadBackoff(util::FilePiece &in, Prob &weights);
-void ReadBackoff(util::FilePiece &in, ProbBackoff &weights);
+void ReadBackoff(util::FilePiece &in, float &backoff);
+inline void ReadBackoff(util::FilePiece &in, ProbBackoff &weights) {
+ ReadBackoff(in, weights.backoff);
+}
+inline void ReadBackoff(util::FilePiece &in, RestWeights &weights) {
+ ReadBackoff(in, weights.backoff);
+}
void ReadEnd(util::FilePiece &in);
@@ -31,27 +35,21 @@ class PositiveProbWarn {
explicit PositiveProbWarn(WarningAction action) : action_(action) {}
- float ReadProb(util::FilePiece &f) {
- float prob = f.ReadFloat();
- UTIL_THROW_IF(f.get() != '\t', FormatLoadException, "Expected tab after probability");
- UTIL_THROW_IF(isnan(prob), FormatLoadException, "NaN probability");
- if (prob > 0.0) {
- Warn(prob);
- prob = 0.0;
- }
- return prob;
- }
-
- private:
void Warn(float prob);
+ private:
WarningAction action_;
};
-template <class Voc> void Read1Gram(util::FilePiece &f, Voc &vocab, ProbBackoff *unigrams, PositiveProbWarn &warn) {
+template <class Voc, class Weights> void Read1Gram(util::FilePiece &f, Voc &vocab, Weights *unigrams, PositiveProbWarn &warn) {
try {
- float prob = warn.ReadProb(f);
- ProbBackoff &value = unigrams[vocab.Insert(f.ReadDelimited(kARPASpaces))];
+ float prob = f.ReadFloat();
+ if (prob > 0.0) {
+ warn.Warn(prob);
+ prob = 0.0;
+ }
+ if (f.get() != '\t') UTIL_THROW(FormatLoadException, "Expected tab after probability");
+ Weights &value = unigrams[vocab.Insert(f.ReadDelimited(kARPASpaces))];
value.prob = prob;
ReadBackoff(f, value);
} catch(util::Exception &e) {
@@ -61,7 +59,7 @@ template <class Voc> void Read1Gram(util::FilePiece &f, Voc &vocab, ProbBackoff
}
// Return true if a positive log probability came out.
-template <class Voc> void Read1Grams(util::FilePiece &f, std::size_t count, Voc &vocab, ProbBackoff *unigrams, PositiveProbWarn &warn) {
+template <class Voc, class Weights> void Read1Grams(util::FilePiece &f, std::size_t count, Voc &vocab, Weights *unigrams, PositiveProbWarn &warn) {
ReadNGramHeader(f, 1);
for (std::size_t i = 0; i < count; ++i) {
Read1Gram(f, vocab, unigrams, warn);
@@ -72,7 +70,11 @@ template <class Voc> void Read1Grams(util::FilePiece &f, std::size_t count, Voc
// Return true if a positive log probability came out.
template <class Voc, class Weights> void ReadNGram(util::FilePiece &f, const unsigned char n, const Voc &vocab, WordIndex *const reverse_indices, Weights &weights, PositiveProbWarn &warn) {
try {
- weights.prob = warn.ReadProb(f);
+ weights.prob = f.ReadFloat();
+ if (weights.prob > 0.0) {
+ warn.Warn(weights.prob);
+ weights.prob = 0.0;
+ }
for (WordIndex *vocab_out = reverse_indices + n - 1; vocab_out >= reverse_indices; --vocab_out) {
*vocab_out = vocab.Index(f.ReadDelimited(kARPASpaces));
}
diff --git a/lm/return.hh b/lm/return.hh
index 1b55091b2..622320ce1 100644
--- a/lm/return.hh
+++ b/lm/return.hh
@@ -33,6 +33,9 @@ struct FullScoreReturn {
*/
bool independent_left;
uint64_t extend_left; // Defined only if independent_left
+
+ // Rest cost for extension to the left.
+ float rest;
};
} // namespace lm
diff --git a/lm/search_hashed.cc b/lm/search_hashed.cc
index 1d6fb5be7..139423098 100644
--- a/lm/search_hashed.cc
+++ b/lm/search_hashed.cc
@@ -3,7 +3,9 @@
#include "lm/binary_format.hh"
#include "lm/blank.hh"
#include "lm/lm_exception.hh"
+#include "lm/model.hh"
#include "lm/read_arpa.hh"
+#include "lm/value.hh"
#include "lm/vocab.hh"
#include "util/bit_packing.hh"
@@ -14,6 +16,8 @@
namespace lm {
namespace ngram {
+class ProbingModel;
+
namespace {
/* These are passed to ReadNGrams so that n-grams with zero backoff that appear as context will still be used in state. */
@@ -37,9 +41,9 @@ template <class Middle> class ActivateLowerMiddle {
Middle &modify_;
};
-class ActivateUnigram {
+template <class Weights> class ActivateUnigram {
public:
- explicit ActivateUnigram(ProbBackoff *unigram) : modify_(unigram) {}
+ explicit ActivateUnigram(Weights *unigram) : modify_(unigram) {}
void operator()(const WordIndex *vocab_ids, const unsigned int /*n*/) {
// assert(n == 2);
@@ -47,43 +51,124 @@ class ActivateUnigram {
}
private:
- ProbBackoff *modify_;
+ Weights *modify_;
};
-template <class Middle> void FixSRI(int lower, float negative_lower_prob, unsigned int n, const uint64_t *keys, const WordIndex *vocab_ids, ProbBackoff *unigrams, std::vector<Middle> &middle) {
- ProbBackoff blank;
- blank.backoff = kNoExtensionBackoff;
- // Fix SRI's stupidity.
- // Note that negative_lower_prob is the negative of the probability (so it's currently >= 0). We still want the sign bit off to indicate left extension, so I just do -= on the backoffs.
- blank.prob = negative_lower_prob;
- // An entry was found at lower (order lower + 2).
- // We need to insert blanks starting at lower + 1 (order lower + 3).
- unsigned int fix = static_cast<unsigned int>(lower + 1);
- uint64_t backoff_hash = detail::CombineWordHash(static_cast<uint64_t>(vocab_ids[1]), vocab_ids[2]);
- if (fix == 0) {
- // Insert a missing bigram.
- blank.prob -= unigrams[vocab_ids[1]].backoff;
- SetExtension(unigrams[vocab_ids[1]].backoff);
- // Bigram including a unigram's backoff
- middle[0].Insert(detail::ProbBackoffEntry::Make(keys[0], blank));
- fix = 1;
- } else {
- for (unsigned int i = 3; i < fix + 2; ++i) backoff_hash = detail::CombineWordHash(backoff_hash, vocab_ids[i]);
+// Find the lower order entry, inserting blanks along the way as necessary.
+template <class Value> void FindLower(
+ const std::vector<uint64_t> &keys,
+ typename Value::Weights &unigram,
+ std::vector<util::ProbingHashTable<typename Value::ProbingEntry, util::IdentityHash> > &middle,
+ std::vector<typename Value::Weights *> &between) {
+ typename util::ProbingHashTable<typename Value::ProbingEntry, util::IdentityHash>::MutableIterator iter;
+ typename Value::ProbingEntry entry;
+ // Backoff will always be 0.0. We'll get the probability and rest in another pass.
+ entry.value.backoff = kNoExtensionBackoff;
+ // Go back and find the longest right-aligned entry, informing it that it extends left. Normally this will match immediately, but sometimes SRI is dumb.
+ for (int lower = keys.size() - 2; ; --lower) {
+ if (lower == -1) {
+ between.push_back(&unigram);
+ return;
+ }
+ entry.key = keys[lower];
+ bool found = middle[lower].FindOrInsert(entry, iter);
+ between.push_back(&iter->value);
+ if (found) return;
+ }
+}
+
+// Between usually has single entry, the value to adjust. But sometimes SRI stupidly pruned entries so it has unitialized blank values to be set here.
+template <class Added, class Build> void AdjustLower(
+ const Added &added,
+ const Build &build,
+ std::vector<typename Build::Value::Weights *> &between,
+ const unsigned int n,
+ const std::vector<WordIndex> &vocab_ids,
+ typename Build::Value::Weights *unigrams,
+ std::vector<util::ProbingHashTable<typename Build::Value::ProbingEntry, util::IdentityHash> > &middle) {
+ typedef typename Build::Value Value;
+ if (between.size() == 1) {
+ build.MarkExtends(*between.front(), added);
+ return;
+ }
+ typedef util::ProbingHashTable<typename Value::ProbingEntry, util::IdentityHash> Middle;
+ float prob = -fabs(between.back()->prob);
+ // Order of the n-gram on which probabilities are based.
+ unsigned char basis = n - between.size();
+ assert(basis != 0);
+ typename Build::Value::Weights **change = &between.back();
+ // Skip the basis.
+ --change;
+ if (basis == 1) {
+ // Hallucinate a bigram based on a unigram's backoff and a unigram probability.
+ float &backoff = unigrams[vocab_ids[1]].backoff;
+ SetExtension(backoff);
+ prob += backoff;
+ (*change)->prob = prob;
+ build.SetRest(&*vocab_ids.begin(), 2, **change);
+ basis = 2;
+ --change;
}
- // fix >= 1. Insert trigrams and above.
- for (; fix <= n - 3; ++fix) {
+ uint64_t backoff_hash = static_cast<uint64_t>(vocab_ids[1]);
+ for (unsigned char i = 2; i <= basis; ++i) {
+ backoff_hash = detail::CombineWordHash(backoff_hash, vocab_ids[i]);
+ }
+ for (; basis < n - 1; ++basis, --change) {
typename Middle::MutableIterator gotit;
- if (middle[fix - 1].UnsafeMutableFind(backoff_hash, gotit)) {
+ if (middle[basis - 2].UnsafeMutableFind(backoff_hash, gotit)) {
float &backoff = gotit->value.backoff;
SetExtension(backoff);
- blank.prob -= backoff;
+ prob += backoff;
}
- middle[fix].Insert(detail::ProbBackoffEntry::Make(keys[fix], blank));
- backoff_hash = detail::CombineWordHash(backoff_hash, vocab_ids[fix + 2]);
+ (*change)->prob = prob;
+ build.SetRest(&*vocab_ids.begin(), basis + 1, **change);
+ backoff_hash = detail::CombineWordHash(backoff_hash, vocab_ids[basis+1]);
+ }
+
+ typename std::vector<typename Value::Weights *>::const_iterator i(between.begin());
+ build.MarkExtends(**i, added);
+ const typename Value::Weights *longer = *i;
+ // Everything has probability but is not marked as extending.
+ for (++i; i != between.end(); ++i) {
+ build.MarkExtends(**i, *longer);
+ longer = *i;
}
}
-template <class Voc, class Store, class Middle, class Activate> void ReadNGrams(util::FilePiece &f, const unsigned int n, const size_t count, const Voc &vocab, ProbBackoff *unigrams, std::vector<Middle> &middle, Activate activate, Store &store, PositiveProbWarn &warn) {
+// Continue marking lower entries even they know that they extend left. This is used for upper/lower bounds.
+template <class Build> void MarkLower(
+ const std::vector<uint64_t> &keys,
+ const Build &build,
+ typename Build::Value::Weights &unigram,
+ std::vector<util::ProbingHashTable<typename Build::Value::ProbingEntry, util::IdentityHash> > &middle,
+ int start_order,
+ const typename Build::Value::Weights &longer) {
+ if (start_order == 0) return;
+ typename util::ProbingHashTable<typename Build::Value::ProbingEntry, util::IdentityHash>::MutableIterator iter;
+ // Hopefully the compiler will realize that if MarkExtends always returns false, it can simplify this code.
+ for (int even_lower = start_order - 2 /* index in middle */; ; --even_lower) {
+ if (even_lower == -1) {
+ build.MarkExtends(unigram, longer);
+ return;
+ }
+ middle[even_lower].UnsafeMutableFind(keys[even_lower], iter);
+ if (!build.MarkExtends(iter->value, longer)) return;
+ }
+}
+
+template <class Build, class Activate, class Store> void ReadNGrams(
+ util::FilePiece &f,
+ const unsigned int n,
+ const size_t count,
+ const ProbingVocabulary &vocab,
+ const Build &build,
+ typename Build::Value::Weights *unigrams,
+ std::vector<util::ProbingHashTable<typename Build::Value::ProbingEntry, util::IdentityHash> > &middle,
+ Activate activate,
+ Store &store,
+ PositiveProbWarn &warn) {
+ typedef typename Build::Value Value;
+ typedef util::ProbingHashTable<typename Value::ProbingEntry, util::IdentityHash> Middle;
assert(n >= 2);
ReadNGramHeader(f, n);
@@ -91,38 +176,25 @@ template <class Voc, class Store, class Middle, class Activate> void ReadNGrams(
// vocab ids of words in reverse order.
std::vector<WordIndex> vocab_ids(n);
std::vector<uint64_t> keys(n-1);
- typename Store::Entry::Value value;
- typename Middle::MutableIterator found;
+ typename Store::Entry entry;
+ std::vector<typename Value::Weights *> between;
for (size_t i = 0; i < count; ++i) {
- ReadNGram(f, n, vocab, &*vocab_ids.begin(), value, warn);
+ ReadNGram(f, n, vocab, &*vocab_ids.begin(), entry.value, warn);
+ build.SetRest(&*vocab_ids.begin(), n, entry.value);
keys[0] = detail::CombineWordHash(static_cast<uint64_t>(vocab_ids.front()), vocab_ids[1]);
for (unsigned int h = 1; h < n - 1; ++h) {
keys[h] = detail::CombineWordHash(keys[h-1], vocab_ids[h+1]);
}
// Initially the sign bit is on, indicating it does not extend left. Most already have this but there might +0.0.
- util::SetSign(value.prob);
- store.Insert(Store::Entry::Make(keys[n-2], value));
- // Go back and find the longest right-aligned entry, informing it that it extends left. Normally this will match immediately, but sometimes SRI is dumb.
- int lower;
- util::FloatEnc fix_prob;
- for (lower = n - 3; ; --lower) {
- if (lower == -1) {
- fix_prob.f = unigrams[vocab_ids.front()].prob;
- fix_prob.i &= ~util::kSignBit;
- unigrams[vocab_ids.front()].prob = fix_prob.f;
- break;
- }
- if (middle[lower].UnsafeMutableFind(keys[lower], found)) {
- // Turn off sign bit to indicate that it extends left.
- fix_prob.f = found->value.prob;
- fix_prob.i &= ~util::kSignBit;
- found->value.prob = fix_prob.f;
- // We don't need to recurse further down because this entry already set the bits for lower entries.
- break;
- }
- }
- if (lower != static_cast<int>(n) - 3) FixSRI(lower, fix_prob.f, n, &*keys.begin(), &*vocab_ids.begin(), unigrams, middle);
+ util::SetSign(entry.value.prob);
+ entry.key = keys[n-2];
+
+ store.Insert(entry);
+ between.clear();
+ FindLower<Value>(keys, unigrams[vocab_ids.front()], middle, between);
+ AdjustLower<typename Store::Entry::Value, Build>(entry.value, build, between, n, vocab_ids, unigrams, middle);
+ if (Build::kMarkEvenLower) MarkLower<Build>(keys, build, unigrams[vocab_ids.front()], middle, n - between.size() - 1, *between.back());
activate(&*vocab_ids.begin(), n);
}
@@ -132,9 +204,9 @@ template <class Voc, class Store, class Middle, class Activate> void ReadNGrams(
} // namespace
namespace detail {
-template <class MiddleT, class LongestT> uint8_t *TemplateHashedSearch<MiddleT, LongestT>::SetupMemory(uint8_t *start, const std::vector<uint64_t> &counts, const Config &config) {
+template <class Value> uint8_t *HashedSearch<Value>::SetupMemory(uint8_t *start, const std::vector<uint64_t> &counts, const Config &config) {
std::size_t allocated = Unigram::Size(counts[0]);
- unigram = Unigram(start, allocated);
+ unigram_ = Unigram(start, counts[0], allocated);
start += allocated;
for (unsigned int n = 2; n < counts.size(); ++n) {
allocated = Middle::Size(counts[n - 1], config.probing_multiplier);
@@ -142,31 +214,63 @@ template <class MiddleT, class LongestT> uint8_t *TemplateHashedSearch<MiddleT,
start += allocated;
}
allocated = Longest::Size(counts.back(), config.probing_multiplier);
- longest = Longest(start, allocated);
+ longest_ = Longest(start, allocated);
start += allocated;
return start;
}
-template <class MiddleT, class LongestT> template <class Voc> void TemplateHashedSearch<MiddleT, LongestT>::InitializeFromARPA(const char * /*file*/, util::FilePiece &f, const std::vector<uint64_t> &counts, const Config &config, Voc &vocab, Backing &backing) {
+template <class Value> void HashedSearch<Value>::InitializeFromARPA(const char * /*file*/, util::FilePiece &f, const std::vector<uint64_t> &counts, const Config &config, ProbingVocabulary &vocab, Backing &backing) {
// TODO: fix sorted.
SetupMemory(GrowForSearch(config, vocab.UnkCountChangePadding(), Size(counts, config), backing), counts, config);
PositiveProbWarn warn(config.positive_log_probability);
-
- Read1Grams(f, counts[0], vocab, unigram.Raw(), warn);
+ Read1Grams(f, counts[0], vocab, unigram_.Raw(), warn);
CheckSpecials(config, vocab);
+ DispatchBuild(f, counts, config, vocab, warn);
+}
+
+template <> void HashedSearch<BackoffValue>::DispatchBuild(util::FilePiece &f, const std::vector<uint64_t> &counts, const Config &config, const ProbingVocabulary &vocab, PositiveProbWarn &warn) {
+ NoRestBuild build;
+ ApplyBuild(f, counts, config, vocab, warn, build);
+}
+
+template <> void HashedSearch<RestValue>::DispatchBuild(util::FilePiece &f, const std::vector<uint64_t> &counts, const Config &config, const ProbingVocabulary &vocab, PositiveProbWarn &warn) {
+ switch (config.rest_function) {
+ case Config::REST_MAX:
+ {
+ MaxRestBuild build;
+ ApplyBuild(f, counts, config, vocab, warn, build);
+ }
+ break;
+ case Config::REST_LOWER:
+ {
+ LowerRestBuild<ProbingModel> build(config, counts.size(), vocab);
+ ApplyBuild(f, counts, config, vocab, warn, build);
+ }
+ break;
+ }
+}
+
+template <class Value> template <class Build> void HashedSearch<Value>::ApplyBuild(util::FilePiece &f, const std::vector<uint64_t> &counts, const Config &config, const ProbingVocabulary &vocab, PositiveProbWarn &warn, const Build &build) {
+ for (WordIndex i = 0; i < counts[0]; ++i) {
+ build.SetRest(&i, (unsigned int)1, unigram_.Raw()[i]);
+ }
try {
if (counts.size() > 2) {
- ReadNGrams(f, 2, counts[1], vocab, unigram.Raw(), middle_, ActivateUnigram(unigram.Raw()), middle_[0], warn);
+ ReadNGrams<Build, ActivateUnigram<typename Value::Weights>, Middle>(
+ f, 2, counts[1], vocab, build, unigram_.Raw(), middle_, ActivateUnigram<typename Value::Weights>(unigram_.Raw()), middle_[0], warn);
}
for (unsigned int n = 3; n < counts.size(); ++n) {
- ReadNGrams(f, n, counts[n-1], vocab, unigram.Raw(), middle_, ActivateLowerMiddle<Middle>(middle_[n-3]), middle_[n-2], warn);
+ ReadNGrams<Build, ActivateLowerMiddle<Middle>, Middle>(
+ f, n, counts[n-1], vocab, build, unigram_.Raw(), middle_, ActivateLowerMiddle<Middle>(middle_[n-3]), middle_[n-2], warn);
}
if (counts.size() > 2) {
- ReadNGrams(f, counts.size(), counts[counts.size() - 1], vocab, unigram.Raw(), middle_, ActivateLowerMiddle<Middle>(middle_.back()), longest, warn);
+ ReadNGrams<Build, ActivateLowerMiddle<Middle>, Longest>(
+ f, counts.size(), counts[counts.size() - 1], vocab, build, unigram_.Raw(), middle_, ActivateLowerMiddle<Middle>(middle_.back()), longest_, warn);
} else {
- ReadNGrams(f, counts.size(), counts[counts.size() - 1], vocab, unigram.Raw(), middle_, ActivateUnigram(unigram.Raw()), longest, warn);
+ ReadNGrams<Build, ActivateUnigram<typename Value::Weights>, Longest>(
+ f, counts.size(), counts[counts.size() - 1], vocab, build, unigram_.Raw(), middle_, ActivateUnigram<typename Value::Weights>(unigram_.Raw()), longest_, warn);
}
} catch (util::ProbingSizeException &e) {
UTIL_THROW(util::ProbingSizeException, "Avoid pruning n-grams like \"bar baz quux\" when \"foo bar baz quux\" is still in the model. KenLM will work when this pruning happens, but the probing model assumes these events are rare enough that using blank space in the probing hash table will cover all of them. Increase probing_multiplier (-p to build_binary) to add more blank spaces.\n");
@@ -174,17 +278,16 @@ template <class MiddleT, class LongestT> template <class Voc> void TemplateHashe
ReadEnd(f);
}
-template <class MiddleT, class LongestT> void TemplateHashedSearch<MiddleT, LongestT>::LoadedBinary() {
- unigram.LoadedBinary();
+template <class Value> void HashedSearch<Value>::LoadedBinary() {
+ unigram_.LoadedBinary();
for (typename std::vector<Middle>::iterator i = middle_.begin(); i != middle_.end(); ++i) {
i->LoadedBinary();
}
- longest.LoadedBinary();
+ longest_.LoadedBinary();
}
-template class TemplateHashedSearch<ProbingHashedSearch::Middle, ProbingHashedSearch::Longest>;
-
-template void TemplateHashedSearch<ProbingHashedSearch::Middle, ProbingHashedSearch::Longest>::InitializeFromARPA(const char *, util::FilePiece &f, const std::vector<uint64_t> &counts, const Config &, ProbingVocabulary &vocab, Backing &backing);
+template class HashedSearch<BackoffValue>;
+template class HashedSearch<RestValue>;
} // namespace detail
} // namespace ngram
diff --git a/lm/search_hashed.hh b/lm/search_hashed.hh
index 4352c72dd..7e8c12206 100644
--- a/lm/search_hashed.hh
+++ b/lm/search_hashed.hh
@@ -19,6 +19,7 @@ namespace util { class FilePiece; }
namespace lm {
namespace ngram {
struct Backing;
+class ProbingVocabulary;
namespace detail {
inline uint64_t CombineWordHash(uint64_t current, const WordIndex next) {
@@ -26,54 +27,48 @@ inline uint64_t CombineWordHash(uint64_t current, const WordIndex next) {
return ret;
}
-struct HashedSearch {
- typedef uint64_t Node;
-
- class Unigram {
- public:
- Unigram() {}
-
- Unigram(void *start, std::size_t /*allocated*/) : unigram_(static_cast<ProbBackoff*>(start)) {}
-
- static std::size_t Size(uint64_t count) {
- return (count + 1) * sizeof(ProbBackoff); // +1 for hallucinate <unk>
- }
-
- const ProbBackoff &Lookup(WordIndex index) const { return unigram_[index]; }
+#pragma pack(push)
+#pragma pack(4)
+struct ProbEntry {
+ uint64_t key;
+ Prob value;
+ typedef uint64_t Key;
+ typedef Prob Value;
+ uint64_t GetKey() const {
+ return key;
+ }
+};
- ProbBackoff &Unknown() { return unigram_[0]; }
+#pragma pack(pop)
- void LoadedBinary() {}
+class LongestPointer {
+ public:
+ explicit LongestPointer(const float &to) : to_(&to) {}
- // For building.
- ProbBackoff *Raw() { return unigram_; }
+ LongestPointer() : to_(NULL) {}
- private:
- ProbBackoff *unigram_;
- };
+ bool Found() const {
+ return to_ != NULL;
+ }
- Unigram unigram;
+ float Prob() const {
+ return *to_;
+ }
- void LookupUnigram(WordIndex word, float &backoff, Node &next, FullScoreReturn &ret) const {
- const ProbBackoff &entry = unigram.Lookup(word);
- util::FloatEnc val;
- val.f = entry.prob;
- ret.independent_left = (val.i & util::kSignBit);
- ret.extend_left = static_cast<uint64_t>(word);
- val.i |= util::kSignBit;
- ret.prob = val.f;
- backoff = entry.backoff;
- next = static_cast<Node>(word);
- }
+ private:
+ const float *to_;
};
-template <class MiddleT, class LongestT> class TemplateHashedSearch : public HashedSearch {
+template <class Value> class HashedSearch {
public:
- typedef MiddleT Middle;
+ typedef uint64_t Node;
- typedef LongestT Longest;
- Longest longest;
+ typedef typename Value::ProbingProxy UnigramPointer;
+ typedef typename Value::ProbingProxy MiddlePointer;
+ typedef ::lm::ngram::detail::LongestPointer LongestPointer;
+ static const ModelType kModelType = Value::kProbingModelType;
+ static const bool kDifferentRest = Value::kDifferentRest;
static const unsigned int kVersion = 0;
// TODO: move probing_multiplier here with next binary file format update.
@@ -89,64 +84,55 @@ template <class MiddleT, class LongestT> class TemplateHashedSearch : public Has
uint8_t *SetupMemory(uint8_t *start, const std::vector<uint64_t> &counts, const Config &config);
- template <class Voc> void InitializeFromARPA(const char *file, util::FilePiece &f, const std::vector<uint64_t> &counts, const Config &config, Voc &vocab, Backing &backing);
+ void InitializeFromARPA(const char *file, util::FilePiece &f, const std::vector<uint64_t> &counts, const Config &config, ProbingVocabulary &vocab, Backing &backing);
- typedef typename std::vector<Middle>::const_iterator MiddleIter;
+ void LoadedBinary();
- MiddleIter MiddleBegin() const { return middle_.begin(); }
- MiddleIter MiddleEnd() const { return middle_.end(); }
+ unsigned char Order() const {
+ return middle_.size() + 2;
+ }
- Node Unpack(uint64_t extend_pointer, unsigned char extend_length, float &prob) const {
- util::FloatEnc val;
- if (extend_length == 1) {
- val.f = unigram.Lookup(static_cast<uint64_t>(extend_pointer)).prob;
- } else {
- typename Middle::ConstIterator found;
- if (!middle_[extend_length - 2].Find(extend_pointer, found)) {
- std::cerr << "Extend pointer " << extend_pointer << " should have been found for length " << (unsigned) extend_length << std::endl;
- abort();
- }
- val.f = found->value.prob;
- }
- val.i |= util::kSignBit;
- prob = val.f;
- return extend_pointer;
+ typename Value::Weights &UnknownUnigram() { return unigram_.Unknown(); }
+
+ UnigramPointer LookupUnigram(WordIndex word, Node &next, bool &independent_left, uint64_t &extend_left) const {
+ extend_left = static_cast<uint64_t>(word);
+ next = extend_left;
+ UnigramPointer ret(unigram_.Lookup(word));
+ independent_left = ret.IndependentLeft();
+ return ret;
}
- bool LookupMiddle(const Middle &middle, WordIndex word, float &backoff, Node &node, FullScoreReturn &ret) const {
- node = CombineWordHash(node, word);
+#pragma GCC diagnostic ignored "-Wuninitialized"
+ MiddlePointer Unpack(uint64_t extend_pointer, unsigned char extend_length, Node &node) const {
+ node = extend_pointer;
typename Middle::ConstIterator found;
- if (!middle.Find(node, found)) return false;
- util::FloatEnc enc;
- enc.f = found->value.prob;
- ret.independent_left = (enc.i & util::kSignBit);
- ret.extend_left = node;
- enc.i |= util::kSignBit;
- ret.prob = enc.f;
- backoff = found->value.backoff;
- return true;
+ bool got = middle_[extend_length - 2].Find(extend_pointer, found);
+ assert(got);
+ (void)got;
+ return MiddlePointer(found->value);
}
- void LoadedBinary();
-
- bool LookupMiddleNoProb(const Middle &middle, WordIndex word, float &backoff, Node &node) const {
+ MiddlePointer LookupMiddle(unsigned char order_minus_2, WordIndex word, Node &node, bool &independent_left, uint64_t &extend_pointer) const {
node = CombineWordHash(node, word);
typename Middle::ConstIterator found;
- if (!middle.Find(node, found)) return false;
- backoff = found->value.backoff;
- return true;
+ if (!middle_[order_minus_2].Find(node, found)) {
+ independent_left = true;
+ return MiddlePointer();
+ }
+ extend_pointer = node;
+ MiddlePointer ret(found->value);
+ independent_left = ret.IndependentLeft();
+ return ret;
}
- bool LookupLongest(WordIndex word, float &prob, Node &node) const {
+ LongestPointer LookupLongest(WordIndex word, const Node &node) const {
// Sign bit is always on because longest n-grams do not extend left.
- node = CombineWordHash(node, word);
typename Longest::ConstIterator found;
- if (!longest.Find(node, found)) return false;
- prob = found->value.prob;
- return true;
+ if (!longest_.Find(CombineWordHash(node, word), found)) return LongestPointer();
+ return LongestPointer(found->value.prob);
}
- // Geenrate a node without necessarily checking that it actually exists.
+ // Generate a node without necessarily checking that it actually exists.
// Optionally return false if it's know to not exist.
bool FastMakeNode(const WordIndex *begin, const WordIndex *end, Node &node) const {
assert(begin != end);
@@ -158,55 +144,54 @@ template <class MiddleT, class LongestT> class TemplateHashedSearch : public Has
}
private:
- std::vector<Middle> middle_;
-};
+ // Interpret config's rest cost build policy and pass the right template argument to ApplyBuild.
+ void DispatchBuild(util::FilePiece &f, const std::vector<uint64_t> &counts, const Config &config, const ProbingVocabulary &vocab, PositiveProbWarn &warn);
-/* These look like perfect candidates for a template, right? Ancient gcc (4.1
- * on RedHat stale linux) doesn't pack templates correctly. ProbBackoffEntry
- * is a multiple of 8 bytes anyway. ProbEntry is 12 bytes so it's set to pack.
- */
-struct ProbBackoffEntry {
- uint64_t key;
- ProbBackoff value;
- typedef uint64_t Key;
- typedef ProbBackoff Value;
- uint64_t GetKey() const {
- return key;
- }
- static ProbBackoffEntry Make(uint64_t key, ProbBackoff value) {
- ProbBackoffEntry ret;
- ret.key = key;
- ret.value = value;
- return ret;
- }
-};
+ template <class Build> void ApplyBuild(util::FilePiece &f, const std::vector<uint64_t> &counts, const Config &config, const ProbingVocabulary &vocab, PositiveProbWarn &warn, const Build &build);
-#pragma pack(push)
-#pragma pack(4)
-struct ProbEntry {
- uint64_t key;
- Prob value;
- typedef uint64_t Key;
- typedef Prob Value;
- uint64_t GetKey() const {
- return key;
- }
- static ProbEntry Make(uint64_t key, Prob value) {
- ProbEntry ret;
- ret.key = key;
- ret.value = value;
- return ret;
- }
-};
+ class Unigram {
+ public:
+ Unigram() {}
-#pragma pack(pop)
+ Unigram(void *start, uint64_t count, std::size_t /*allocated*/) :
+ unigram_(static_cast<typename Value::Weights*>(start))
+#ifdef DEBUG
+ , count_(count)
+#endif
+ {}
+
+ static std::size_t Size(uint64_t count) {
+ return (count + 1) * sizeof(ProbBackoff); // +1 for hallucinate <unk>
+ }
+
+ const typename Value::Weights &Lookup(WordIndex index) const {
+#ifdef DEBUG
+ assert(index < count_);
+#endif
+ return unigram_[index];
+ }
+
+ typename Value::Weights &Unknown() { return unigram_[0]; }
+ void LoadedBinary() {}
-struct ProbingHashedSearch : public TemplateHashedSearch<
- util::ProbingHashTable<ProbBackoffEntry, util::IdentityHash>,
- util::ProbingHashTable<ProbEntry, util::IdentityHash> > {
+ // For building.
+ typename Value::Weights *Raw() { return unigram_; }
+
+ private:
+ typename Value::Weights *unigram_;
+#ifdef DEBUG
+ uint64_t count_;
+#endif
+ };
+
+ Unigram unigram_;
+
+ typedef util::ProbingHashTable<typename Value::ProbingEntry, util::IdentityHash> Middle;
+ std::vector<Middle> middle_;
- static const ModelType kModelType = HASH_PROBING;
+ typedef util::ProbingHashTable<ProbEntry, util::IdentityHash> Longest;
+ Longest longest_;
};
} // namespace detail
diff --git a/lm/search_trie.cc b/lm/search_trie.cc
index ffadfa944..9a3e96916 100644
--- a/lm/search_trie.cc
+++ b/lm/search_trie.cc
@@ -5,7 +5,6 @@
#include "lm/binary_format.hh"
#include "lm/blank.hh"
#include "lm/lm_exception.hh"
-#include "lm/max_order.hh"
#include "lm/quantize.hh"
#include "lm/trie.hh"
#include "lm/trie_sort.hh"
@@ -180,7 +179,7 @@ const float kBadProb = std::numeric_limits<float>::infinity();
class SRISucks {
public:
SRISucks() {
- for (BackoffMessages *i = messages_; i != messages_ + kMaxOrder - 1; ++i)
+ for (BackoffMessages *i = messages_; i != messages_ + KENLM_MAX_ORDER - 1; ++i)
i->Init(sizeof(ProbPointer) + sizeof(WordIndex) * (i - messages_ + 1));
}
@@ -196,7 +195,7 @@ class SRISucks {
}
void ObtainBackoffs(unsigned char total_order, FILE *unigram_file, RecordReader *reader) {
- for (unsigned char i = 0; i < kMaxOrder - 1; ++i) {
+ for (unsigned char i = 0; i < KENLM_MAX_ORDER - 1; ++i) {
it_[i] = values_[i].empty() ? NULL : &*values_[i].begin();
}
messages_[0].Apply(it_, unigram_file);
@@ -221,10 +220,10 @@ class SRISucks {
private:
// This used to be one array. Then I needed to separate it by order for quantization to work.
- std::vector<float> values_[kMaxOrder - 1];
- BackoffMessages messages_[kMaxOrder - 1];
+ std::vector<float> values_[KENLM_MAX_ORDER - 1];
+ BackoffMessages messages_[KENLM_MAX_ORDER - 1];
- float *it_[kMaxOrder - 1];
+ float *it_[KENLM_MAX_ORDER - 1];
};
class FindBlanks {
@@ -273,8 +272,9 @@ class FindBlanks {
// Phase to actually write n-grams to the trie.
template <class Quant, class Bhiksha> class WriteEntries {
public:
- WriteEntries(RecordReader *contexts, UnigramValue *unigrams, BitPackedMiddle<typename Quant::Middle, Bhiksha> *middle, BitPackedLongest<typename Quant::Longest> &longest, unsigned char order, SRISucks &sri) :
+ WriteEntries(RecordReader *contexts, const Quant &quant, UnigramValue *unigrams, BitPackedMiddle<Bhiksha> *middle, BitPackedLongest &longest, unsigned char order, SRISucks &sri) :
contexts_(contexts),
+ quant_(quant),
unigrams_(unigrams),
middle_(middle),
longest_(longest),
@@ -290,7 +290,7 @@ template <class Quant, class Bhiksha> class WriteEntries {
void MiddleBlank(const unsigned char order, const WordIndex *indices, unsigned char /*lower*/, float /*prob_base*/) {
ProbBackoff weights = sri_.GetBlank(order_, order, indices);
- middle_[order - 2].Insert(indices[order - 1], weights.prob, weights.backoff);
+ typename Quant::MiddlePointer(quant_, order - 2, middle_[order - 2].Insert(indices[order - 1])).Write(weights.prob, weights.backoff);
}
void Middle(const unsigned char order, const void *data) {
@@ -301,21 +301,22 @@ template <class Quant, class Bhiksha> class WriteEntries {
SetExtension(weights.backoff);
++context;
}
- middle_[order - 2].Insert(words[order - 1], weights.prob, weights.backoff);
+ typename Quant::MiddlePointer(quant_, order - 2, middle_[order - 2].Insert(words[order - 1])).Write(weights.prob, weights.backoff);
}
void Longest(const void *data) {
const WordIndex *words = reinterpret_cast<const WordIndex*>(data);
- longest_.Insert(words[order_ - 1], reinterpret_cast<const Prob*>(words + order_)->prob);
+ typename Quant::LongestPointer(quant_, longest_.Insert(words[order_ - 1])).Write(reinterpret_cast<const Prob*>(words + order_)->prob);
}
void Cleanup() {}
private:
RecordReader *contexts_;
+ const Quant &quant_;
UnigramValue *const unigrams_;
- BitPackedMiddle<typename Quant::Middle, Bhiksha> *const middle_;
- BitPackedLongest<typename Quant::Longest> &longest_;
+ BitPackedMiddle<Bhiksha> *const middle_;
+ BitPackedLongest &longest_;
BitPacked &bigram_pack_;
const unsigned char order_;
SRISucks &sri_;
@@ -335,7 +336,7 @@ struct Gram {
template <class Doing> class BlankManager {
public:
BlankManager(unsigned char total_order, Doing &doing) : total_order_(total_order), been_length_(0), doing_(doing) {
- for (float *i = basis_; i != basis_ + kMaxOrder - 1; ++i) *i = kBadProb;
+ for (float *i = basis_; i != basis_ + KENLM_MAX_ORDER - 1; ++i) *i = kBadProb;
}
void Visit(const WordIndex *to, unsigned char length, float prob) {
@@ -371,16 +372,16 @@ template <class Doing> class BlankManager {
private:
const unsigned char total_order_;
- WordIndex been_[kMaxOrder];
+ WordIndex been_[KENLM_MAX_ORDER];
unsigned char been_length_;
- float basis_[kMaxOrder];
+ float basis_[KENLM_MAX_ORDER];
Doing &doing_;
};
template <class Doing> void RecursiveInsert(const unsigned char total_order, const WordIndex unigram_count, RecordReader *input, std::ostream *progress_out, const char *message, Doing &doing) {
- util::ErsatzProgress progress(progress_out, message, unigram_count + 1);
+ util::ErsatzProgress progress(unigram_count + 1, progress_out, message);
WordIndex unigram = 0;
std::priority_queue<Gram> grams;
grams.push(Gram(&unigram, 1));
@@ -468,8 +469,8 @@ void PopulateUnigramWeights(FILE *file, WordIndex unigram_count, RecordReader &c
} // namespace
template <class Quant, class Bhiksha> void BuildTrie(SortedFiles &files, std::vector<uint64_t> &counts, const Config &config, TrieSearch<Quant, Bhiksha> &out, Quant &quant, const SortedVocabulary &vocab, Backing &backing) {
- RecordReader inputs[kMaxOrder - 1];
- RecordReader contexts[kMaxOrder - 1];
+ RecordReader inputs[KENLM_MAX_ORDER - 1];
+ RecordReader contexts[KENLM_MAX_ORDER - 1];
for (unsigned char i = 2; i <= counts.size(); ++i) {
inputs[i-2].Init(files.Full(i), i * sizeof(WordIndex) + (i == counts.size() ? sizeof(Prob) : sizeof(ProbBackoff)));
@@ -502,7 +503,7 @@ template <class Quant, class Bhiksha> void BuildTrie(SortedFiles &files, std::ve
inputs[i-2].Rewind();
}
if (Quant::kTrain) {
- util::ErsatzProgress progress(config.messages, "Quantizing", std::accumulate(counts.begin() + 1, counts.end(), 0));
+ util::ErsatzProgress progress(std::accumulate(counts.begin() + 1, counts.end(), 0), config.messages, "Quantizing");
for (unsigned char i = 2; i < counts.size(); ++i) {
TrainQuantizer(i, counts[i-1], sri.Values(i), inputs[i-2], progress, quant);
}
@@ -510,7 +511,7 @@ template <class Quant, class Bhiksha> void BuildTrie(SortedFiles &files, std::ve
quant.FinishedLoading(config);
}
- UnigramValue *unigrams = out.unigram.Raw();
+ UnigramValue *unigrams = out.unigram_.Raw();
PopulateUnigramWeights(unigram_file.get(), counts[0], contexts[0], unigrams);
unigram_file.reset();
@@ -519,7 +520,7 @@ template <class Quant, class Bhiksha> void BuildTrie(SortedFiles &files, std::ve
}
// Fill entries except unigram probabilities.
{
- WriteEntries<Quant, Bhiksha> writer(contexts, unigrams, out.middle_begin_, out.longest, counts.size(), sri);
+ WriteEntries<Quant, Bhiksha> writer(contexts, quant, unigrams, out.middle_begin_, out.longest_, counts.size(), sri);
RecursiveInsert(counts.size(), counts[0], inputs, config.messages, "Writing trie", writer);
}
@@ -544,14 +545,14 @@ template <class Quant, class Bhiksha> void BuildTrie(SortedFiles &files, std::ve
for (typename TrieSearch<Quant, Bhiksha>::Middle *i = out.middle_begin_; i != out.middle_end_ - 1; ++i) {
i->FinishedLoading((i+1)->InsertIndex(), config);
}
- (out.middle_end_ - 1)->FinishedLoading(out.longest.InsertIndex(), config);
+ (out.middle_end_ - 1)->FinishedLoading(out.longest_.InsertIndex(), config);
}
}
template <class Quant, class Bhiksha> uint8_t *TrieSearch<Quant, Bhiksha>::SetupMemory(uint8_t *start, const std::vector<uint64_t> &counts, const Config &config) {
- quant_.SetupMemory(start, config);
+ quant_.SetupMemory(start, counts.size(), config);
start += Quant::Size(counts.size(), config);
- unigram.Init(start);
+ unigram_.Init(start);
start += Unigram::Size(counts[0]);
FreeMiddles();
middle_begin_ = static_cast<Middle*>(malloc(sizeof(Middle) * (counts.size() - 2)));
@@ -565,23 +566,23 @@ template <class Quant, class Bhiksha> uint8_t *TrieSearch<Quant, Bhiksha>::Setup
for (unsigned char i = counts.size() - 1; i >= 2; --i) {
new (middle_begin_ + i - 2) Middle(
middle_starts[i-2],
- quant_.Mid(i),
+ quant_.MiddleBits(config),
counts[i-1],
counts[0],
counts[i],
- (i == counts.size() - 1) ? static_cast<const BitPacked&>(longest) : static_cast<const BitPacked &>(middle_begin_[i-1]),
+ (i == counts.size() - 1) ? static_cast<const BitPacked&>(longest_) : static_cast<const BitPacked &>(middle_begin_[i-1]),
config);
}
- longest.Init(start, quant_.Long(counts.size()), counts[0]);
+ longest_.Init(start, quant_.LongestBits(config), counts[0]);
return start + Longest::Size(Quant::LongestBits(config), counts.back(), counts[0]);
}
template <class Quant, class Bhiksha> void TrieSearch<Quant, Bhiksha>::LoadedBinary() {
- unigram.LoadedBinary();
+ unigram_.LoadedBinary();
for (Middle *i = middle_begin_; i != middle_end_; ++i) {
i->LoadedBinary();
}
- longest.LoadedBinary();
+ longest_.LoadedBinary();
}
template <class Quant, class Bhiksha> void TrieSearch<Quant, Bhiksha>::InitializeFromARPA(const char *file, util::FilePiece &f, std::vector<uint64_t> &counts, const Config &config, SortedVocabulary &vocab, Backing &backing) {
diff --git a/lm/search_trie.hh b/lm/search_trie.hh
index 5155ca020..10b22ab18 100644
--- a/lm/search_trie.hh
+++ b/lm/search_trie.hh
@@ -28,13 +28,11 @@ template <class Quant, class Bhiksha> class TrieSearch {
public:
typedef NodeRange Node;
- typedef ::lm::ngram::trie::Unigram Unigram;
- Unigram unigram;
-
- typedef trie::BitPackedMiddle<typename Quant::Middle, Bhiksha> Middle;
+ typedef ::lm::ngram::trie::UnigramPointer UnigramPointer;
+ typedef typename Quant::MiddlePointer MiddlePointer;
+ typedef typename Quant::LongestPointer LongestPointer;
- typedef trie::BitPackedLongest<typename Quant::Longest> Longest;
- Longest longest;
+ static const bool kDifferentRest = false;
static const ModelType kModelType = static_cast<ModelType>(TRIE_SORTED + Quant::kModelTypeAdd + Bhiksha::kModelTypeAdd);
@@ -62,55 +60,46 @@ template <class Quant, class Bhiksha> class TrieSearch {
void LoadedBinary();
- typedef const Middle *MiddleIter;
+ void InitializeFromARPA(const char *file, util::FilePiece &f, std::vector<uint64_t> &counts, const Config &config, SortedVocabulary &vocab, Backing &backing);
- const Middle *MiddleBegin() const { return middle_begin_; }
- const Middle *MiddleEnd() const { return middle_end_; }
+ unsigned char Order() const {
+ return middle_end_ - middle_begin_ + 2;
+ }
- void InitializeFromARPA(const char *file, util::FilePiece &f, std::vector<uint64_t> &counts, const Config &config, SortedVocabulary &vocab, Backing &backing);
+ ProbBackoff &UnknownUnigram() { return unigram_.Unknown(); }
- void LookupUnigram(WordIndex word, float &backoff, Node &node, FullScoreReturn &ret) const {
- unigram.Find(word, ret.prob, backoff, node);
- ret.independent_left = (node.begin == node.end);
- ret.extend_left = static_cast<uint64_t>(word);
+ UnigramPointer LookupUnigram(WordIndex word, Node &next, bool &independent_left, uint64_t &extend_left) const {
+ extend_left = static_cast<uint64_t>(word);
+ UnigramPointer ret(unigram_.Find(word, next));
+ independent_left = (next.begin == next.end);
+ return ret;
}
- bool LookupMiddle(const Middle &mid, WordIndex word, float &backoff, Node &node, FullScoreReturn &ret) const {
- if (!mid.Find(word, ret.prob, backoff, node, ret.extend_left)) return false;
- ret.independent_left = (node.begin == node.end);
- return true;
+ MiddlePointer Unpack(uint64_t extend_pointer, unsigned char extend_length, Node &node) const {
+ return MiddlePointer(quant_, extend_length - 2, middle_begin_[extend_length - 2].ReadEntry(extend_pointer, node));
}
- bool LookupMiddleNoProb(const Middle &mid, WordIndex word, float &backoff, Node &node) const {
- return mid.FindNoProb(word, backoff, node);
+ MiddlePointer LookupMiddle(unsigned char order_minus_2, WordIndex word, Node &node, bool &independent_left, uint64_t &extend_left) const {
+ util::BitAddress address(middle_begin_[order_minus_2].Find(word, node, extend_left));
+ independent_left = (address.base == NULL) || (node.begin == node.end);
+ return MiddlePointer(quant_, order_minus_2, address);
}
- bool LookupLongest(WordIndex word, float &prob, const Node &node) const {
- return longest.Find(word, prob, node);
+ LongestPointer LookupLongest(WordIndex word, const Node &node) const {
+ return LongestPointer(quant_, longest_.Find(word, node));
}
bool FastMakeNode(const WordIndex *begin, const WordIndex *end, Node &node) const {
- // TODO: don't decode backoff.
assert(begin != end);
- FullScoreReturn ignored;
- float ignored_backoff;
- LookupUnigram(*begin, ignored_backoff, node, ignored);
+ bool independent_left;
+ uint64_t ignored;
+ LookupUnigram(*begin, node, independent_left, ignored);
for (const WordIndex *i = begin + 1; i < end; ++i) {
- if (!LookupMiddleNoProb(middle_begin_[i - begin - 1], *i, ignored_backoff, node)) return false;
+ if (independent_left || !LookupMiddle(i - begin - 1, *i, node, independent_left, ignored).Found()) return false;
}
return true;
}
- Node Unpack(uint64_t extend_pointer, unsigned char extend_length, float &prob) const {
- if (extend_length == 1) {
- float ignored;
- Node ret;
- unigram.Find(static_cast<WordIndex>(extend_pointer), prob, ignored, ret);
- return ret;
- }
- return middle_begin_[extend_length - 2].ReadEntry(extend_pointer, prob);
- }
-
private:
friend void BuildTrie<Quant, Bhiksha>(SortedFiles &files, std::vector<uint64_t> &counts, const Config &config, TrieSearch<Quant, Bhiksha> &out, Quant &quant, const SortedVocabulary &vocab, Backing &backing);
@@ -122,8 +111,16 @@ template <class Quant, class Bhiksha> class TrieSearch {
free(middle_begin_);
}
+ typedef trie::BitPackedMiddle<Bhiksha> Middle;
+
+ typedef trie::BitPackedLongest Longest;
+ Longest longest_;
+
Middle *middle_begin_, *middle_end_;
Quant quant_;
+
+ typedef ::lm::ngram::trie::Unigram Unigram;
+ Unigram unigram_;
};
} // namespace trie
diff --git a/lm/state.hh b/lm/state.hh
new file mode 100644
index 000000000..3dbf617bf
--- /dev/null
+++ b/lm/state.hh
@@ -0,0 +1,122 @@
+#ifndef LM_STATE__
+#define LM_STATE__
+
+#include "lm/word_index.hh"
+#include "util/murmur_hash.hh"
+
+#include <string.h>
+
+namespace lm {
+namespace ngram {
+
+// This is a POD but if you want memcmp to return the same as operator==, call
+// ZeroRemaining first.
+class State {
+ public:
+ bool operator==(const State &other) const {
+ if (length != other.length) return false;
+ return !memcmp(words, other.words, length * sizeof(WordIndex));
+ }
+
+ // Three way comparison function.
+ int Compare(const State &other) const {
+ if (length != other.length) return length < other.length ? -1 : 1;
+ return memcmp(words, other.words, length * sizeof(WordIndex));
+ }
+
+ bool operator<(const State &other) const {
+ if (length != other.length) return length < other.length;
+ return memcmp(words, other.words, length * sizeof(WordIndex)) < 0;
+ }
+
+ // Call this before using raw memcmp.
+ void ZeroRemaining() {
+ for (unsigned char i = length; i < KENLM_MAX_ORDER - 1; ++i) {
+ words[i] = 0;
+ backoff[i] = 0.0;
+ }
+ }
+
+ unsigned char Length() const { return length; }
+
+ // You shouldn't need to touch anything below this line, but the members are public so FullState will qualify as a POD.
+ // This order minimizes total size of the struct if WordIndex is 64 bit, float is 32 bit, and alignment of 64 bit integers is 64 bit.
+ WordIndex words[KENLM_MAX_ORDER - 1];
+ float backoff[KENLM_MAX_ORDER - 1];
+ unsigned char length;
+};
+
+inline uint64_t hash_value(const State &state, uint64_t seed = 0) {
+ return util::MurmurHashNative(state.words, sizeof(WordIndex) * state.length, seed);
+}
+
+struct Left {
+ bool operator==(const Left &other) const {
+ return
+ (length == other.length) &&
+ pointers[length - 1] == other.pointers[length - 1] &&
+ full == other.full;
+ }
+
+ int Compare(const Left &other) const {
+ if (length < other.length) return -1;
+ if (length > other.length) return 1;
+ if (pointers[length - 1] > other.pointers[length - 1]) return 1;
+ if (pointers[length - 1] < other.pointers[length - 1]) return -1;
+ return (int)full - (int)other.full;
+ }
+
+ bool operator<(const Left &other) const {
+ return Compare(other) == -1;
+ }
+
+ void ZeroRemaining() {
+ for (uint64_t * i = pointers + length; i < pointers + KENLM_MAX_ORDER - 1; ++i)
+ *i = 0;
+ }
+
+ uint64_t pointers[KENLM_MAX_ORDER - 1];
+ unsigned char length;
+ bool full;
+};
+
+inline uint64_t hash_value(const Left &left) {
+ unsigned char add[2];
+ add[0] = left.length;
+ add[1] = left.full;
+ return util::MurmurHashNative(add, 2, left.length ? left.pointers[left.length - 1] : 0);
+}
+
+struct ChartState {
+ bool operator==(const ChartState &other) {
+ return (right == other.right) && (left == other.left);
+ }
+
+ int Compare(const ChartState &other) const {
+ int lres = left.Compare(other.left);
+ if (lres) return lres;
+ return right.Compare(other.right);
+ }
+
+ bool operator<(const ChartState &other) const {
+ return Compare(other) == -1;
+ }
+
+ void ZeroRemaining() {
+ left.ZeroRemaining();
+ right.ZeroRemaining();
+ }
+
+ Left left;
+ State right;
+};
+
+inline uint64_t hash_value(const ChartState &state) {
+ return hash_value(state.right, hash_value(state.left));
+}
+
+
+} // namespace ngram
+} // namespace lm
+
+#endif // LM_STATE__
diff --git a/lm/trie.cc b/lm/trie.cc
index 20075bb83..0f1ca574b 100644
--- a/lm/trie.cc
+++ b/lm/trie.cc
@@ -1,7 +1,6 @@
#include "lm/trie.hh"
#include "lm/bhiksha.hh"
-#include "lm/quantize.hh"
#include "util/bit_packing.hh"
#include "util/exception.hh"
#include "util/sorted_uniform.hh"
@@ -58,91 +57,71 @@ void BitPacked::BaseInit(void *base, uint64_t max_vocab, uint8_t remaining_bits)
max_vocab_ = max_vocab;
}
-template <class Quant, class Bhiksha> std::size_t BitPackedMiddle<Quant, Bhiksha>::Size(uint8_t quant_bits, uint64_t entries, uint64_t max_vocab, uint64_t max_ptr, const Config &config) {
+template <class Bhiksha> std::size_t BitPackedMiddle<Bhiksha>::Size(uint8_t quant_bits, uint64_t entries, uint64_t max_vocab, uint64_t max_ptr, const Config &config) {
return Bhiksha::Size(entries + 1, max_ptr, config) + BaseSize(entries, max_vocab, quant_bits + Bhiksha::InlineBits(entries + 1, max_ptr, config));
}
-template <class Quant, class Bhiksha> BitPackedMiddle<Quant, Bhiksha>::BitPackedMiddle(void *base, const Quant &quant, uint64_t entries, uint64_t max_vocab, uint64_t max_next, const BitPacked &next_source, const Config &config) :
+template <class Bhiksha> BitPackedMiddle<Bhiksha>::BitPackedMiddle(void *base, uint8_t quant_bits, uint64_t entries, uint64_t max_vocab, uint64_t max_next, const BitPacked &next_source, const Config &config) :
BitPacked(),
- quant_(quant),
+ quant_bits_(quant_bits),
// If the offset of the method changes, also change TrieSearch::UpdateConfigFromBinary.
bhiksha_(base, entries + 1, max_next, config),
next_source_(&next_source) {
if (entries + 1 >= (1ULL << 57) || (max_next >= (1ULL << 57))) UTIL_THROW(util::Exception, "Sorry, this does not support more than " << (1ULL << 57) << " n-grams of a particular order. Edit util/bit_packing.hh and fix the bit packing functions.");
- BaseInit(reinterpret_cast<uint8_t*>(base) + Bhiksha::Size(entries + 1, max_next, config), max_vocab, quant.TotalBits() + bhiksha_.InlineBits());
+ BaseInit(reinterpret_cast<uint8_t*>(base) + Bhiksha::Size(entries + 1, max_next, config), max_vocab, quant_bits_ + bhiksha_.InlineBits());
}
-template <class Quant, class Bhiksha> void BitPackedMiddle<Quant, Bhiksha>::Insert(WordIndex word, float prob, float backoff) {
+template <class Bhiksha> util::BitAddress BitPackedMiddle<Bhiksha>::Insert(WordIndex word) {
assert(word <= word_mask_);
uint64_t at_pointer = insert_index_ * total_bits_;
util::WriteInt57(base_, at_pointer, word_bits_, word);
at_pointer += word_bits_;
- quant_.Write(base_, at_pointer, prob, backoff);
- at_pointer += quant_.TotalBits();
+ util::BitAddress ret(base_, at_pointer);
+ at_pointer += quant_bits_;
uint64_t next = next_source_->InsertIndex();
bhiksha_.WriteNext(base_, at_pointer, insert_index_, next);
-
++insert_index_;
+ return ret;
}
-template <class Quant, class Bhiksha> bool BitPackedMiddle<Quant, Bhiksha>::Find(WordIndex word, float &prob, float &backoff, NodeRange &range, uint64_t &pointer) const {
+template <class Bhiksha> util::BitAddress BitPackedMiddle<Bhiksha>::Find(WordIndex word, NodeRange &range, uint64_t &pointer) const {
uint64_t at_pointer;
if (!FindBitPacked(base_, word_mask_, word_bits_, total_bits_, range.begin, range.end, max_vocab_, word, at_pointer)) {
- return false;
+ return util::BitAddress(NULL, 0);
}
pointer = at_pointer;
at_pointer *= total_bits_;
at_pointer += word_bits_;
+ bhiksha_.ReadNext(base_, at_pointer + quant_bits_, pointer, total_bits_, range);
- quant_.Read(base_, at_pointer, prob, backoff);
- at_pointer += quant_.TotalBits();
-
- bhiksha_.ReadNext(base_, at_pointer, pointer, total_bits_, range);
-
- return true;
+ return util::BitAddress(base_, at_pointer);
}
-template <class Quant, class Bhiksha> bool BitPackedMiddle<Quant, Bhiksha>::FindNoProb(WordIndex word, float &backoff, NodeRange &range) const {
- uint64_t index;
- if (!FindBitPacked(base_, word_mask_, word_bits_, total_bits_, range.begin, range.end, max_vocab_, word, index)) return false;
- uint64_t at_pointer = index * total_bits_;
- at_pointer += word_bits_;
- quant_.ReadBackoff(base_, at_pointer, backoff);
- at_pointer += quant_.TotalBits();
- bhiksha_.ReadNext(base_, at_pointer, index, total_bits_, range);
- return true;
-}
-
-template <class Quant, class Bhiksha> void BitPackedMiddle<Quant, Bhiksha>::FinishedLoading(uint64_t next_end, const Config &config) {
+template <class Bhiksha> void BitPackedMiddle<Bhiksha>::FinishedLoading(uint64_t next_end, const Config &config) {
uint64_t last_next_write = (insert_index_ + 1) * total_bits_ - bhiksha_.InlineBits();
bhiksha_.WriteNext(base_, last_next_write, insert_index_ + 1, next_end);
bhiksha_.FinishedLoading(config);
}
-template <class Quant> void BitPackedLongest<Quant>::Insert(WordIndex index, float prob) {
+util::BitAddress BitPackedLongest::Insert(WordIndex index) {
assert(index <= word_mask_);
uint64_t at_pointer = insert_index_ * total_bits_;
util::WriteInt57(base_, at_pointer, word_bits_, index);
at_pointer += word_bits_;
- quant_.Write(base_, at_pointer, prob);
++insert_index_;
+ return util::BitAddress(base_, at_pointer);
}
-template <class Quant> bool BitPackedLongest<Quant>::Find(WordIndex word, float &prob, const NodeRange &range) const {
+util::BitAddress BitPackedLongest::Find(WordIndex word, const NodeRange &range) const {
uint64_t at_pointer;
- if (!FindBitPacked(base_, word_mask_, word_bits_, total_bits_, range.begin, range.end, max_vocab_, word, at_pointer)) return false;
+ if (!FindBitPacked(base_, word_mask_, word_bits_, total_bits_, range.begin, range.end, max_vocab_, word, at_pointer)) return util::BitAddress(NULL, 0);
at_pointer = at_pointer * total_bits_ + word_bits_;
- quant_.Read(base_, at_pointer, prob);
- return true;
+ return util::BitAddress(base_, at_pointer);
}
-template class BitPackedMiddle<DontQuantize::Middle, DontBhiksha>;
-template class BitPackedMiddle<DontQuantize::Middle, ArrayBhiksha>;
-template class BitPackedMiddle<SeparatelyQuantize::Middle, DontBhiksha>;
-template class BitPackedMiddle<SeparatelyQuantize::Middle, ArrayBhiksha>;
-template class BitPackedLongest<DontQuantize::Longest>;
-template class BitPackedLongest<SeparatelyQuantize::Longest>;
+template class BitPackedMiddle<DontBhiksha>;
+template class BitPackedMiddle<ArrayBhiksha>;
} // namespace trie
} // namespace ngram
diff --git a/lm/trie.hh b/lm/trie.hh
index 8fcd995ec..034a14144 100644
--- a/lm/trie.hh
+++ b/lm/trie.hh
@@ -1,12 +1,13 @@
#ifndef LM_TRIE__
#define LM_TRIE__
-#include <stdint.h>
+#include "lm/weights.hh"
+#include "lm/word_index.hh"
+#include "util/bit_packing.hh"
#include <cstddef>
-#include "lm/word_index.hh"
-#include "lm/weights.hh"
+#include <stdint.h>
namespace lm {
namespace ngram {
@@ -24,6 +25,22 @@ struct UnigramValue {
uint64_t Next() const { return next; }
};
+class UnigramPointer {
+ public:
+ explicit UnigramPointer(const ProbBackoff &to) : to_(&to) {}
+
+ UnigramPointer() : to_(NULL) {}
+
+ bool Found() const { return to_ != NULL; }
+
+ float Prob() const { return to_->prob; }
+ float Backoff() const { return to_->backoff; }
+ float Rest() const { return Prob(); }
+
+ private:
+ const ProbBackoff *to_;
+};
+
class Unigram {
public:
Unigram() {}
@@ -47,12 +64,11 @@ class Unigram {
void LoadedBinary() {}
- void Find(WordIndex word, float &prob, float &backoff, NodeRange &next) const {
+ UnigramPointer Find(WordIndex word, NodeRange &next) const {
UnigramValue *val = unigram_ + word;
- prob = val->weights.prob;
- backoff = val->weights.backoff;
next.begin = val->next;
next.end = (val+1)->next;
+ return UnigramPointer(val->weights);
}
private:
@@ -81,40 +97,36 @@ class BitPacked {
uint64_t insert_index_, max_vocab_;
};
-template <class Quant, class Bhiksha> class BitPackedMiddle : public BitPacked {
+template <class Bhiksha> class BitPackedMiddle : public BitPacked {
public:
static std::size_t Size(uint8_t quant_bits, uint64_t entries, uint64_t max_vocab, uint64_t max_next, const Config &config);
// next_source need not be initialized.
- BitPackedMiddle(void *base, const Quant &quant, uint64_t entries, uint64_t max_vocab, uint64_t max_next, const BitPacked &next_source, const Config &config);
+ BitPackedMiddle(void *base, uint8_t quant_bits, uint64_t entries, uint64_t max_vocab, uint64_t max_next, const BitPacked &next_source, const Config &config);
- void Insert(WordIndex word, float prob, float backoff);
+ util::BitAddress Insert(WordIndex word);
void FinishedLoading(uint64_t next_end, const Config &config);
void LoadedBinary() { bhiksha_.LoadedBinary(); }
- bool Find(WordIndex word, float &prob, float &backoff, NodeRange &range, uint64_t &pointer) const;
-
- bool FindNoProb(WordIndex word, float &backoff, NodeRange &range) const;
+ util::BitAddress Find(WordIndex word, NodeRange &range, uint64_t &pointer) const;
- NodeRange ReadEntry(uint64_t pointer, float &prob) {
+ util::BitAddress ReadEntry(uint64_t pointer, NodeRange &range) {
uint64_t addr = pointer * total_bits_;
addr += word_bits_;
- quant_.ReadProb(base_, addr, prob);
- NodeRange ret;
- bhiksha_.ReadNext(base_, addr + quant_.TotalBits(), pointer, total_bits_, ret);
- return ret;
+ bhiksha_.ReadNext(base_, addr + quant_bits_, pointer, total_bits_, range);
+ return util::BitAddress(base_, addr);
}
private:
- Quant quant_;
+ uint8_t quant_bits_;
Bhiksha bhiksha_;
const BitPacked *next_source_;
};
-template <class Quant> class BitPackedLongest : public BitPacked {
+class BitPackedLongest : public BitPacked {
public:
static std::size_t Size(uint8_t quant_bits, uint64_t entries, uint64_t max_vocab) {
return BaseSize(entries, max_vocab, quant_bits);
@@ -122,19 +134,18 @@ template <class Quant> class BitPackedLongest : public BitPacked {
BitPackedLongest() {}
- void Init(void *base, const Quant &quant, uint64_t max_vocab) {
- quant_ = quant;
- BaseInit(base, max_vocab, quant_.TotalBits());
+ void Init(void *base, uint8_t quant_bits, uint64_t max_vocab) {
+ BaseInit(base, max_vocab, quant_bits);
}
void LoadedBinary() {}
- void Insert(WordIndex word, float prob);
+ util::BitAddress Insert(WordIndex word);
- bool Find(WordIndex word, float &prob, const NodeRange &node) const;
+ util::BitAddress Find(WordIndex word, const NodeRange &node) const;
private:
- Quant quant_;
+ uint8_t quant_bits_;
};
} // namespace trie
diff --git a/lm/trie_sort.cc b/lm/trie_sort.cc
index b80fed02e..0d83221e2 100644
--- a/lm/trie_sort.cc
+++ b/lm/trie_sort.cc
@@ -148,13 +148,17 @@ template <class Combine> FILE *MergeSortedFiles(FILE *first_file, FILE *second_f
} // namespace
void RecordReader::Init(FILE *file, std::size_t entry_size) {
- rewind(file);
- file_ = file;
+ entry_size_ = entry_size;
data_.reset(malloc(entry_size));
UTIL_THROW_IF(!data_.get(), util::ErrnoException, "Failed to malloc read buffer");
- remains_ = true;
- entry_size_ = entry_size;
- ++*this;
+ file_ = file;
+ if (file) {
+ rewind(file);
+ remains_ = true;
+ ++*this;
+ } else {
+ remains_ = false;
+ }
}
void RecordReader::Overwrite(const void *start, std::size_t amount) {
@@ -169,9 +173,13 @@ void RecordReader::Overwrite(const void *start, std::size_t amount) {
}
void RecordReader::Rewind() {
- rewind(file_);
- remains_ = true;
- ++*this;
+ if (file_) {
+ rewind(file_);
+ remains_ = true;
+ ++*this;
+ } else {
+ remains_ = false;
+ }
}
SortedFiles::SortedFiles(const Config &config, util::FilePiece &f, std::vector<uint64_t> &counts, size_t buffer, const std::string &file_prefix, SortedVocabulary &vocab) {
diff --git a/lm/trie_sort.hh b/lm/trie_sort.hh
index 6ef17eb9f..c1be9bfc4 100644
--- a/lm/trie_sort.hh
+++ b/lm/trie_sort.hh
@@ -3,7 +3,6 @@
#ifndef LM_TRIE_SORT__
#define LM_TRIE_SORT__
-#include "lm/max_order.hh"
#include "lm/word_index.hh"
#include "util/file.hh"
@@ -107,7 +106,7 @@ class SortedFiles {
util::scoped_fd unigram_;
- util::scoped_FILE full_[kMaxOrder - 1], context_[kMaxOrder - 1];
+ util::scoped_FILE full_[KENLM_MAX_ORDER - 1], context_[KENLM_MAX_ORDER - 1];
};
} // namespace trie
diff --git a/lm/value.hh b/lm/value.hh
new file mode 100644
index 000000000..ba716713a
--- /dev/null
+++ b/lm/value.hh
@@ -0,0 +1,157 @@
+#ifndef LM_VALUE__
+#define LM_VALUE__
+
+#include "lm/model_type.hh"
+#include "lm/value_build.hh"
+#include "lm/weights.hh"
+#include "util/bit_packing.hh"
+
+#include <stdint.h>
+
+namespace lm {
+namespace ngram {
+
+// Template proxy for probing unigrams and middle.
+template <class Weights> class GenericProbingProxy {
+ public:
+ explicit GenericProbingProxy(const Weights &to) : to_(&to) {}
+
+ GenericProbingProxy() : to_(0) {}
+
+ bool Found() const { return to_ != 0; }
+
+ float Prob() const {
+ util::FloatEnc enc;
+ enc.f = to_->prob;
+ enc.i |= util::kSignBit;
+ return enc.f;
+ }
+
+ float Backoff() const { return to_->backoff; }
+
+ bool IndependentLeft() const {
+ util::FloatEnc enc;
+ enc.f = to_->prob;
+ return enc.i & util::kSignBit;
+ }
+
+ protected:
+ const Weights *to_;
+};
+
+// Basic proxy for trie unigrams.
+template <class Weights> class GenericTrieUnigramProxy {
+ public:
+ explicit GenericTrieUnigramProxy(const Weights &to) : to_(&to) {}
+
+ GenericTrieUnigramProxy() : to_(0) {}
+
+ bool Found() const { return to_ != 0; }
+ float Prob() const { return to_->prob; }
+ float Backoff() const { return to_->backoff; }
+ float Rest() const { return Prob(); }
+
+ protected:
+ const Weights *to_;
+};
+
+struct BackoffValue {
+ typedef ProbBackoff Weights;
+ static const ModelType kProbingModelType = PROBING;
+
+ class ProbingProxy : public GenericProbingProxy<Weights> {
+ public:
+ explicit ProbingProxy(const Weights &to) : GenericProbingProxy<Weights>(to) {}
+ ProbingProxy() {}
+ float Rest() const { return Prob(); }
+ };
+
+ class TrieUnigramProxy : public GenericTrieUnigramProxy<Weights> {
+ public:
+ explicit TrieUnigramProxy(const Weights &to) : GenericTrieUnigramProxy<Weights>(to) {}
+ TrieUnigramProxy() {}
+ float Rest() const { return Prob(); }
+ };
+
+ struct ProbingEntry {
+ typedef uint64_t Key;
+ typedef Weights Value;
+ uint64_t key;
+ ProbBackoff value;
+ uint64_t GetKey() const { return key; }
+ };
+
+ struct TrieUnigramValue {
+ Weights weights;
+ uint64_t next;
+ uint64_t Next() const { return next; }
+ };
+
+ const static bool kDifferentRest = false;
+
+ template <class Model, class C> void Callback(const Config &, unsigned int, typename Model::Vocabulary &, C &callback) {
+ NoRestBuild build;
+ callback(build);
+ }
+};
+
+struct RestValue {
+ typedef RestWeights Weights;
+ static const ModelType kProbingModelType = REST_PROBING;
+
+ class ProbingProxy : public GenericProbingProxy<RestWeights> {
+ public:
+ explicit ProbingProxy(const Weights &to) : GenericProbingProxy<RestWeights>(to) {}
+ ProbingProxy() {}
+ float Rest() const { return to_->rest; }
+ };
+
+ class TrieUnigramProxy : public GenericTrieUnigramProxy<Weights> {
+ public:
+ explicit TrieUnigramProxy(const Weights &to) : GenericTrieUnigramProxy<Weights>(to) {}
+ TrieUnigramProxy() {}
+ float Rest() const { return to_->rest; }
+ };
+
+// gcc 4.1 doesn't properly back dependent types :-(.
+#pragma pack(push)
+#pragma pack(4)
+ struct ProbingEntry {
+ typedef uint64_t Key;
+ typedef Weights Value;
+ Key key;
+ Value value;
+ Key GetKey() const { return key; }
+ };
+
+ struct TrieUnigramValue {
+ Weights weights;
+ uint64_t next;
+ uint64_t Next() const { return next; }
+ };
+#pragma pack(pop)
+
+ const static bool kDifferentRest = true;
+
+ template <class Model, class C> void Callback(const Config &config, unsigned int order, typename Model::Vocabulary &vocab, C &callback) {
+ switch (config.rest_function) {
+ case Config::REST_MAX:
+ {
+ MaxRestBuild build;
+ callback(build);
+ }
+ break;
+ case Config::REST_LOWER:
+ {
+ LowerRestBuild<Model> build(config, order, vocab);
+ callback(build);
+ }
+ break;
+ }
+ }
+};
+
+} // namespace ngram
+} // namespace lm
+
+#endif // LM_VALUE__
diff --git a/lm/value_build.cc b/lm/value_build.cc
new file mode 100644
index 000000000..6124f8da9
--- /dev/null
+++ b/lm/value_build.cc
@@ -0,0 +1,58 @@
+#include "lm/value_build.hh"
+
+#include "lm/model.hh"
+#include "lm/read_arpa.hh"
+
+namespace lm {
+namespace ngram {
+
+template <class Model> LowerRestBuild<Model>::LowerRestBuild(const Config &config, unsigned int order, const typename Model::Vocabulary &vocab) {
+ UTIL_THROW_IF(config.rest_lower_files.size() != order - 1, ConfigException, "This model has order " << order << " so there should be " << (order - 1) << " lower-order models for rest cost purposes.");
+ Config for_lower = config;
+ for_lower.rest_lower_files.clear();
+
+ // Unigram models aren't supported, so this is a custom loader.
+ // TODO: optimize the unigram loading?
+ {
+ util::FilePiece uni(config.rest_lower_files[0].c_str());
+ std::vector<uint64_t> number;
+ ReadARPACounts(uni, number);
+ UTIL_THROW_IF(number.size() != 1, FormatLoadException, "Expected the unigram model to have order 1, not " << number.size());
+ ReadNGramHeader(uni, 1);
+ unigrams_.resize(number[0]);
+ unigrams_[0] = config.unknown_missing_logprob;
+ PositiveProbWarn warn;
+ for (uint64_t i = 0; i < number[0]; ++i) {
+ WordIndex w;
+ Prob entry;
+ ReadNGram(uni, 1, vocab, &w, entry, warn);
+ unigrams_[w] = entry.prob;
+ }
+ }
+
+ try {
+ for (unsigned int i = 2; i < order; ++i) {
+ models_.push_back(new Model(config.rest_lower_files[i - 1].c_str(), for_lower));
+ UTIL_THROW_IF(models_.back()->Order() != i, FormatLoadException, "Lower order file " << config.rest_lower_files[i-1] << " should have order " << i);
+ }
+ } catch (...) {
+ for (typename std::vector<const Model*>::const_iterator i = models_.begin(); i != models_.end(); ++i) {
+ delete *i;
+ }
+ models_.clear();
+ throw;
+ }
+
+ // TODO: force/check same vocab.
+}
+
+template <class Model> LowerRestBuild<Model>::~LowerRestBuild() {
+ for (typename std::vector<const Model*>::const_iterator i = models_.begin(); i != models_.end(); ++i) {
+ delete *i;
+ }
+}
+
+template class LowerRestBuild<ProbingModel>;
+
+} // namespace ngram
+} // namespace lm
diff --git a/lm/value_build.hh b/lm/value_build.hh
new file mode 100644
index 000000000..461e6a5c9
--- /dev/null
+++ b/lm/value_build.hh
@@ -0,0 +1,97 @@
+#ifndef LM_VALUE_BUILD__
+#define LM_VALUE_BUILD__
+
+#include "lm/weights.hh"
+#include "lm/word_index.hh"
+#include "util/bit_packing.hh"
+
+#include <vector>
+
+namespace lm {
+namespace ngram {
+
+struct Config;
+struct BackoffValue;
+struct RestValue;
+
+class NoRestBuild {
+ public:
+ typedef BackoffValue Value;
+
+ NoRestBuild() {}
+
+ void SetRest(const WordIndex *, unsigned int, const Prob &/*prob*/) const {}
+ void SetRest(const WordIndex *, unsigned int, const ProbBackoff &) const {}
+
+ template <class Second> bool MarkExtends(ProbBackoff &weights, const Second &) const {
+ util::UnsetSign(weights.prob);
+ return false;
+ }
+
+ // Probing doesn't need to go back to unigram.
+ const static bool kMarkEvenLower = false;
+};
+
+class MaxRestBuild {
+ public:
+ typedef RestValue Value;
+
+ MaxRestBuild() {}
+
+ void SetRest(const WordIndex *, unsigned int, const Prob &/*prob*/) const {}
+ void SetRest(const WordIndex *, unsigned int, RestWeights &weights) const {
+ weights.rest = weights.prob;
+ util::SetSign(weights.rest);
+ }
+
+ bool MarkExtends(RestWeights &weights, const RestWeights &to) const {
+ util::UnsetSign(weights.prob);
+ if (weights.rest >= to.rest) return false;
+ weights.rest = to.rest;
+ return true;
+ }
+ bool MarkExtends(RestWeights &weights, const Prob &to) const {
+ util::UnsetSign(weights.prob);
+ if (weights.rest >= to.prob) return false;
+ weights.rest = to.prob;
+ return true;
+ }
+
+ // Probing does need to go back to unigram.
+ const static bool kMarkEvenLower = true;
+};
+
+template <class Model> class LowerRestBuild {
+ public:
+ typedef RestValue Value;
+
+ LowerRestBuild(const Config &config, unsigned int order, const typename Model::Vocabulary &vocab);
+
+ ~LowerRestBuild();
+
+ void SetRest(const WordIndex *, unsigned int, const Prob &/*prob*/) const {}
+ void SetRest(const WordIndex *vocab_ids, unsigned int n, RestWeights &weights) const {
+ typename Model::State ignored;
+ if (n == 1) {
+ weights.rest = unigrams_[*vocab_ids];
+ } else {
+ weights.rest = models_[n-2]->FullScoreForgotState(vocab_ids + 1, vocab_ids + n, *vocab_ids, ignored).prob;
+ }
+ }
+
+ template <class Second> bool MarkExtends(RestWeights &weights, const Second &) const {
+ util::UnsetSign(weights.prob);
+ return false;
+ }
+
+ const static bool kMarkEvenLower = false;
+
+ std::vector<float> unigrams_;
+
+ std::vector<const Model*> models_;
+};
+
+} // namespace ngram
+} // namespace lm
+
+#endif // LM_VALUE_BUILD__
diff --git a/lm/vocab.cc b/lm/vocab.cc
index 9fd698bbf..5de68f16e 100644
--- a/lm/vocab.cc
+++ b/lm/vocab.cc
@@ -196,7 +196,7 @@ WordIndex ProbingVocabulary::Insert(const StringPiece &str) {
}
}
-void ProbingVocabulary::FinishedLoading(ProbBackoff * /*reorder_vocab*/) {
+void ProbingVocabulary::InternalFinishedLoading() {
lookup_.FinishedInserting();
header_->bound = bound_;
header_->version = kProbingVocabularyVersion;
diff --git a/lm/vocab.hh b/lm/vocab.hh
index 343fc98a5..a25432f97 100644
--- a/lm/vocab.hh
+++ b/lm/vocab.hh
@@ -141,7 +141,9 @@ class ProbingVocabulary : public base::Vocabulary {
WordIndex Insert(const StringPiece &str);
- void FinishedLoading(ProbBackoff *reorder_vocab);
+ template <class Weights> void FinishedLoading(Weights * /*reorder_vocab*/) {
+ InternalFinishedLoading();
+ }
std::size_t UnkCountChangePadding() const { return 0; }
@@ -150,6 +152,8 @@ class ProbingVocabulary : public base::Vocabulary {
void LoadedBinary(bool have_words, int fd, EnumerateVocab *to);
private:
+ void InternalFinishedLoading();
+
typedef util::ProbingHashTable<ProbingVocabuaryEntry, util::IdentityHash> Lookup;
Lookup lookup_;
diff --git a/lm/weights.hh b/lm/weights.hh
index 1f38cf5e1..bd5d80342 100644
--- a/lm/weights.hh
+++ b/lm/weights.hh
@@ -12,6 +12,11 @@ struct ProbBackoff {
float prob;
float backoff;
};
+struct RestWeights {
+ float prob;
+ float backoff;
+ float rest;
+};
} // namespace lm
#endif // LM_WEIGHTS__
diff --git a/mert/BleuScorer.cpp b/mert/BleuScorer.cpp
index 39df32dc2..b9cae5a15 100644
--- a/mert/BleuScorer.cpp
+++ b/mert/BleuScorer.cpp
@@ -28,6 +28,10 @@ const char REFLEN_CLOSEST[] = "closest";
} // namespace
+namespace MosesTuning
+{
+
+
BleuScorer::BleuScorer(const string& config)
: StatisticsBasedScorer("BLEU", config),
m_ref_length_type(CLOSEST) {
@@ -164,7 +168,7 @@ void BleuScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
entry.set(stats);
}
-float BleuScorer::calculateScore(const vector<int>& comps) const
+statscore_t BleuScorer::calculateScore(const vector<int>& comps) const
{
CHECK(comps.size() == kBleuNgramOrder * 2 + 1);
@@ -337,3 +341,5 @@ float BleuScorer::sentenceLevelBleuPlusOne(const vector<float>& stats) {
//cerr << brevity << " -> " << exp(logbleu) << endl;
return exp(logbleu);
}
+
+}
diff --git a/mert/BleuScorer.h b/mert/BleuScorer.h
index d0da10bac..1927183a9 100644
--- a/mert/BleuScorer.h
+++ b/mert/BleuScorer.h
@@ -7,13 +7,16 @@
#include "Types.h"
#include "ScoreData.h"
-#include "Scorer.h"
+#include "StatisticsBasedScorer.h"
#include "ScopedVector.h"
+namespace MosesTuning
+{
+
const int kBleuNgramOrder = 4;
class NgramCounts;
-class Reference;
+class Reference;
using namespace std;
@@ -37,7 +40,7 @@ public:
virtual void setReferenceFiles(const std::vector<std::string>& referenceFiles);
virtual void prepareStats(std::size_t sid, const std::string& text, ScoreStats& entry);
- virtual float calculateScore(const std::vector<int>& comps) const;
+ virtual statscore_t calculateScore(const std::vector<int>& comps) const;
virtual std::size_t NumberOfScores() const { return 2 * kBleuNgramOrder + 1; }
int CalcReferenceLength(std::size_t sentence_id, std::size_t length);
@@ -85,4 +88,6 @@ float sentenceLevelBackgroundBleu(const std::vector<float>& sent, const std::vec
*/
float unsmoothedBleu(const std::vector<float>& stats);
+}
+
#endif // MERT_BLEU_SCORER_H_
diff --git a/mert/BleuScorerTest.cpp b/mert/BleuScorerTest.cpp
index 5960507e8..0d721422c 100644
--- a/mert/BleuScorerTest.cpp
+++ b/mert/BleuScorerTest.cpp
@@ -8,6 +8,8 @@
#include "Vocabulary.h"
#include "Util.h"
+using namespace MosesTuning;
+
namespace {
NgramCounts* g_counts = NULL;
diff --git a/mert/CderScorer.cpp b/mert/CderScorer.cpp
index 8a835f17d..cece29034 100644
--- a/mert/CderScorer.cpp
+++ b/mert/CderScorer.cpp
@@ -14,6 +14,10 @@ inline int CalcDistance(int word1, int word2) {
} // namespace
+namespace MosesTuning
+{
+
+
CderScorer::CderScorer(const string& config, bool allowed_long_jumps)
: StatisticsBasedScorer(allowed_long_jumps ? "CDER" : "WER", config),
m_allowed_long_jumps(allowed_long_jumps) {}
@@ -124,3 +128,5 @@ void CderScorer::computeCD(const sent_t& cand, const sent_t& ref,
delete row;
}
+
+}
diff --git a/mert/CderScorer.h b/mert/CderScorer.h
index 060243162..60b6ad125 100644
--- a/mert/CderScorer.h
+++ b/mert/CderScorer.h
@@ -4,7 +4,11 @@
#include <string>
#include <vector>
#include "Types.h"
-#include "Scorer.h"
+#include "StatisticsBasedScorer.h"
+
+namespace MosesTuning
+{
+
/**
* CderScorer class can compute both CDER and WER metric.
@@ -38,4 +42,6 @@ class CderScorer: public StatisticsBasedScorer {
CderScorer& operator=(const CderScorer&);
};
+}
+
#endif // MERT_CDER_SCORER_H_
diff --git a/mert/Data.cpp b/mert/Data.cpp
index 2a6bd5e92..3c0a03db0 100644
--- a/mert/Data.cpp
+++ b/mert/Data.cpp
@@ -19,6 +19,8 @@
using namespace std;
+namespace MosesTuning
+{
Data::Data(Scorer* scorer, const string& sparse_weights_file)
: m_scorer(scorer),
@@ -135,7 +137,7 @@ void Data::loadNBest(const string &file)
throw runtime_error("Unable to open: " + file);
ScoreStats scoreentry;
- string line, sentence_index, sentence, feature_str;
+ string line, sentence_index, sentence, feature_str, alignment;
while (getline(inp, line, '\n')) {
if (line.empty()) continue;
@@ -146,7 +148,24 @@ void Data::loadNBest(const string &file)
getNextPound(line, sentence, "|||"); // second field
getNextPound(line, feature_str, "|||"); // third field
+ if (line.length() > 0) {
+ string temp;
+ getNextPound(line, temp, "|||"); //fourth field sentence score
+ if (line.length() > 0) {
+ getNextPound(line, alignment, "|||"); //fifth field (if present) is either phrase or word alignment
+ if (line.length() > 0) {
+ getNextPound(line, alignment, "|||"); //sixth field (if present) is word alignment
+ }
+ }
+ }
+ //TODO check alignment exists if scorers need it
+
+ if (m_scorer->useAlignment()) {
+ sentence += "|||";
+ sentence += alignment;
+ }
m_scorer->prepareStats(sentence_index, sentence, scoreentry);
+
m_score_data->add(scoreentry, sentence_index);
// examine first line for name of features
@@ -260,3 +279,6 @@ void Data::createShards(size_t shard_count, float shard_size, const string& scor
//cerr << endl;
}
}
+
+}
+
diff --git a/mert/Data.h b/mert/Data.h
index 0b971ab1f..528a396cd 100644
--- a/mert/Data.h
+++ b/mert/Data.h
@@ -16,6 +16,9 @@
#include "FeatureData.h"
#include "ScoreData.h"
+namespace MosesTuning
+{
+
class Scorer;
typedef boost::shared_ptr<ScoreData> ScoreDataHandle;
@@ -91,4 +94,6 @@ public:
const std::string& sentence_index);
};
+}
+
#endif // MERT_DATA_H_
diff --git a/mert/DataTest.cpp b/mert/DataTest.cpp
index e94d4ffe9..9856d29cd 100644
--- a/mert/DataTest.cpp
+++ b/mert/DataTest.cpp
@@ -7,6 +7,8 @@
#include <boost/scoped_ptr.hpp>
+using namespace MosesTuning;
+
//very basic test of sharding
BOOST_AUTO_TEST_CASE(shard_basic) {
boost::scoped_ptr<Scorer> scorer(ScorerFactory::getScorer("BLEU", ""));
diff --git a/mert/Fdstream.h b/mert/Fdstream.h
index 93f705dfe..6dbdb40a6 100644
--- a/mert/Fdstream.h
+++ b/mert/Fdstream.h
@@ -13,6 +13,9 @@
#define BUFFER_SIZE (32768)
+namespace MosesTuning
+{
+
class _fdstream
{
protected:
@@ -164,4 +167,6 @@ private:
#error "Not supported"
#endif
+}
+
#endif // _FDSTREAM_
diff --git a/mert/FeatureArray.cpp b/mert/FeatureArray.cpp
index 6fc12c5c0..0e3114bca 100644
--- a/mert/FeatureArray.cpp
+++ b/mert/FeatureArray.cpp
@@ -14,6 +14,10 @@
using namespace std;
+namespace MosesTuning
+{
+
+
FeatureArray::FeatureArray()
: m_index(""), m_num_features(0){}
@@ -154,3 +158,6 @@ bool FeatureArray::check_consistency() const
}
return true;
}
+
+}
+
diff --git a/mert/FeatureArray.h b/mert/FeatureArray.h
index 5ce5e1b1a..d50be0fa0 100644
--- a/mert/FeatureArray.h
+++ b/mert/FeatureArray.h
@@ -13,6 +13,10 @@
#include <iosfwd>
#include "FeatureStats.h"
+namespace MosesTuning
+{
+
+
const char FEATURES_TXT_BEGIN[] = "FEATURES_TXT_BEGIN_0";
const char FEATURES_TXT_END[] = "FEATURES_TXT_END_0";
const char FEATURES_BIN_BEGIN[] = "FEATURES_BIN_BEGIN_0";
@@ -77,4 +81,6 @@ public:
bool check_consistency() const;
};
+}
+
#endif // MERT_FEATURE_ARRAY_H_
diff --git a/mert/FeatureData.cpp b/mert/FeatureData.cpp
index 91e004734..e8c6d79ca 100644
--- a/mert/FeatureData.cpp
+++ b/mert/FeatureData.cpp
@@ -14,6 +14,10 @@
using namespace std;
+namespace MosesTuning
+{
+
+
FeatureData::FeatureData()
: m_num_features(0) {}
@@ -162,3 +166,6 @@ string FeatureData::ToString() const {
return res;
}
+
+}
+
diff --git a/mert/FeatureData.h b/mert/FeatureData.h
index 4a20c8379..4e33dd3c3 100644
--- a/mert/FeatureData.h
+++ b/mert/FeatureData.h
@@ -14,6 +14,10 @@
#include <stdexcept>
#include "FeatureArray.h"
+namespace MosesTuning
+{
+
+
class FeatureData
{
private:
@@ -130,4 +134,6 @@ public:
std::string ToString() const;
};
+}
+
#endif // MERT_FEATURE_DATA_H_
diff --git a/mert/FeatureDataIterator.cpp b/mert/FeatureDataIterator.cpp
index 7f72255d4..7ada05a61 100644
--- a/mert/FeatureDataIterator.cpp
+++ b/mert/FeatureDataIterator.cpp
@@ -29,6 +29,10 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
using namespace std;
using namespace util;
+namespace MosesTuning
+{
+
+
int ParseInt(const StringPiece& str ) {
char* errIndex;
//could wrap?
@@ -130,3 +134,6 @@ bool FeatureDataIterator::equal(const FeatureDataIterator& rhs) const {
const vector<FeatureDataItem>& FeatureDataIterator::dereference() const {
return m_next;
}
+
+}
+
diff --git a/mert/FeatureDataIterator.h b/mert/FeatureDataIterator.h
index 9bc5f03f7..b8f138c49 100644
--- a/mert/FeatureDataIterator.h
+++ b/mert/FeatureDataIterator.h
@@ -37,6 +37,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "FeatureStats.h"
+namespace MosesTuning
+{
+
class FileFormatException : public util::Exception
{
@@ -91,4 +94,6 @@ class FeatureDataIterator :
std::vector<FeatureDataItem> m_next;
};
+}
+
#endif // MERT_FEATURE_DATA_ITERATOR_H_
diff --git a/mert/FeatureDataTest.cpp b/mert/FeatureDataTest.cpp
index ed70f7971..0f3d6a596 100644
--- a/mert/FeatureDataTest.cpp
+++ b/mert/FeatureDataTest.cpp
@@ -5,6 +5,8 @@
#include <sstream>
+using namespace MosesTuning;
+
namespace {
void CheckFeatureMap(const FeatureData* feature_data,
diff --git a/mert/FeatureStats.cpp b/mert/FeatureStats.cpp
index 834b63b4c..22f62e234 100644
--- a/mert/FeatureStats.cpp
+++ b/mert/FeatureStats.cpp
@@ -22,6 +22,10 @@ namespace {
const int kAvailableSize = 8;
} // namespace
+namespace MosesTuning
+{
+
+
SparseVector::name2id_t SparseVector::m_name_to_id;
SparseVector::id2name_t SparseVector::m_id_to_name;
@@ -294,7 +298,6 @@ ostream& operator<<(ostream& o, const FeatureStats& e)
return o;
}
-//ADEED_BY_TS
bool operator==(const FeatureStats& f1, const FeatureStats& f2) {
size_t size = f1.size();
@@ -308,4 +311,5 @@ bool operator==(const FeatureStats& f1, const FeatureStats& f2) {
return true;
}
-//END_ADDED
+
+}
diff --git a/mert/FeatureStats.h b/mert/FeatureStats.h
index 2dc7883ce..883a89b97 100644
--- a/mert/FeatureStats.h
+++ b/mert/FeatureStats.h
@@ -16,6 +16,10 @@
#include <vector>
#include "Types.h"
+namespace MosesTuning
+{
+
+
// Minimal sparse vector
class SparseVector {
public:
@@ -121,8 +125,8 @@ public:
friend std::ostream& operator<<(std::ostream& o, const FeatureStats& e);
};
-//ADEED_BY_TS
bool operator==(const FeatureStats& f1, const FeatureStats& f2);
-//END_ADDED
+
+}
#endif // MERT_FEATURE_STATS_H_
diff --git a/mert/HypPackEnumerator.cpp b/mert/HypPackEnumerator.cpp
index ffbf3cfb5..776c02857 100644
--- a/mert/HypPackEnumerator.cpp
+++ b/mert/HypPackEnumerator.cpp
@@ -6,6 +6,10 @@
using namespace std;
+namespace MosesTuning
+{
+
+
StreamingHypPackEnumerator::StreamingHypPackEnumerator
(
vector<std::string> const& featureFiles,
@@ -40,6 +44,7 @@ size_t StreamingHypPackEnumerator::num_dense() const {
void StreamingHypPackEnumerator::prime(){
m_current_indexes.clear();
+ m_current_featureVectors.clear();
boost::unordered_set<FeatureDataItem> seen;
m_primed = true;
@@ -57,7 +62,7 @@ void StreamingHypPackEnumerator::prime(){
exit(1);
}
for (size_t j = 0; j < m_featureDataIters[i]->size(); ++j) {
- FeatureDataItem item = m_featureDataIters[i]->operator[](j);
+ const FeatureDataItem& item = m_featureDataIters[i]->operator[](j);
// Dedup
if(seen.find(item)==seen.end()) {
seen.insert(item);
@@ -73,6 +78,7 @@ void StreamingHypPackEnumerator::prime(){
}
// Store item for retrieval
m_current_indexes.push_back(pair<size_t,size_t>(i,j));
+ m_current_featureVectors.push_back(MiraFeatureVector(item));
}
}
}
@@ -103,6 +109,7 @@ void StreamingHypPackEnumerator::next(){
++m_scoreDataIters[i];
}
m_sentenceId++;
+ if(m_sentenceId % 100 == 0) cerr << ".";
if(!finished()) prime();
}
@@ -114,13 +121,12 @@ size_t StreamingHypPackEnumerator::cur_size(){
return m_current_indexes.size();
}
-const FeatureDataItem& StreamingHypPackEnumerator::featuresAt(size_t index){
+const MiraFeatureVector& StreamingHypPackEnumerator::featuresAt(size_t index){
if(!m_primed) {
cerr << "Querying features from an unprimed HypPackEnumerator" << endl;
exit(1);
}
- const pair<size_t,size_t>& pij = m_current_indexes[index];
- return m_featureDataIters[pij.first]->operator[](pij.second);
+ return m_current_featureVectors[index];
}
const ScoreDataItem& StreamingHypPackEnumerator::scoresAt(size_t index) {
@@ -132,6 +138,10 @@ const ScoreDataItem& StreamingHypPackEnumerator::scoresAt(size_t index) {
return m_scoreDataIters[pij.first]->operator[](pij.second);
}
+size_t StreamingHypPackEnumerator::cur_id() {
+ return m_sentenceId;
+}
+
/* --------- RandomAccessHypPackEnumerator ------------- */
RandomAccessHypPackEnumerator::RandomAccessHypPackEnumerator(vector<string> const& featureFiles,
@@ -141,7 +151,7 @@ RandomAccessHypPackEnumerator::RandomAccessHypPackEnumerator(vector<string> cons
StreamingHypPackEnumerator train(featureFiles,scoreFiles);
size_t index=0;
for(train.reset(); !train.finished(); train.next()) {
- m_features.push_back(vector<FeatureDataItem>());
+ m_features.push_back(vector<MiraFeatureVector>());
m_scores.push_back(vector<ScoreDataItem>());
for(size_t j=0;j<train.cur_size();j++) {
m_features.back().push_back(train.featuresAt(j));
@@ -174,16 +184,20 @@ size_t RandomAccessHypPackEnumerator::cur_size() {
assert(m_features[m_indexes[m_cur_index]].size()==m_scores[m_indexes[m_cur_index]].size());
return m_features[m_indexes[m_cur_index]].size();
}
-const FeatureDataItem& RandomAccessHypPackEnumerator::featuresAt(size_t i) {
+const MiraFeatureVector& RandomAccessHypPackEnumerator::featuresAt(size_t i) {
return m_features[m_indexes[m_cur_index]][i];
}
const ScoreDataItem& RandomAccessHypPackEnumerator::scoresAt(size_t i) {
return m_scores[m_indexes[m_cur_index]][i];
}
-
+size_t RandomAccessHypPackEnumerator::cur_id() {
+ return m_indexes[m_cur_index];
+}
// --Emacs trickery--
// Local Variables:
// mode:c++
// c-basic-offset:2
// End:
+
+}
diff --git a/mert/HypPackEnumerator.h b/mert/HypPackEnumerator.h
index d878c2625..690e53103 100644
--- a/mert/HypPackEnumerator.h
+++ b/mert/HypPackEnumerator.h
@@ -16,6 +16,11 @@
#include "FeatureDataIterator.h"
#include "ScoreDataIterator.h"
+#include "MiraFeatureVector.h"
+
+namespace MosesTuning
+{
+
// Start with these abstract classes
@@ -27,9 +32,10 @@ public:
virtual bool finished() = 0;
virtual void next() = 0;
+ virtual std::size_t cur_id() = 0;
virtual std::size_t cur_size() = 0;
virtual std::size_t num_dense() const = 0;
- virtual const FeatureDataItem& featuresAt(std::size_t i) = 0;
+ virtual const MiraFeatureVector& featuresAt(std::size_t i) = 0;
virtual const ScoreDataItem& scoresAt(std::size_t i) = 0;
};
@@ -46,8 +52,9 @@ public:
virtual bool finished();
virtual void next();
+ virtual std::size_t cur_id();
virtual std::size_t cur_size();
- virtual const FeatureDataItem& featuresAt(std::size_t i);
+ virtual const MiraFeatureVector& featuresAt(std::size_t i);
virtual const ScoreDataItem& scoresAt(std::size_t i);
private:
@@ -62,6 +69,7 @@ private:
std::vector<FeatureDataIterator> m_featureDataIters;
std::vector<ScoreDataIterator> m_scoreDataIters;
std::vector<std::pair<std::size_t,std::size_t> > m_current_indexes;
+ std::vector<MiraFeatureVector> m_current_featureVectors;
};
// Instantiation that reads into memory
@@ -79,8 +87,9 @@ public:
virtual bool finished();
virtual void next();
+ virtual std::size_t cur_id();
virtual std::size_t cur_size();
- virtual const FeatureDataItem& featuresAt(std::size_t i);
+ virtual const MiraFeatureVector& featuresAt(std::size_t i);
virtual const ScoreDataItem& scoresAt(std::size_t i);
private:
@@ -88,10 +97,12 @@ private:
std::size_t m_cur_index;
std::size_t m_num_dense;
std::vector<std::size_t> m_indexes;
- std::vector<std::vector<FeatureDataItem> > m_features;
+ std::vector<std::vector<MiraFeatureVector> > m_features;
std::vector<std::vector<ScoreDataItem> > m_scores;
};
+}
+
#endif // MERT_HYP_PACK_COLLECTION_H
// --Emacs trickery--
diff --git a/mert/InterpolatedScorer.cpp b/mert/InterpolatedScorer.cpp
index 9db147d91..b218acf14 100644
--- a/mert/InterpolatedScorer.cpp
+++ b/mert/InterpolatedScorer.cpp
@@ -4,6 +4,10 @@
using namespace std;
+namespace MosesTuning
+{
+
+
// TODO: This is too long. Consider creating a function for
// initialization such as Init().
InterpolatedScorer::InterpolatedScorer(const string& name, const string& config)
@@ -62,6 +66,17 @@ InterpolatedScorer::InterpolatedScorer(const string& name, const string& config)
cerr <<endl;
}
+bool InterpolatedScorer::useAlignment() const {
+ //cout << "InterpolatedScorer::useAlignment" << endl;
+ for (vector<Scorer*>::const_iterator itsc = m_scorers.begin(); itsc < m_scorers.end(); itsc++) {
+ if ((*itsc)->useAlignment()) {
+ //cout <<"InterpolatedScorer::useAlignment Returning true"<<endl;
+ return true;
+ }
+ }
+ return false;
+};
+
void InterpolatedScorer::setScoreData(ScoreData* data)
{
size_t last = 0;
@@ -152,11 +167,23 @@ void InterpolatedScorer::setReferenceFiles(const vector<string>& referenceFiles)
void InterpolatedScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
{
stringstream buff;
+ string align = text;
+ string sentence = "";
+ size_t alignmentData = text.find("|||");
+ //Get sentence and alignment parts
+ if(alignmentData != string::npos) {
+ getNextPound(align,sentence, "|||");
+ }
+
int i = 0;
- for (ScopedVector<Scorer>::iterator itsc = m_scorers.begin();
- itsc != m_scorers.end(); ++itsc) {
+ for (ScopedVector<Scorer>::iterator itsc = m_scorers.begin(); itsc != m_scorers.end(); ++itsc) {
ScoreStats tempEntry;
- (*itsc)->prepareStats(sid, text, tempEntry);
+ if ((*itsc)->useAlignment()) {
+ (*itsc)->prepareStats(sid, text, tempEntry);
+ }
+ else {
+ (*itsc)->prepareStats(sid, sentence, tempEntry);
+ }
if (i > 0) buff << " ";
buff << tempEntry;
i++;
@@ -195,3 +222,6 @@ void InterpolatedScorer::setFilter(const string& filterCommand)
m_scorers[i]->setFilter(csplit[i]);
}
}
+
+}
+
diff --git a/mert/InterpolatedScorer.h b/mert/InterpolatedScorer.h
index 321b50d20..49c065d27 100644
--- a/mert/InterpolatedScorer.h
+++ b/mert/InterpolatedScorer.h
@@ -8,6 +8,10 @@
#include "Scorer.h"
#include "ScopedVector.h"
+namespace MosesTuning
+{
+
+
/**
* Class that includes other scorers eg.
* Interpolated HAMMING and BLEU scorer **/
@@ -42,6 +46,8 @@ public:
virtual void setFilter(const std::string& filterCommand);
+ bool useAlignment() const;
+
protected:
ScopedVector<Scorer> m_scorers;
@@ -52,4 +58,6 @@ protected:
std::vector<float> m_scorer_weights;
};
+}
+
#endif // MERT_INTERPOLATED_SCORER_H_
diff --git a/mert/Jamfile b/mert/Jamfile
index bf2a52b2a..828d5c367 100644
--- a/mert/Jamfile
+++ b/mert/Jamfile
@@ -45,6 +45,10 @@ CderScorer.cpp
MergeScorer.cpp
Vocabulary.cpp
PreProcessFilter.cpp
+SentenceLevelScorer.cpp
+Permutation.cpp
+PermutationScorer.cpp
+StatisticsBasedScorer.cpp
../util//kenutil m ..//z ;
exe mert : mert.cpp mert_lib bleu_lib ../moses/src//ThreadPool ;
@@ -74,3 +78,4 @@ unit-test vocabulary_test : VocabularyTest.cpp mert_lib ..//boost_unit_test_fram
install legacy : programs : <location>. ;
lib bleu_lib : BleuScorer.cpp mert_lib : : : <include>. ;
+
diff --git a/mert/MergeScorer.cpp b/mert/MergeScorer.cpp
index dbdd31cf7..3f26df7e7 100644
--- a/mert/MergeScorer.cpp
+++ b/mert/MergeScorer.cpp
@@ -14,6 +14,10 @@
using namespace std;
using namespace TERCpp;
+namespace MosesTuning
+{
+
+
MergeScorer::MergeScorer(const string& config)
: StatisticsBasedScorer("MERGE", config) {}
@@ -121,3 +125,6 @@ float MergeScorer::calculateScore(const std::vector< int >& comps) const
}
return result;
}
+
+}
+
diff --git a/mert/MergeScorer.h b/mert/MergeScorer.h
index 4031ad788..5800b3d2a 100644
--- a/mert/MergeScorer.h
+++ b/mert/MergeScorer.h
@@ -4,7 +4,11 @@
#include <string>
#include <vector>
-#include "Scorer.h"
+#include "StatisticsBasedScorer.h"
+
+namespace MosesTuning
+{
+
class PerScorer;
class ScoreStats;
@@ -33,4 +37,6 @@ protected:
MergeScorer& operator=(const MergeScorer&);
};
+}
+
#endif // MERT_MERGE_SCORER_H_
diff --git a/mert/MiraFeatureVector.cpp b/mert/MiraFeatureVector.cpp
index b72d29595..95805c295 100644
--- a/mert/MiraFeatureVector.cpp
+++ b/mert/MiraFeatureVector.cpp
@@ -1,15 +1,22 @@
#include <cmath>
+#include <iomanip>
#include "MiraFeatureVector.h"
using namespace std;
+namespace MosesTuning
+{
+
+
MiraFeatureVector::MiraFeatureVector(const FeatureDataItem& vec)
: m_dense(vec.dense)
{
vector<size_t> sparseFeats = vec.sparse.feats();
bool bFirst = true;
size_t lastFeat = 0;
+ m_sparseFeats.reserve(sparseFeats.size());
+ m_sparseVals.reserve(sparseFeats.size());
for(size_t i=0;i<sparseFeats.size();i++)
{
size_t feat = m_dense.size() + sparseFeats[i];
@@ -58,14 +65,14 @@ ValType MiraFeatureVector::val(size_t index) const {
if(index < m_dense.size())
return m_dense[index];
else
- return m_sparseVals[index];
+ return m_sparseVals[index-m_dense.size()];
}
size_t MiraFeatureVector::feat(size_t index) const {
if(index < m_dense.size())
return index;
else
- return m_sparseFeats[index];
+ return m_sparseFeats[index-m_dense.size()];
}
size_t MiraFeatureVector::size() const {
@@ -139,8 +146,20 @@ MiraFeatureVector operator-(const MiraFeatureVector& a, const MiraFeatureVector&
return MiraFeatureVector(dense,sparseFeats,sparseVals);
}
+ostream& operator<<(ostream& o, const MiraFeatureVector& e)
+{
+ for(size_t i=0;i<e.size();i++) {
+ if(i>0) o << " ";
+ o << e.feat(i) << ":" << e.val(i);
+ }
+ return o;
+}
+
// --Emacs trickery--
// Local Variables:
// mode:c++
// c-basic-offset:2
// End:
+
+}
+
diff --git a/mert/MiraFeatureVector.h b/mert/MiraFeatureVector.h
index 31dd025c3..60e765605 100644
--- a/mert/MiraFeatureVector.h
+++ b/mert/MiraFeatureVector.h
@@ -13,9 +13,14 @@
#define MERT_MIRA_FEATURE_VECTOR_H
#include <vector>
+#include <iostream>
#include "FeatureDataIterator.h"
+namespace MosesTuning
+{
+
+
typedef FeatureStatsType ValType;
class MiraFeatureVector {
@@ -34,12 +39,16 @@ public:
friend MiraFeatureVector operator-(const MiraFeatureVector& a,
const MiraFeatureVector& b);
+ friend std::ostream& operator<<(std::ostream& o, const MiraFeatureVector& e);
+
private:
std::vector<ValType> m_dense;
std::vector<std::size_t> m_sparseFeats;
std::vector<ValType> m_sparseVals;
};
+} // namespace
+
#endif // MERT_FEATURE_VECTOR_H
// --Emacs trickery--
@@ -47,3 +56,5 @@ private:
// mode:c++
// c-basic-offset:2
// End:
+
+
diff --git a/mert/MiraWeightVector.cpp b/mert/MiraWeightVector.cpp
index 7e17a2714..c6f0261dc 100644
--- a/mert/MiraWeightVector.cpp
+++ b/mert/MiraWeightVector.cpp
@@ -1,7 +1,13 @@
#include "MiraWeightVector.h"
+#include <cmath>
+
using namespace std;
+namespace MosesTuning
+{
+
+
/**
* Constructor, initializes to the zero vector
*/
@@ -113,6 +119,17 @@ AvgWeightVector::AvgWeightVector(const MiraWeightVector& wv)
:m_wv(wv)
{}
+ostream& operator<<(ostream& o, const MiraWeightVector& e)
+{
+ for(size_t i=0;i<e.m_weights.size();i++) {
+ if(abs(e.m_weights[i])>1e-8) {
+ if(i>0) o << " ";
+ cerr << i << ":" << e.m_weights[i];
+ }
+ }
+ return o;
+}
+
ValType AvgWeightVector::weight(size_t index) const
{
if(m_wv.m_numUpdates==0) return m_wv.weight(index);
@@ -143,3 +160,5 @@ size_t AvgWeightVector::size() const {
// mode:c++
// c-basic-offset:2
// End:
+}
+
diff --git a/mert/MiraWeightVector.h b/mert/MiraWeightVector.h
index 65b374625..30f8adfa4 100644
--- a/mert/MiraWeightVector.h
+++ b/mert/MiraWeightVector.h
@@ -11,9 +11,14 @@
#define MERT_MIRA_WEIGHT_VECTOR_H
#include <vector>
+#include <iostream>
#include "MiraFeatureVector.h"
+namespace MosesTuning
+{
+
+
class AvgWeightVector;
class MiraWeightVector {
@@ -59,6 +64,8 @@ public:
friend class AvgWeightVector;
+ friend std::ostream& operator<<(std::ostream& o, const MiraWeightVector& e);
+
private:
/**
* Updates a weight and lazily updates its total
@@ -102,3 +109,5 @@ private:
// mode:c++
// c-basic-offset:2
// End:
+
+}
diff --git a/mert/Ngram.h b/mert/Ngram.h
index 846604f3f..d3e8041a3 100644
--- a/mert/Ngram.h
+++ b/mert/Ngram.h
@@ -5,6 +5,9 @@
#include <map>
#include <string>
+namespace MosesTuning
+{
+
/** A simple STL-std::map based n-gram counts. Basically, we provide
* typical accessors and mutaors, but we intentionally does not allow
* erasing elements.
@@ -95,4 +98,6 @@ class NgramCounts {
std::map<Key, Value, NgramComparator> m_counts;
};
+}
+
#endif // MERT_NGRAM_H_
diff --git a/mert/NgramTest.cpp b/mert/NgramTest.cpp
index a07fca42c..e6218206f 100644
--- a/mert/NgramTest.cpp
+++ b/mert/NgramTest.cpp
@@ -3,6 +3,8 @@
#define BOOST_TEST_MODULE MertNgram
#include <boost/test/unit_test.hpp>
+using namespace MosesTuning;
+
BOOST_AUTO_TEST_CASE(ngram_basic) {
NgramCounts counts;
NgramCounts::Key key;
diff --git a/mert/Optimizer.cpp b/mert/Optimizer.cpp
index 39e9aac1b..e5f5854b2 100644
--- a/mert/Optimizer.cpp
+++ b/mert/Optimizer.cpp
@@ -33,6 +33,10 @@ inline float intersect(float m1, float b1, float m2, float b2)
} // namespace
+namespace MosesTuning
+{
+
+
Optimizer::Optimizer(unsigned Pd, const vector<unsigned>& i2O, const vector<bool>& pos, const vector<parameter_t>& start, unsigned int nrandom)
: m_scorer(NULL), m_feature_data(), m_num_random_directions(nrandom), m_positive(pos)
{
@@ -472,3 +476,5 @@ statscore_t RandomOptimizer::TrueRun(Point& P) const
P.SetScore(score);
return score;
}
+
+}
diff --git a/mert/Optimizer.h b/mert/Optimizer.h
index f196878f2..f81d59d96 100644
--- a/mert/Optimizer.h
+++ b/mert/Optimizer.h
@@ -10,6 +10,10 @@
static const float kMaxFloat = std::numeric_limits<float>::max();
+namespace MosesTuning
+{
+
+
class Point;
/**
@@ -109,4 +113,6 @@ public:
virtual statscore_t TrueRun(Point&) const;
};
+}
+
#endif // OPTIMIZER_H
diff --git a/mert/OptimizerFactory.cpp b/mert/OptimizerFactory.cpp
index 6cafd15b0..b33194f33 100644
--- a/mert/OptimizerFactory.cpp
+++ b/mert/OptimizerFactory.cpp
@@ -3,6 +3,10 @@
using namespace std;
+namespace MosesTuning
+{
+
+
vector<string> OptimizerFactory::m_type_names;
void OptimizerFactory::SetTypeNames()
@@ -65,3 +69,5 @@ Optimizer* OptimizerFactory::BuildOptimizer(unsigned dim,
return NULL;
}
}
+
+}
diff --git a/mert/OptimizerFactory.h b/mert/OptimizerFactory.h
index 7d5e02323..ae34bcb00 100644
--- a/mert/OptimizerFactory.h
+++ b/mert/OptimizerFactory.h
@@ -4,6 +4,10 @@
#include <vector>
#include "Types.h"
+namespace MosesTuning
+{
+
+
class Optimizer;
class OptimizerFactory
@@ -39,4 +43,7 @@ class OptimizerFactory
static std::vector<std::string> m_type_names;
};
+}
+
+
#endif // MERT_OPTIMIZER_FACTORY_H_
diff --git a/mert/OptimizerFactoryTest.cpp b/mert/OptimizerFactoryTest.cpp
index 684af711a..4d259c68d 100644
--- a/mert/OptimizerFactoryTest.cpp
+++ b/mert/OptimizerFactoryTest.cpp
@@ -5,6 +5,8 @@
#include <boost/test/unit_test.hpp>
#include <boost/scoped_ptr.hpp>
+using namespace MosesTuning;
+
namespace {
inline bool CheckBuildOptimizer(unsigned dim,
diff --git a/mert/PerScorer.cpp b/mert/PerScorer.cpp
index 67b633872..06b53436f 100644
--- a/mert/PerScorer.cpp
+++ b/mert/PerScorer.cpp
@@ -8,6 +8,10 @@
using namespace std;
+namespace MosesTuning
+{
+
+
PerScorer::PerScorer(const string& config)
: StatisticsBasedScorer("PER",config) {}
@@ -86,3 +90,6 @@ float PerScorer::calculateScore(const vector<int>& comps) const
return num/denom;
}
}
+
+}
+
diff --git a/mert/PerScorer.h b/mert/PerScorer.h
index 7f9351f1a..76ea9bfd7 100644
--- a/mert/PerScorer.h
+++ b/mert/PerScorer.h
@@ -5,7 +5,11 @@
#include <string>
#include <vector>
#include "Types.h"
-#include "Scorer.h"
+#include "StatisticsBasedScorer.h"
+
+namespace MosesTuning
+{
+
class ScoreStats;
@@ -36,4 +40,6 @@ private:
std::vector<std::multiset<int> > m_ref_tokens;
};
+}
+
#endif // MERT_PER_SCORER_H_
diff --git a/mert/Permutation.cpp b/mert/Permutation.cpp
new file mode 100644
index 000000000..5f3102f26
--- /dev/null
+++ b/mert/Permutation.cpp
@@ -0,0 +1,337 @@
+/*
+ * Permutation.cpp
+ * met - Minimum Error Training
+ *
+ * Created by Alexandra Birch 18/11/09.
+ *
+ */
+
+#include <fstream>
+#include <sstream>
+#include <math.h>
+#include "Permutation.h"
+#include "Util.h"
+
+using namespace std;
+
+namespace MosesTuning
+{
+
+
+Permutation::Permutation(const string &alignment, const int sourceLength, const int targetLength )
+{
+ if (sourceLength > 0) {
+ set(alignment, sourceLength);
+ }
+ m_targetLength = targetLength;
+}
+
+size_t Permutation::getLength() const
+{
+ return int(m_array.size());
+}
+void Permutation::dump() const
+{
+ int j=0;
+ for (vector<int>::const_iterator i = m_array.begin(); i !=m_array.end(); i++) {
+ cout << "(";
+ cout << j << ":" << *i ;
+ cout << "), ";
+ j++;
+ }
+ cout << endl;
+}
+
+
+//Sent alignment string
+//Eg: "0-1 0-0 1-2 3-0 4-5 6-7 "
+// Inidiviual word alignments which can be one-one,
+// or null aligned, or many-many. The format is sourcepos - targetpos
+//Its the output of the berkley aligner subtracting 1 from each number
+//sourceLength needed because last source words might not be aligned
+void Permutation::set(const string & alignment,const int sourceLength)
+{
+
+ //cout << "******** Permutation::set :" << alignment << ": len : " << sourceLength <<endl;
+
+ if(sourceLength <= 0) {
+ //not found
+ cerr << "Source sentence length not positive:"<< sourceLength << endl;
+ exit(0);
+ }
+
+ if (alignment.length() <= 0) {
+ //alignment empty - could happen but not good
+ cerr << "Alignment string empty:"<< alignment << endl;
+ }
+
+ //Tokenise on whitespace
+ string buf; // Have a buffer string
+ stringstream ss(alignment); // Insert the string into a stream
+ vector<string> tokens; // Create vector to hold our words
+ while (ss >> buf)
+ tokens.push_back(buf);
+
+ vector<int> tempPerm(sourceLength, -1);
+ //Set tempPerm to have one target position per source position
+ for (size_t i=0; i<tokens.size(); i++) {
+ string temp = tokens[i];
+ int posDelimeter = temp.find("-");
+ if(posDelimeter == int(string::npos)) {
+ cerr << "Delimiter not found - :"<< tokens[i] << endl;
+ exit(1);
+ }
+ int sourcePos = atoi((temp.substr(0, posDelimeter)).c_str());
+ int targetPos = atoi((temp.substr(posDelimeter+1)).c_str());
+ //cout << "SP:" << sourcePos << " TP:" << targetPos << endl;
+ if (sourcePos > sourceLength) {
+ cerr << "Source sentence length:" << sourceLength << " is smaller than alignment source position:" << sourcePos << endl;
+ cerr << "******** Permutation::set :" << alignment << ": len : " << sourceLength <<endl;
+ exit(1);
+ }
+ //If have multiple target pos aligned to one source,
+ // then ignore all but first alignment
+ if (tempPerm[sourcePos] == -1 || tempPerm[sourcePos] > targetPos) {
+ tempPerm[sourcePos] = targetPos;
+ }
+ }
+
+ //TODO
+ //Set final permutation in m_array
+ //Take care of: source - null
+ // multiple_source - one target
+ // unaligned target
+ // Input: 1-9 2-1 4-3 4-4 5-6 6-6 7-6 8-8
+ // Convert source: 1 2 3 4 5 6 7 8
+ // target: 9 1 -1 3 6 6 6 8 -> 8 1 2 3 4 5 6 7
+
+ // 1st step: Add null aligned source to previous alignment
+ // target: 9 1 -1 3 6 6 6 8 -> 9 1 1 3 6 6 6 8
+ int last=0;
+ m_array.assign(sourceLength, -1);
+ //get a searcheable index
+ multimap<int, int> invMap;
+ multimap<int, int>::iterator it;
+ //cout << " SourceP -> TargetP " << endl;
+ for (size_t i=0; i<tempPerm.size(); i++) {
+ if (tempPerm[i] == -1) {
+ tempPerm[i] = last;
+ } else {
+ last = tempPerm[i];
+ }
+ //cout << i << " -> " << tempPerm[i] << endl;
+ //Key is target pos, value is source pos
+ invMap.insert(pair<int,int>(tempPerm[i],int(i)));
+ }
+
+
+
+ // 2nd step: Get target into index of multimap and sort
+ // Convert source: 1 2 3 4 5 6 7 8
+ // target: 9 1 0 3 6 6 6 8 -> 0 1 3 6 6 6 8 9
+ // source: 3 2 4 5 6 7 8 1
+ int i=0;
+ //cout << " TargetP => SourceP : TargetIndex " << endl;
+ for ( it=invMap.begin() ; it != invMap.end(); it++ ) {
+ //cout << (*it).first << " => " << (*it).second << " : " << i << endl;
+ //find source position
+ m_array[(*it).second] = i;
+ i++;
+ }
+
+ bool ok = checkValidPermutation(m_array);
+ //dump();
+ if (!ok) {
+ throw runtime_error(" Created invalid permutation");
+ }
+}
+
+//Static
+vector<int> Permutation::invert(const vector<int> & inVector)
+{
+ vector<int> outVector(inVector.size());
+ for (size_t i=0; i<inVector.size(); i++) {
+ outVector[inVector[i]] = int(i);
+ }
+ return outVector;
+}
+
+//Static
+//Permutations start at 0
+bool Permutation::checkValidPermutation(vector<int> const & inVector)
+{
+ vector<int> test(inVector.size(),-1);
+ for (size_t i=0; i< inVector.size(); i++) {
+ //No multiple entries of same value allowed
+ if (test[inVector[i]] > -1) {
+ cerr << "Permutation error: multiple entries of same value\n" << endl;
+ return false;
+ }
+ test[inVector[i]] ++;
+ }
+ for (size_t i=0; i<inVector.size(); i++) {
+ //No holes allowed
+ if (test[inVector[i]] == -1) {
+ cerr << "Permutation error: missing values\n" << endl;
+ return false;
+ }
+ }
+ return true;
+}
+
+
+//TODO default to HAMMING
+//Note: it returns the distance that is not normalised
+float Permutation::distance(const Permutation &permCompare, const distanceMetric_t &type) const
+{
+ float score=0;
+
+ //bool debug= (verboselevel()>3); // TODO: fix verboselevel()
+ bool debug=false;
+ if (debug) {
+ cout << "*****Permutation::distance" <<endl;
+ cout << "Hypo:" << endl;
+ dump();
+ cout << "Ref: " << endl;
+ permCompare.dump();
+ }
+
+ if (type == HAMMING_DISTANCE) {
+ score = calculateHamming(permCompare);
+ } else if (type == KENDALL_DISTANCE) {
+ score = calculateKendall(permCompare);
+ } else {
+ throw runtime_error("Distance type not valid");
+ }
+
+ float brevityPenalty = 1.0 - (float) permCompare.getTargetLength()/getTargetLength() ;//reflength divided by trans length
+ if (brevityPenalty < 0.0) {
+ score = score * exp(brevityPenalty);
+ }
+
+ if (debug) {
+ cout << "Distance type:" << type << endl;
+ cout << "Score: "<< score << endl;
+ }
+ return score;
+}
+
+
+float Permutation::calculateHamming(const Permutation & compare) const
+{
+ float score=0;
+ vector<int> compareArray = compare.getArray();
+ if (getLength() != compare.getLength()) {
+ cerr << "1stperm: " << getLength() << " 2ndperm: " << compare.getLength() << endl;
+ throw runtime_error("Length of permutations not equal");
+ }
+ if (getLength() == 0) {
+ cerr << "Empty permutation" << endl;
+ return 0;
+ }
+ for (size_t i=0; i<getLength(); i++) {
+ if (m_array[i] != compareArray[i]) {
+ score++;
+ }
+
+ }
+ score = 1 - (score / getLength());
+ return score;
+}
+
+float Permutation::calculateKendall(const Permutation & compare) const
+{
+ float score=0;
+ vector<int> compareArray = compare.getArray();
+ if (getLength() != compare.getLength()) {
+ cerr << "1stperm: " << getLength() << " 2ndperm: " << compare.getLength() << endl;
+ throw runtime_error("Length of permutations not equal");
+ }
+ if (getLength() == 0) {
+ cerr << "Empty permutation" << endl;
+ return 0;
+ }
+ if (getLength() == 1) {
+ cerr << "One-word sentence. Kendall score = 1" << endl;
+ return 1;
+ }
+ for (size_t i=0; i<getLength(); i++) {
+ for (size_t j=0; j<getLength(); j++) {
+ if ((m_array[i] < m_array[j]) && (compareArray[i] > compareArray[j])) {
+ score++;
+ }
+ }
+ }
+ score = (score / ((getLength()*getLength() - getLength()) /2 ) );
+ //Adjusted Kendall's tau correlates better with human judgements
+ score = sqrt (score);
+ score = 1 - score;
+
+ return score;
+}
+
+vector<int> Permutation::getArray() const
+{
+ vector<int> ret = m_array;
+ return ret;
+}
+
+//Static
+//This function is called with test which is
+// the 5th field in moses nbest output when called with -include-alignment-in-n-best
+//eg. 0=0 1-2=1-2 3=3 4=4 5=5 6=6 7-9=7-8 10=9 11-13=10-11 (source-target)
+string Permutation::convertMosesToStandard(string const & alignment)
+{
+ if (alignment.length() == 0) {
+ cerr << "Alignment input string empty" << endl;
+ }
+ string working = alignment;
+ string out;
+
+ stringstream oss;
+ while (working.length() > 0) {
+ string align;
+ getNextPound(working,align," ");
+
+ //If found an alignment
+ if (align.length() > 0) {
+ size_t posDelimeter = align.find("=");
+ if(posDelimeter== string::npos) {
+ cerr << "Delimiter not found = :"<< align << endl;
+ exit(0);
+ }
+ int firstSourcePos,lastSourcePos,firstTargetPos,lastTargetPos;
+ string sourcePoss = align.substr(0, posDelimeter);
+ string targetPoss = align.substr(posDelimeter+1);
+ posDelimeter = sourcePoss.find("-");
+ if(posDelimeter < string::npos) {
+ firstSourcePos = atoi((sourcePoss.substr(0, posDelimeter)).c_str());
+ lastSourcePos = atoi((sourcePoss.substr(posDelimeter+1)).c_str());
+ } else {
+ firstSourcePos = atoi(sourcePoss.c_str());
+ lastSourcePos = firstSourcePos;
+ }
+ posDelimeter = targetPoss.find("-");
+ if(posDelimeter < string::npos) {
+ firstTargetPos = atoi((targetPoss.substr(0, posDelimeter)).c_str());
+ lastTargetPos = atoi((targetPoss.substr(posDelimeter+1)).c_str());
+ } else {
+ firstTargetPos = atoi(targetPoss.c_str());
+ lastTargetPos = firstTargetPos;
+ }
+ for (int i = firstSourcePos; i <= lastSourcePos; i++) {
+ for (int j = firstTargetPos; j <= lastTargetPos; j++) {
+ oss << i << "-" << j << " ";
+ }
+ }
+
+ } //else case where two spaces ?
+ }
+ out = oss.str();
+ //cout << "ConverttoStandard: " << out << endl;
+
+ return out;
+}
+
+}
+
diff --git a/mert/Permutation.h b/mert/Permutation.h
new file mode 100644
index 000000000..2c47487b6
--- /dev/null
+++ b/mert/Permutation.h
@@ -0,0 +1,71 @@
+/*
+ * Permutation.h
+ * met - Minimum Error Training
+ *
+ * Created by Alexandra Birch 18 Nov 2009.
+ *
+ */
+
+#ifndef PERMUTATION_H
+#define PERMUTATION_H
+
+
+#include <limits>
+#include <vector>
+#include <iostream>
+#include <fstream>
+
+#include "Util.h"
+
+namespace MosesTuning
+{
+
+
+class Permutation
+{
+
+public:
+ //Can be HAMMING_DISTANCE or KENDALLS_DISTANCE
+ Permutation(const std::string &alignment = std::string(), const int sourceLength = 0, const int targetLength = 0 );
+
+ ~Permutation() {};
+
+ inline void clear() {
+ m_array.clear();
+ }
+ inline size_t size() {
+ return m_array.size();
+ }
+
+
+ void set(const std::string &alignment,const int sourceLength);
+
+ float distance(const Permutation &permCompare, const distanceMetric_t &strategy = HAMMING_DISTANCE) const;
+
+ //Const
+ void dump() const;
+ size_t getLength() const;
+ std::vector<int> getArray() const;
+ int getTargetLength() const {
+ return m_targetLength;
+ }
+
+
+ //Static
+ static std::string convertMosesToStandard(std::string const & alignment);
+ static std::vector<int> invert(std::vector<int> const & inVector);
+ static bool checkValidPermutation(std::vector<int> const & inVector);
+
+protected:
+ std::vector<int> m_array;
+ int m_targetLength;
+ float calculateHamming(const Permutation & compare) const;
+ float calculateKendall(const Permutation & compare) const;
+
+private:
+};
+
+
+}
+
+#endif
diff --git a/mert/PermutationScorer.cpp b/mert/PermutationScorer.cpp
new file mode 100644
index 000000000..c6588eec7
--- /dev/null
+++ b/mert/PermutationScorer.cpp
@@ -0,0 +1,245 @@
+#include <cassert>
+#include "PermutationScorer.h"
+
+using namespace std;
+
+namespace MosesTuning
+{
+
+
+const int PermutationScorer::SCORE_PRECISION = 5;
+const int PermutationScorer::SCORE_MULTFACT = 100000; // 100000=10^SCORE_PRECISION
+
+PermutationScorer::PermutationScorer(const string &distanceMetric, const string &config)
+ :StatisticsBasedScorer(distanceMetric,config)
+{
+ //configure regularisation
+
+ static string KEY_REFCHOICE = "refchoice";
+ static string REFCHOICE_AVERAGE = "average";
+ static string REFCHOICE_CLOSEST = "closest";
+
+ string refchoice = getConfig(KEY_REFCHOICE,REFCHOICE_CLOSEST);
+ if (refchoice == REFCHOICE_AVERAGE) {
+ m_refChoiceStrategy = REFERENCE_CHOICE_AVERAGE;
+ } else if (refchoice == REFCHOICE_CLOSEST) {
+ m_refChoiceStrategy = REFERENCE_CHOICE_CLOSEST;
+ } else {
+ throw runtime_error("Unknown reference choice strategy: " + refchoice);
+ }
+ cerr << "Using reference choice strategy: " << refchoice << endl;
+
+ if (distanceMetric.compare("HAMMING") == 0) {
+ m_distanceMetric = HAMMING_DISTANCE;
+ } else if (distanceMetric.compare("KENDALL") == 0) {
+ m_distanceMetric = KENDALL_DISTANCE;
+ }
+ cerr << "Using permutation distance metric: " << distanceMetric << endl;
+
+ //Get reference alignments from scconfig refalign option
+ static string KEY_ALIGNMENT_FILES = "refalign";
+ string refalign = getConfig(KEY_ALIGNMENT_FILES,"");
+ //cout << refalign << endl;
+ if (refalign.length() > 0) {
+ string substring;
+ while (!refalign.empty()) {
+ getNextPound(refalign, substring, "+");
+ m_referenceAlignments.push_back(substring);
+ }
+ }
+
+ //Get length of source sentences read in from scconfig source option
+ // this is essential for extractor but unneccesary for mert executable
+ static string KEY_SOURCE_FILE = "source";
+ string sourceFile = getConfig(KEY_SOURCE_FILE,"");
+ if (sourceFile.length() > 0) {
+ cerr << "Loading source sentence lengths from " << sourceFile << endl;
+ ifstream sourcein(sourceFile.c_str());
+ if (!sourcein) {
+ throw runtime_error("Unable to open: " + sourceFile);
+ }
+ string line;
+ while (getline(sourcein,line)) {
+ size_t wordNumber = 0;
+ string word;
+ while(!line.empty()) {
+ getNextPound(line, word, " ");
+ wordNumber++;
+ }
+ m_sourceLengths.push_back(wordNumber);
+ }
+ sourcein.close();
+ }
+}
+
+void PermutationScorer::setReferenceFiles(const vector<string>& referenceFiles)
+{
+ cout << "*******setReferenceFiles" << endl;
+ //make sure reference data is clear
+ m_referencePerms.clear();
+
+ vector< vector< int> > targetLengths;
+ //Just getting target length from reference text file
+ for (size_t i = 0; i < referenceFiles.size(); ++i) {
+ vector <int> lengths;
+ cout << "Loading reference from " << referenceFiles[i] << endl;
+ ifstream refin(referenceFiles[i].c_str());
+ if (!refin) {
+ cerr << "Unable to open: " << referenceFiles[i] << endl;
+ throw runtime_error("Unable to open alignment file");
+ }
+ string line;
+ while (getline(refin,line)) {
+ int count = getNumberWords(line);
+ lengths.push_back(count);
+ }
+ targetLengths.push_back(lengths);
+ }
+
+ //load reference data
+ //NOTE ignoring normal reference file, only using previously saved alignment reference files
+ for (size_t i = 0; i < m_referenceAlignments.size(); ++i) {
+ vector<Permutation> referencePerms;
+ cout << "Loading reference from " << m_referenceAlignments[i] << endl;
+ ifstream refin(m_referenceAlignments[i].c_str());
+ if (!refin) {
+ cerr << "Unable to open: " << m_referenceAlignments[i] << endl;
+ throw runtime_error("Unable to open alignment file");
+ }
+ string line;
+ size_t sid = 0; //sentence counter
+ while (getline(refin,line)) {
+ //cout << line << endl;
+
+ //Line needs to be of the format: 0-0 1-1 1-2 etc source-target
+ Permutation perm(line, m_sourceLengths[sid],targetLengths[i][sid]);
+ //perm.dump();
+ referencePerms.push_back(perm);
+ //check the source sentence length is the same for previous file
+ if (perm.getLength() != m_sourceLengths[sid]) {
+ cerr << "Permutation Length: " << perm.getLength() << endl;
+ cerr << "Source length: " << m_sourceLengths[sid] << " for sid " << sid << endl;
+ throw runtime_error("Source sentence lengths not the same: ");
+ }
+
+ sid++;
+ }
+ m_referencePerms.push_back(referencePerms);
+ }
+}
+
+int PermutationScorer::getNumberWords (const string& text) const
+{
+ int count = 0;
+ string line = trimStr(text);
+ if (line.length()>0) {
+ int pos = line.find(" ");
+ while (pos!=int(string::npos)) {
+ count++;
+ pos = line.find(" ",pos+1);
+ }
+ count++;
+ }
+ return count;
+}
+
+
+void PermutationScorer::prepareStats(size_t sid, const string& text, ScoreStats& entry)
+{
+ //bool debug= (verboselevel()>3); // TODO: fix verboselevel()
+ bool debug=false;
+ if (debug) {
+ cout << "*******prepareStats" ;
+ cout << text << endl;
+ cout << sid << endl;
+ cout << "Reference0align:" << endl;
+ m_referencePerms[0][sid].dump();
+ }
+
+ string sentence = "";
+ string align = text;
+ size_t alignmentData = text.find("|||");
+ //Get sentence and alignment parts
+ if(alignmentData != string::npos) {
+ getNextPound(align,sentence, "|||");
+ } else {
+ align = text;
+ }
+ int translationLength = getNumberWords(sentence);
+
+
+ //A vector of Permutations for each sentence
+ vector< vector<Permutation> > nBestPerms;
+ float distanceValue;
+
+ //need to create permutations for each nbest line
+ //here we check if the alignments extracted from the nbest are phrase-based or word-based, in which case no conversion is needed
+ bool isWordAlignment=true;
+ string alignCopy = align;
+ string align1;
+ getNextPound(alignCopy,align1," ");
+ if (align1.length() > 0) {
+ size_t phraseDelimeter = align1.find("=");
+ if(phraseDelimeter!= string::npos)
+ isWordAlignment=false;
+ }
+ string standardFormat = align;
+ if(!isWordAlignment)
+ standardFormat= Permutation::convertMosesToStandard(align);
+
+ if (debug) {
+ cerr << "Nbest alignment: " << align << endl;
+ cerr << "-->std alignment: " << standardFormat << endl;
+ }
+
+ Permutation perm(standardFormat, m_sourceLengths[sid],translationLength);
+ //perm.dump();
+
+ if (m_refChoiceStrategy == REFERENCE_CHOICE_AVERAGE) {
+ float total = 0;
+ for (size_t i = 0; i < m_referencePerms.size(); ++i) {
+ float dist = perm.distance(m_referencePerms[i][sid], m_distanceMetric);
+ total += dist;
+ //cout << "Ref number: " << i << " distance: " << dist << endl;
+ }
+ float mean = (float)total/m_referencePerms.size();
+ //cout << "MultRef strategy AVERAGE: total " << total << " mean " << mean << " number " << m_referencePerms.size() << endl;
+ distanceValue = mean;
+ } else if (m_refChoiceStrategy == REFERENCE_CHOICE_CLOSEST) {
+ float max_val = 0;
+
+ for (size_t i = 0; i < m_referencePerms.size(); ++i) {
+ //look for the closest reference
+ float value = perm.distance(m_referencePerms[i][sid], m_distanceMetric);
+ //cout << "Ref number: " << i << " distance: " << value << endl;
+ if (value > max_val) {
+ max_val = value;
+ }
+ }
+ distanceValue = max_val;
+ //cout << "MultRef strategy CLOSEST: max_val " << distanceValue << endl;
+ } else {
+ throw runtime_error("Unsupported reflength strategy");
+ }
+
+ //SCOREROUT eg: 0.04546
+ distanceValue*=SCORE_MULTFACT; //SCOREROUT eg: 4546 to transform float into integer
+ ostringstream tempStream;
+ tempStream.precision(SCORE_PRECISION);
+ tempStream << distanceValue << " 1"; //use for final normalization over the amount of test sentences
+ string str = tempStream.str();
+ entry.set(str);
+
+ //cout << tempStream.str();
+}
+
+//Will just be final score
+statscore_t PermutationScorer::calculateScore(const vector<int>& comps) const
+{
+ //cerr << "*******PermutationScorer::calculateScore" ;
+ //cerr << " " << comps[0]/comps[1] << endl;
+ return (((statscore_t) comps[0]) / comps[1]) / SCORE_MULTFACT;
+}
+
+}
+
diff --git a/mert/PermutationScorer.h b/mert/PermutationScorer.h
new file mode 100644
index 000000000..4d5c144ce
--- /dev/null
+++ b/mert/PermutationScorer.h
@@ -0,0 +1,75 @@
+#ifndef __PERMUTATIONSCORER_H__
+#define __PERMUTATIONSCORER_H__
+
+#include <algorithm>
+#include <cmath>
+#include <iostream>
+#include <iterator>
+#include <set>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <vector>
+#include <limits.h>
+#include "Types.h"
+#include "ScoreData.h"
+#include "Scorer.h"
+#include "Permutation.h"
+#include "StatisticsBasedScorer.h"
+
+namespace MosesTuning
+{
+
+/**
+ * Permutation
+ **/
+class PermutationScorer: public StatisticsBasedScorer
+{
+
+public:
+ PermutationScorer(const std::string &distanceMetric = "HAMMING",
+ const std::string &config = std::string());
+ void setReferenceFiles(const std::vector<std::string>& referenceFiles);
+ void prepareStats(size_t sid, const std::string& text, ScoreStats& entry);
+ static const int SCORE_PRECISION;
+ static const int SCORE_MULTFACT;
+
+ size_t NumberOfScores() const {
+ //cerr << "PermutationScorer number of scores: 1" << endl;
+ //return 1;
+
+ //cerr << "PermutationScorer number of scores: 2" << endl;
+ //the second it is just a counter for the normalization of the amount of test sentences
+ return 2;
+ };
+ bool useAlignment() const {
+ //cout << "PermutationScorer::useAlignment returning true" << endl;
+ return true;
+ };
+
+
+protected:
+ statscore_t calculateScore(const std::vector<int>& scores) const;
+ PermutationScorer(const PermutationScorer&);
+ ~PermutationScorer() {};
+ PermutationScorer& operator=(const PermutationScorer&);
+ int getNumberWords (const std::string & line) const;
+
+ distanceMetricReferenceChoice_t m_refChoiceStrategy;
+ distanceMetric_t m_distanceMetric;
+
+ // data extracted from reference files
+ // A vector of permutations for each reference file
+ std::vector< std::vector<Permutation> > m_referencePerms;
+ std::vector<size_t> m_sourceLengths;
+ std::vector<std::string> m_referenceAlignments;
+
+private:
+};
+//TODO need to read in floats for scores - necessary for selecting mean reference strategy and for BLEU?
+
+}
+
+#endif //__PERMUTATIONSCORER_H
+
+
diff --git a/mert/Point.cpp b/mert/Point.cpp
index 299e2b4d0..5c446aa8b 100644
--- a/mert/Point.cpp
+++ b/mert/Point.cpp
@@ -8,6 +8,9 @@
using namespace std;
+namespace MosesTuning
+{
+
vector<unsigned> Point::m_opt_indices;
unsigned Point::m_dim = 0;
@@ -156,3 +159,6 @@ void Point::GetAllWeights(vector<parameter_t>& w) const
}
}
}
+
+}
+
diff --git a/mert/Point.h b/mert/Point.h
index 4be1219fe..92cb832dd 100644
--- a/mert/Point.h
+++ b/mert/Point.h
@@ -6,6 +6,10 @@
#include <vector>
#include "Types.h"
+namespace MosesTuning
+{
+
+
class FeatureStats;
class Optimizer;
@@ -100,4 +104,6 @@ public:
void SetScore(statscore_t score) { m_score = score; }
};
+}
+
#endif // MERT_POINT_H
diff --git a/mert/PointTest.cpp b/mert/PointTest.cpp
index d7d6b031c..df270dec9 100644
--- a/mert/PointTest.cpp
+++ b/mert/PointTest.cpp
@@ -7,6 +7,7 @@
#include "Util.h"
using namespace std;
+using namespace MosesTuning;
BOOST_AUTO_TEST_CASE(point_operators) {
const unsigned int dim = 5;
diff --git a/mert/PreProcessFilter.cpp b/mert/PreProcessFilter.cpp
index d1b822729..da26177f7 100644
--- a/mert/PreProcessFilter.cpp
+++ b/mert/PreProcessFilter.cpp
@@ -16,6 +16,10 @@ using namespace std;
#define CHILD_STDERR_READ pipefds_error[0]
#define CHILD_STDERR_WRITE pipefds_error[1]
+namespace MosesTuning
+{
+
+
// Child exec error signal
void exec_failed (int sig)
{
@@ -135,3 +139,6 @@ PreProcessFilter::~PreProcessFilter()
delete m_toFilter;
delete m_fromFilter;
}
+
+}
+
diff --git a/mert/PreProcessFilter.h b/mert/PreProcessFilter.h
index 27c8d35ff..25e627f6d 100644
--- a/mert/PreProcessFilter.h
+++ b/mert/PreProcessFilter.h
@@ -3,6 +3,10 @@
#include <string>
+namespace MosesTuning
+{
+
+
class ofdstream;
class ifdstream;
@@ -22,4 +26,6 @@ private:
ifdstream* m_fromFilter;
};
+}
+
#endif // MERT_PREPROCESSFILTER_H_
diff --git a/mert/Reference.h b/mert/Reference.h
index 53b715dff..1d6869a12 100644
--- a/mert/Reference.h
+++ b/mert/Reference.h
@@ -7,6 +7,10 @@
#include "Ngram.h"
+namespace MosesTuning
+{
+
+
/**
* Reference class represents reference translations for an output
* translation used in calculating BLEU score.
@@ -79,4 +83,7 @@ inline int Reference::CalcShortest() const {
return *std::min_element(m_length.begin(), m_length.end());
}
+}
+
+
#endif // MERT_REFERENCE_H_
diff --git a/mert/ReferenceTest.cpp b/mert/ReferenceTest.cpp
index 454768195..ad76de1f7 100644
--- a/mert/ReferenceTest.cpp
+++ b/mert/ReferenceTest.cpp
@@ -3,6 +3,8 @@
#define BOOST_TEST_MODULE MertReference
#include <boost/test/unit_test.hpp>
+using namespace MosesTuning;
+
BOOST_AUTO_TEST_CASE(refernece_count) {
Reference ref;
BOOST_CHECK(ref.get_counts() != NULL);
diff --git a/mert/ScopedVector.h b/mert/ScopedVector.h
index 0b7eda13a..c87f07071 100644
--- a/mert/ScopedVector.h
+++ b/mert/ScopedVector.h
@@ -3,6 +3,9 @@
#include <vector>
+namespace MosesTuning
+{
+
template <class T>
class ScopedVector {
public:
@@ -51,4 +54,6 @@ class ScopedVector {
void operator=(const ScopedVector<T>&);
};
+}
+
#endif // MERT_SCOPED_VECTOR_H_
diff --git a/mert/ScoreArray.cpp b/mert/ScoreArray.cpp
index 83fa96ef0..da626f627 100644
--- a/mert/ScoreArray.cpp
+++ b/mert/ScoreArray.cpp
@@ -12,6 +12,10 @@
using namespace std;
+namespace MosesTuning
+{
+
+
ScoreArray::ScoreArray()
: m_num_scores(0), m_index("") {}
@@ -160,3 +164,6 @@ bool ScoreArray::check_consistency() const
}
return true;
}
+
+}
+
diff --git a/mert/ScoreArray.h b/mert/ScoreArray.h
index 64d019daf..12b9d7c70 100644
--- a/mert/ScoreArray.h
+++ b/mert/ScoreArray.h
@@ -15,6 +15,9 @@
#include "ScoreStats.h"
+namespace MosesTuning
+{
+
const char SCORES_TXT_BEGIN[] = "SCORES_TXT_BEGIN_0";
const char SCORES_TXT_END[] = "SCORES_TXT_END_0";
const char SCORES_BIN_BEGIN[] = "SCORES_BIN_BEGIN_0";
@@ -83,4 +86,6 @@ public:
bool check_consistency() const;
};
+}
+
#endif // MERT_SCORE_ARRAY_H_
diff --git a/mert/ScoreData.cpp b/mert/ScoreData.cpp
index 4fc4cf2c7..7eb14b4ea 100644
--- a/mert/ScoreData.cpp
+++ b/mert/ScoreData.cpp
@@ -16,6 +16,10 @@
using namespace std;
+namespace MosesTuning
+{
+
+
ScoreData::ScoreData(Scorer* scorer) :
m_scorer(scorer)
{
@@ -133,3 +137,6 @@ void ScoreData::setIndex()
j++;
}
}
+
+}
+
diff --git a/mert/ScoreData.h b/mert/ScoreData.h
index 5113bcee9..f4acd9c18 100644
--- a/mert/ScoreData.h
+++ b/mert/ScoreData.h
@@ -16,6 +16,10 @@
#include "ScoreArray.h"
#include "ScoreStats.h"
+namespace MosesTuning
+{
+
+
class Scorer;
class ScoreData
@@ -105,4 +109,6 @@ public:
}
};
+}
+
#endif // MERT_SCORE_DATA_H_
diff --git a/mert/ScoreDataIterator.cpp b/mert/ScoreDataIterator.cpp
index 4cac63c54..6efcf5bc3 100644
--- a/mert/ScoreDataIterator.cpp
+++ b/mert/ScoreDataIterator.cpp
@@ -26,6 +26,10 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
using namespace std;
using namespace util;
+namespace MosesTuning
+{
+
+
ScoreDataIterator::ScoreDataIterator() {}
ScoreDataIterator::ScoreDataIterator(const string& filename) {
@@ -87,3 +91,5 @@ const vector<ScoreDataItem>& ScoreDataIterator::dereference() const {
return m_next;
}
+}
+
diff --git a/mert/ScoreDataIterator.h b/mert/ScoreDataIterator.h
index 910e92165..92480def2 100644
--- a/mert/ScoreDataIterator.h
+++ b/mert/ScoreDataIterator.h
@@ -34,6 +34,10 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "FeatureDataIterator.h"
+namespace MosesTuning
+{
+
+
typedef std::vector<float> ScoreDataItem;
class ScoreDataIterator :
@@ -62,4 +66,7 @@ class ScoreDataIterator :
std::vector<ScoreDataItem> m_next;
};
+}
+
+
#endif // MERT_SCORE_DATA_ITERATOR_H_
diff --git a/mert/ScoreStats.cpp b/mert/ScoreStats.cpp
index bcc61391b..20e707005 100644
--- a/mert/ScoreStats.cpp
+++ b/mert/ScoreStats.cpp
@@ -17,6 +17,10 @@ namespace {
const int kAvailableSize = 8;
} // namespace
+namespace MosesTuning
+{
+
+
ScoreStats::ScoreStats()
: m_available_size(kAvailableSize), m_entries(0),
m_array(new ScoreStatsType[m_available_size]) {}
@@ -136,7 +140,6 @@ ostream& operator<<(ostream& o, const ScoreStats& e)
return o;
}
-//ADDED_BY_TS
bool operator==(const ScoreStats& s1, const ScoreStats& s2) {
size_t size = s1.size();
@@ -150,4 +153,5 @@ bool operator==(const ScoreStats& s1, const ScoreStats& s2) {
return true;
}
-//END_ADDED
+
+} \ No newline at end of file
diff --git a/mert/ScoreStats.h b/mert/ScoreStats.h
index 2b7756a3b..4088b655e 100644
--- a/mert/ScoreStats.h
+++ b/mert/ScoreStats.h
@@ -16,6 +16,10 @@
#include "Types.h"
+namespace MosesTuning
+{
+
+
class ScoreStats
{
private:
@@ -90,8 +94,8 @@ public:
friend std::ostream& operator<<(std::ostream& o, const ScoreStats& e);
};
-//ADDED_BY_TS
bool operator==(const ScoreStats& s1, const ScoreStats& s2);
-//END_ADDED
+
+}
#endif // MERT_SCORE_STATS_H_
diff --git a/mert/Scorer.cpp b/mert/Scorer.cpp
index cbb99cfdb..b8688eafc 100644
--- a/mert/Scorer.cpp
+++ b/mert/Scorer.cpp
@@ -8,35 +8,9 @@
using namespace std;
-namespace {
-
-//regularisation strategies
-inline float score_min(const statscores_t& scores, size_t start, size_t end)
+namespace MosesTuning
{
- float min = numeric_limits<float>::max();
- for (size_t i = start; i < end; ++i) {
- if (scores[i] < min) {
- min = scores[i];
- }
- }
- return min;
-}
-
-inline float score_average(const statscores_t& scores, size_t start, size_t end)
-{
- if ((end - start) < 1) {
- // this shouldn't happen
- return 0;
- }
- float total = 0;
- for (size_t j = start; j < end; ++j) {
- total += scores[j];
- }
-
- return total / (end - start);
-}
-
-} // namespace
+
Scorer::Scorer(const string& name, const string& config)
: m_name(name),
@@ -160,110 +134,11 @@ string Scorer::applyFilter(const string& sentence) const
}
}
-
-StatisticsBasedScorer::StatisticsBasedScorer(const string& name, const string& config)
- : Scorer(name,config) {
- //configure regularisation
- static string KEY_TYPE = "regtype";
- static string KEY_WINDOW = "regwin";
- static string KEY_CASE = "case";
- static string TYPE_NONE = "none";
- static string TYPE_AVERAGE = "average";
- static string TYPE_MINIMUM = "min";
- static string TRUE = "true";
- static string FALSE = "false";
-
- string type = getConfig(KEY_TYPE,TYPE_NONE);
- if (type == TYPE_NONE) {
- m_regularization_type = NONE;
- } else if (type == TYPE_AVERAGE) {
- m_regularization_type = AVERAGE;
- } else if (type == TYPE_MINIMUM) {
- m_regularization_type = MINIMUM;
- } else {
- throw runtime_error("Unknown scorer regularisation strategy: " + type);
- }
- // cerr << "Using scorer regularisation strategy: " << type << endl;
-
- const string& window = getConfig(KEY_WINDOW, "0");
- m_regularization_window = atoi(window.c_str());
- // cerr << "Using scorer regularisation window: " << m_regularization_window << endl;
-
- const string& preserve_case = getConfig(KEY_CASE,TRUE);
- if (preserve_case == TRUE) {
- m_enable_preserve_case = true;
- } else if (preserve_case == FALSE) {
- m_enable_preserve_case = false;
- }
- // cerr << "Using case preservation: " << m_enable_preserve_case << endl;
+float Scorer::score(const candidates_t& candidates) const {
+ diffs_t diffs;
+ statscores_t scores;
+ score(candidates, diffs, scores);
+ return scores[0];
}
-void StatisticsBasedScorer::score(const candidates_t& candidates, const diffs_t& diffs,
- statscores_t& scores) const
-{
- if (!m_score_data) {
- throw runtime_error("Score data not loaded");
- }
- // calculate the score for the candidates
- if (m_score_data->size() == 0) {
- throw runtime_error("Score data is empty");
- }
- if (candidates.size() == 0) {
- throw runtime_error("No candidates supplied");
- }
- int numCounts = m_score_data->get(0,candidates[0]).size();
- vector<int> totals(numCounts);
- for (size_t i = 0; i < candidates.size(); ++i) {
- ScoreStats stats = m_score_data->get(i,candidates[i]);
- if (stats.size() != totals.size()) {
- stringstream msg;
- msg << "Statistics for (" << "," << candidates[i] << ") have incorrect "
- << "number of fields. Found: " << stats.size() << " Expected: "
- << totals.size();
- throw runtime_error(msg.str());
- }
- for (size_t k = 0; k < totals.size(); ++k) {
- totals[k] += stats.get(k);
- }
- }
- scores.push_back(calculateScore(totals));
-
- candidates_t last_candidates(candidates);
- // apply each of the diffs, and get new scores
- for (size_t i = 0; i < diffs.size(); ++i) {
- for (size_t j = 0; j < diffs[i].size(); ++j) {
- size_t sid = diffs[i][j].first;
- size_t nid = diffs[i][j].second;
- size_t last_nid = last_candidates[sid];
- for (size_t k = 0; k < totals.size(); ++k) {
- int diff = m_score_data->get(sid,nid).get(k)
- - m_score_data->get(sid,last_nid).get(k);
- totals[k] += diff;
- }
- last_candidates[sid] = nid;
- }
- scores.push_back(calculateScore(totals));
- }
-
- // Regularisation. This can either be none, or the min or average as described in
- // Cer, Jurafsky and Manning at WMT08.
- if (m_regularization_type == NONE || m_regularization_window <= 0) {
- // no regularisation
- return;
- }
-
- // window size specifies the +/- in each direction
- statscores_t raw_scores(scores); // copy scores
- for (size_t i = 0; i < scores.size(); ++i) {
- size_t start = 0;
- if (i >= m_regularization_window) {
- start = i - m_regularization_window;
- }
- const size_t end = min(scores.size(), i + m_regularization_window + 1);
- if (m_regularization_type == AVERAGE) {
- scores[i] = score_average(raw_scores,start,end);
- } else {
- scores[i] = score_min(raw_scores,start,end);
- }
- }
}
diff --git a/mert/Scorer.h b/mert/Scorer.h
index 0ed9ced7d..3027cdcc4 100644
--- a/mert/Scorer.h
+++ b/mert/Scorer.h
@@ -6,18 +6,24 @@
#include <stdexcept>
#include <string>
#include <vector>
+#include <limits>
#include "Types.h"
#include "ScoreData.h"
-class PreProcessFilter;
-class ScoreStats;
-
namespace mert {
class Vocabulary;
} // namespace mert
+namespace MosesTuning
+{
+
+class PreProcessFilter;
+class ScoreStats;
+
+enum ScorerRegularisationStrategy {REG_NONE, REG_AVERAGE, REG_MINIMUM};
+
/**
* Superclass of all scorers and dummy implementation.
*
@@ -77,12 +83,7 @@ class Scorer
* Calculate the score of the sentences corresponding to the list of candidate
* indices. Each index indicates the 1-best choice from the n-best list.
*/
- float score(const candidates_t& candidates) const {
- diffs_t diffs;
- statscores_t scores;
- score(candidates, diffs, scores);
- return scores[0];
- }
+ float score(const candidates_t& candidates) const;
const std::string& getName() const {
return m_name;
@@ -103,6 +104,15 @@ class Scorer
}
/**
+ * The scorer returns if it uses the reference alignment data
+ * for permutation distance scores
+ **/
+ virtual bool useAlignment() const {
+ //cout << "Scorer::useAlignment returning false " << endl;
+ return false;
+ };
+
+ /**
* Set the factors, which should be used for this metric
*/
virtual void setFactors(const std::string& factors);
@@ -165,34 +175,36 @@ class Scorer
};
-/**
- * Abstract base class for Scorers that work by adding statistics across all
- * outout sentences, then apply some formula, e.g., BLEU, PER.
- */
-class StatisticsBasedScorer : public Scorer
-{
- public:
- StatisticsBasedScorer(const std::string& name, const std::string& config);
- virtual ~StatisticsBasedScorer() {}
- virtual void score(const candidates_t& candidates, const diffs_t& diffs,
- statscores_t& scores) const;
-
- protected:
-
- enum RegularisationType {
- NONE,
- AVERAGE,
- MINIMUM
- };
-
- /**
- * Calculate the actual score.
- */
- virtual statscore_t calculateScore(const std::vector<int>& totals) const = 0;
+namespace {
+
+ //regularisation strategies
+ inline float score_min(const statscores_t& scores, size_t start, size_t end)
+ {
+ float min = std::numeric_limits<float>::max();
+ for (size_t i = start; i < end; ++i) {
+ if (scores[i] < min) {
+ min = scores[i];
+ }
+ }
+ return min;
+ }
+
+ inline float score_average(const statscores_t& scores, size_t start, size_t end)
+ {
+ if ((end - start) < 1) {
+ // this shouldn't happen
+ return 0;
+ }
+ float total = 0;
+ for (size_t j = start; j < end; ++j) {
+ total += scores[j];
+ }
+
+ return total / (end - start);
+ }
+
+} // namespace
- // regularisation
- RegularisationType m_regularization_type;
- std::size_t m_regularization_window;
-};
+}
#endif // MERT_SCORER_H_
diff --git a/mert/ScorerFactory.cpp b/mert/ScorerFactory.cpp
index 5da75273d..dbd95c1ea 100644
--- a/mert/ScorerFactory.cpp
+++ b/mert/ScorerFactory.cpp
@@ -9,9 +9,14 @@
#include "MergeScorer.h"
#include "InterpolatedScorer.h"
#include "SemposScorer.h"
+#include "PermutationScorer.h"
using namespace std;
+namespace MosesTuning
+{
+
+
vector<string> ScorerFactory::getTypes() {
vector<string> types;
types.push_back(string("BLEU"));
@@ -21,6 +26,7 @@ vector<string> ScorerFactory::getTypes() {
types.push_back(string("WER"));
types.push_back(string("MERGE"));
types.push_back(string("SEMPOS"));
+ types.push_back(string("LRSCORE"));
return types;
}
@@ -40,6 +46,8 @@ Scorer* ScorerFactory::getScorer(const string& type, const string& config) {
return new SemposScorer(config);
} else if (type == "MERGE") {
return new MergeScorer(config);
+ } else if ((type == "HAMMING") || (type == "KENDALL")) {
+ return (PermutationScorer*) new PermutationScorer(type, config);
} else {
if (type.find(',') != string::npos) {
return new InterpolatedScorer(type, config);
@@ -49,3 +57,6 @@ Scorer* ScorerFactory::getScorer(const string& type, const string& config) {
}
}
}
+
+}
+
diff --git a/mert/ScorerFactory.h b/mert/ScorerFactory.h
index 6752817ef..e8b33d87c 100644
--- a/mert/ScorerFactory.h
+++ b/mert/ScorerFactory.h
@@ -4,6 +4,10 @@
#include <vector>
#include <string>
+namespace MosesTuning
+{
+
+
class Scorer;
class ScorerFactory
@@ -18,4 +22,6 @@ private:
~ScorerFactory() {}
};
+}
+
#endif // MERT_SCORER_FACTORY_H_
diff --git a/mert/SemposOverlapping.cpp b/mert/SemposOverlapping.cpp
index f27f188f7..ffcabaab2 100644
--- a/mert/SemposOverlapping.cpp
+++ b/mert/SemposOverlapping.cpp
@@ -8,10 +8,14 @@ using namespace std;
namespace {
-SemposOverlapping* g_overlapping = NULL;
+MosesTuning::SemposOverlapping* g_overlapping = NULL;
} // namespace
+namespace MosesTuning
+{
+
+
SemposOverlapping* SemposOverlappingFactory::GetOverlapping(const string& str, const SemposScorer* sempos) {
if (str == "cap-micro") {
return new CapMicroOverlapping(sempos);
@@ -107,3 +111,5 @@ float CapMacroOverlapping::calculateScore(const vector<int>& stats) const
if (n == 0) return 1;
return sum / n;
}
+
+}
diff --git a/mert/SemposOverlapping.h b/mert/SemposOverlapping.h
index e16ffe7bb..3b5a99f7f 100644
--- a/mert/SemposOverlapping.h
+++ b/mert/SemposOverlapping.h
@@ -7,6 +7,10 @@
#include <utility>
#include <vector>
+namespace MosesTuning
+{
+
+
class SemposScorer;
// TODO: need comments about this number.
@@ -87,4 +91,6 @@ public:
const SemposScorer* semposScorer;
};
+}
+
#endif // MERT_SEMPOSOVERLAPPING_H_
diff --git a/mert/SemposScorer.cpp b/mert/SemposScorer.cpp
index 7f4b3cc14..8dd1fc8ee 100644
--- a/mert/SemposScorer.cpp
+++ b/mert/SemposScorer.cpp
@@ -10,6 +10,10 @@
using namespace std;
+namespace MosesTuning
+{
+
+
SemposScorer::SemposScorer(const string& config)
: StatisticsBasedScorer("SEMPOS", config),
m_ovr(SemposOverlappingFactory::GetOverlapping(getConfig("overlapping", "cap-micro"),this)),
@@ -178,3 +182,6 @@ void SemposScorer::loadWeights(const string& weightsfile)
}
}
+
+}
+
diff --git a/mert/SemposScorer.h b/mert/SemposScorer.h
index 98c55fc41..bde064349 100644
--- a/mert/SemposScorer.h
+++ b/mert/SemposScorer.h
@@ -15,6 +15,11 @@
// However, currently SemposScorer uses a bunch of typedefs, which are
// used in SemposScorer as well as inherited SemposOverlapping classes.
#include "SemposOverlapping.h"
+#include "StatisticsBasedScorer.h"
+
+namespace MosesTuning
+{
+
/**
* This class represents sempos based metrics.
@@ -61,4 +66,6 @@ private:
SemposScorer& operator=(const SemposScorer&);
};
+}
+
#endif // MERT_SEMPOSSCORER_H_
diff --git a/mert/SentenceLevelScorer.cpp b/mert/SentenceLevelScorer.cpp
new file mode 100644
index 000000000..df3bbb5c1
--- /dev/null
+++ b/mert/SentenceLevelScorer.cpp
@@ -0,0 +1,108 @@
+//
+// SentenceLevelScorer.cpp
+// mert_lib
+//
+// Created by Hieu Hoang on 22/06/2012.
+// Copyright 2012 __MyCompanyName__. All rights reserved.
+//
+
+#include <iostream>
+#include "SentenceLevelScorer.h"
+
+using namespace std;
+
+namespace MosesTuning
+{
+
+
+/** The sentence level scores have already been calculated, just need to average them
+ and include the differences. Allows scores which are floats **/
+void SentenceLevelScorer::score(const candidates_t& candidates, const diffs_t& diffs,
+ statscores_t& scores)
+{
+ //cout << "*******SentenceLevelScorer::score" << endl;
+ if (!m_score_data) {
+ throw runtime_error("Score data not loaded");
+ }
+ //calculate the score for the candidates
+ if (m_score_data->size() == 0) {
+ throw runtime_error("Score data is empty");
+ }
+ if (candidates.size() == 0) {
+ throw runtime_error("No candidates supplied");
+ }
+ int numCounts = m_score_data->get(0,candidates[0]).size();
+ vector<float> totals(numCounts);
+ for (size_t i = 0; i < candidates.size(); ++i) {
+ //cout << " i " << i << " candi " << candidates[i] ;
+ ScoreStats stats = m_score_data->get(i,candidates[i]);
+ if (stats.size() != totals.size()) {
+ stringstream msg;
+ msg << "Statistics for (" << "," << candidates[i] << ") have incorrect "
+ << "number of fields. Found: " << stats.size() << " Expected: "
+ << totals.size();
+ throw runtime_error(msg.str());
+ }
+ //Add up scores for all sentences, would normally be just one score
+ for (size_t k = 0; k < totals.size(); ++k) {
+ totals[k] += stats.get(k);
+ //cout << " stats " << stats.get(k) ;
+ }
+ //cout << endl;
+ }
+ //take average
+ for (size_t k = 0; k < totals.size(); ++k) {
+ //cout << "totals = " << totals[k] << endl;
+ //cout << "cand = " << candidates.size() << endl;
+ totals[k] /= candidates.size();
+ //cout << "finaltotals = " << totals[k] << endl;
+ }
+
+ scores.push_back(calculateScore(totals));
+
+ candidates_t last_candidates(candidates);
+ //apply each of the diffs, and get new scores
+ for (size_t i = 0; i < diffs.size(); ++i) {
+ for (size_t j = 0; j < diffs[i].size(); ++j) {
+ size_t sid = diffs[i][j].first;
+ size_t nid = diffs[i][j].second;
+ //cout << "sid = " << sid << endl;
+ //cout << "nid = " << nid << endl;
+ size_t last_nid = last_candidates[sid];
+ for (size_t k = 0; k < totals.size(); ++k) {
+ float diff = m_score_data->get(sid,nid).get(k)
+ - m_score_data->get(sid,last_nid).get(k);
+ //cout << "diff = " << diff << endl;
+ totals[k] += diff/candidates.size();
+ //cout << "totals = " << totals[k] << endl;
+ }
+ last_candidates[sid] = nid;
+ }
+ scores.push_back(calculateScore(totals));
+ }
+
+ //regularisation. This can either be none, or the min or average as described in
+ //Cer, Jurafsky and Manning at WMT08
+ if (_regularisationStrategy == REG_NONE || _regularisationWindow <= 0) {
+ //no regularisation
+ return;
+ }
+
+ //window size specifies the +/- in each direction
+ statscores_t raw_scores(scores);//copy scores
+ for (size_t i = 0; i < scores.size(); ++i) {
+ size_t start = 0;
+ if (i >= _regularisationWindow) {
+ start = i - _regularisationWindow;
+ }
+ size_t end = min(scores.size(), i + _regularisationWindow+1);
+ if (_regularisationStrategy == REG_AVERAGE) {
+ scores[i] = score_average(raw_scores,start,end);
+ } else {
+ scores[i] = score_min(raw_scores,start,end);
+ }
+ }
+}
+
+}
+
diff --git a/mert/SentenceLevelScorer.h b/mert/SentenceLevelScorer.h
new file mode 100644
index 000000000..b875c8af9
--- /dev/null
+++ b/mert/SentenceLevelScorer.h
@@ -0,0 +1,88 @@
+//
+// SentenceLevelScorer.h
+// mert_lib
+//
+// Created by Hieu Hoang on 22/06/2012.
+// Copyright 2012 __MyCompanyName__. All rights reserved.
+//
+
+#ifndef mert_lib_SentenceLevelScorer_h
+#define mert_lib_SentenceLevelScorer_h
+
+#include "Scorer.h"
+#include <string>
+#include <vector>
+#include <vector>
+#include <boost/spirit/home/support/detail/lexer/runtime_error.hpp>
+
+namespace MosesTuning
+{
+
+
+/**
+ * Abstract base class for scorers that work by using sentence level
+ * statistics eg. permutation distance metrics **/
+class SentenceLevelScorer : public Scorer
+{
+
+public:
+ SentenceLevelScorer(const std::string& name, const std::string& config): Scorer(name,config) {
+ //configure regularisation
+ static std::string KEY_TYPE = "regtype";
+ static std::string KEY_WINDOW = "regwin";
+ static std::string KEY_CASE = "case";
+ static std::string TYPE_NONE = "none";
+ static std::string TYPE_AVERAGE = "average";
+ static std::string TYPE_MINIMUM = "min";
+ static std::string TRUE = "true";
+ static std::string FALSE = "false";
+
+ std::string type = getConfig(KEY_TYPE,TYPE_NONE);
+ if (type == TYPE_NONE) {
+ _regularisationStrategy = REG_NONE;
+ } else if (type == TYPE_AVERAGE) {
+ _regularisationStrategy = REG_AVERAGE;
+ } else if (type == TYPE_MINIMUM) {
+ _regularisationStrategy = REG_MINIMUM;
+ } else {
+ throw boost::lexer::runtime_error("Unknown scorer regularisation strategy: " + type);
+ }
+ std::cerr << "Using scorer regularisation strategy: " << type << std::endl;
+
+ std::string window = getConfig(KEY_WINDOW,"0");
+ _regularisationWindow = atoi(window.c_str());
+ std::cerr << "Using scorer regularisation window: " << _regularisationWindow << std::endl;
+
+ std::string preservecase = getConfig(KEY_CASE,TRUE);
+ if (preservecase == TRUE) {
+ m_enable_preserve_case = true;
+ } else if (preservecase == FALSE) {
+ m_enable_preserve_case = false;
+ }
+ std::cerr << "Using case preservation: " << m_enable_preserve_case << std::endl;
+
+
+ }
+ ~SentenceLevelScorer() {};
+ virtual void score(const candidates_t& candidates, const diffs_t& diffs,
+ statscores_t& scores);
+
+ //calculate the actual score
+ virtual statscore_t calculateScore(const std::vector<statscore_t>& totals) {
+ return 0;
+ };
+
+
+
+protected:
+
+ //regularisation
+ ScorerRegularisationStrategy _regularisationStrategy;
+ size_t _regularisationWindow;
+
+};
+
+
+}
+
+#endif
diff --git a/mert/Singleton.h b/mert/Singleton.h
index 9fef3e639..473517170 100644
--- a/mert/Singleton.h
+++ b/mert/Singleton.h
@@ -3,6 +3,10 @@
#include <cstdlib>
+namespace MosesTuning
+{
+
+
// thread *un*safe singleton.
// TODO: replace this with thread-safe singleton.
template <typename T>
@@ -30,4 +34,6 @@ class Singleton {
template <typename T>
T* Singleton<T>::m_instance = NULL;
+}
+
#endif // MERT_SINGLETON_H_
diff --git a/mert/SingletonTest.cpp b/mert/SingletonTest.cpp
index 2c44bdc1f..a74ce7c6b 100644
--- a/mert/SingletonTest.cpp
+++ b/mert/SingletonTest.cpp
@@ -3,6 +3,8 @@
#define BOOST_TEST_MODULE MertSingleton
#include <boost/test/unit_test.hpp>
+using namespace MosesTuning;
+
namespace {
static int g_count = 0;
diff --git a/mert/StatisticsBasedScorer.cpp b/mert/StatisticsBasedScorer.cpp
new file mode 100644
index 000000000..05dd95939
--- /dev/null
+++ b/mert/StatisticsBasedScorer.cpp
@@ -0,0 +1,126 @@
+//
+// StatisticsBasedScorer.cpp
+// mert_lib
+//
+// Created by Hieu Hoang on 23/06/2012.
+// Copyright 2012 __MyCompanyName__. All rights reserved.
+//
+
+#include <iostream>
+#include "StatisticsBasedScorer.h"
+
+using namespace std;
+
+namespace MosesTuning
+{
+
+
+StatisticsBasedScorer::StatisticsBasedScorer(const string& name, const string& config)
+: Scorer(name,config) {
+ //configure regularisation
+ static string KEY_TYPE = "regtype";
+ static string KEY_WINDOW = "regwin";
+ static string KEY_CASE = "case";
+ static string TYPE_NONE = "none";
+ static string TYPE_AVERAGE = "average";
+ static string TYPE_MINIMUM = "min";
+ static string TRUE = "true";
+ static string FALSE = "false";
+
+ string type = getConfig(KEY_TYPE,TYPE_NONE);
+ if (type == TYPE_NONE) {
+ m_regularization_type = NONE;
+ } else if (type == TYPE_AVERAGE) {
+ m_regularization_type = AVERAGE;
+ } else if (type == TYPE_MINIMUM) {
+ m_regularization_type = MINIMUM;
+ } else {
+ throw runtime_error("Unknown scorer regularisation strategy: " + type);
+ }
+ // cerr << "Using scorer regularisation strategy: " << type << endl;
+
+ const string& window = getConfig(KEY_WINDOW, "0");
+ m_regularization_window = atoi(window.c_str());
+ // cerr << "Using scorer regularisation window: " << m_regularization_window << endl;
+
+ const string& preserve_case = getConfig(KEY_CASE,TRUE);
+ if (preserve_case == TRUE) {
+ m_enable_preserve_case = true;
+ } else if (preserve_case == FALSE) {
+ m_enable_preserve_case = false;
+ }
+ // cerr << "Using case preservation: " << m_enable_preserve_case << endl;
+}
+
+void StatisticsBasedScorer::score(const candidates_t& candidates, const diffs_t& diffs,
+ statscores_t& scores) const
+{
+ if (!m_score_data) {
+ throw runtime_error("Score data not loaded");
+ }
+ // calculate the score for the candidates
+ if (m_score_data->size() == 0) {
+ throw runtime_error("Score data is empty");
+ }
+ if (candidates.size() == 0) {
+ throw runtime_error("No candidates supplied");
+ }
+ int numCounts = m_score_data->get(0,candidates[0]).size();
+ vector<int> totals(numCounts);
+ for (size_t i = 0; i < candidates.size(); ++i) {
+ ScoreStats stats = m_score_data->get(i,candidates[i]);
+ if (stats.size() != totals.size()) {
+ stringstream msg;
+ msg << "Statistics for (" << "," << candidates[i] << ") have incorrect "
+ << "number of fields. Found: " << stats.size() << " Expected: "
+ << totals.size();
+ throw runtime_error(msg.str());
+ }
+ for (size_t k = 0; k < totals.size(); ++k) {
+ totals[k] += stats.get(k);
+ }
+ }
+ scores.push_back(calculateScore(totals));
+
+ candidates_t last_candidates(candidates);
+ // apply each of the diffs, and get new scores
+ for (size_t i = 0; i < diffs.size(); ++i) {
+ for (size_t j = 0; j < diffs[i].size(); ++j) {
+ size_t sid = diffs[i][j].first;
+ size_t nid = diffs[i][j].second;
+ size_t last_nid = last_candidates[sid];
+ for (size_t k = 0; k < totals.size(); ++k) {
+ int diff = m_score_data->get(sid,nid).get(k)
+ - m_score_data->get(sid,last_nid).get(k);
+ totals[k] += diff;
+ }
+ last_candidates[sid] = nid;
+ }
+ scores.push_back(calculateScore(totals));
+ }
+
+ // Regularisation. This can either be none, or the min or average as described in
+ // Cer, Jurafsky and Manning at WMT08.
+ if (m_regularization_type == NONE || m_regularization_window <= 0) {
+ // no regularisation
+ return;
+ }
+
+ // window size specifies the +/- in each direction
+ statscores_t raw_scores(scores); // copy scores
+ for (size_t i = 0; i < scores.size(); ++i) {
+ size_t start = 0;
+ if (i >= m_regularization_window) {
+ start = i - m_regularization_window;
+ }
+ const size_t end = min(scores.size(), i + m_regularization_window + 1);
+ if (m_regularization_type == AVERAGE) {
+ scores[i] = score_average(raw_scores,start,end);
+ } else {
+ scores[i] = score_min(raw_scores,start,end);
+ }
+ }
+}
+
+}
+
diff --git a/mert/StatisticsBasedScorer.h b/mert/StatisticsBasedScorer.h
new file mode 100644
index 000000000..ca32535ad
--- /dev/null
+++ b/mert/StatisticsBasedScorer.h
@@ -0,0 +1,50 @@
+//
+// StatisticsBasedScorer.h
+// mert_lib
+//
+// Created by Hieu Hoang on 23/06/2012.
+// Copyright 2012 __MyCompanyName__. All rights reserved.
+//
+
+#ifndef mert_lib_StatisticsBasedScorer_h
+#define mert_lib_StatisticsBasedScorer_h
+
+#include "Scorer.h"
+
+namespace MosesTuning
+{
+
+
+/**
+ * Abstract base class for Scorers that work by adding statistics across all
+ * outout sentences, then apply some formula, e.g., BLEU, PER.
+ */
+class StatisticsBasedScorer : public Scorer
+{
+public:
+ StatisticsBasedScorer(const std::string& name, const std::string& config);
+ virtual ~StatisticsBasedScorer() {}
+ virtual void score(const candidates_t& candidates, const diffs_t& diffs,
+ statscores_t& scores) const;
+
+protected:
+
+ enum RegularisationType {
+ NONE,
+ AVERAGE,
+ MINIMUM
+ };
+
+ /**
+ * Calculate the actual score.
+ */
+ virtual statscore_t calculateScore(const std::vector<int>& totals) const = 0;
+
+ // regularisation
+ RegularisationType m_regularization_type;
+ std::size_t m_regularization_window;
+};
+
+} // namespace
+
+#endif
diff --git a/mert/TerScorer.cpp b/mert/TerScorer.cpp
index ef0f737ae..cc7cf1630 100644
--- a/mert/TerScorer.cpp
+++ b/mert/TerScorer.cpp
@@ -12,6 +12,10 @@
using namespace std;
using namespace TERCpp;
+namespace MosesTuning
+{
+
+
TerScorer::TerScorer(const string& config)
: StatisticsBasedScorer("TER",config), kLENGTH(2) {}
@@ -108,3 +112,5 @@ float TerScorer::calculateScore(const vector<int>& comps) const
return (1.0+(num / denom));
}
}
+
+}
diff --git a/mert/TerScorer.h b/mert/TerScorer.h
index 5f2cf6ae1..0229f5e8c 100644
--- a/mert/TerScorer.h
+++ b/mert/TerScorer.h
@@ -6,7 +6,11 @@
#include <vector>
#include "Types.h"
-#include "Scorer.h"
+#include "StatisticsBasedScorer.h"
+
+namespace MosesTuning
+{
+
class ScoreStats;
@@ -47,4 +51,6 @@ private:
TerScorer& operator=(const TerScorer&);
};
+}
+
#endif // MERT_TER_SCORER_H_
diff --git a/mert/Timer.cpp b/mert/Timer.cpp
index 5235edb04..088be93a5 100644
--- a/mert/Timer.cpp
+++ b/mert/Timer.cpp
@@ -22,6 +22,10 @@ uint64_t GetTimeOfDayMicroSeconds() {
} // namespace
+namespace MosesTuning
+{
+
+
void Timer::GetCPUTimeMicroSeconds(Timer::CPUTime* cpu_time) const {
#if !defined(_WIN32) && !defined(_WIN64)
struct rusage usage;
@@ -102,3 +106,5 @@ std::string Timer::ToString() const {
return res;
}
+
+}
diff --git a/mert/Timer.h b/mert/Timer.h
index 7b1101b50..bae4ab6b3 100644
--- a/mert/Timer.h
+++ b/mert/Timer.h
@@ -5,6 +5,10 @@
#include <string>
#include <stdint.h>
+namespace MosesTuning
+{
+
+
class Timer
{
private:
@@ -102,4 +106,7 @@ inline std::ostream& operator<<(std::ostream& os, const Timer& t) {
return os;
}
+}
+
+
#endif // MERT_TIMER_H_
diff --git a/mert/TimerTest.cpp b/mert/TimerTest.cpp
index d9562a3df..3bf0e5573 100644
--- a/mert/TimerTest.cpp
+++ b/mert/TimerTest.cpp
@@ -6,6 +6,8 @@
#include <string>
#include <unistd.h>
+using namespace MosesTuning;
+
BOOST_AUTO_TEST_CASE(timer_basic_test) {
Timer timer;
const int sleep_time_microsec = 40; // ad-hoc microseconds to pass unit tests.
diff --git a/mert/Types.h b/mert/Types.h
index 71904e4ad..b4c03a89a 100644
--- a/mert/Types.h
+++ b/mert/Types.h
@@ -6,6 +6,9 @@
#include <string>
#include <utility>
+namespace MosesTuning
+{
+
class FeatureStats;
class FeatureArray;
class FeatureData;
@@ -39,4 +42,9 @@ typedef std::vector<ScoreArray> scoredata_t;
typedef std::map<std::size_t, std::string> idx2name;
typedef std::map<std::string, std::size_t> name2idx;
+typedef enum { HAMMING_DISTANCE=0, KENDALL_DISTANCE } distanceMetric_t;
+typedef enum { REFERENCE_CHOICE_AVERAGE=0, REFERENCE_CHOICE_CLOSEST } distanceMetricReferenceChoice_t;
+
+}
+
#endif // MERT_TYPE_H_
diff --git a/mert/Util.cpp b/mert/Util.cpp
index 952aaf9aa..ac7d1803b 100644
--- a/mert/Util.cpp
+++ b/mert/Util.cpp
@@ -13,7 +13,7 @@ using namespace std;
namespace {
-Timer g_timer;
+MosesTuning::Timer g_timer;
int g_verbose = 0;
bool FindDelimiter(const std::string &str, const std::string &delim, size_t *pos)
@@ -24,6 +24,9 @@ bool FindDelimiter(const std::string &str, const std::string &delim, size_t *pos
} // namespace
+namespace MosesTuning
+{
+
int verboselevel()
{
return g_verbose;
@@ -86,3 +89,5 @@ double GetUserTime()
{
return g_timer.get_elapsed_cpu_time();
}
+
+}
diff --git a/mert/Util.h b/mert/Util.h
index 22239a27c..e2071bf1f 100644
--- a/mert/Util.h
+++ b/mert/Util.h
@@ -22,6 +22,9 @@
#include "Types.h"
+namespace MosesTuning
+{
+
#ifdef TRACE_ENABLE
#define TRACE_ERR(str) { std::cerr << str; }
#else
@@ -116,9 +119,20 @@ inline FeatureStatsType ConvertStringToFeatureStatsType(const std::string &str)
return ConvertCharToFeatureStatsType(str.c_str());
}
+inline std::string trimStr(const std::string& Src, const std::string& c = " \r\n")
+{
+ size_t p2 = Src.find_last_not_of(c);
+ if (p2 == std::string::npos) return std::string();
+ size_t p1 = Src.find_first_not_of(c);
+ if (p1 == std::string::npos) p1 = 0;
+ return Src.substr(p1, (p2-p1)+1);
+}
+
// Utilities to measure decoding time
void ResetUserTime();
void PrintUserTime(const std::string &message);
double GetUserTime();
+}
+
#endif // MERT_UTIL_H_
diff --git a/mert/UtilTest.cpp b/mert/UtilTest.cpp
index 2101f7c8d..f3ca6ca80 100644
--- a/mert/UtilTest.cpp
+++ b/mert/UtilTest.cpp
@@ -3,6 +3,8 @@
#define BOOST_TEST_MODULE UtilTest
#include <boost/test/unit_test.hpp>
+using namespace MosesTuning;
+
BOOST_AUTO_TEST_CASE(util_get_next_pound_test) {
{
std::string str("9 9 7 ");
diff --git a/mert/Vocabulary.cpp b/mert/Vocabulary.cpp
index 40b04bf99..239d88ced 100644
--- a/mert/Vocabulary.cpp
+++ b/mert/Vocabulary.cpp
@@ -6,9 +6,31 @@ namespace {
Vocabulary* g_vocab = NULL;
} // namespace
+int Vocabulary::Encode(const std::string& token) {
+ iterator it = m_vocab.find(token);
+ int encoded_token;
+ if (it == m_vocab.end()) {
+ // Add an new entry to the vocaburary.
+ encoded_token = static_cast<int>(m_vocab.size());
+
+ m_vocab[token] = encoded_token;
+ } else {
+ encoded_token = it->second;
+ }
+ return encoded_token;
+}
+
+bool Vocabulary::Lookup(const std::string&str , int* v) const {
+
+ const_iterator it = m_vocab.find(str);
+ if (it == m_vocab.end()) return false;
+ *v = it->second;
+ return true;
+}
+
Vocabulary* VocabularyFactory::GetVocabulary() {
if (g_vocab == NULL) {
- return Singleton<Vocabulary>::GetInstance();
+ return MosesTuning::Singleton<Vocabulary>::GetInstance();
} else {
return g_vocab;
}
diff --git a/mert/Vocabulary.h b/mert/Vocabulary.h
index a8630e951..0d9291260 100644
--- a/mert/Vocabulary.h
+++ b/mert/Vocabulary.h
@@ -21,28 +21,12 @@ class Vocabulary {
virtual ~Vocabulary() {}
/** Returns the assiged id for given "token". */
- int Encode(const std::string& token) {
- iterator it = m_vocab.find(token);
- int encoded_token;
- if (it == m_vocab.end()) {
- // Add an new entry to the vocaburary.
- encoded_token = static_cast<int>(m_vocab.size());
- m_vocab[token] = encoded_token;
- } else {
- encoded_token = it->second;
- }
- return encoded_token;
- }
+ int Encode(const std::string& token);
/**
* Return true iff the specified "str" is found in the container.
*/
- bool Lookup(const std::string&str , int* v) const {
- const_iterator it = m_vocab.find(str);
- if (it == m_vocab.end()) return false;
- *v = it->second;
- return true;
- }
+ bool Lookup(const std::string&str , int* v) const;
void clear() { m_vocab.clear(); }
@@ -62,6 +46,7 @@ class Vocabulary {
private:
std::map<std::string, int> m_vocab;
+
};
class VocabularyFactory {
diff --git a/mert/VocabularyTest.cpp b/mert/VocabularyTest.cpp
index 0e67ba62a..5b453fcda 100644
--- a/mert/VocabularyTest.cpp
+++ b/mert/VocabularyTest.cpp
@@ -1,9 +1,10 @@
#include "Vocabulary.h"
+#include "Singleton.h"
#define BOOST_TEST_MODULE MertVocabulary
#include <boost/test/unit_test.hpp>
-#include "Singleton.h"
+using namespace MosesTuning;
namespace mert {
namespace {
diff --git a/mert/evaluator.cpp b/mert/evaluator.cpp
index a95cdfa1b..4527bad60 100644
--- a/mert/evaluator.cpp
+++ b/mert/evaluator.cpp
@@ -12,6 +12,7 @@
#include "Util.h"
using namespace std;
+using namespace MosesTuning;
namespace {
diff --git a/mert/extractor.cpp b/mert/extractor.cpp
index 17c9df5c0..077d9b94c 100644
--- a/mert/extractor.cpp
+++ b/mert/extractor.cpp
@@ -18,6 +18,7 @@
#include "Util.h"
using namespace std;
+using namespace MosesTuning;
namespace {
@@ -36,6 +37,7 @@ void usage()
cerr << "[--prev-scfile|-R] comma separated list of previous scorer data" << endl;
cerr << "[--factors|-f] list of factors passed to the scorer (e.g. 0|2)" << endl;
cerr << "[--filter|-l] filter command used to preprocess the sentences" << endl;
+ cerr << "[--allow-duplicates|-d] omit the duplicate removal step" << endl;
cerr << "[-v] verbose level" << endl;
cerr << "[--help|-h] print this message and exit" << endl;
exit(1);
@@ -55,6 +57,7 @@ static struct option long_options[] = {
{"prev-ffile", required_argument, 0, 'E'},
{"verbose", required_argument, 0, 'v'},
{"help", no_argument, 0, 'h'},
+ {"allow-duplicates", no_argument, 0, 'd'},
{0, 0, 0, 0}
};
@@ -71,6 +74,7 @@ struct ProgramOption {
string prevScoreDataFile;
string prevFeatureDataFile;
bool binmode;
+ bool allowDuplicates;
int verbosity;
ProgramOption()
@@ -85,6 +89,7 @@ struct ProgramOption {
prevScoreDataFile(""),
prevFeatureDataFile(""),
binmode(false),
+ allowDuplicates(false),
verbosity(0) { }
};
@@ -92,7 +97,7 @@ void ParseCommandOptions(int argc, char** argv, ProgramOption* opt) {
int c;
int option_index;
- while ((c = getopt_long(argc, argv, "s:r:f:l:n:S:F:R:E:v:hb", long_options, &option_index)) != -1) {
+ while ((c = getopt_long(argc, argv, "s:r:f:l:n:S:F:R:E:v:hbd", long_options, &option_index)) != -1) {
switch (c) {
case 's':
opt->scorerType = string(optarg);
@@ -130,6 +135,9 @@ void ParseCommandOptions(int argc, char** argv, ProgramOption* opt) {
case 'v':
opt->verbosity = atoi(optarg);
break;
+ case 'd':
+ opt->allowDuplicates = true;
+ break;
default:
usage();
}
@@ -223,7 +231,9 @@ int main(int argc, char** argv)
// PrintUserTime("Nbest entries loaded and scored");
//ADDED_BY_TS
- data.removeDuplicates();
+ if (!option.allowDuplicates) {
+ data.removeDuplicates();
+ }
//END_ADDED
data.save(option.featureDataFile, option.scoreDataFile, option.binmode);
diff --git a/mert/init.opt b/mert/init.opt
deleted file mode 100644
index 32ab00796..000000000
--- a/mert/init.opt
+++ /dev/null
@@ -1 +0,0 @@
-0.1 0.1 0.3 0.4 0.7 0.5 0.01 0.02 0.1 0.1 0.3 0.4 0.7 9 \ No newline at end of file
diff --git a/mert/kbmira.cpp b/mert/kbmira.cpp
index fa01b41a2..f0d1624e6 100644
--- a/mert/kbmira.cpp
+++ b/mert/kbmira.cpp
@@ -1,7 +1,9 @@
// $Id$
// vim:tabstop=2
/***********************************************************************
-
+K-best Batch MIRA for Moses
+Copyright (C) 2012, National Research Council Canada / Conseil national
+de recherches du Canada
***********************************************************************/
/**
@@ -43,6 +45,7 @@
#include "MiraWeightVector.h"
using namespace std;
+using namespace MosesTuning;
namespace po = boost::program_options;
@@ -71,6 +74,7 @@ ValType evaluate(HypPackEnumerator* train, const AvgWeightVector& wv) {
int main(int argc, char** argv)
{
+ const ValType BLEU_RATIO = 5;
bool help;
string denseInitFile;
string sparseInitFile;
@@ -84,6 +88,8 @@ int main(int argc, char** argv)
bool streaming = false; // Stream all k-best lists?
bool no_shuffle = false; // Don't shuffle, even for in memory version
bool model_bg = false; // Use model for background corpus
+ bool verbose = false; // Verbose updates
+ bool safe_hope = false; // Model score cannot have more than BLEU_RATIO times more influence than BLEU
// Command-line processing follows pro.cpp
po::options_description desc("Allowed options");
@@ -100,7 +106,9 @@ int main(int argc, char** argv)
("sparse-init,s", po::value<string>(&sparseInitFile), "Weight file for sparse features")
("streaming", po::value(&streaming)->zero_tokens()->default_value(false), "Stream n-best lists to save memory, implies --no-shuffle")
("no-shuffle", po::value(&no_shuffle)->zero_tokens()->default_value(false), "Don't shuffle hypotheses before each epoch")
- ("model-bg", po::value(&model_bg)->zero_tokens()->default_value(false), "Use model instead of hope for BLEU background");
+ ("model-bg", po::value(&model_bg)->zero_tokens()->default_value(false), "Use model instead of hope for BLEU background")
+ ("verbose", po::value(&verbose)->zero_tokens()->default_value(false), "Verbose updates")
+ ("safe-hope", po::value(&safe_hope)->zero_tokens()->default_value(false), "Mode score's influence on hope decoding is limited")
;
po::options_description cmdline_options;
@@ -115,6 +123,8 @@ int main(int argc, char** argv)
exit(0);
}
+ cerr << "kbmira with c=" << c << " decay=" << decay << " no_shuffle=" << no_shuffle << endl;
+
if (vm.count("random-seed")) {
cerr << "Initialising random seed to " << seed << endl;
srand(seed);
@@ -129,14 +139,17 @@ int main(int argc, char** argv)
vector<parameter_t> initParams;
if(!denseInitFile.empty()) {
ifstream opt(denseInitFile.c_str());
- string buffer; istringstream strstrm(buffer);
+ string buffer;
if (opt.fail()) {
cerr << "could not open dense initfile: " << denseInitFile << endl;
exit(3);
}
parameter_t val;
getline(opt,buffer);
- while(strstrm >> val) initParams.push_back(val);
+ istringstream strstrm(buffer);
+ while(strstrm >> val) {
+ initParams.push_back(val);
+ }
opt.close();
}
size_t initDenseSize = initParams.size();
@@ -189,47 +202,69 @@ int main(int argc, char** argv)
int iNumUpdates = 0;
ValType totalLoss = 0.0;
for(train->reset(); !train->finished(); train->next()) {
-
// Hope / fear decode
+ ValType hope_scale = 1.0;
size_t hope_index=0, fear_index=0, model_index=0;
ValType hope_score=0, fear_score=0, model_score=0;
- for(size_t i=0; i< train->cur_size(); i++) {
- MiraFeatureVector vec(train->featuresAt(i));
- ValType score = wv.score(vec);
- ValType bleu = sentenceLevelBackgroundBleu(train->scoresAt(i),bg);
- // Hope
- if(i==0 || (score + bleu) > hope_score) {
- hope_score = score + bleu;
- hope_index = i;
+ int iNumHypsBackup = iNumHyps;
+ for(size_t safe_loop=0; safe_loop<2; safe_loop++) {
+ iNumHyps = iNumHypsBackup;
+ ValType hope_bleu, hope_model;
+ for(size_t i=0; i< train->cur_size(); i++) {
+ const MiraFeatureVector& vec=train->featuresAt(i);
+ ValType score = wv.score(vec);
+ ValType bleu = sentenceLevelBackgroundBleu(train->scoresAt(i),bg);
+ // Hope
+ if(i==0 || (hope_scale*score + bleu) > hope_score) {
+ hope_score = hope_scale*score + bleu;
+ hope_index = i;
+ hope_bleu = bleu;
+ hope_model = score;
+ }
+ // Fear
+ if(i==0 || (score - bleu) > fear_score) {
+ fear_score = score - bleu;
+ fear_index = i;
+ }
+ // Model
+ if(i==0 || score > model_score) {
+ model_score = score;
+ model_index = i;
+ }
+ iNumHyps++;
}
- // Fear
- if(i==0 || (score - bleu) > fear_score) {
- fear_score = score - bleu;
- fear_index = i;
- }
- // Model
- if(i==0 || score > model_score) {
- model_score = score;
- model_index = i;
- }
- iNumHyps++;
+ // Outer loop rescales the contribution of model score to 'hope' in antagonistic cases
+ // where model score is having far more influence than BLEU
+ hope_bleu *= BLEU_RATIO; // We only care about cases where model has MUCH more influence than BLEU
+ if(safe_hope && safe_loop==0 && abs(hope_model)>1e-8 && abs(hope_bleu)/abs(hope_model)<hope_scale)
+ hope_scale = abs(hope_bleu) / abs(hope_model);
+ else break;
}
// Update weights
if(hope_index!=fear_index) {
// Vector difference
- MiraFeatureVector hope(train->featuresAt(hope_index));
- MiraFeatureVector fear(train->featuresAt(fear_index));
+ const MiraFeatureVector& hope=train->featuresAt(hope_index);
+ const MiraFeatureVector& fear=train->featuresAt(fear_index);
MiraFeatureVector diff = hope - fear;
// Bleu difference
const vector<float>& hope_stats = train->scoresAt(hope_index);
ValType hopeBleu = sentenceLevelBackgroundBleu(hope_stats, bg);
const vector<float>& fear_stats = train->scoresAt(fear_index);
ValType fearBleu = sentenceLevelBackgroundBleu(fear_stats, bg);
- assert(hopeBleu > fearBleu);
+ assert(hopeBleu + 1e-8 >= fearBleu);
ValType delta = hopeBleu - fearBleu;
// Loss and update
ValType diff_score = wv.score(diff);
ValType loss = delta - diff_score;
+ if(verbose) {
+ cerr << "Updating sent " << train->cur_id() << endl;
+ cerr << "Wght: " << wv << endl;
+ cerr << "Hope: " << hope << " BLEU:" << hopeBleu << " Score:" << wv.score(hope) << endl;
+ cerr << "Fear: " << fear << " BLEU:" << fearBleu << " Score:" << wv.score(fear) << endl;
+ cerr << "Diff: " << diff << " BLEU:" << delta << " Score:" << diff_score << endl;
+ cerr << "Loss: " << loss << " Scale: " << hope_scale << endl;
+ cerr << endl;
+ }
if(loss > 0) {
ValType eta = min(c, loss / diff.sqrNorm());
wv.update(diff,eta);
diff --git a/mert/mert.cpp b/mert/mert.cpp
index 58baffe93..4352927dc 100644
--- a/mert/mert.cpp
+++ b/mert/mert.cpp
@@ -28,6 +28,7 @@
#include "../moses/src/ThreadPool.h"
using namespace std;
+using namespace MosesTuning;
namespace {
diff --git a/mert/pro.cpp b/mert/pro.cpp
index 14135461f..8055b19bd 100644
--- a/mert/pro.cpp
+++ b/mert/pro.cpp
@@ -44,9 +44,13 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "BleuScorer.h"
using namespace std;
+using namespace MosesTuning;
namespace po = boost::program_options;
+namespace MosesTuning
+{
+
class SampledPair {
private:
pair<size_t,size_t> m_translation1;
@@ -88,6 +92,7 @@ static void outputSample(ostream& out, const FeatureDataItem& f1, const FeatureD
}
}
+}
int main(int argc, char** argv)
{
diff --git a/mert/test_scorer.cpp b/mert/test_scorer.cpp
deleted file mode 100644
index c1d52fcbe..000000000
--- a/mert/test_scorer.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-#include <iostream>
-#include <vector>
-
-#include "ScoreData.h"
-#include "Data.h"
-#include "Scorer.h"
-#include "ScorerFactory.h"
-
-using namespace std;
-
-int main(int argc, char** argv)
-{
- cout << "Testing the scorer" << endl;
- //BleuScorer bs("test-scorer-data/cppstats.feats.opt");;
- vector<string> references;
- references.push_back("test_scorer_data/reference.txt");
- //bs.prepare(references, "test-scorer-data/nbest.out");
- Scorer* scorer = new BleuScorer();;
- scorer->setReferenceFiles(references);
- Data d(*scorer);
- d.loadnbest("test_scorer_data/nbest.out");
- //sd.savetxt();
-
- //calculate two bleu scores, nbest and a diff
- ScoreData* sd=d.getScoreData();
- scorer->setScoreData(sd);
- candidates_t candidates(sd->size());;
- for (size_t i = 0; i < sd->size(); ++i) {
- sd->get(i,0).savetxt("/dev/stdout");
- }
-
- diffs_t diffs;
- diff_t diff;
- diff.push_back(make_pair(1,2));
- diff.push_back(make_pair(7,8));
- diffs.push_back(diff);
-
- statscores_t scores;
- scorer->score(candidates,diffs,scores);
-
- cout << "Bleus: " << scores[0] << " " << scores[1] << endl;
-
- //try the per
- scorer = new PerScorer();
- Data pd(*scorer);
- scorer->setReferenceFiles(references);
-
- pd.loadnbest("test_scorer_data/nbest.out");
- //sd.savetxt();
-
- ScoreData* psd=pd.getScoreData();
- scorer->setScoreData(psd);
- for (size_t i = 0; i < psd->size(); ++i) {
- psd->get(i,0).savetxt("/dev/stdout");
- }
-
-
- cout << "PER: " << scorer->score(candidates) << endl;
-
-}
diff --git a/mira/Decoder.cpp b/mira/Decoder.cpp
index db0833738..f75c2613a 100644
--- a/mira/Decoder.cpp
+++ b/mira/Decoder.cpp
@@ -69,7 +69,7 @@ namespace Mira {
cerr << "Loading static data failed, exit." << endl;
exit(1);
}
- StaticData::LoadDataStatic(params);
+ StaticData::LoadDataStatic(params, "mira");
for (int i = 0; i < BASE_ARGC; ++i) {
delete[] mosesargv[i];
}
@@ -139,7 +139,7 @@ namespace Mira {
const TranslationSystem& system,
string filename) {
// run the decoder
- m_manager = new Moses::Manager(*m_sentence, search, &system);
+ m_manager = new Moses::Manager(0,*m_sentence, search, &system);
m_manager->ProcessSentence();
TrellisPathList nBestList;
m_manager->CalcNBest(nBestSize, nBestList, distinct);
@@ -153,7 +153,7 @@ namespace Mira {
throw runtime_error(msg.str());
}
// TODO: handle sentence id (for now always 0)
- OutputNBest(out, nBestList, StaticData::Instance().GetOutputFactorOrder(),m_manager->GetTranslationSystem(), 0, false);
+ //OutputNBest(out, nBestList, StaticData::Instance().GetOutputFactorOrder(),m_manager->GetTranslationSystem(), 0, false);
out.close();
}
@@ -300,7 +300,7 @@ namespace Mira {
}
else {
// run the decoder
- m_manager = new Moses::Manager(*m_sentence, staticData.GetSearchAlgorithm(), &system);
+ m_manager = new Moses::Manager(0,*m_sentence, staticData.GetSearchAlgorithm(), &system);
m_manager->ProcessSentence();
TrellisPathList nBestList;
m_manager->CalcNBest(nBestSize, nBestList, distinctNbest);
@@ -313,11 +313,11 @@ namespace Mira {
throw runtime_error(msg.str());
}
// TODO: handle sentence id (for now always 0)
- OutputNBest(out, nBestList, StaticData::Instance().GetOutputFactorOrder(),m_manager->GetTranslationSystem(), 0, false);
+ //OutputNBest(out, nBestList, StaticData::Instance().GetOutputFactorOrder(),m_manager->GetTranslationSystem(), 0, false);
out.close();
}
else {
- OutputNBest(streamOut, nBestList, StaticData::Instance().GetOutputFactorOrder(),m_manager->GetTranslationSystem(), sentenceid, false);
+ //OutputNBest(streamOut, nBestList, StaticData::Instance().GetOutputFactorOrder(),m_manager->GetTranslationSystem(), sentenceid, false);
streamOut.flush();
}
}
diff --git a/mira/Main.cpp b/mira/Main.cpp
index 519a1e4e7..7fe332740 100644
--- a/mira/Main.cpp
+++ b/mira/Main.cpp
@@ -147,6 +147,7 @@ int main(int argc, char** argv) {
("mv-reg", po::value<bool>(&most_violated_reg)->default_value(false), "Regularize most violated constraint")
("dbg", po::value<bool>(&debug)->default_value(true), "More debug output")
("make-pairs", po::value<bool>(&makePairs)->default_value(true), "Make pairs of hypotheses for 1slack")
+ ("debug", po::value<bool>(&debug)->default_value(true), "More debug output")
("rescale-slack", po::value<bool>(&rescaleSlack)->default_value(false), "Rescale slack in 1-slack formulation")
("disable-bleu-feature", po::value<bool>(&disableBleuFeature)->default_value(false), "Disable the Bleu feature")
("real-bleu", po::value<bool>(&realBleu)->default_value(false), "Compute real sentence Bleu on complete translations")
@@ -177,9 +178,6 @@ int main(int argc, char** argv) {
("boost", po::value<bool>(&boost)->default_value(false), "Apply boosting factor to updates on misranked candidates")
("config,f", po::value<string>(&mosesConfigFile), "Moses ini-file")
("configs-folds", po::value<vector<string> >(&mosesConfigFilesFolds), "Moses ini-files, one for each fold")
- //("core-weights", po::value<string>(&coreWeightFile)->default_value(""), "Weight file containing the core weights (already tuned, have to be non-zero)")
- ("decay-core", po::value<float>(&decay_core)->default_value(0.001), "Decay factor for updating core feature learning rates")
- ("decay-sparse", po::value<float>(&decay_sparse)->default_value(0.001), "Decay factor for updating sparse feature learning rates")
("debug-model", po::value<bool>(&debug_model)->default_value(false), "Get best model translation for debugging purposes")
("decode-hope", po::value<bool>(&decode_hope)->default_value(false), "Decode dev input set according to hope objective")
("decode-fear", po::value<bool>(&decode_fear)->default_value(false), "Decode dev input set according to fear objective")
@@ -189,7 +187,6 @@ int main(int argc, char** argv) {
("distinct-nbest", po::value<bool>(&distinctNbest)->default_value(true), "Use n-best list with distinct translations in inference step")
("dump-mixed-weights", po::value<bool>(&dumpMixedWeights)->default_value(false), "Dump mixed weights instead of averaged weights")
("epochs,e", po::value<size_t>(&epochs)->default_value(10), "Number of epochs")
- ("feature-confidence", po::value<bool>(&feature_confidence)->default_value(false), "Use feature weight confidence in weight updates")
("feature-cutoff", po::value<int>(&featureCutoff)->default_value(-1), "Feature cutoff as additional regularization for sparse features")
("fear-n", po::value<int>(&fear_n)->default_value(1), "Number of fear translations used")
("help", po::value(&help)->zero_tokens()->default_value(false), "Print this help message and exit")
@@ -213,9 +210,6 @@ int main(int argc, char** argv) {
("mixing-frequency", po::value<size_t>(&mixingFrequency)->default_value(1), "How often per epoch to mix weights, when using mpi")
("model-hope-fear", po::value<bool>(&model_hope_fear)->default_value(false), "Use model, hope and fear translations for optimisation")
("moses-src", po::value<string>(&moses_src)->default_value(""), "Moses source directory")
- ("most-violated", po::value<bool>(&most_violated)->default_value(false), "Pick pair of hypo and hope that violates constraint the most")
- ("all-violated", po::value<bool>(&all_violated)->default_value(false), "Pair all hypos with hope translation that violate constraint")
- ("one-against-all", po::value<bool>(&one_against_all)->default_value(false), "Pick best Bleu as hope and all others are fear")
("nbest,n", po::value<size_t>(&n)->default_value(1), "Number of translations in n-best list")
("normalise-weights", po::value<bool>(&normaliseWeights)->default_value(false), "Whether to normalise the updated weights before passing them to the decoder")
("normalise-margin", po::value<bool>(&normaliseMargin)->default_value(false), "Normalise the margin: squash between 0 and 1")
@@ -229,6 +223,7 @@ int main(int argc, char** argv) {
("reference-files,r", po::value<vector<string> >(&referenceFiles), "Reference translation files for training")
("reference-files-folds", po::value<vector<string> >(&referenceFilesFolds), "Reference translation files for training, one for each fold")
("kbest", po::value<bool>(&kbest)->default_value(false), "Select hope/fear pairs from a list of nbest translations")
+
("scale-by-inverse-length", po::value<bool>(&scaleByInverseLength)->default_value(false), "Scale BLEU by (history of) inverse input length")
("scale-by-input-length", po::value<bool>(&scaleByInputLength)->default_value(false), "Scale BLEU by (history of) input length")
("scale-by-avg-input-length", po::value<bool>(&scaleByAvgInputLength)->default_value(false), "Scale BLEU by average input length")
@@ -244,12 +239,10 @@ int main(int argc, char** argv) {
("scale-update-precision", po::value<bool>(&scale_update_precision)->default_value(0), "Scale update by precision of oracle")
("sentence-level-bleu", po::value<bool>(&sentenceBleu)->default_value(true), "Use a sentences level Bleu scoring function")
("shuffle", po::value<bool>(&shuffle)->default_value(false), "Shuffle input sentences before processing")
- ("signed-counts", po::value<bool>(&signed_counts)->default_value(false), "Use signed counts for feature learning rates")
("sigmoid-param", po::value<float>(&sigmoidParam)->default_value(1), "y=sigmoidParam is the axis that this sigmoid approaches")
("slack", po::value<float>(&slack)->default_value(0.01), "Use slack in optimiser")
("sparse-average", po::value<bool>(&sparseAverage)->default_value(false), "Average weights by the number of processes")
("sparse-no-average", po::value<bool>(&sparseNoAverage)->default_value(false), "Don't average sparse weights, just sum")
- //("start-weights", po::value<string>(&startWeightFile)->default_value(""), "Weight file containing start weights")
("stop-weights", po::value<bool>(&weightConvergence)->default_value(true), "Stop when weights converge")
("verbosity,v", po::value<int>(&verbosity)->default_value(0), "Verbosity level")
("weight-dump-frequency", po::value<size_t>(&weightDumpFrequency)->default_value(1), "How often per epoch to dump weights (mpi)")
@@ -401,9 +394,9 @@ int main(int argc, char** argv) {
}
// initialise Moses
- // add initial Bleu weight and references to initialize Bleu feature
+ // add references to initialize Bleu feature
boost::trim(decoder_settings);
- decoder_settings += " -mira -distinct-nbest -weight-bl 1 -references";
+ decoder_settings += " -mira -distinct-nbest -references";
if (trainWithMultipleFolds) {
decoder_settings += " ";
decoder_settings += referenceFilesFolds[myFold];
@@ -1560,7 +1553,6 @@ int main(int argc, char** argv) {
// broadcast summed confidence counts
//mpi::broadcast(world, mixedConfidenceCounts, 0);
//confidenceCounts = mixedConfidenceCounts;
-
#endif
#ifndef MPI_ENABLE
//cerr << "\nRank " << rank << ", no mixing, weights: " << mosesWeights << endl;
@@ -1678,6 +1670,7 @@ int main(int argc, char** argv) {
size_t pruned = mixedAverageWeights.SparseL1Regularize(l1_lambda);
cerr << "Rank " << rank << ", epoch " << epoch << ", "
<< "l1-reg. on mixedAverageWeights with lambda=" << l1_lambda << ", pruned: " << pruned << endl;
+
}
if (l2_regularize) {
mixedAverageWeights.SparseL2Regularize(l2_lambda);
diff --git a/mira/Main.h b/mira/Main.h
index ff6e18d7b..23db36c36 100644
--- a/mira/Main.h
+++ b/mira/Main.h
@@ -44,7 +44,6 @@ struct RandomIndex {
//void OutputNBestList(const MosesChart::TrellisPathList &nBestList, const TranslationSystem* system, long translationId);
bool loadSentences(const std::string& filename, std::vector<std::string>& sentences);
-//bool loadCoreWeights(const std::string& filename, ProducerWeightMap& coreWeightMap, const std::vector<const Moses::ScoreProducer*> &featureFunctions);
bool evaluateModulo(size_t shard_position, size_t mix_or_dump_base, size_t actual_batch_size);
void printFeatureValues(std::vector<std::vector<Moses::ScoreComponentCollection> > &featureValues);
void ignoreCoreFeatures(std::vector<std::vector<Moses::ScoreComponentCollection> > &featureValues, ProducerWeightMap &coreWeightMap);
diff --git a/mira/MiraOptimiser.cpp b/mira/MiraOptimiser.cpp
index 9312f41df..d77cef759 100644
--- a/mira/MiraOptimiser.cpp
+++ b/mira/MiraOptimiser.cpp
@@ -513,19 +513,7 @@ size_t MiraOptimiser::updateWeightsHopeFearSelective(
nonZeroFeatures.push_back(f);
}
}
- // 1 + 2
- /*for (FVector::iterator i = features.begin(); i != features.end(); ++i) {
- if (i->second != 0.0) {
- ++n_sparse;
- ScoreComponentCollection f;
- f.Assign((i->first).name(), i->second);
- nonZeroFeatures.push_back(f);
- cerr << "Rank " << rank << ", epoch " << epoch << ", f: " << f << endl;
- }
- }
- cerr << "Rank " << rank << ", epoch " << epoch << ", non-zero features: " << nonZeroFeatures.size() << endl;*/
- // 3
vector<ScoreComponentCollection> nonZeroFeaturesHope;
vector<ScoreComponentCollection> nonZeroFeaturesFear;
for (FVector::iterator i = features.begin(); i != features.end(); ++i) {
@@ -545,22 +533,6 @@ size_t MiraOptimiser::updateWeightsHopeFearSelective(
}
}
- //1
- /*float n = n_core + n_sparse;
- for (size_t i=0; i<n; ++i)
- lossMinusModelScoreDiffs.push_back(diff/n);
-
- //2
- float diff_10 = diff * 0.1;
- float diff_90 = diff * 0.9;
- cerr << "Rank " << rank << ", epoch " << epoch << ", core diff: " << diff_10/n_core << endl;
- cerr << "Rank " << rank << ", epoch " << epoch << ", sparse diff: " << diff_90/n_sparse << endl;
- for (size_t i=0; i<n_core; ++i)
- lossMinusModelScoreDiffs.push_back(diff_10/n_core);
- for (size_t i=0; i<n_sparse; ++i)
- lossMinusModelScoreDiffs.push_back(diff_90/n_sparse);*/
-
- // 3
float n = n_core + n_sparse_hope + n_sparse_fear;
for (size_t i=0; i<n_core; ++i)
lossMinusModelScoreDiffs.push_back(diff/n);
diff --git a/mira/Optimiser.h b/mira/Optimiser.h
index 19ba3585c..213ee054e 100644
--- a/mira/Optimiser.h
+++ b/mira/Optimiser.h
@@ -27,142 +27,147 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
namespace Mira {
class Optimiser {
- public:
- Optimiser() {}
-
- virtual size_t updateWeightsHopeFear(
- Moses::ScoreComponentCollection& weightUpdate,
- const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
- const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
- const std::vector<std::vector<float> >& bleuScoresHope,
- const std::vector<std::vector<float> >& bleuScoresFear,
- const std::vector<std::vector<float> >& modelScoresHope,
- const std::vector<std::vector<float> >& modelScoresFear,
- float learning_rate,
- size_t rank,
- size_t epoch,
- int updatePosition = -1) = 0;
+ public:
+ Optimiser() {}
+
+ virtual size_t updateWeightsHopeFear(
+ Moses::ScoreComponentCollection& weightUpdate,
+ const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
+ const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
+ const std::vector<std::vector<float> >& bleuScoresHope,
+ const std::vector<std::vector<float> >& bleuScoresFear,
+ const std::vector<std::vector<float> >& modelScoresHope,
+ const std::vector<std::vector<float> >& modelScoresFear,
+ float learning_rate,
+ size_t rank,
+ size_t epoch,
+ int updatePosition = -1) = 0;
};
class Perceptron : public Optimiser {
- public:
- virtual size_t updateWeightsHopeFear(
- Moses::ScoreComponentCollection& weightUpdate,
- const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
- const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
- const std::vector<std::vector<float> >& bleuScoresHope,
- const std::vector<std::vector<float> >& bleuScoresFear,
- const std::vector<std::vector<float> >& modelScoresHope,
- const std::vector<std::vector<float> >& modelScoresFear,
- float learning_rate,
- size_t rank,
- size_t epoch,
- int updatePosition = -1);
+ public:
+ virtual size_t updateWeightsHopeFear(
+ Moses::ScoreComponentCollection& weightUpdate,
+ const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
+ const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
+ const std::vector<std::vector<float> >& bleuScoresHope,
+ const std::vector<std::vector<float> >& bleuScoresFear,
+ const std::vector<std::vector<float> >& modelScoresHope,
+ const std::vector<std::vector<float> >& modelScoresFear,
+ float learning_rate,
+ size_t rank,
+ size_t epoch,
+ int updatePosition = -1);
};
class MiraOptimiser : public Optimiser {
- public:
- MiraOptimiser() :
- Optimiser() { }
-
- MiraOptimiser(float slack, bool scale_margin, bool scale_margin_precision,
- bool scale_update, bool scale_update_precision, bool boost, bool normaliseMargin, float sigmoidParam) :
- Optimiser(),
- m_slack(slack),
- m_scale_margin(scale_margin),
- m_scale_margin_precision(scale_margin_precision),
- m_scale_update(scale_update),
- m_scale_update_precision(scale_update_precision),
- m_precision(1),
- m_boost(boost),
- m_normaliseMargin(normaliseMargin),
- m_sigmoidParam(sigmoidParam) { }
-
- size_t updateWeights(Moses::ScoreComponentCollection& weightUpdate,
- const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValues,
- const std::vector<std::vector<float> >& losses,
- const std::vector<std::vector<float> >& bleuScores,
- const std::vector<std::vector<float> >& modelScores,
- const std::vector< Moses::ScoreComponentCollection>& oracleFeatureValues,
- const std::vector< float> oracleBleuScores,
- const std::vector< float> oracleModelScores,
- float learning_rate,
- size_t rank,
- size_t epoch);
- virtual size_t updateWeightsHopeFear(Moses::ScoreComponentCollection& weightUpdate,
- const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
- const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
- const std::vector<std::vector<float> >& bleuScoresHope,
- const std::vector<std::vector<float> >& bleuScoresFear,
- const std::vector<std::vector<float> >& modelScoresHope,
- const std::vector<std::vector<float> >& modelScoresFear,
- float learning_rate,
- size_t rank,
- size_t epoch,
- int updatePosition = -1);
- size_t updateWeightsHopeFearSelective(Moses::ScoreComponentCollection& weightUpdate,
- const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
- const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
- const std::vector<std::vector<float> >& bleuScoresHope,
- const std::vector<std::vector<float> >& bleuScoresFear,
- const std::vector<std::vector<float> >& modelScoresHope,
- const std::vector<std::vector<float> >& modelScoresFear,
- float learning_rate,
- size_t rank,
- size_t epoch,
- int updatePosition = -1);
- size_t updateWeightsHopeFearSummed(Moses::ScoreComponentCollection& weightUpdate,
- const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
- const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
- const std::vector<std::vector<float> >& bleuScoresHope,
- const std::vector<std::vector<float> >& bleuScoresFear,
- const std::vector<std::vector<float> >& modelScoresHope,
- const std::vector<std::vector<float> >& modelScoresFear,
- float learning_rate,
- size_t rank,
- size_t epoch,
- bool rescaleSlack,
- bool makePairs);
- size_t updateWeightsAnalytically(Moses::ScoreComponentCollection& weightUpdate,
- Moses::ScoreComponentCollection& featureValuesHope,
- Moses::ScoreComponentCollection& featureValuesFear,
- float bleuScoreHope,
- float bleuScoreFear,
- float modelScoreHope,
- float modelScoreFear,
- float learning_rate,
- size_t rank,
- size_t epoch);
+ public:
+ MiraOptimiser() :
+ Optimiser() { }
+
+ MiraOptimiser(
+ float slack, bool scale_margin, bool scale_margin_precision,
+ bool scale_update, bool scale_update_precision, bool boost, bool normaliseMargin, float sigmoidParam) :
+ Optimiser(),
+ m_slack(slack),
+ m_scale_margin(scale_margin),
+ m_scale_margin_precision(scale_margin_precision),
+ m_scale_update(scale_update),
+ m_scale_update_precision(scale_update_precision),
+ m_precision(1),
+ m_boost(boost),
+ m_normaliseMargin(normaliseMargin),
+ m_sigmoidParam(sigmoidParam) { }
+
+ size_t updateWeights(
+ Moses::ScoreComponentCollection& weightUpdate,
+ const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValues,
+ const std::vector<std::vector<float> >& losses,
+ const std::vector<std::vector<float> >& bleuScores,
+ const std::vector<std::vector<float> >& modelScores,
+ const std::vector< Moses::ScoreComponentCollection>& oracleFeatureValues,
+ const std::vector< float> oracleBleuScores,
+ const std::vector< float> oracleModelScores,
+ float learning_rate,
+ size_t rank,
+ size_t epoch);
+ virtual size_t updateWeightsHopeFear(
+ Moses::ScoreComponentCollection& weightUpdate,
+ const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
+ const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
+ const std::vector<std::vector<float> >& bleuScoresHope,
+ const std::vector<std::vector<float> >& bleuScoresFear,
+ const std::vector<std::vector<float> >& modelScoresHope,
+ const std::vector<std::vector<float> >& modelScoresFear,
+ float learning_rate,
+ size_t rank,
+ size_t epoch,
+ int updatePosition = -1);
+ size_t updateWeightsHopeFearSelective(
+ Moses::ScoreComponentCollection& weightUpdate,
+ const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
+ const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
+ const std::vector<std::vector<float> >& bleuScoresHope,
+ const std::vector<std::vector<float> >& bleuScoresFear,
+ const std::vector<std::vector<float> >& modelScoresHope,
+ const std::vector<std::vector<float> >& modelScoresFear,
+ float learning_rate,
+ size_t rank,
+ size_t epoch,
+ int updatePosition = -1);
+ size_t updateWeightsHopeFearSummed(
+ Moses::ScoreComponentCollection& weightUpdate,
+ const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesHope,
+ const std::vector<std::vector<Moses::ScoreComponentCollection> >& featureValuesFear,
+ const std::vector<std::vector<float> >& bleuScoresHope,
+ const std::vector<std::vector<float> >& bleuScoresFear,
+ const std::vector<std::vector<float> >& modelScoresHope,
+ const std::vector<std::vector<float> >& modelScoresFear,
+ float learning_rate,
+ size_t rank,
+ size_t epoch,
+ bool rescaleSlack,
+ bool makePairs);
+ size_t updateWeightsAnalytically(
+ Moses::ScoreComponentCollection& weightUpdate,
+ Moses::ScoreComponentCollection& featureValuesHope,
+ Moses::ScoreComponentCollection& featureValuesFear,
+ float bleuScoreHope,
+ float bleuScoreFear,
+ float modelScoreHope,
+ float modelScoreFear,
+ float learning_rate,
+ size_t rank,
+ size_t epoch);
void setSlack(float slack) {
- m_slack = slack;
+ m_slack = slack;
}
void setPrecision(float precision) {
- m_precision = precision;
+ m_precision = precision;
}
-
- private:
-
- // regularise Hildreth updates
- float m_slack;
-
- // scale margin with BLEU score or precision
- bool m_scale_margin, m_scale_margin_precision;
-
- // scale update with oracle BLEU score or precision
- bool m_scale_update, m_scale_update_precision;
-
- float m_precision;
-
- // boosting of updates on misranked candidates
- bool m_boost;
-
- // squash margin between 0 and 1 (or depending on m_sigmoidParam)
- bool m_normaliseMargin;
-
- // y=sigmoidParam is the axis that this sigmoid approaches
- float m_sigmoidParam ;
+
+ private:
+ // regularise Hildreth updates
+ float m_slack;
+
+ // scale margin with BLEU score or precision
+ bool m_scale_margin, m_scale_margin_precision;
+
+ // scale update with oracle BLEU score or precision
+ bool m_scale_update, m_scale_update_precision;
+
+ float m_precision;
+
+ // boosting of updates on misranked candidates
+ bool m_boost;
+
+ // squash margin between 0 and 1 (or depending on m_sigmoidParam)
+ bool m_normaliseMargin;
+
+ // y=sigmoidParam is the axis that this sigmoid approaches
+ float m_sigmoidParam ;
};
}
diff --git a/mira/training-expt.perl b/mira/training-expt.perl
index 3bf5fb4b4..c143871b2 100755
--- a/mira/training-expt.perl
+++ b/mira/training-expt.perl
@@ -284,19 +284,19 @@ my $trainer_exe = &param_required("train.trainer");
#&check_exists("weights file ", $weights_file);
#optional training parameters
-my $epochs = &param("train.epochs", 2);
+my $epochs = &param("train.epochs");
my $learner = &param("train.learner", "mira");
-my $batch = &param("train.batch", 1);
+my $batch = &param("train.batch", 1); # don't print this param twice (when printing training file)
my $extra_args = &param("train.extra-args");
-my $by_node = &param("train.by-node",0);
+my $by_node = &param("train.by-node");
my $slots = &param("train.slots",8);
my $jobs = &param("train.jobs",8);
-my $mixing_frequency = &param("train.mixing-frequency",0);
-my $weight_dump_frequency = &param("train.weight-dump-frequency",0);
-my $burn_in = &param("train.burn-in",0);
+my $mixing_frequency = &param("train.mixing-frequency", 1); # don't print this param twice
+my $weight_dump_frequency = &param("train.weight-dump-frequency", 1); # don't print this param twice
+my $burn_in = &param("train.burn-in");
my $burn_in_input_file = &param("train.burn-in-input-file");
my $burn_in_reference_files = &param("train.burn-in-reference-files");
-my $skipTrain = &param("train.skip", 0);
+my $skipTrain = &param("train.skip");
my $train_decoder_settings = &param("train.decoder-settings", "");
if (!$train_decoder_settings) {
$train_decoder_settings = $general_decoder_settings;
@@ -341,7 +341,7 @@ else {
my @result = split(/\s/, $result);
my $inputSize = $result[0];
my $shardSize = $inputSize / $jobs;
- if ($mixing_frequency != 0) {
+ if ($mixing_frequency) {
if ($shardSize < $mixing_frequency) {
$mixing_frequency = int($shardSize);
if ($mixing_frequency == 0) {
@@ -403,101 +403,98 @@ if (ref($reference_files) eq 'ARRAY') {
my $arr_refs = \@refs;
if (!$skipTrain) {
-#write the script
-open TRAIN, ">$train_script_file" or die "Unable to open \"$train_script_file\" for writing";
+ #write the script
+ open TRAIN, ">$train_script_file" or die "Unable to open \"$train_script_file\" for writing";
-&header(*TRAIN,$job_name,$working_dir,$slots,$jobs,$hours,$vmem,$train_out,$train_err);
-if ($jobs == 1) {
- print TRAIN "$trainer_exe ";
-}
-else {
- if ($by_node) {
- print TRAIN "mpirun -np $jobs --bynode $trainer_exe \\\n";
+ &header(*TRAIN,$job_name,$working_dir,$slots,$jobs,$hours,$vmem,$train_out,$train_err);
+ if ($jobs == 1) {
+ print TRAIN "$trainer_exe ";
}
else {
- print TRAIN "mpirun -np \$NSLOTS $trainer_exe \\\n";
- }
-}
-
-if ($jackknife) {
- foreach my $ini (@moses_ini_files_folds) {
- print TRAIN "--configs-folds $ini ";
- }
- print TRAIN "\\\n";
- foreach my $in (@input_files_folds) {
- print TRAIN "--input-files-folds $in ";
+ if ($by_node) {
+ print TRAIN "mpirun -np $jobs --bynode $trainer_exe \\\n";
+ }
+ else {
+ print TRAIN "mpirun -np \$NSLOTS $trainer_exe \\\n";
+ }
}
- print TRAIN "\\\n";
- for my $ref (@reference_files_folds) {
- print TRAIN "--reference-files-folds $ref ";
+
+ if ($jackknife) {
+ foreach my $ini (@moses_ini_files_folds) {
+ print TRAIN "--configs-folds $ini ";
+ }
+ print TRAIN "\\\n";
+ foreach my $in (@input_files_folds) {
+ print TRAIN "--input-files-folds $in ";
+ }
+ print TRAIN "\\\n";
+ for my $ref (@reference_files_folds) {
+ print TRAIN "--reference-files-folds $ref ";
+ }
+ print TRAIN "\\\n";
}
- print TRAIN "\\\n";
-}
-else {
- print TRAIN "-f $moses_ini_file \\\n";
- print TRAIN "-i $input_file \\\n";
- for my $ref (@refs) {
- print TRAIN "-r $ref ";
+ else {
+ print TRAIN "-f $moses_ini_file \\\n";
+ print TRAIN "-i $input_file \\\n";
+ for my $ref (@refs) {
+ print TRAIN "-r $ref ";
+ }
+ print TRAIN "\\\n";
}
- print TRAIN "\\\n";
-}
-if ($continue_epoch > 0) {
- print TRAIN "--continue-epoch $continue_epoch \\\n";
-}
-if ($burn_in) {
- print TRAIN "--burn-in 1 \\\n";
- print TRAIN "--burn-in-input-file $burn_in_input_file \\\n";
- my @burnin_refs;
- if (ref($burn_in_reference_files) eq 'ARRAY') {
- @burnin_refs = @$burn_in_reference_files;
- } else {
- @burnin_refs = glob $burn_in_reference_files . "*"; # TODO:
+ if ($continue_epoch > 0) {
+ print TRAIN "--continue-epoch $continue_epoch \\\n";
}
- for my $burnin_ref (@burnin_refs) {
- &check_exists("burn-in ref file", $burnin_ref);
- print TRAIN "--burn-in-reference-files $burnin_ref ";
+ if ($burn_in) {
+ print TRAIN "--burn-in 1 \\\n";
+ print TRAIN "--burn-in-input-file $burn_in_input_file \\\n";
+ my @burnin_refs;
+ if (ref($burn_in_reference_files) eq 'ARRAY') {
+ @burnin_refs = @$burn_in_reference_files;
+ } else {
+ @burnin_refs = glob $burn_in_reference_files . "*"; # TODO:
+ }
+ for my $burnin_ref (@burnin_refs) {
+ &check_exists("burn-in ref file", $burnin_ref);
+ print TRAIN "--burn-in-reference-files $burnin_ref ";
+ }
+ print TRAIN "\\\n";
}
- print TRAIN "\\\n";
-}
#if ($weights_file) {
# print TRAIN "-w $weights_file \\\n";
#}
-if (defined $start_weight_file) {
- print TRAIN "--start-weights $start_weight_file \\\n";
-}
-print TRAIN "-l $learner \\\n";
-print TRAIN "--weight-dump-stem $weight_file_stem \\\n";
-print TRAIN "--mixing-frequency $mixing_frequency \\\n";
-if ($weight_dump_frequency != -1) {
- print TRAIN "--weight-dump-frequency $weight_dump_frequency \\\n";
-}
-print TRAIN "--epochs $epochs \\\n";
-print TRAIN "-b $batch \\\n";
-print TRAIN "--decoder-settings \"$train_decoder_settings\" \\\n";
-print TRAIN $extra_args;
-print TRAIN "\n";
-if ($jobs == 1) {
- print TRAIN "echo \"mira finished.\"\n";
-}
-else {
- print TRAIN "echo \"mpirun finished.\"\n";
-}
-close TRAIN;
-
-if (! $execute) {
- print STDERR "Written train file: $train_script_file\n";
- exit 0;
-}
-
-#submit the training job
-if ($have_sge) {
- $train_job_id = &submit_job_sge($train_script_file);
+ if (defined $start_weight_file) {
+ print TRAIN "--start-weights $start_weight_file \\\n";
+ }
+ print TRAIN "-l $learner \\\n";
+ print TRAIN "--weight-dump-stem $weight_file_stem \\\n";
+ print TRAIN "--mixing-frequency $mixing_frequency \\\n" if ($extra_args !~ /--mixing-frequency /);
+ print TRAIN "--weight-dump-frequency $weight_dump_frequency \\\n" if ($extra_args !~ /--weight-dump-frequency /);
+ print TRAIN "--epochs $epochs \\\n" if $epochs;
+ print TRAIN "--batch-size $batch \\\n" if ($extra_args !~ /--batch-size / && $extra_args !~ /-b /);
+ print TRAIN $extra_args." \\\n";
+ print TRAIN "--decoder-settings \"$train_decoder_settings\" \\\n";
+ if ($jobs == 1) {
+ print TRAIN "echo \"mira finished.\"\n";
+ }
+ else {
+ print TRAIN "echo \"mpirun finished.\"\n";
+ }
+ close TRAIN;
-} else {
- $train_job_id = &submit_job_no_sge($train_script_file, $train_out,$train_err);
-}
-
-die "Failed to submit training job" unless $train_job_id;
+ if (! $execute) {
+ print STDERR "Written train file: $train_script_file\n";
+ exit 0;
+ }
+
+ #submit the training job
+ if ($have_sge) {
+ $train_job_id = &submit_job_sge($train_script_file);
+
+ } else {
+ $train_job_id = &submit_job_no_sge($train_script_file, $train_out,$train_err);
+ }
+
+ die "Failed to submit training job" unless $train_job_id;
}
#wait for the next weights file to appear, or the training job to end
@@ -883,7 +880,7 @@ sub param {
my $value = $config->param($key);
$value = $default if !$value;
# Empty arguments get interpreted as arrays
- $value = "" if (ref($value) eq 'ARRAY' && scalar(@$value) == 0);
+ $value = 0 if (ref($value) eq 'ARRAY' && scalar(@$value) == 0);
return $value;
}
diff --git a/misc/Jamfile b/misc/Jamfile
index e8a133367..68de732f9 100644
--- a/misc/Jamfile
+++ b/misc/Jamfile
@@ -6,4 +6,16 @@ exe queryPhraseTable : queryPhraseTable.cpp ../moses/src//moses ;
exe queryLexicalTable : queryLexicalTable.cpp ../moses/src//moses ;
-alias programs : processPhraseTable processLexicalTable queryPhraseTable queryLexicalTable ;
+local with-cmph = [ option.get "with-cmph" ] ;
+if $(with-cmph) {
+ exe processPhraseTableMin : processPhraseTableMin.cpp ../moses/src//moses ;
+ exe processLexicalTableMin : processLexicalTableMin.cpp ../moses/src//moses ;
+ exe queryPhraseTableMin : queryPhraseTableMin.cpp ../moses/src//moses ;
+
+ alias programsMin : processPhraseTableMin processLexicalTableMin queryPhraseTableMin ;
+}
+else {
+ alias programsMin ;
+}
+
+alias programs : processPhraseTable processLexicalTable queryPhraseTable queryLexicalTable programsMin ;
diff --git a/misc/pmoses/COPYING b/misc/pmoses/COPYING
deleted file mode 120000
index 0b6cbf81b..000000000
--- a/misc/pmoses/COPYING
+++ /dev/null
@@ -1 +0,0 @@
-/usr/share/automake-1.10/COPYING \ No newline at end of file
diff --git a/misc/pmoses/INSTALL b/misc/pmoses/INSTALL
deleted file mode 120000
index 5bb6e7b7e..000000000
--- a/misc/pmoses/INSTALL
+++ /dev/null
@@ -1 +0,0 @@
-/usr/share/automake-1.10/INSTALL \ No newline at end of file
diff --git a/misc/pmoses/configure b/misc/pmoses/configure
index 188414c59..188414c59 100644..100755
--- a/misc/pmoses/configure
+++ b/misc/pmoses/configure
diff --git a/misc/processLexicalTableMin.cpp b/misc/processLexicalTableMin.cpp
new file mode 100644
index 000000000..20183a3b6
--- /dev/null
+++ b/misc/processLexicalTableMin.cpp
@@ -0,0 +1,131 @@
+#include <iostream>
+#include <string>
+
+#ifdef WITH_THREADS
+#include <boost/thread/thread.hpp>
+#endif
+
+#include "CompactPT/LexicalReorderingTableCreator.h"
+
+using namespace Moses;
+
+void printHelp(char **argv)
+{
+ std::cerr << "Usage " << argv[0] << ":\n"
+ " options: \n"
+ "\t-in string -- input table file name\n"
+ "\t-out string -- prefix of binary table file\n"
+#ifdef WITH_THREADS
+ "\t-threads int|all -- number of threads used for conversion\n"
+#endif
+ "\n advanced:\n"
+ "\t-landmark int -- use landmark phrase every 2^n phrases\n"
+ "\t-fingerprint int -- number of bits used for phrase fingerprints\n"
+ "\t-join-scores -- single set of Huffman codes for score components\n"
+ "\t-quantize int -- maximum number of scores per score component\n"
+ "\n"
+ " For more information see: http://www.statmt.org/moses/?n=Moses.AdvancedFeatures#ntoc6\n\n"
+ " If you use this please cite:\n\n"
+ " @article { junczys_pbml98_2012,\n"
+ " author = { Marcin Junczys-Dowmunt },\n"
+ " title = { Phrasal Rank-Encoding: Exploiting Phrase Redundancy and\n"
+ " Translational Relations for Phrase Table Compression },\n"
+ " journal = { The Prague Bulletin of Mathematical Linguistics },\n"
+ " volume = { 98 },\n"
+ " year = { 2012 },\n"
+ " note = { Proceedings of the MT Marathon 2012, Edinburgh },\n"
+ " }\n\n"
+ " Acknowledgments: Part of this research was carried out at and funded by\n"
+ " the World Intellectual Property Organization (WIPO) in Geneva.\n\n";
+}
+
+int main(int argc, char** argv)
+{
+
+ std::string inFilePath;
+ std::string outFilePath("out");
+
+ size_t orderBits = 10;
+ size_t fingerPrintBits = 16;
+ bool multipleScoreTrees = true;
+ size_t quantize = 0;
+
+#ifdef WITH_THREADS
+ size_t threads = 1;
+#endif
+
+ if(1 >= argc)
+ {
+ printHelp(argv);
+ return 1;
+ }
+ for(int i = 1; i < argc; ++i)
+ {
+ std::string arg(argv[i]);
+ if("-in" == arg && i+1 < argc)
+ {
+ ++i;
+ inFilePath = argv[i];
+ }
+ else if("-out" == arg && i+1 < argc)
+ {
+ ++i;
+ outFilePath = argv[i];
+ }
+ else if("-landmark" == arg && i+1 < argc)
+ {
+ ++i;
+ orderBits = atoi(argv[i]);
+ }
+ else if("-fingerprint" == arg && i+1 < argc)
+ {
+ ++i;
+ fingerPrintBits = atoi(argv[i]);
+ }
+ else if("-join-scores" == arg)
+ {
+ multipleScoreTrees = false;
+ }
+ else if("-quantize" == arg && i+1 < argc)
+ {
+ ++i;
+ quantize = atoi(argv[i]);
+ }
+ else if("-threads" == arg && i+1 < argc)
+ {
+#ifdef WITH_THREADS
+ ++i;
+ if(std::string(argv[i]) == "all") {
+ threads = boost::thread::hardware_concurrency();
+ if(!threads) {
+ std::cerr << "Could not determine number of hardware threads, setting to 1" << std::endl;
+ threads = 1;
+ }
+ }
+ else
+ threads = atoi(argv[i]);
+#else
+ std::cerr << "Thread support not compiled in" << std::endl;
+ exit(1);
+#endif
+ }
+ else
+ {
+ //somethings wrong... print help
+ printHelp(argv);
+ return 1;
+ }
+ }
+
+ if(outFilePath.rfind(".minlexr") != outFilePath.size() - 8)
+ outFilePath += ".minlexr";
+
+ LexicalReorderingTableCreator(
+ inFilePath, outFilePath,
+ orderBits, fingerPrintBits,
+ multipleScoreTrees, quantize
+#ifdef WITH_THREADS
+ , threads
+#endif
+ );
+}
diff --git a/misc/processPhraseTableMin.cpp b/misc/processPhraseTableMin.cpp
new file mode 100644
index 000000000..084562562
--- /dev/null
+++ b/misc/processPhraseTableMin.cpp
@@ -0,0 +1,177 @@
+#include <iostream>
+
+#ifdef WITH_THREADS
+#include <boost/thread/thread.hpp>
+#endif
+
+#include "TypeDef.h"
+#include "CompactPT/PhraseTableCreator.h"
+
+using namespace Moses;
+
+void printHelp(char **argv) {
+ std::cerr << "Usage " << argv[0] << ":\n"
+ " options: \n"
+ "\t-in string -- input table file name\n"
+ "\t-out string -- prefix of binary table file\n"
+ "\t-nscores int -- number of score components in phrase table\n"
+ "\t-alignment-info -- include alignment info in the binary phrase table\n"
+#ifdef WITH_THREADS
+ "\t-threads int|all -- number of threads used for conversion\n"
+#endif
+ "\n advanced:\n"
+ "\t-encoding string -- encoding type: PREnc REnc None (default PREnc)\n"
+ "\t-rankscore int -- score index of P(t|s) (default 2)\n"
+ "\t-maxrank int -- maximum rank for PREnc (default 100)\n"
+ "\t-landmark int -- use landmark phrase every 2^n source phrases (default 10)\n"
+ "\t-fingerprint int -- number of bits used for source phrase fingerprints (default 16)\n"
+ "\t-join-scores -- single set of Huffman codes for score components\n"
+ "\t-quantize int -- maximum number of scores per score component\n"
+ "\t-no-warnings -- suppress warnings about missing alignment data\n"
+ "\n"
+ " For more information see: http://www.statmt.org/moses/?n=Moses.AdvancedFeatures#ntoc6\n\n"
+ " If you use this please cite:\n\n"
+ " @article { junczys_pbml98_2012,\n"
+ " author = { Marcin Junczys-Dowmunt },\n"
+ " title = { Phrasal Rank-Encoding: Exploiting Phrase Redundancy and\n"
+ " Translational Relations for Phrase Table Compression },\n"
+ " journal = { The Prague Bulletin of Mathematical Linguistics },\n"
+ " volume = { 98 },\n"
+ " year = { 2012 },\n"
+ " note = { Proceedings of the MT Marathon 2012, Edinburgh },\n"
+ " }\n\n"
+ " Acknowledgments: Part of this research was carried out at and funded by\n"
+ " the World Intellectual Property Organization (WIPO) in Geneva.\n\n";
+}
+
+
+int main(int argc, char **argv) {
+
+ std::string inFilePath;
+ std::string outFilePath("out");
+ PhraseTableCreator::Coding coding = PhraseTableCreator::PREnc;
+
+ size_t numScoreComponent = 5;
+ size_t orderBits = 10;
+ size_t fingerprintBits = 16;
+ bool useAlignmentInfo = false;
+ bool multipleScoreTrees = true;
+ size_t quantize = 0;
+ size_t maxRank = 100;
+ bool sortScoreIndexSet = false;
+ size_t sortScoreIndex = 2;
+ bool warnMe = true;
+ size_t threads = 1;
+
+ if(1 >= argc) {
+ printHelp(argv);
+ return 1;
+ }
+ for(int i = 1; i < argc; ++i) {
+ std::string arg(argv[i]);
+ if("-in" == arg && i+1 < argc) {
+ ++i;
+ inFilePath = argv[i];
+ }
+ else if("-out" == arg && i+1 < argc) {
+ ++i;
+ outFilePath = argv[i];
+ }
+ else if("-encoding" == arg && i+1 < argc) {
+ ++i;
+ std::string val(argv[i]);
+ if(val == "None" || val == "none") {
+ coding = PhraseTableCreator::None;
+ }
+ else if(val == "REnc" || val == "renc") {
+ coding = PhraseTableCreator::REnc;
+ }
+ else if(val == "PREnc" || val == "prenc") {
+ coding = PhraseTableCreator::PREnc;
+ }
+ }
+ else if("-maxrank" == arg && i+1 < argc) {
+ ++i;
+ maxRank = atoi(argv[i]);
+ }
+ else if("-nscores" == arg && i+1 < argc) {
+ ++i;
+ numScoreComponent = atoi(argv[i]);
+ }
+ else if("-rankscore" == arg && i+1 < argc) {
+ ++i;
+ sortScoreIndex = atoi(argv[i]);
+ sortScoreIndexSet = true;
+ }
+ else if("-alignment-info" == arg) {
+ useAlignmentInfo = true;
+ }
+ else if("-landmark" == arg && i+1 < argc) {
+ ++i;
+ orderBits = atoi(argv[i]);
+ }
+ else if("-fingerprint" == arg && i+1 < argc) {
+ ++i;
+ fingerprintBits = atoi(argv[i]);
+ }
+ else if("-join-scores" == arg) {
+ multipleScoreTrees = false;
+ }
+ else if("-quantize" == arg && i+1 < argc) {
+ ++i;
+ quantize = atoi(argv[i]);
+ }
+ else if("-no-warnings" == arg) {
+ warnMe = false;
+ }
+ else if("-threads" == arg && i+1 < argc) {
+#ifdef WITH_THREADS
+ ++i;
+ if(std::string(argv[i]) == "all") {
+ threads = boost::thread::hardware_concurrency();
+ if(!threads) {
+ std::cerr << "Could not determine number of hardware threads, setting to 1" << std::endl;
+ threads = 1;
+ }
+ }
+ else
+ threads = atoi(argv[i]);
+#else
+ std::cerr << "Thread support not compiled in" << std::endl;
+ exit(1);
+#endif
+ }
+ else {
+ //something's wrong... print help
+ printHelp(argv);
+ return 1;
+ }
+ }
+
+ if(!sortScoreIndexSet && numScoreComponent != 5 && coding == PhraseTableCreator::PREnc)
+ {
+ std::cerr << "WARNING: You are using a nonstandard number of scores ("
+ << numScoreComponent << ") with PREnc. Set the index of P(t|s) "
+ "with -rankscore int if it is not "
+ << sortScoreIndex << "." << std::endl;
+ }
+
+ if(sortScoreIndex >= numScoreComponent)
+ {
+ std::cerr << "ERROR: -rankscore " << sortScoreIndex << " is out of range (0 ... "
+ << (numScoreComponent-1) << ")" << std::endl;
+ abort();
+ }
+
+ if(outFilePath.rfind(".minphr") != outFilePath.size() - 7)
+ outFilePath += ".minphr";
+
+ PhraseTableCreator(inFilePath, outFilePath, numScoreComponent, sortScoreIndex,
+ coding, orderBits, fingerprintBits,
+ useAlignmentInfo, multipleScoreTrees,
+ quantize, maxRank, warnMe
+#ifdef WITH_THREADS
+ , threads
+#endif
+ );
+}
diff --git a/misc/queryPhraseTable.cpp b/misc/queryPhraseTable.cpp
index 109c7e69e..02e1c29a1 100644
--- a/misc/queryPhraseTable.cpp
+++ b/misc/queryPhraseTable.cpp
@@ -18,6 +18,7 @@ int main(int argc, char **argv)
int nscores = 5;
std::string ttable = "";
bool useAlignments = false;
+ bool reportCounts = false;
for(int i = 1; i < argc; i++) {
if(!strcmp(argv[i], "-n")) {
@@ -28,8 +29,11 @@ int main(int argc, char **argv)
if(i + 1 == argc)
usage();
ttable = argv[++i];
- } else if(!strcmp(argv[i], "-a"))
+ } else if(!strcmp(argv[i], "-a")) {
useAlignments = true;
+ } else if (!strcmp(argv[i], "-c")) {
+ reportCounts = true;
+ }
else
usage();
}
@@ -54,22 +58,26 @@ int main(int argc, char **argv)
else
ptree.GetTargetCandidates(srcphrase, tgtcands);
- for(uint i = 0; i < tgtcands.size(); i++) {
- std::cout << line << " |||";
- for(uint j = 0; j < tgtcands[i].tokens.size(); j++)
- std::cout << ' ' << *tgtcands[i].tokens[j];
- std::cout << " |||";
-
- if(useAlignments) {
- std::cout << " " << wordAlignment[i] << " |||";
+ if (reportCounts) {
+ std::cout << line << " " << tgtcands.size() << "\n";
+ } else {
+ for(uint i = 0; i < tgtcands.size(); i++) {
+ std::cout << line << " |||";
+ for(uint j = 0; j < tgtcands[i].tokens.size(); j++)
+ std::cout << ' ' << *tgtcands[i].tokens[j];
+ std::cout << " |||";
+
+ if(useAlignments) {
+ std::cout << " " << wordAlignment[i] << " |||";
+ }
+
+ for(uint j = 0; j < tgtcands[i].scores.size(); j++)
+ std::cout << ' ' << tgtcands[i].scores[j];
+ std::cout << '\n';
}
-
- for(uint j = 0; j < tgtcands[i].scores.size(); j++)
- std::cout << ' ' << tgtcands[i].scores[j];
std::cout << '\n';
}
- std::cout << '\n';
std::cout.flush();
}
}
@@ -78,6 +86,7 @@ void usage()
{
std::cerr << "Usage: queryPhraseTable [-n <nscores>] [-a] -t <ttable>\n"
"-n <nscores> number of scores in phrase table (default: 5)\n"
+ "-c only report counts of entries\n"
"-a binary phrase table contains alignments\n"
"-t <ttable> phrase table\n";
exit(1);
diff --git a/misc/queryPhraseTableMin.cpp b/misc/queryPhraseTableMin.cpp
new file mode 100644
index 000000000..02d889598
--- /dev/null
+++ b/misc/queryPhraseTableMin.cpp
@@ -0,0 +1,97 @@
+// Query binary phrase tables.
+// Marcin Junczys-Dowmunt, 13 September 2012
+
+#include <cstdlib>
+#include <cstring>
+#include <string>
+#include <vector>
+
+#include "CompactPT/PhraseDictionaryCompact.h"
+#include "Util.h"
+
+void usage();
+
+typedef unsigned int uint;
+
+using namespace Moses;
+
+int main(int argc, char **argv)
+{
+ int nscores = 5;
+ std::string ttable = "";
+ bool useAlignments = false;
+ bool reportCounts = false;
+
+ for(int i = 1; i < argc; i++) {
+ if(!strcmp(argv[i], "-n")) {
+ if(i + 1 == argc)
+ usage();
+ nscores = atoi(argv[++i]);
+ } else if(!strcmp(argv[i], "-t")) {
+ if(i + 1 == argc)
+ usage();
+ ttable = argv[++i];
+ } else if(!strcmp(argv[i], "-a")) {
+ useAlignments = true;
+ } else if (!strcmp(argv[i], "-c")) {
+ reportCounts = true;
+ }
+ else
+ usage();
+ }
+
+ if(ttable == "")
+ usage();
+
+ std::vector<FactorType> input(1, 0);
+ std::vector<FactorType> output(1, 0);
+ std::vector<float> weight(nscores, 0);
+
+ LMList lmList;
+
+ PhraseDictionaryFeature pdf(Compact, nscores, nscores, input, output, ttable, weight, 0, "", "");
+ PhraseDictionaryCompact pdc(nscores, Compact, &pdf, false, useAlignments);
+ bool ret = pdc.Load(input, output, ttable, weight, 0, lmList, 0);
+ assert(ret);
+
+ std::string line;
+ while(getline(std::cin, line)) {
+ Phrase sourcePhrase(0);
+ sourcePhrase.CreateFromString(input, line, "||dummy_string||");
+
+ TargetPhraseVectorPtr decodedPhraseColl
+ = pdc.GetTargetPhraseCollectionRaw(sourcePhrase);
+
+ if(decodedPhraseColl != NULL) {
+ if(reportCounts)
+ std::cout << sourcePhrase << decodedPhraseColl->size() << std::endl;
+ else
+ for(TargetPhraseVector::iterator it = decodedPhraseColl->begin(); it != decodedPhraseColl->end(); it++) {
+ TargetPhrase &tp = *it;
+ std::cout << sourcePhrase << "||| ";
+ std::cout << static_cast<const Phrase&>(tp) << "|||";
+
+ if(useAlignments)
+ std::cout << " " << tp.GetAlignmentInfo() << "|||";
+
+ for(size_t i = 0; i < tp.GetScoreBreakdown().size(); i++)
+ std::cout << " " << exp(tp.GetScoreBreakdown()[i]);
+ std::cout << std::endl;
+ }
+ }
+ else if(reportCounts)
+ std::cout << sourcePhrase << 0 << std::endl;
+
+ std::cout.flush();
+ }
+}
+
+void usage()
+{
+ std::cerr << "Usage: queryPhraseTable [-n <nscores>] [-a] -t <ttable>\n"
+ "-n <nscores> number of scores in phrase table (default: 5)\n"
+ "-c only report counts of entries\n"
+ "-a binary phrase table contains alignments\n"
+ "-t <ttable> phrase table\n";
+ exit(1);
+}
diff --git a/moses-chart-cmd/src/IOWrapper.cpp b/moses-chart-cmd/src/IOWrapper.cpp
index dc366d1c4..f6adcd7fc 100644
--- a/moses-chart-cmd/src/IOWrapper.cpp
+++ b/moses-chart-cmd/src/IOWrapper.cpp
@@ -43,7 +43,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "PhraseDictionary.h"
#include "ChartTrellisPathList.h"
#include "ChartTrellisPath.h"
-#include "ChartTranslationOption.h"
+#include "ChartTranslationOptions.h"
#include "ChartHypothesis.h"
#include <boost/algorithm/string.hpp>
@@ -53,6 +53,9 @@ POSSIBILITY OF SUCH DAMAGE.
using namespace std;
using namespace Moses;
+namespace MosesChartCmd
+{
+
IOWrapper::IOWrapper(const std::vector<FactorType> &inputFactorOrder
, const std::vector<FactorType> &outputFactorOrder
, const FactorMask &inputFactorUsed
@@ -62,7 +65,6 @@ IOWrapper::IOWrapper(const std::vector<FactorType> &inputFactorOrder
:m_inputFactorOrder(inputFactorOrder)
,m_outputFactorOrder(outputFactorOrder)
,m_inputFactorUsed(inputFactorUsed)
- ,m_nBestStream(NULL)
,m_outputSearchGraphStream(NULL)
,m_detailedTranslationReportingStream(NULL)
,m_inputFilePath(inputFilePath)
@@ -70,6 +72,7 @@ IOWrapper::IOWrapper(const std::vector<FactorType> &inputFactorOrder
,m_nBestOutputCollector(NULL)
,m_searchGraphOutputCollector(NULL)
,m_singleBestOutputCollector(NULL)
+ ,m_alignmentOutputCollector(NULL)
{
const StaticData &staticData = StaticData::Instance();
@@ -79,21 +82,19 @@ IOWrapper::IOWrapper(const std::vector<FactorType> &inputFactorOrder
m_inputStream = new InputFileStream(inputFilePath);
}
- m_surpressSingleBestOutput = false;
+ bool suppressSingleBestOutput = false;
if (nBestSize > 0) {
if (nBestFilePath == "-") {
- m_nBestStream = &std::cout;
- m_surpressSingleBestOutput = true;
+ m_nBestOutputCollector = new Moses::OutputCollector(&std::cout);
+ suppressSingleBestOutput = true;
} else {
- std::ofstream *nBestFile = new std::ofstream;
- m_nBestStream = nBestFile;
- nBestFile->open(nBestFilePath.c_str());
+ m_nBestOutputCollector = new Moses::OutputCollector(new std::ofstream(nBestFilePath.c_str()));
+ m_nBestOutputCollector->HoldOutputStream();
}
- m_nBestOutputCollector = new Moses::OutputCollector(m_nBestStream);
}
- if (!m_surpressSingleBestOutput) {
+ if (!suppressSingleBestOutput) {
m_singleBestOutputCollector = new Moses::OutputCollector(&std::cout);
}
@@ -112,6 +113,15 @@ IOWrapper::IOWrapper(const std::vector<FactorType> &inputFactorOrder
m_detailedTranslationReportingStream = new std::ofstream(path.c_str());
m_detailOutputCollector = new Moses::OutputCollector(m_detailedTranslationReportingStream);
}
+
+ if (staticData.PrintAlignmentInfo()) {
+ if (staticData.GetAlignmentOutputFile().empty()) {
+ m_alignmentOutputCollector = new Moses::OutputCollector(&std::cout);
+ } else {
+ m_alignmentOutputCollector = new Moses::OutputCollector(new std::ofstream(staticData.GetAlignmentOutputFile().c_str()));
+ m_alignmentOutputCollector->HoldOutputStream();
+ }
+ }
}
IOWrapper::~IOWrapper()
@@ -119,16 +129,13 @@ IOWrapper::~IOWrapper()
if (!m_inputFilePath.empty()) {
delete m_inputStream;
}
- if (!m_surpressSingleBestOutput) {
- // outputting n-best to file, rather than stdout. need to close file and delete obj
- delete m_nBestStream;
- }
delete m_outputSearchGraphStream;
delete m_detailedTranslationReportingStream;
delete m_detailOutputCollector;
delete m_nBestOutputCollector;
delete m_searchGraphOutputCollector;
delete m_singleBestOutputCollector;
+ delete m_alignmentOutputCollector;
}
void IOWrapper::ResetTranslationId() {
@@ -192,6 +199,86 @@ void OutputSurface(std::ostream &out, const ChartHypothesis *hypo, const std::ve
}
}
}
+
+namespace {
+ typedef std::vector< std::pair<size_t, size_t> > WordAlignment;
+
+ bool IsUnknownWord(const Word& word) {
+ const Factor* factor = word[MAX_NUM_FACTORS - 1];
+ if (factor == NULL)
+ return false;
+ return factor->GetString() == UNKNOWN_FACTOR;
+ }
+
+ WordAlignment GetWordAlignment(const Moses::ChartHypothesis *hypo, size_t *targetWordsCount)
+ {
+ const Moses::TargetPhrase& targetPhrase = hypo->GetCurrTargetPhrase();
+ const AlignmentInfo& phraseAlignmentInfo = targetPhrase.GetAlignmentInfo();
+ size_t sourceSize = 0;
+ for (AlignmentInfo::const_iterator it = phraseAlignmentInfo.begin();
+ it != phraseAlignmentInfo.end(); ++it)
+ {
+ sourceSize = std::max(sourceSize, it->first + 1);
+ }
+ std::vector<size_t> sourceSideLengths(sourceSize, 1);
+ std::vector<size_t> targetSideLengths(targetPhrase.GetSize(), 1);
+ std::vector<WordAlignment> alignmentsPerSourceNonTerm(sourceSize);
+ size_t prevHypoIndex = 0;
+ for (AlignmentInfo::const_iterator it = phraseAlignmentInfo.begin();
+ it != phraseAlignmentInfo.end(); ++it)
+ {
+ if (targetPhrase.GetWord(it->second).IsNonTerminal()) {
+ const Moses::ChartHypothesis *prevHypo = hypo->GetPrevHypo(prevHypoIndex);
+ ++prevHypoIndex;
+ alignmentsPerSourceNonTerm[it->first] = GetWordAlignment(
+ prevHypo, &targetSideLengths[it->second]);
+ sourceSideLengths[it->first] = prevHypo->GetCurrSourceRange().GetNumWordsCovered();
+ CHECK(prevHypo->GetCurrSourceRange().GetStartPos() - hypo->GetCurrSourceRange().GetStartPos()
+ == (int)std::accumulate(sourceSideLengths.begin(), sourceSideLengths.begin() + it->first, 0));
+ } else {
+ alignmentsPerSourceNonTerm[it->first].push_back(WordAlignment::value_type(0, 0));
+ }
+ }
+ if (targetWordsCount != NULL) {
+ *targetWordsCount = std::accumulate(targetSideLengths.begin(), targetSideLengths.end(), 0);
+ }
+ // isn't valid since there may be unaligned words: CHECK(hypo->GetCurrSourceRange().GetNumWordsCovered() == std::accumulate(sourceSideLengths.begin(), sourceSideLengths.end(), 0));
+ WordAlignment result;
+ for (AlignmentInfo::const_iterator it = phraseAlignmentInfo.begin();
+ it != phraseAlignmentInfo.end(); ++it)
+ {
+ size_t sourceOffset = std::accumulate(sourceSideLengths.begin(), sourceSideLengths.begin() + it->first, 0);
+ size_t targetOffset = std::accumulate(targetSideLengths.begin(), targetSideLengths.begin() + it->second, 0);
+ for (WordAlignment::const_iterator it2 = alignmentsPerSourceNonTerm[it->first].begin();
+ it2 != alignmentsPerSourceNonTerm[it->first].end(); ++it2)
+ {
+ result.push_back(make_pair(sourceOffset + it2->first, targetOffset + it2->second));
+ }
+ }
+ if (result.empty() && targetPhrase.GetSize() == 1 && hypo->GetCurrSourceRange().GetNumWordsCovered() == 1 && IsUnknownWord(targetPhrase.GetWord(0))) {
+ result.push_back(WordAlignment::value_type(0, 0));
+ }
+ return result;
+ }
+}
+
+
+void IOWrapper::OutputAlignment(const Moses::ChartHypothesis *hypo, long translationId)
+{
+ if (m_alignmentOutputCollector == NULL)
+ return;
+ WordAlignment alignment = GetWordAlignment(hypo, NULL);
+ std::ostringstream out;
+ for (WordAlignment::const_iterator it = alignment.begin();
+ it != alignment.end(); ++it)
+ {
+ if (it != alignment.begin())
+ out << " ";
+ out << it->first << "-" << it->second;
+ }
+ out << std::endl;
+ m_alignmentOutputCollector->Write(static_cast<int>(translationId), out.str());
+}
void IOWrapper::Backtrack(const ChartHypothesis *hypo)
{
@@ -326,9 +413,12 @@ void IOWrapper::OutputDetailedTranslationReport(
CHECK(m_detailOutputCollector);
m_detailOutputCollector->Write(translationId, out.str());
}
+
void IOWrapper::OutputBestHypo(const ChartHypothesis *hypo, long translationId, bool /* reportSegmentation */, bool /* reportAllFactors */)
{
+ if (!m_singleBestOutputCollector)
+ return;
std::ostringstream out;
IOWrapper::FixPrecision(out);
if (hypo != NULL) {
@@ -340,23 +430,21 @@ void IOWrapper::OutputBestHypo(const ChartHypothesis *hypo, long translationId,
if (StaticData::Instance().GetOutputHypoScore()) {
out << hypo->GetTotalScore() << " ";
}
-
- if (!m_surpressSingleBestOutput) {
- if (StaticData::Instance().IsPathRecoveryEnabled()) {
- out << "||| ";
- }
- Phrase outPhrase(ARRAY_SIZE_INCR);
- hypo->CreateOutputPhrase(outPhrase);
-
- // delete 1st & last
- CHECK(outPhrase.GetSize() >= 2);
- outPhrase.RemoveWord(0);
- outPhrase.RemoveWord(outPhrase.GetSize() - 1);
-
- const std::vector<FactorType> outputFactorOrder = StaticData::Instance().GetOutputFactorOrder();
- string output = outPhrase.GetStringRep(outputFactorOrder);
- out << output << endl;
+
+ if (StaticData::Instance().IsPathRecoveryEnabled()) {
+ out << "||| ";
}
+ Phrase outPhrase(ARRAY_SIZE_INCR);
+ hypo->CreateOutputPhrase(outPhrase);
+
+ // delete 1st & last
+ CHECK(outPhrase.GetSize() >= 2);
+ outPhrase.RemoveWord(0);
+ outPhrase.RemoveWord(outPhrase.GetSize() - 1);
+
+ const std::vector<FactorType> outputFactorOrder = StaticData::Instance().GetOutputFactorOrder();
+ string output = outPhrase.GetStringRep(outputFactorOrder);
+ out << output << endl;
} else {
VERBOSE(1, "NO BEST TRANSLATION" << endl);
@@ -366,10 +454,7 @@ void IOWrapper::OutputBestHypo(const ChartHypothesis *hypo, long translationId,
out << endl;
}
-
- if (m_singleBestOutputCollector) {
- m_singleBestOutputCollector->Write(translationId, out.str());
- }
+ m_singleBestOutputCollector->Write(translationId, out.str());
}
void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, const ChartHypothesis *bestHypo, const TranslationSystem* system, long translationId)
@@ -377,7 +462,7 @@ void IOWrapper::OutputNBestList(const ChartTrellisPathList &nBestList, const Cha
std::ostringstream out;
// Check if we're writing to std::cout.
- if (m_surpressSingleBestOutput) {
+ if (m_nBestOutputCollector->OutputIsCout()) {
// Set precision only if we're writing the n-best list to cout. This is to
// preserve existing behaviour, but should probably be done either way.
IOWrapper::FixPrecision(out);
@@ -549,3 +634,6 @@ void IOWrapper::FixPrecision(std::ostream &stream, size_t size)
stream.setf(std::ios::fixed);
stream.precision(size);
}
+
+}
+
diff --git a/moses-chart-cmd/src/IOWrapper.h b/moses-chart-cmd/src/IOWrapper.h
index 3c4c32545..31434d806 100644
--- a/moses-chart-cmd/src/IOWrapper.h
+++ b/moses-chart-cmd/src/IOWrapper.h
@@ -52,6 +52,11 @@ class FactorCollection;
class ChartTrellisPathList;
}
+namespace MosesChartCmd
+{
+
+/** Helper class that holds misc variables to write data out to command line.
+ */
class IOWrapper
{
protected:
@@ -61,15 +66,15 @@ protected:
const std::vector<Moses::FactorType> &m_inputFactorOrder;
const std::vector<Moses::FactorType> &m_outputFactorOrder;
const Moses::FactorMask &m_inputFactorUsed;
- std::ostream *m_nBestStream, *m_outputSearchGraphStream;
+ std::ostream *m_outputSearchGraphStream;
std::ostream *m_detailedTranslationReportingStream;
std::string m_inputFilePath;
std::istream *m_inputStream;
- bool m_surpressSingleBestOutput;
Moses::OutputCollector *m_detailOutputCollector;
Moses::OutputCollector *m_nBestOutputCollector;
Moses::OutputCollector *m_searchGraphOutputCollector;
Moses::OutputCollector *m_singleBestOutputCollector;
+ Moses::OutputCollector *m_alignmentOutputCollector;
public:
IOWrapper(const std::vector<Moses::FactorType> &inputFactorOrder
@@ -86,6 +91,7 @@ public:
void OutputNBestList(const Moses::ChartTrellisPathList &nBestList, const Moses::ChartHypothesis *bestHypo, const Moses::TranslationSystem* system, long translationId);
void OutputSparseFeatureScores(std::ostream& out, const Moses::ChartTrellisPath &path, const Moses::FeatureFunction *ff, std::string &lastName);
void OutputDetailedTranslationReport(const Moses::ChartHypothesis *hypo, const Moses::Sentence &sentence, long translationId);
+ void OutputAlignment(const Moses::ChartHypothesis *hypo, long translationId);
void Backtrack(const Moses::ChartHypothesis *hypo);
void ResetTranslationId();
@@ -96,3 +102,5 @@ public:
static void FixPrecision(std::ostream &, size_t size=3);
};
+
+}
diff --git a/moses-chart-cmd/src/Jamfile b/moses-chart-cmd/src/Jamfile
index 583b4664e..174a4b7f6 100644
--- a/moses-chart-cmd/src/Jamfile
+++ b/moses-chart-cmd/src/Jamfile
@@ -1,3 +1,2 @@
exe moses_chart : Main.cpp mbr.cpp IOWrapper.cpp TranslationAnalysis.cpp ../../moses/src//moses ;
-install legacy-install : moses_chart : <location>. <install-type>EXE <install-dependencies>on <link>shared:<dll-path>$(TOP)/moses-chart-cmd/src <link>shared:<install-type>LIB ;
diff --git a/moses-chart-cmd/src/Main.cpp b/moses-chart-cmd/src/Main.cpp
index 29ea81ec0..ef95922fe 100644
--- a/moses-chart-cmd/src/Main.cpp
+++ b/moses-chart-cmd/src/Main.cpp
@@ -60,6 +60,7 @@ POSSIBILITY OF SUCH DAMAGE.
using namespace std;
using namespace Moses;
+using namespace MosesChartCmd;
/**
* Translates a sentence.
@@ -102,6 +103,10 @@ public:
const Sentence &sentence = dynamic_cast<const Sentence &>(*m_source);
m_ioWrapper.OutputDetailedTranslationReport(bestHypo, sentence, lineNumber);
}
+
+ if (staticData.PrintAlignmentInfo()) {
+ m_ioWrapper.OutputAlignment(bestHypo, lineNumber);
+ }
// n-best
size_t nBestSize = staticData.GetNBestSize();
@@ -186,19 +191,13 @@ static void ShowWeights()
const TranslationSystem& system = staticData.GetTranslationSystem(TranslationSystem::DEFAULT);
const vector<const StatelessFeatureFunction*>& slf =system.GetStatelessFeatureFunctions();
const vector<const StatefulFeatureFunction*>& sff = system.GetStatefulFeatureFunctions();
- const vector<PhraseDictionaryFeature*>& pds = system.GetPhraseDictionaries();
- const vector<GenerationDictionary*>& gds = system.GetGenerationDictionaries();
for (size_t i = 0; i < sff.size(); ++i) {
PrintFeatureWeight(sff[i]);
}
- for (size_t i = 0; i < pds.size(); ++i) {
- PrintFeatureWeight(pds[i]);
- }
- for (size_t i = 0; i < gds.size(); ++i) {
- PrintFeatureWeight(gds[i]);
- }
for (size_t i = 0; i < slf.size(); ++i) {
- PrintFeatureWeight(slf[i]);
+ if (slf[i]->GetScoreProducerWeightShortName() != "u") {
+ PrintFeatureWeight(slf[i]);
+ }
}
}
@@ -222,7 +221,7 @@ int main(int argc, char* argv[])
}
const StaticData &staticData = StaticData::Instance();
- if (!StaticData::LoadDataStatic(&parameter))
+ if (!StaticData::LoadDataStatic(&parameter, argv[0]))
return EXIT_FAILURE;
if (parameter.isParamSpecified("show-weights")) {
@@ -233,8 +232,8 @@ int main(int argc, char* argv[])
CHECK(staticData.GetSearchAlgorithm() == ChartDecoding);
// set up read/writing class
- IOWrapper *ioWrapper = GetIODevice(staticData);
-
+ IOWrapper *ioWrapper = GetIOWrapper(staticData);
+
// check on weights
const ScoreComponentCollection& weights = staticData.GetAllWeights();
IFVERBOSE(2) {
@@ -287,7 +286,7 @@ int main(int argc, char* argv[])
#endif
}
-IOWrapper *GetIODevice(const StaticData &staticData)
+IOWrapper *GetIOWrapper(const StaticData &staticData)
{
IOWrapper *ioWrapper;
const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder()
diff --git a/moses-chart-cmd/src/Main.h b/moses-chart-cmd/src/Main.h
index b9d9a32d6..02661b58d 100644
--- a/moses-chart-cmd/src/Main.h
+++ b/moses-chart-cmd/src/Main.h
@@ -36,7 +36,9 @@ POSSIBILITY OF SUCH DAMAGE.
#include "StaticData.h"
-class IOWrapper;
+namespace MosesChartCmd {
+ class IOWrapper;
+}
int main(int argc, char* argv[]);
-IOWrapper *GetIODevice(const Moses::StaticData &staticData);
+MosesChartCmd::IOWrapper *GetIOWrapper(const Moses::StaticData &staticData);
diff --git a/moses-chart-cmd/src/TranslationAnalysis.h b/moses-chart-cmd/src/TranslationAnalysis.h
index 1291548f8..6f05128e9 100644
--- a/moses-chart-cmd/src/TranslationAnalysis.h
+++ b/moses-chart-cmd/src/TranslationAnalysis.h
@@ -13,7 +13,7 @@
namespace TranslationAnalysis
{
-/***
+/**
* print details about the translation represented in hypothesis to
* os. Included information: phrase alignment, words dropped, scores
*/
diff --git a/moses-cmd/src/IOWrapper.cpp b/moses-cmd/src/IOWrapper.cpp
index f4a9e21f6..80958ace0 100644
--- a/moses-cmd/src/IOWrapper.cpp
+++ b/moses-cmd/src/IOWrapper.cpp
@@ -49,6 +49,9 @@ POSSIBILITY OF SUCH DAMAGE.
using namespace std;
using namespace Moses;
+namespace MosesCmd
+{
+
IOWrapper::IOWrapper(
const vector<FactorType> &inputFactorOrder
, const vector<FactorType> &outputFactorOrder
@@ -478,11 +481,15 @@ void OutputAllFeatureScores( std::ostream& out, const TranslationSystem* system,
const vector<const StatefulFeatureFunction*>& sff = system->GetStatefulFeatureFunctions();
for( size_t i=0; i<sff.size(); i++ )
if (sff[i]->GetScoreProducerWeightShortName() != "bl")
- OutputFeatureScores( out, path, sff[i], lastName );
+ OutputFeatureScores( out, path, sff[i], lastName );
const vector<const StatelessFeatureFunction*>& slf = system->GetStatelessFeatureFunctions();
for( size_t i=0; i<slf.size(); i++ )
- OutputFeatureScores( out, path, slf[i], lastName );
+ if (slf[i]->GetScoreProducerWeightShortName() != "u" &&
+ slf[i]->GetScoreProducerWeightShortName() != "tm" &&
+ slf[i]->GetScoreProducerWeightShortName() != "I" &&
+ slf[i]->GetScoreProducerWeightShortName() != "g")
+ OutputFeatureScores( out, path, slf[i], lastName );
}
void OutputFeatureScores( std::ostream& out, const TrellisPath &path, const FeatureFunction *ff, std::string &lastName )
@@ -578,7 +585,7 @@ bool ReadInput(IOWrapper &ioWrapper, InputTypeEnum inputType, InputType*& source
-IOWrapper *GetIODevice(const StaticData &staticData)
+IOWrapper *GetIOWrapper(const StaticData &staticData)
{
IOWrapper *ioWrapper;
const std::vector<FactorType> &inputFactorOrder = staticData.GetInputFactorOrder()
@@ -607,3 +614,6 @@ IOWrapper *GetIODevice(const StaticData &staticData)
return ioWrapper;
}
+
+}
+
diff --git a/moses-cmd/src/IOWrapper.h b/moses-cmd/src/IOWrapper.h
index 289d0ba90..fc811f248 100644
--- a/moses-cmd/src/IOWrapper.h
+++ b/moses-cmd/src/IOWrapper.h
@@ -53,6 +53,11 @@ POSSIBILITY OF SUCH DAMAGE.
#include "WordLattice.h"
#include "LatticeMBR.h"
+namespace MosesCmd
+{
+
+/** Helper class that holds misc variables to write data out to command line.
+ */
class IOWrapper
{
protected:
@@ -118,7 +123,7 @@ public:
}
};
-IOWrapper *GetIODevice(const Moses::StaticData &staticData);
+IOWrapper *GetIOWrapper(const Moses::StaticData &staticData);
bool ReadInput(IOWrapper &ioWrapper, Moses::InputTypeEnum inputType, Moses::InputType*& source);
void OutputBestSurface(std::ostream &out, const Moses::Hypothesis *hypo, const std::vector<Moses::FactorType> &outputFactorOrder, bool reportSegmentation, bool reportAllFactors);
void OutputNBest(std::ostream& out, const Moses::TrellisPathList &nBestList, const std::vector<Moses::FactorType>&,
@@ -133,4 +138,7 @@ void OutputInput(std::ostream& os, const Moses::Hypothesis* hypo);
void OutputAlignment(Moses::OutputCollector* collector, size_t lineNo, const Moses::Hypothesis *hypo);
void OutputAlignment(Moses::OutputCollector* collector, size_t lineNo, const Moses::TrellisPath &path);
+
+}
+
#endif
diff --git a/moses-cmd/src/LatticeMBR.cpp b/moses-cmd/src/LatticeMBR.cpp
index 1b1ec8284..2bd62747e 100644
--- a/moses-cmd/src/LatticeMBR.cpp
+++ b/moses-cmd/src/LatticeMBR.cpp
@@ -15,6 +15,9 @@
using namespace std;
using namespace Moses;
+namespace MosesCmd
+{
+
size_t bleu_order = 4;
float UNKNGRAMLOGPROB = -20;
void GetOutputWords(const TrellisPath &path, vector <Word> &translation)
@@ -661,4 +664,6 @@ const TrellisPath doConsensusDecoding(Manager& manager, TrellisPathList& nBestLi
//return bestWords;
}
+}
+
diff --git a/moses-cmd/src/LatticeMBR.h b/moses-cmd/src/LatticeMBR.h
index fa0379aee..14a2e22da 100644
--- a/moses-cmd/src/LatticeMBR.h
+++ b/moses-cmd/src/LatticeMBR.h
@@ -19,6 +19,9 @@
+namespace MosesCmd
+{
+
class Edge;
typedef std::vector< const Moses::Hypothesis *> Lattice;
@@ -144,4 +147,7 @@ bool ascendingCoverageCmp(const Moses::Hypothesis* a, const Moses::Hypothesis* b
std::vector<Moses::Word> doLatticeMBR(Moses::Manager& manager, Moses::TrellisPathList& nBestList);
const Moses::TrellisPath doConsensusDecoding(Moses::Manager& manager, Moses::TrellisPathList& nBestList);
//std::vector<Moses::Word> doConsensusDecoding(Moses::Manager& manager, Moses::TrellisPathList& nBestList);
+
+}
+
#endif
diff --git a/moses-cmd/src/LatticeMBRGrid.cpp b/moses-cmd/src/LatticeMBRGrid.cpp
index 8bd52b7d7..83a0f3562 100644
--- a/moses-cmd/src/LatticeMBRGrid.cpp
+++ b/moses-cmd/src/LatticeMBRGrid.cpp
@@ -54,10 +54,14 @@ POSSIBILITY OF SUCH DAMAGE.
using namespace std;
using namespace Moses;
+using namespace MosesCmd;
//keys
enum gridkey {lmbr_p,lmbr_r,lmbr_prune,lmbr_scale};
+namespace MosesCmd
+{
+
class Grid
{
public:
@@ -128,6 +132,8 @@ private:
map<string,gridkey> m_args;
};
+} // namespace
+
int main(int argc, char* argv[])
{
cerr << "Lattice MBR Grid search" << endl;
@@ -145,13 +151,13 @@ int main(int argc, char* argv[])
params->Explain();
exit(1);
}
- if (!StaticData::LoadDataStatic(params)) {
+ if (!StaticData::LoadDataStatic(params, argv[0])) {
exit(1);
}
StaticData& staticData = const_cast<StaticData&>(StaticData::Instance());
staticData.SetUseLatticeMBR(true);
- IOWrapper* ioWrapper = GetIODevice(staticData);
+ IOWrapper* ioWrapper = GetIOWrapper(staticData);
if (!ioWrapper) {
throw runtime_error("Failed to initialise IOWrapper");
@@ -174,7 +180,7 @@ int main(int argc, char* argv[])
++lineCount;
Sentence sentence;
const TranslationSystem& system = staticData.GetTranslationSystem(TranslationSystem::DEFAULT);
- Manager manager(*source,staticData.GetSearchAlgorithm(), &system);
+ Manager manager(lineCount, *source, staticData.GetSearchAlgorithm(), &system);
manager.ProcessSentence();
TrellisPathList nBestList;
manager.CalcNBest(nBestSize, nBestList,true);
diff --git a/moses-cmd/src/Main.cpp b/moses-cmd/src/Main.cpp
index 699e020af..f75a56b2c 100644
--- a/moses-cmd/src/Main.cpp
+++ b/moses-cmd/src/Main.cpp
@@ -39,6 +39,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "Manager.h"
#include "StaticData.h"
#include "Util.h"
+#include "Timer.h"
#include "mbr.h"
#include "ThreadPool.h"
#include "TranslationAnalysis.h"
@@ -50,7 +51,10 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
using namespace std;
using namespace Moses;
+using namespace MosesCmd;
+namespace MosesCmd
+{
// output floats with three significant digits
static const size_t PRECISION = 3;
@@ -76,23 +80,27 @@ public:
OutputCollector* latticeSamplesCollector,
OutputCollector* wordGraphCollector, OutputCollector* searchGraphCollector,
OutputCollector* detailedTranslationCollector,
- OutputCollector* alignmentInfoCollector ) :
+ OutputCollector* alignmentInfoCollector,
+ OutputCollector* unknownsCollector) :
m_source(source), m_lineNumber(lineNumber),
m_outputCollector(outputCollector), m_nbestCollector(nbestCollector),
m_latticeSamplesCollector(latticeSamplesCollector),
m_wordGraphCollector(wordGraphCollector), m_searchGraphCollector(searchGraphCollector),
m_detailedTranslationCollector(detailedTranslationCollector),
- m_alignmentInfoCollector(alignmentInfoCollector) {}
+ m_alignmentInfoCollector(alignmentInfoCollector),
+ m_unknownsCollector(unknownsCollector) {}
/** Translate one sentence
* gets called by main function implemented at end of this source file */
void Run() {
// report thread number
-#ifdef BOOST_HAS_PTHREADS
+#if defined(WITH_THREADS) && defined(BOOST_HAS_PTHREADS)
TRACE_ERR("Translating line " << m_lineNumber << " in thread id " << pthread_self() << std::endl);
#endif
+ Timer translationTime;
+ translationTime.start();
// shorthand for "global data"
const StaticData &staticData = StaticData::Instance();
// input sentence
@@ -104,7 +112,7 @@ public:
// note: this executes the search, resulting in a search graph
// we still need to apply the decision rule (MAP, MBR, ...)
if ((*m_source).GetSize() == 0) return;
- Manager manager(*m_source,staticData.GetSearchAlgorithm(), &system);
+ Manager manager(m_lineNumber, *m_source,staticData.GetSearchAlgorithm(), &system);
manager.ProcessSentence();
// output word graph
@@ -263,11 +271,24 @@ public:
m_detailedTranslationCollector->Write(m_lineNumber,out.str());
}
+ //list of unknown words
+ if (m_unknownsCollector) {
+ const vector<Phrase*>& unknowns = manager.getSntTranslationOptions()->GetUnknownSources();
+ ostringstream out;
+ for (size_t i = 0; i < unknowns.size(); ++i) {
+ out << *(unknowns[i]);
+ }
+ out << endl;
+ m_unknownsCollector->Write(m_lineNumber, out.str());
+ }
+
// report additional statistics
IFVERBOSE(2) {
PrintUserTime("Sentence Decoding Time:");
}
manager.CalcDecoderStatistics();
+
+ VERBOSE(1, "Line " << m_lineNumber << ": Translation took " << translationTime << " seconds total" << endl);
}
~TranslationTask() {
@@ -284,6 +305,7 @@ private:
OutputCollector* m_searchGraphCollector;
OutputCollector* m_detailedTranslationCollector;
OutputCollector* m_alignmentInfoCollector;
+ OutputCollector* m_unknownsCollector;
std::ofstream *m_alignmentStream;
@@ -316,22 +338,18 @@ static void ShowWeights()
const TranslationSystem& system = staticData.GetTranslationSystem(TranslationSystem::DEFAULT);
const vector<const StatelessFeatureFunction*>& slf =system.GetStatelessFeatureFunctions();
const vector<const StatefulFeatureFunction*>& sff = system.GetStatefulFeatureFunctions();
- const vector<PhraseDictionaryFeature*>& pds = system.GetPhraseDictionaries();
- const vector<GenerationDictionary*>& gds = system.GetGenerationDictionaries();
for (size_t i = 0; i < sff.size(); ++i) {
PrintFeatureWeight(sff[i]);
}
for (size_t i = 0; i < slf.size(); ++i) {
- PrintFeatureWeight(slf[i]);
- }
- for (size_t i = 0; i < pds.size(); ++i) {
- PrintFeatureWeight(pds[i]);
- }
- for (size_t i = 0; i < gds.size(); ++i) {
- PrintFeatureWeight(gds[i]);
+ if (slf[i]->GetScoreProducerWeightShortName() != "u") {
+ PrintFeatureWeight(slf[i]);
+ }
}
}
+} //namespace
+
/** main function of the command line version of the decoder **/
int main(int argc, char** argv)
{
@@ -356,14 +374,13 @@ int main(int argc, char** argv)
// (stores them as strings, or array of strings)
Parameter* params = new Parameter();
if (!params->LoadParam(argc,argv)) {
- params->Explain();
exit(1);
}
// initialize all "global" variables, which are stored in StaticData
// note: this also loads models such as the language model, etc.
- if (!StaticData::LoadDataStatic(params)) {
+ if (!StaticData::LoadDataStatic(params, argv[0])) {
exit(1);
}
@@ -381,7 +398,7 @@ int main(int argc, char** argv)
srand(time(NULL));
// set up read/writing class
- IOWrapper* ioWrapper = GetIODevice(staticData);
+ IOWrapper* ioWrapper = GetIOWrapper(staticData);
if (!ioWrapper) {
cerr << "Error; Failed to create IO object" << endl;
exit(1);
@@ -464,6 +481,18 @@ int main(int argc, char** argv)
if (!staticData.GetAlignmentOutputFile().empty()) {
alignmentInfoCollector.reset(new OutputCollector(ioWrapper->GetAlignmentOutputStream()));
}
+
+ //initialise stream for unknown (oov) words
+ auto_ptr<OutputCollector> unknownsCollector;
+ auto_ptr<ofstream> unknownsStream;
+ if (!staticData.GetOutputUnknownsFile().empty()) {
+ unknownsStream.reset(new ofstream(staticData.GetOutputUnknownsFile().c_str()));
+ if (!unknownsStream->good()) {
+ TRACE_ERR("Unable to open " << staticData.GetOutputUnknownsFile() << " for unknowns");
+ exit(1);
+ }
+ unknownsCollector.reset(new OutputCollector(unknownsStream.get()));
+ }
#ifdef WITH_THREADS
ThreadPool pool(staticData.ThreadCount());
@@ -484,7 +513,8 @@ int main(int argc, char** argv)
wordGraphCollector.get(),
searchGraphCollector.get(),
detailedTranslationCollector.get(),
- alignmentInfoCollector.get() );
+ alignmentInfoCollector.get(),
+ unknownsCollector.get() );
// execute task
#ifdef WITH_THREADS
pool.Submit(task);
diff --git a/moses/src/AlignmentInfo.h b/moses/src/AlignmentInfo.h
index 02e9c7627..01d30013d 100644
--- a/moses/src/AlignmentInfo.h
+++ b/moses/src/AlignmentInfo.h
@@ -30,7 +30,9 @@ namespace Moses
class AlignmentInfoCollection;
-// Collection of non-terminal/terminal alignment pairs, ordered by source index.
+/** Collection of non-terminal alignment pairs, ordered by source index.
+ * Usually held by a TargetPhrase to map non-terms in hierarchical/syntax models
+ */
class AlignmentInfo
{
friend std::ostream& operator<<(std::ostream &, const AlignmentInfo &);
@@ -46,9 +48,10 @@ class AlignmentInfo
const_iterator begin() const { return m_collection.begin(); }
const_iterator end() const { return m_collection.end(); }
- // Provides a map from target-side to source-side non-terminal indices.
- // The target-side index should be the rule symbol index (counting terminals).
- // The index returned is the rule non-terminal index (ignoring terminals).
+ /** Provides a map from target-side to source-side non-terminal indices.
+ * The target-side index should be the rule symbol index (counting terminals).
+ * The index returned is the rule non-terminal index (ignoring terminals).
+ */
const NonTermIndexMap &GetNonTermIndexMap() const {
return m_nonTermIndexMap;
}
@@ -68,7 +71,7 @@ class AlignmentInfo
std::vector< const std::pair<size_t,size_t>* > GetSortedAlignments() const;
private:
- // AlignmentInfo objects should only be created by an AlignmentInfoCollection
+ //! AlignmentInfo objects should only be created by an AlignmentInfoCollection
explicit AlignmentInfo(const std::set<std::pair<size_t,size_t> > &pairs)
: m_collection(pairs)
{
@@ -100,8 +103,9 @@ class AlignmentInfo
NonTermIndexMap m_nonTermIndexMap;
};
-// Define an arbitrary strict weak ordering between AlignmentInfo objects
-// for use by AlignmentInfoCollection.
+/** Define an arbitrary strict weak ordering between AlignmentInfo objects
+ * for use by AlignmentInfoCollection.
+ */
struct AlignmentInfoOrderer
{
bool operator()(const AlignmentInfo &a, const AlignmentInfo &b) const {
diff --git a/moses/src/AlignmentInfoCollection.cpp b/moses/src/AlignmentInfoCollection.cpp
index aa411fc63..4569b374b 100644
--- a/moses/src/AlignmentInfoCollection.cpp
+++ b/moses/src/AlignmentInfoCollection.cpp
@@ -38,8 +38,18 @@ const AlignmentInfo &AlignmentInfoCollection::GetEmptyAlignmentInfo() const
const AlignmentInfo *AlignmentInfoCollection::Add(
const std::set<std::pair<size_t,size_t> > &pairs)
{
+ AlignmentInfo pairsAlignmentInfo(pairs);
+#ifdef WITH_THREADS
+ {
+ boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
+ AlignmentInfoSet::const_iterator i = m_collection.find(pairsAlignmentInfo);
+ if (i != m_collection.end())
+ return &*i;
+ }
+ boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
+#endif
std::pair<AlignmentInfoSet::iterator, bool> ret =
- m_collection.insert(AlignmentInfo(pairs));
+ m_collection.insert(pairsAlignmentInfo);
return &(*ret.first);
}
diff --git a/moses/src/AlignmentInfoCollection.h b/moses/src/AlignmentInfoCollection.h
index a6c90f135..708ba285c 100644
--- a/moses/src/AlignmentInfoCollection.h
+++ b/moses/src/AlignmentInfoCollection.h
@@ -23,32 +23,47 @@
#include <set>
+#ifdef WITH_THREADS
+#include <boost/thread/shared_mutex.hpp>
+#include <boost/thread/locks.hpp>
+#endif
+
namespace Moses
{
-// Singleton collection of all AlignmentInfo objects.
+/** Singleton collection of all AlignmentInfo objects.
+ * Used as a cache of all alignment info to save space.
+ * @todo Check whether this needs locking in threaded environment
+ */
class AlignmentInfoCollection
{
public:
static AlignmentInfoCollection &Instance() { return s_instance; }
- // Returns a pointer to an AlignmentInfo object with the same source-target
- // alignment pairs as given in the argument. If the collection already
- // contains such an object then returns a pointer to it; otherwise a new
- // one is inserted.
+ /** Returns a pointer to an AlignmentInfo object with the same source-target
+ * alignment pairs as given in the argument. If the collection already
+ * contains such an object then returns a pointer to it; otherwise a new
+ * one is inserted.
+ */
const AlignmentInfo *Add(const std::set<std::pair<size_t,size_t> > &);
const AlignmentInfo *Add(const std::set<std::pair<size_t,size_t> > &, int* indicator);
- // Returns a pointer to an empty AlignmentInfo object.
+ //! Returns a pointer to an empty AlignmentInfo object.
const AlignmentInfo &GetEmptyAlignmentInfo() const;
private:
typedef std::set<AlignmentInfo, AlignmentInfoOrderer> AlignmentInfoSet;
- // Only a single static variable should be created.
+ //! Only a single static variable should be created.
AlignmentInfoCollection();
static AlignmentInfoCollection s_instance;
+
+#ifdef WITH_THREADS
+ //reader-writer lock
+ mutable boost::shared_mutex m_accessLock;
+#endif
+
AlignmentInfoSet m_collection;
const AlignmentInfo *m_emptyAlignmentInfo;
};
diff --git a/moses/src/BilingualDynSuffixArray.cpp b/moses/src/BilingualDynSuffixArray.cpp
index 6e1325a97..1e1ef649e 100644
--- a/moses/src/BilingualDynSuffixArray.cpp
+++ b/moses/src/BilingualDynSuffixArray.cpp
@@ -68,13 +68,56 @@ bool BilingualDynSuffixArray::Load(
CacheFreqWords();
return true;
}
+
+bool BilingualDynSuffixArray::LoadTM(
+ const std::vector<FactorType>& inputFactors,
+ const std::vector<FactorType>& outputFactors,
+ std::string source, std::string target, std::string alignments,
+ const std::vector<float> &weight)
+{
+ m_inputFactors = inputFactors;
+ m_outputFactors = outputFactors;
+
+ m_scoreCmp = new ScoresComp(weight);
+ InputFileStream sourceStrme(source);
+ InputFileStream targetStrme(target);
+
+ cerr << "Loading target corpus...\n";
+ LoadCorpus(targetStrme, m_outputFactors,*m_trgCorpus, m_trgSntBreaks, m_trgVocab);
+
+ cerr << "Loading source corpus...\n";
+ LoadCorpus(sourceStrme, m_inputFactors, *m_srcCorpus, m_srcSntBreaks, m_srcVocab);
+
+ CHECK(m_srcSntBreaks.size() == m_trgSntBreaks.size());
+
+ // build suffix arrays and auxilliary arrays
+ cerr << "Building Source Suffix Array...\n";
+ m_srcSA = new DynSuffixArray(m_srcCorpus);
+ if(!m_srcSA) return false;
+ cerr << "Building Target Suffix Array...\n";
+ //m_trgSA = new DynSuffixArray(m_trgCorpus);
+ //if(!m_trgSA) return false;
+ cerr << "\t(Skipped. Not used)\n";
+
+ InputFileStream alignStrme(alignments);
+ cerr << "Loading Alignment File...\n";
+ LoadRawAlignments(alignStrme);
+ //LoadAlignments(alignStrme);
+ cerr << "Building frequent word cache...\n";
+ CacheFreqWords();
+ return true;
+
+}
int BilingualDynSuffixArray::LoadRawAlignments(InputFileStream& align)
{
// stores the alignments in the raw file format
std::string line;
std::vector<int> vtmp;
+ int lineNum = 1;
while(getline(align, line)) {
+ if (lineNum % 10000 == 0)
+ cerr << lineNum;
Utils::splitToInt(line, vtmp, "- ");
CHECK(vtmp.size() % 2 == 0);
std::vector<short> vAlgn; // store as short ints for memory
@@ -83,6 +126,7 @@ int BilingualDynSuffixArray::LoadRawAlignments(InputFileStream& align)
vAlgn.push_back(short(*itr));
}
m_rawAlignments.push_back(vAlgn);
+ ++lineNum;
}
return m_rawAlignments.size();
}
@@ -170,7 +214,7 @@ bool BilingualDynSuffixArray::ExtractPhrases(const int& sntIndex, const int& wor
return curSnt.Extract(m_maxPhraseLength, phrasePairs, leftIdx, rightIdx); // extract all phrase Alignments in sentence
}
-void BilingualDynSuffixArray::CleanUp()
+void BilingualDynSuffixArray::CleanUp(const InputType& source)
{
//m_wordPairCache.clear();
}
@@ -462,7 +506,7 @@ void BilingualDynSuffixArray::addSntPair(string& source, string& target, string&
Phrase sphrase(ARRAY_SIZE_INCR);
sphrase.CreateFromString(m_inputFactors, source, factorDelimiter);
m_srcVocab->MakeOpen();
- wordID_t sIDs[sphrase.GetSize()];
+ std::vector<wordID_t> sIDs(sphrase.GetSize());
// store words in vocabulary and corpus
for(int i = sphrase.GetSize()-1; i >= 0; --i) {
sIDs[i] = m_srcVocab->GetWordID(sphrase.GetWord(i)); // get vocab id backwards
@@ -477,7 +521,7 @@ void BilingualDynSuffixArray::addSntPair(string& source, string& target, string&
Phrase tphrase(ARRAY_SIZE_INCR);
tphrase.CreateFromString(m_outputFactors, target, factorDelimiter);
m_trgVocab->MakeOpen();
- wordID_t tIDs[tphrase.GetSize()];
+ std::vector<wordID_t> tIDs(tphrase.GetSize());
for(int i = tphrase.GetSize()-1; i >= 0; --i) {
tIDs[i] = m_trgVocab->GetWordID(tphrase.GetWord(i)); // get vocab id
}
diff --git a/moses/src/BilingualDynSuffixArray.h b/moses/src/BilingualDynSuffixArray.h
index 1543c3709..18c42f342 100644
--- a/moses/src/BilingualDynSuffixArray.h
+++ b/moses/src/BilingualDynSuffixArray.h
@@ -11,6 +11,8 @@
namespace Moses {
+/** @todo ask Abbey Levenberg
+ */
class SAPhrase
{
public:
@@ -29,6 +31,8 @@ public:
{ return words < phr2.words; }
};
+/** @todo ask Abbey Levenberg
+ */
class PhrasePair
{
public:
@@ -45,6 +49,8 @@ public:
{ return m_endTarget - m_startTarget + 1; }
};
+/** @todo ask Abbey Levenberg
+ */
class SentenceAlignment
{
public:
@@ -77,6 +83,8 @@ private:
const std::vector<float>& m_weights;
};
+/** @todo ask Abbey Levenberg
+ */
class BilingualDynSuffixArray {
public:
BilingualDynSuffixArray();
@@ -85,8 +93,12 @@ public:
const std::vector<FactorType>& outputTactors,
std::string source, std::string target, std::string alignments,
const std::vector<float> &weight);
+ bool LoadTM( const std::vector<FactorType>& inputFactors,
+ const std::vector<FactorType>& outputTactors,
+ std::string source, std::string target, std::string alignments,
+ const std::vector<float> &weight);
void GetTargetPhrasesByLexicalWeight(const Phrase& src, std::vector< std::pair<Scores, TargetPhrase*> >& target) const;
- void CleanUp();
+ void CleanUp(const InputType& source);
void addSntPair(string& source, string& target, string& alignment);
private:
DynSuffixArray* m_srcSA;
diff --git a/moses/src/BitmapContainer.cpp b/moses/src/BitmapContainer.cpp
index c80f3b542..1baa06096 100644
--- a/moses/src/BitmapContainer.cpp
+++ b/moses/src/BitmapContainer.cpp
@@ -262,13 +262,13 @@ BitmapContainer::BitmapContainer(const WordsBitmap &bitmap
BitmapContainer::~BitmapContainer()
{
// As we have created the square position objects we clean up now.
- HypothesisQueueItem *item = NULL;
while (!m_queue.empty()) {
- item = m_queue.top();
- FREEHYPO(item->GetHypothesis());
- delete item;
- m_queue.pop();
+ HypothesisQueueItem *item = m_queue.top();
+ m_queue.pop();
+
+ FREEHYPO( item->GetHypothesis() );
+ delete item;
}
// Delete all edges.
diff --git a/moses/src/BitmapContainer.h b/moses/src/BitmapContainer.h
index 987440750..3b9654468 100644
--- a/moses/src/BitmapContainer.h
+++ b/moses/src/BitmapContainer.h
@@ -51,6 +51,7 @@ typedef std::priority_queue< HypothesisQueueItem*, std::vector< HypothesisQueueI
// Hypothesis Priority Queue Code
////////////////////////////////////////////////////////////////////////////////
+//! 1 item in the priority queue for stack decoding (phrase-based)
class HypothesisQueueItem
{
private:
@@ -91,7 +92,7 @@ public:
}
};
-// Allows to compare two HypothesisQueueItem objects by the corresponding scores.
+//! Allows comparison of two HypothesisQueueItem objects by the corresponding scores.
class QueueItemOrderer
{
public:
diff --git a/moses/src/CYKPlusParser/ChartRuleLookupManagerCYKPlus.cpp b/moses/src/CYKPlusParser/ChartRuleLookupManagerCYKPlus.cpp
index 574572be8..b3cbfda5b 100644
--- a/moses/src/CYKPlusParser/ChartRuleLookupManagerCYKPlus.cpp
+++ b/moses/src/CYKPlusParser/ChartRuleLookupManagerCYKPlus.cpp
@@ -22,7 +22,6 @@
#include "RuleTable/PhraseDictionarySCFG.h"
#include "InputType.h"
#include "ChartTranslationOptionList.h"
-#include "CellCollection.h"
#include "DotChartInMemory.h"
#include "StaticData.h"
#include "NonTerminal.h"
diff --git a/moses/src/CYKPlusParser/ChartRuleLookupManagerCYKPlus.h b/moses/src/CYKPlusParser/ChartRuleLookupManagerCYKPlus.h
index dbee00b6f..5af2c4b63 100644
--- a/moses/src/CYKPlusParser/ChartRuleLookupManagerCYKPlus.h
+++ b/moses/src/CYKPlusParser/ChartRuleLookupManagerCYKPlus.h
@@ -31,6 +31,8 @@ class DottedRule;
class TargetPhraseCollection;
class WordsRange;
+/** @todo what is this?
+ */
class ChartRuleLookupManagerCYKPlus : public ChartRuleLookupManager
{
public:
diff --git a/moses/src/CYKPlusParser/ChartRuleLookupManagerMemory.cpp b/moses/src/CYKPlusParser/ChartRuleLookupManagerMemory.cpp
index d180905aa..904d536e1 100644
--- a/moses/src/CYKPlusParser/ChartRuleLookupManagerMemory.cpp
+++ b/moses/src/CYKPlusParser/ChartRuleLookupManagerMemory.cpp
@@ -22,7 +22,6 @@
#include "RuleTable/PhraseDictionarySCFG.h"
#include "InputType.h"
#include "ChartTranslationOptionList.h"
-#include "CellCollection.h"
#include "DotChartInMemory.h"
#include "StaticData.h"
#include "NonTerminal.h"
diff --git a/moses/src/CYKPlusParser/ChartRuleLookupManagerMemory.h b/moses/src/CYKPlusParser/ChartRuleLookupManagerMemory.h
index ee23062c4..38f06d63e 100644
--- a/moses/src/CYKPlusParser/ChartRuleLookupManagerMemory.h
+++ b/moses/src/CYKPlusParser/ChartRuleLookupManagerMemory.h
@@ -41,7 +41,7 @@ class ChartTranslationOptionList;
class DottedRuleColl;
class WordsRange;
-// Implementation of ChartRuleLookupManager for in-memory rule tables.
+//! Implementation of ChartRuleLookupManager for in-memory rule tables.
class ChartRuleLookupManagerMemory : public ChartRuleLookupManagerCYKPlus
{
public:
diff --git a/moses/src/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.cpp b/moses/src/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.cpp
new file mode 100644
index 000000000..e504a24d7
--- /dev/null
+++ b/moses/src/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.cpp
@@ -0,0 +1,280 @@
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2011 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ ***********************************************************************/
+
+#include "ChartRuleLookupManagerMemoryPerSentence.h"
+
+#include "RuleTable/PhraseDictionaryFuzzyMatch.h"
+#include "InputType.h"
+#include "ChartTranslationOptionList.h"
+#include "DotChartInMemory.h"
+#include "StaticData.h"
+#include "NonTerminal.h"
+#include "ChartCellCollection.h"
+
+namespace Moses
+{
+
+ChartRuleLookupManagerMemoryPerSentence::ChartRuleLookupManagerMemoryPerSentence(
+ const InputType &src,
+ const ChartCellCollection &cellColl,
+ const PhraseDictionaryFuzzyMatch &ruleTable)
+ : ChartRuleLookupManagerCYKPlus(src, cellColl)
+ , m_ruleTable(ruleTable)
+{
+ CHECK(m_dottedRuleColls.size() == 0);
+ size_t sourceSize = src.GetSize();
+ m_dottedRuleColls.resize(sourceSize);
+
+ const PhraseDictionaryNodeSCFG &rootNode = m_ruleTable.GetRootNode(src);
+
+ for (size_t ind = 0; ind < m_dottedRuleColls.size(); ++ind) {
+#ifdef USE_BOOST_POOL
+ DottedRuleInMemory *initDottedRule = m_dottedRulePool.malloc();
+ new (initDottedRule) DottedRuleInMemory(rootNode);
+#else
+ DottedRuleInMemory *initDottedRule = new DottedRuleInMemory(rootNode);
+#endif
+
+ DottedRuleColl *dottedRuleColl = new DottedRuleColl(sourceSize - ind + 1);
+ dottedRuleColl->Add(0, initDottedRule); // init rule. stores the top node in tree
+
+ m_dottedRuleColls[ind] = dottedRuleColl;
+ }
+}
+
+ChartRuleLookupManagerMemoryPerSentence::~ChartRuleLookupManagerMemoryPerSentence()
+{
+ RemoveAllInColl(m_dottedRuleColls);
+}
+
+void ChartRuleLookupManagerMemoryPerSentence::GetChartRuleCollection(
+ const WordsRange &range,
+ ChartTranslationOptionList &outColl)
+{
+ size_t relEndPos = range.GetEndPos() - range.GetStartPos();
+ size_t absEndPos = range.GetEndPos();
+
+ // MAIN LOOP. create list of nodes of target phrases
+
+ // get list of all rules that apply to spans at same starting position
+ DottedRuleColl &dottedRuleCol = *m_dottedRuleColls[range.GetStartPos()];
+ const DottedRuleList &expandableDottedRuleList = dottedRuleCol.GetExpandableDottedRuleList();
+
+ const ChartCellLabel &sourceWordLabel = GetCellCollection().Get(WordsRange(absEndPos, absEndPos)).GetSourceWordLabel();
+
+ // loop through the rules
+ // (note that expandableDottedRuleList can be expanded as the loop runs
+ // through calls to ExtendPartialRuleApplication())
+ for (size_t ind = 0; ind < expandableDottedRuleList.size(); ++ind) {
+ // rule we are about to extend
+ const DottedRuleInMemory &prevDottedRule = *expandableDottedRuleList[ind];
+ // we will now try to extend it, starting after where it ended
+ size_t startPos = prevDottedRule.IsRoot()
+ ? range.GetStartPos()
+ : prevDottedRule.GetWordsRange().GetEndPos() + 1;
+
+ // search for terminal symbol
+ // (if only one more word position needs to be covered)
+ if (startPos == absEndPos) {
+
+ // look up in rule dictionary, if the current rule can be extended
+ // with the source word in the last position
+ const Word &sourceWord = sourceWordLabel.GetLabel();
+ const PhraseDictionaryNodeSCFG *node = prevDottedRule.GetLastNode().GetChild(sourceWord);
+
+ // if we found a new rule -> create it and add it to the list
+ if (node != NULL) {
+ // create the rule
+#ifdef USE_BOOST_POOL
+ DottedRuleInMemory *dottedRule = m_dottedRulePool.malloc();
+ new (dottedRule) DottedRuleInMemory(*node, sourceWordLabel,
+ prevDottedRule);
+#else
+ DottedRuleInMemory *dottedRule = new DottedRuleInMemory(*node,
+ sourceWordLabel,
+ prevDottedRule);
+#endif
+ dottedRuleCol.Add(relEndPos+1, dottedRule);
+ }
+ }
+
+ // search for non-terminals
+ size_t endPos, stackInd;
+
+ // span is already complete covered? nothing can be done
+ if (startPos > absEndPos)
+ continue;
+
+ else if (startPos == range.GetStartPos() && range.GetEndPos() > range.GetStartPos()) {
+ // We're at the root of the prefix tree so won't try to cover the full
+ // span (i.e. we don't allow non-lexical unary rules). However, we need
+ // to match non-unary rules that begin with a non-terminal child, so we
+ // do that in two steps: during this iteration we search for non-terminals
+ // that cover all but the last source word in the span (there won't
+ // already be running nodes for these because that would have required a
+ // non-lexical unary rule match for an earlier span). Any matches will
+ // result in running nodes being appended to the list and on subsequent
+ // iterations (for this same span), we'll extend them to cover the final
+ // word.
+ endPos = absEndPos - 1;
+ stackInd = relEndPos;
+ }
+ else
+ {
+ endPos = absEndPos;
+ stackInd = relEndPos + 1;
+ }
+
+
+ ExtendPartialRuleApplication(prevDottedRule, startPos, endPos, stackInd,
+ dottedRuleCol);
+ }
+
+ // list of rules that that cover the entire span
+ DottedRuleList &rules = dottedRuleCol.Get(relEndPos + 1);
+
+ // look up target sides for the rules
+ DottedRuleList::const_iterator iterRule;
+ for (iterRule = rules.begin(); iterRule != rules.end(); ++iterRule) {
+ const DottedRuleInMemory &dottedRule = **iterRule;
+ const PhraseDictionaryNodeSCFG &node = dottedRule.GetLastNode();
+
+ // look up target sides
+ const TargetPhraseCollection *tpc = node.GetTargetPhraseCollection();
+
+ // add the fully expanded rule (with lexical target side)
+ if (tpc != NULL) {
+ AddCompletedRule(dottedRule, *tpc, range, outColl);
+ }
+ }
+
+ dottedRuleCol.Clear(relEndPos+1);
+
+ outColl.ShrinkToLimit();
+}
+
+// Given a partial rule application ending at startPos-1 and given the sets of
+// source and target non-terminals covering the span [startPos, endPos],
+// determines the full or partial rule applications that can be produced through
+// extending the current rule application by a single non-terminal.
+void ChartRuleLookupManagerMemoryPerSentence::ExtendPartialRuleApplication(
+ const DottedRuleInMemory &prevDottedRule,
+ size_t startPos,
+ size_t endPos,
+ size_t stackInd,
+ DottedRuleColl & dottedRuleColl)
+{
+ // source non-terminal labels for the remainder
+ const NonTerminalSet &sourceNonTerms =
+ GetSentence().GetLabelSet(startPos, endPos);
+
+ // target non-terminal labels for the remainder
+ const ChartCellLabelSet &targetNonTerms =
+ GetCellCollection().Get(WordsRange(startPos, endPos)).GetTargetLabelSet();
+
+ // note where it was found in the prefix tree of the rule dictionary
+ const PhraseDictionaryNodeSCFG &node = prevDottedRule.GetLastNode();
+
+ const PhraseDictionaryNodeSCFG::NonTerminalMap & nonTermMap =
+ node.GetNonTerminalMap();
+
+ const size_t numChildren = nonTermMap.size();
+ if (numChildren == 0) {
+ return;
+ }
+ const size_t numSourceNonTerms = sourceNonTerms.size();
+ const size_t numTargetNonTerms = targetNonTerms.GetSize();
+ const size_t numCombinations = numSourceNonTerms * numTargetNonTerms;
+
+ // We can search by either:
+ // 1. Enumerating all possible source-target NT pairs that are valid for
+ // the span and then searching for matching children in the node,
+ // or
+ // 2. Iterating over all the NT children in the node, searching
+ // for each source and target NT in the span's sets.
+ // We'll do whichever minimises the number of lookups:
+ if (numCombinations <= numChildren*2) {
+
+ // loop over possible source non-terminal labels (as found in input tree)
+ NonTerminalSet::const_iterator p = sourceNonTerms.begin();
+ NonTerminalSet::const_iterator sEnd = sourceNonTerms.end();
+ for (; p != sEnd; ++p) {
+ const Word & sourceNonTerm = *p;
+
+ // loop over possible target non-terminal labels (as found in chart)
+ ChartCellLabelSet::const_iterator q = targetNonTerms.begin();
+ ChartCellLabelSet::const_iterator tEnd = targetNonTerms.end();
+ for (; q != tEnd; ++q) {
+ const ChartCellLabel &cellLabel = q->second;
+
+ // try to match both source and target non-terminal
+ const PhraseDictionaryNodeSCFG * child =
+ node.GetChild(sourceNonTerm, cellLabel.GetLabel());
+
+ // nothing found? then we are done
+ if (child == NULL) {
+ continue;
+ }
+
+ // create new rule
+#ifdef USE_BOOST_POOL
+ DottedRuleInMemory *rule = m_dottedRulePool.malloc();
+ new (rule) DottedRuleInMemory(*child, cellLabel, prevDottedRule);
+#else
+ DottedRuleInMemory *rule = new DottedRuleInMemory(*child, cellLabel,
+ prevDottedRule);
+#endif
+ dottedRuleColl.Add(stackInd, rule);
+ }
+ }
+ }
+ else
+ {
+ // loop over possible expansions of the rule
+ PhraseDictionaryNodeSCFG::NonTerminalMap::const_iterator p;
+ PhraseDictionaryNodeSCFG::NonTerminalMap::const_iterator end =
+ nonTermMap.end();
+ for (p = nonTermMap.begin(); p != end; ++p) {
+ // does it match possible source and target non-terminals?
+ const PhraseDictionaryNodeSCFG::NonTerminalMapKey &key = p->first;
+ const Word &sourceNonTerm = key.first;
+ if (sourceNonTerms.find(sourceNonTerm) == sourceNonTerms.end()) {
+ continue;
+ }
+ const Word &targetNonTerm = key.second;
+ const ChartCellLabel *cellLabel = targetNonTerms.Find(targetNonTerm);
+ if (!cellLabel) {
+ continue;
+ }
+
+ // create new rule
+ const PhraseDictionaryNodeSCFG &child = p->second;
+#ifdef USE_BOOST_POOL
+ DottedRuleInMemory *rule = m_dottedRulePool.malloc();
+ new (rule) DottedRuleInMemory(child, *cellLabel, prevDottedRule);
+#else
+ DottedRuleInMemory *rule = new DottedRuleInMemory(child, *cellLabel,
+ prevDottedRule);
+#endif
+ dottedRuleColl.Add(stackInd, rule);
+ }
+ }
+}
+
+} // namespace Moses
diff --git a/moses/src/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.h b/moses/src/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.h
new file mode 100644
index 000000000..f2cf805c4
--- /dev/null
+++ b/moses/src/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.h
@@ -0,0 +1,78 @@
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2011 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ ***********************************************************************/
+
+#pragma once
+#ifndef moses_ChartRuleLookupManagerMemory_h
+#define moses_ChartRuleLookupManagerMemory_h
+
+#include <vector>
+
+#ifdef USE_BOOST_POOL
+#include <boost/pool/object_pool.hpp>
+#endif
+
+#include "ChartRuleLookupManagerCYKPlus.h"
+#include "DotChartInMemory.h"
+#include "NonTerminal.h"
+#include "RuleTable/PhraseDictionaryNodeSCFG.h"
+#include "RuleTable/PhraseDictionarySCFG.h"
+#include "StackVec.h"
+
+namespace Moses
+{
+
+class ChartTranslationOptionList;
+class DottedRuleColl;
+class WordsRange;
+
+//! Implementation of ChartRuleLookupManager for in-memory rule tables.
+class ChartRuleLookupManagerMemoryPerSentence : public ChartRuleLookupManagerCYKPlus
+{
+ public:
+ ChartRuleLookupManagerMemoryPerSentence(const InputType &sentence,
+ const ChartCellCollection &cellColl,
+ const PhraseDictionaryFuzzyMatch &ruleTable);
+
+ ~ChartRuleLookupManagerMemoryPerSentence();
+
+ virtual void GetChartRuleCollection(
+ const WordsRange &range,
+ ChartTranslationOptionList &outColl);
+
+ private:
+ void ExtendPartialRuleApplication(
+ const DottedRuleInMemory &prevDottedRule,
+ size_t startPos,
+ size_t endPos,
+ size_t stackInd,
+ DottedRuleColl &dottedRuleColl);
+
+ std::vector<DottedRuleColl*> m_dottedRuleColls;
+ const PhraseDictionaryFuzzyMatch &m_ruleTable;
+#ifdef USE_BOOST_POOL
+ // Use an object pool to allocate the dotted rules for this sentence. We
+ // allocate a lot of them and this has been seen to significantly improve
+ // performance, especially for multithreaded decoding.
+ boost::object_pool<DottedRuleInMemory> m_dottedRulePool;
+#endif
+};
+
+} // namespace Moses
+
+#endif
diff --git a/moses/src/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp b/moses/src/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp
index 0398eed28..0cf3c836b 100644
--- a/moses/src/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp
+++ b/moses/src/CYKPlusParser/ChartRuleLookupManagerOnDisk.cpp
@@ -236,7 +236,7 @@ void ChartRuleLookupManagerOnDisk::GetChartRuleCollection(
const OnDiskPt::TargetPhraseCollection *tpcollBerkeleyDb = node->GetTargetPhraseCollection(m_dictionary.GetTableLimit(), m_dbWrapper);
- std::vector<float> weightT = staticData.GetTranslationSystem(TranslationSystem::DEFAULT).GetTranslationWeights(m_dictionary.GetDictIndex());
+ std::vector<float> weightT = staticData.GetWeights(m_dictionary.GetFeature());
targetPhraseCollection
= tpcollBerkeleyDb->ConvertToMoses(m_inputFactorsVec
,m_outputFactorsVec
diff --git a/moses/src/CYKPlusParser/ChartRuleLookupManagerOnDisk.h b/moses/src/CYKPlusParser/ChartRuleLookupManagerOnDisk.h
index de29d6d3f..333d975cc 100644
--- a/moses/src/CYKPlusParser/ChartRuleLookupManagerOnDisk.h
+++ b/moses/src/CYKPlusParser/ChartRuleLookupManagerOnDisk.h
@@ -32,7 +32,7 @@
namespace Moses
{
-// Implementation of ChartRuleLookupManager for on-disk rule tables.
+//! Implementation of ChartRuleLookupManager for on-disk rule tables.
class ChartRuleLookupManagerOnDisk : public ChartRuleLookupManagerCYKPlus
{
public:
diff --git a/moses/src/CYKPlusParser/DotChart.h b/moses/src/CYKPlusParser/DotChart.h
index c37bb3294..0d917d8b0 100644
--- a/moses/src/CYKPlusParser/DotChart.h
+++ b/moses/src/CYKPlusParser/DotChart.h
@@ -24,6 +24,8 @@
namespace Moses
{
+/** @todo what is this?
+ */
class DottedRule
{
public:
diff --git a/moses/src/CYKPlusParser/DotChartInMemory.h b/moses/src/CYKPlusParser/DotChartInMemory.h
index ac7047ffd..ae18ed3b1 100644
--- a/moses/src/CYKPlusParser/DotChartInMemory.h
+++ b/moses/src/CYKPlusParser/DotChartInMemory.h
@@ -28,6 +28,8 @@
namespace Moses
{
+/** @todo what is this?
+ */
class DottedRuleInMemory : public DottedRule
{
public:
diff --git a/moses/src/CYKPlusParser/DotChartOnDisk.h b/moses/src/CYKPlusParser/DotChartOnDisk.h
index 9452e9be3..5b756ba8d 100644
--- a/moses/src/CYKPlusParser/DotChartOnDisk.h
+++ b/moses/src/CYKPlusParser/DotChartOnDisk.h
@@ -29,9 +29,11 @@ namespace OnDiskPt
class PhraseNode;
}
-
namespace Moses
{
+
+/** @todo what is this?
+ */
class DottedRuleOnDisk : public DottedRule
{
public:
diff --git a/moses/src/CellCollection.h b/moses/src/CellCollection.h
deleted file mode 100644
index 5b99fe0fe..000000000
--- a/moses/src/CellCollection.h
+++ /dev/null
@@ -1,40 +0,0 @@
-// $Id$
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2010 Hieu Hoang
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-#pragma once
-
-#include <vector>
-
-#include "NonTerminal.h"
-#include "Word.h"
-#include "WordsRange.h"
-
-namespace Moses
-{
-class Word;
-
-class CellCollection
-{
-public:
- virtual ~CellCollection()
- {}
-};
-
-}
-
diff --git a/moses/src/ChartCell.cpp b/moses/src/ChartCell.cpp
index 89341d8e7..04b629cd9 100644
--- a/moses/src/ChartCell.cpp
+++ b/moses/src/ChartCell.cpp
@@ -21,14 +21,13 @@
#include <algorithm>
#include "ChartCell.h"
-#include "ChartTranslationOptionCollection.h"
#include "ChartCellCollection.h"
#include "RuleCubeQueue.h"
#include "RuleCube.h"
#include "WordsRange.h"
#include "Util.h"
#include "StaticData.h"
-#include "ChartTranslationOption.h"
+#include "ChartTranslationOptions.h"
#include "ChartTranslationOptionList.h"
#include "ChartManager.h"
@@ -38,6 +37,10 @@ namespace Moses
{
extern bool g_debug;
+/** Constructor
+ * \param startPos endPos range of this cell
+ * \param manager pointer back to the manager
+ */
ChartCell::ChartCell(size_t startPos, size_t endPos, ChartManager &manager)
:m_coverage(startPos, endPos)
,m_sourceWordLabel(NULL)
@@ -57,14 +60,18 @@ ChartCell::~ChartCell()
delete m_sourceWordLabel;
}
-/** Add the given hypothesis to the cell */
+/** Add the given hypothesis to the cell.
+ * Returns true if added, false if not. Maybe it already exists in the collection or score falls below threshold etc.
+ * This function just calls the correspondind AddHypothesis() in ChartHypothesisCollection
+ * \param hypo Hypothesis to be added
+ */
bool ChartCell::AddHypothesis(ChartHypothesis *hypo)
{
const Word &targetLHS = hypo->GetTargetLHS();
return m_hypoColl[targetLHS].AddHypothesis(hypo, m_manager);
}
-/** Pruning */
+/** Prune each collection in this cell to a particular size */
void ChartCell::PruneToSize()
{
MapType::iterator iter;
@@ -89,7 +96,7 @@ void ChartCell::ProcessSentence(const ChartTranslationOptionList &transOptList
// add all trans opt into queue. using only 1st child node.
for (size_t i = 0; i < transOptList.GetSize(); ++i) {
- const ChartTranslationOption &transOpt = transOptList.Get(i);
+ const ChartTranslationOptions &transOpt = transOptList.Get(i);
RuleCube *ruleCube = new RuleCube(transOpt, allChartCells, m_manager);
queue.Add(ruleCube);
}
@@ -103,9 +110,9 @@ void ChartCell::ProcessSentence(const ChartTranslationOptionList &transOptList
}
}
+//! call SortHypotheses() in each hypo collection in this cell
void ChartCell::SortHypotheses()
{
- // sort each mini cells & fill up target lhs list
CHECK(m_targetLabelSet.Empty());
MapType::iterator iter;
for (iter = m_hypoColl.begin(); iter != m_hypoColl.end(); ++iter) {
@@ -115,7 +122,7 @@ void ChartCell::SortHypotheses()
}
}
-/** Return the highest scoring hypothesis in the cell */
+/** Return the highest scoring hypothesis out of all the hypo collection in this cell */
const ChartHypothesis *ChartCell::GetBestHypothesis() const
{
const ChartHypothesis *ret = NULL;
@@ -136,6 +143,7 @@ const ChartHypothesis *ChartCell::GetBestHypothesis() const
return ret;
}
+//! call CleanupArcList() in each hypo collection in this cell
void ChartCell::CleanupArcList()
{
// only necessary if n-best calculations are enabled
@@ -148,6 +156,7 @@ void ChartCell::CleanupArcList()
}
}
+//! debug info - size of each hypo collection in this cell
void ChartCell::OutputSizes(std::ostream &out) const
{
MapType::const_iterator iter;
@@ -159,6 +168,7 @@ void ChartCell::OutputSizes(std::ostream &out) const
}
}
+//! debug info - total number of hypos in all hypo collection in this cell
size_t ChartCell::GetSize() const
{
size_t ret = 0;
@@ -172,6 +182,7 @@ size_t ChartCell::GetSize() const
return ret;
}
+//! call GetSearchGraph() for each hypo collection
void ChartCell::GetSearchGraph(long translationId, std::ostream &outputSearchGraphStream, const std::map<unsigned, bool> &reachable) const
{
MapType::const_iterator iterOutside;
diff --git a/moses/src/ChartCell.h b/moses/src/ChartCell.h
index df332a0f1..dd0fc25d3 100644
--- a/moses/src/ChartCell.h
+++ b/moses/src/ChartCell.h
@@ -40,10 +40,12 @@
namespace Moses
{
class ChartTranslationOptionList;
-class ChartTranslationOptionCollection;
class ChartCellCollection;
class ChartManager;
+/** 1 cell in chart decoder.
+ * Doesn't directly hold hypotheses. Each cell contain a map of ChartHypothesisCollection that have different constituent labels
+ */
class ChartCell
{
friend std::ostream& operator<<(std::ostream&, const ChartCell&);
@@ -76,7 +78,7 @@ public:
void ProcessSentence(const ChartTranslationOptionList &transOptList
,const ChartCellCollection &allChartCells);
- /** Get all hypotheses in the cell that have the specified constituent label */
+ //! Get all hypotheses in the cell that have the specified constituent label
const HypoList *GetSortedHypotheses(const Word &constituentLabel) const
{
MapType::const_iterator p = m_hypoColl.find(constituentLabel);
@@ -90,11 +92,13 @@ public:
const ChartHypothesis *GetBestHypothesis() const;
+ //! @todo what is a m_sourceWordLabel?
const ChartCellLabel &GetSourceWordLabel() const {
CHECK(m_coverage.GetNumWordsCovered() == 1);
return *m_sourceWordLabel;
}
+ //! @todo what is a m_sourceWordLabel?
const ChartCellLabelSet &GetTargetLabelSet() const {
return m_targetLabelSet;
}
diff --git a/moses/src/ChartCellCollection.cpp b/moses/src/ChartCellCollection.cpp
index 894bb352c..59e2ff82e 100644
--- a/moses/src/ChartCellCollection.cpp
+++ b/moses/src/ChartCellCollection.cpp
@@ -25,6 +25,10 @@
namespace Moses
{
+/** Costructor
+ \param input the input sentence
+ \param manager reference back to the manager
+ */
ChartCellCollection::ChartCellCollection(const InputType &input, ChartManager &manager)
:m_hypoStackColl(input.GetSize())
{
diff --git a/moses/src/ChartCellCollection.h b/moses/src/ChartCellCollection.h
index 6627b6b67..a9d38ceaa 100644
--- a/moses/src/ChartCellCollection.h
+++ b/moses/src/ChartCellCollection.h
@@ -22,14 +22,15 @@
#include "ChartCell.h"
#include "WordsRange.h"
-#include "CellCollection.h"
namespace Moses
{
class InputType;
class ChartManager;
-class ChartCellCollection : public CellCollection
+/** Hold all the chart cells for 1 input sentence. A variable of this type is held by the ChartManager
+ */
+class ChartCellCollection
{
public:
typedef std::vector<ChartCell*> InnerCollType;
@@ -42,9 +43,12 @@ public:
ChartCellCollection(const InputType &input, ChartManager &manager);
~ChartCellCollection();
+ //! get a chart cell for a particular range
ChartCell &Get(const WordsRange &coverage) {
return *m_hypoStackColl[coverage.GetStartPos()][coverage.GetEndPos() - coverage.GetStartPos()];
}
+
+ //! get a chart cell for a particular range
const ChartCell &Get(const WordsRange &coverage) const {
return *m_hypoStackColl[coverage.GetStartPos()][coverage.GetEndPos() - coverage.GetStartPos()];
}
diff --git a/moses/src/ChartCellLabel.h b/moses/src/ChartCellLabel.h
index 5228b2af5..3d71230c3 100644
--- a/moses/src/ChartCellLabel.h
+++ b/moses/src/ChartCellLabel.h
@@ -28,6 +28,11 @@ namespace Moses
class Word;
+/** Contains a range, word (non-terms?) and a vector of hypotheses.
+ * @todo This is probably incompatible with lattice decoding when the word that spans
+ * a position (or positions) can vary.
+ * @todo is this to hold sorted hypotheses that are in the queue for creating the next hypos?
+ */
class ChartCellLabel
{
public:
diff --git a/moses/src/ChartCellLabelSet.h b/moses/src/ChartCellLabelSet.h
index f467f55a5..b828d5fbc 100644
--- a/moses/src/ChartCellLabelSet.h
+++ b/moses/src/ChartCellLabelSet.h
@@ -31,6 +31,8 @@ namespace Moses
class ChartHypothesisCollection;
+/** @todo I have no idea what's in here
+ */
class ChartCellLabelSet
{
private:
diff --git a/moses/src/ChartHypothesis.cpp b/moses/src/ChartHypothesis.cpp
index ea79474b1..7a76e063e 100644
--- a/moses/src/ChartHypothesis.cpp
+++ b/moses/src/ChartHypothesis.cpp
@@ -29,7 +29,7 @@
#include "StaticData.h"
#include "DummyScoreProducers.h"
#include "LMList.h"
-#include "ChartTranslationOption.h"
+#include "ChartTranslationOptions.h"
#include "FFState.h"
using namespace std;
@@ -41,8 +41,12 @@ namespace Moses
ObjectPool<ChartHypothesis> ChartHypothesis::s_objectPool("ChartHypothesis", 300000);
#endif
-/** Create a hypothesis from a rule */
-ChartHypothesis::ChartHypothesis(const ChartTranslationOption &transOpt,
+/** Create a hypothesis from a rule
+ * \param transOpt wrapper around the rule
+ * \param item @todo dunno
+ * \param manager reference back to manager
+ */
+ChartHypothesis::ChartHypothesis(const ChartTranslationOptions &transOpt,
const RuleCubeItem &item,
ChartManager &manager)
:m_targetPhrase(*(item.GetTranslationDimension().GetTargetPhrase()))
@@ -84,18 +88,15 @@ ChartHypothesis::~ChartHypothesis()
}
/** Create full output phrase that is contained in the hypothesis (and its children)
- * \param outPhrase full output phrase
+ * \param outPhrase full output phrase as return argument
*/
void ChartHypothesis::CreateOutputPhrase(Phrase &outPhrase) const
{
- const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
- GetCurrTargetPhrase().GetAlignmentInfo().GetNonTermIndexMap();
-
for (size_t pos = 0; pos < GetCurrTargetPhrase().GetSize(); ++pos) {
const Word &word = GetCurrTargetPhrase().GetWord(pos);
if (word.IsNonTerminal()) {
// non-term. fill out with prev hypo
- size_t nonTermInd = nonTermIndexMap[pos];
+ size_t nonTermInd = GetCurrTargetPhrase().GetAlignmentInfo().GetNonTermIndexMap()[pos];
const ChartHypothesis *prevHypo = m_prevHypos[nonTermInd];
prevHypo->CreateOutputPhrase(outPhrase);
}
@@ -116,14 +117,15 @@ Phrase ChartHypothesis::GetOutputPhrase() const
/** check, if two hypothesis can be recombined.
this is actually a sorting function that allows us to
keep an ordered list of hypotheses. This makes recombination
- much quicker.
+ much quicker. Returns one of 3 possible values:
+ -1 = this < compare
+ +1 = this > compare
+ 0 = this ==compare
+ \param compare the other hypo to compare to
*/
int ChartHypothesis::RecombineCompare(const ChartHypothesis &compare) const
{
int comp = 0;
- // -1 = this < compare
- // +1 = this > compare
- // 0 = this ==compare
for (unsigned i = 0; i < m_ffStates.size(); ++i)
{
@@ -139,6 +141,9 @@ int ChartHypothesis::RecombineCompare(const ChartHypothesis &compare) const
return 0;
}
+/** calculate total score
+ * @todo this should be in ScoreBreakdown
+ */
void ChartHypothesis::CalcScore()
{
// total scores from prev hypos
@@ -154,18 +159,21 @@ void ChartHypothesis::CalcScore()
const ScoreComponentCollection &scoreBreakdown = GetCurrTargetPhrase().GetScoreBreakdown();
m_scoreBreakdown.PlusEquals(scoreBreakdown);
+ //Add pre-computed features
+ m_manager.InsertPreCalculatedScores(GetCurrTargetPhrase(), &m_scoreBreakdown);
+
// compute values of stateless feature functions that were not
// cached in the translation option-- there is no principled distinction
const std::vector<const StatelessFeatureFunction*>& sfs =
m_manager.GetTranslationSystem()->GetStatelessFeatureFunctions();
for (unsigned i = 0; i < sfs.size(); ++i)
if (sfs[i]->ComputeValueInTranslationOption() == false)
- sfs[i]->EvaluateChart(*this,i,&m_scoreBreakdown);
+ sfs[i]->EvaluateChart(ChartBasedFeatureContext(this),&m_scoreBreakdown);
const std::vector<const StatefulFeatureFunction*>& ffs =
m_manager.GetTranslationSystem()->GetStatefulFeatureFunctions();
for (unsigned i = 0; i < ffs.size(); ++i)
- m_ffStates[i] = ffs[i]->EvaluateChart(*this,i,&m_scoreBreakdown);
+ m_ffStates[i] = ffs[i]->EvaluateChart(*this,i,&m_scoreBreakdown);
m_totalScore = m_scoreBreakdown.GetWeightedScore();
}
@@ -263,6 +271,9 @@ std::ostream& operator<<(std::ostream& out, const ChartHypothesis& hypo)
out << "->" << hypo.GetWinningHypothesis()->GetId();
}
+ if (StaticData::Instance().GetIncludeLHSInSearchGraph()) {
+ out << " " << hypo.GetTargetLHS() << "=>";
+ }
out << " " << hypo.GetCurrTargetPhrase()
//<< " " << outPhrase
<< " " << hypo.GetCurrSourceRange();
diff --git a/moses/src/ChartHypothesis.h b/moses/src/ChartHypothesis.h
index edd03df8c..d3f7f0872 100644
--- a/moses/src/ChartHypothesis.h
+++ b/moses/src/ChartHypothesis.h
@@ -25,7 +25,7 @@
#include "WordsRange.h"
#include "ScoreComponentCollection.h"
#include "Phrase.h"
-#include "ChartTranslationOption.h"
+#include "ChartTranslationOptions.h"
#include "ObjectPool.h"
namespace Moses
@@ -37,6 +37,9 @@ class RuleCubeItem;
typedef std::vector<ChartHypothesis*> ChartArcList;
+/** a hypothesis in the hierarchical/syntax decoder.
+ * Contain a pointer to the current target phrase, a vector of previous hypos, and some scores
+ */
class ChartHypothesis
{
friend std::ostream& operator<<(std::ostream&, const ChartHypothesis&);
@@ -64,8 +67,11 @@ protected:
unsigned m_id; /* pkoehn wants to log the order in which hypotheses were generated */
- ChartHypothesis(); // not implemented
- ChartHypothesis(const ChartHypothesis &copy); // not implemented
+ //! not implemented
+ ChartHypothesis();
+
+ //! not implemented
+ ChartHypothesis(const ChartHypothesis &copy);
public:
#ifdef USE_HYPO_POOL
@@ -74,34 +80,45 @@ public:
return ptr;
}
+ //! delete \param hypo. Works with object pool too
static void Delete(ChartHypothesis *hypo) {
s_objectPool.freeObject(hypo);
}
#else
+ //! delete \param hypo. Works with object pool too
static void Delete(ChartHypothesis *hypo) {
delete hypo;
}
#endif
- ChartHypothesis(const ChartTranslationOption &, const RuleCubeItem &item,
+ ChartHypothesis(const ChartTranslationOptions &, const RuleCubeItem &item,
ChartManager &manager);
~ChartHypothesis();
unsigned GetId() const { return m_id; }
+ //! Get the rule that created this hypothesis
const TargetPhrase &GetCurrTargetPhrase()const {
return m_targetPhrase;
}
+
+ //! the source range that this hypothesis spans
const WordsRange &GetCurrSourceRange()const {
return m_currSourceWordsRange;
}
+
+ //! the arc list when creating n-best lists
inline const ChartArcList* GetArcList() const {
return m_arcList;
}
+
+ //! the feature function states for a particular feature \param featureID
inline const FFState* GetFFState( size_t featureID ) const {
return m_ffStates[ featureID ];
}
+
+ //! reference back to the manager
inline const ChartManager& GetManager() const { return m_manager; }
void CreateOutputPhrase(Phrase &outPhrase) const;
@@ -115,28 +132,31 @@ public:
void CleanupArcList();
void SetWinningHypo(const ChartHypothesis *hypo);
- const ScoreComponentCollection &GetScoreBreakdown() const {
- return m_scoreBreakdown;
- }
- float GetTotalScore() const {
- return m_totalScore;
- }
+ //! get the unweighted score for each feature function
+ const ScoreComponentCollection &GetScoreBreakdown() const
+ { return m_scoreBreakdown; }
+
+ //! Get the weighted total score
+ float GetTotalScore() const
+ { return m_totalScore; }
- const std::vector<const ChartHypothesis*> &GetPrevHypos() const {
- return m_prevHypos;
- }
+ //! vector of previous hypotheses this hypo is built on
+ const std::vector<const ChartHypothesis*> &GetPrevHypos() const
+ { return m_prevHypos; }
+ //! get a particular previous hypos
const ChartHypothesis* GetPrevHypo(size_t pos) const {
return m_prevHypos[pos];
}
-
+
+ //! get the constituency label that covers this hypo
const Word &GetTargetLHS() const {
return GetCurrTargetPhrase().GetTargetLHS();
}
- const ChartHypothesis* GetWinningHypothesis() const {
- return m_winningHypo;
- }
+ //! get the best hypo in the arc list when doing n-best list creation. It's either this hypothesis, or the best hypo is this hypo is in the arc list
+ const ChartHypothesis* GetWinningHypothesis() const
+ { return m_winningHypo; }
TO_STRING();
diff --git a/moses/src/ChartHypothesisCollection.cpp b/moses/src/ChartHypothesisCollection.cpp
index 48a108aab..752bb7f6c 100644
--- a/moses/src/ChartHypothesisCollection.cpp
+++ b/moses/src/ChartHypothesisCollection.cpp
@@ -51,6 +51,13 @@ ChartHypothesisCollection::~ChartHypothesisCollection()
//RemoveAllInColl(m_hypos);
}
+/** public function to add hypothesis to this collection.
+ * Returns false if equiv hypo exists in collection, otherwise returns true.
+ * Takes care of update arc list for n-best list creation.
+ * Will delete hypo is it exist - once this function is call don't delete hypothesis.
+ * \param hypo hypothesis to add
+ * \param manager pointer back to manager
+ */
bool ChartHypothesisCollection::AddHypothesis(ChartHypothesis *hypo, ChartManager &manager)
{
if (hypo->GetTotalScore() < m_bestScore + m_beamWidth) {
@@ -109,6 +116,11 @@ bool ChartHypothesisCollection::AddHypothesis(ChartHypothesis *hypo, ChartManage
}
}
+/** add hypothesis to stack. Prune if necessary.
+ * Returns false if equiv hypo exists in collection, otherwise returns true, and the iterator that points to the place where the hypo was added
+ * \param hypo hypothesis to add
+ * \param manager pointer back to manager
+ */
pair<ChartHypothesisCollection::HCType::iterator, bool> ChartHypothesisCollection::Add(ChartHypothesis *hypo, ChartManager &manager)
{
std::pair<HCType::iterator, bool> ret = m_hypos.insert(hypo);
@@ -134,12 +146,16 @@ pair<ChartHypothesisCollection::HCType::iterator, bool> ChartHypothesisCollectio
return ret;
}
-/** Remove hypothesis pointed to by iterator but don't delete the object. */
+/** Remove hypothesis pointed to by iterator but DOES NOT delete the object.
+ * \param iter iterator to delete
+ */
void ChartHypothesisCollection::Detach(const HCType::iterator &iter)
{
m_hypos.erase(iter);
}
+/** destroy iterator AND hypothesis pointed to by iterator. If in an object pool, takes care of that too
+ */
void ChartHypothesisCollection::Remove(const HCType::iterator &iter)
{
ChartHypothesis *h = *iter;
@@ -161,6 +177,10 @@ void ChartHypothesisCollection::Remove(const HCType::iterator &iter)
ChartHypothesis::Delete(h);
}
+/** prune number of hypo to a particular number of hypos, specified by m_maxHypoStackSize, according to score
+ * Don't prune of hypos have identical scores on the boundary, so occasionally number of hypo can remain above m_maxHypoStackSize.
+ * \param manager reference back to manager. Used for collecting stats
+ */
void ChartHypothesisCollection::PruneToSize(ChartManager &manager)
{
if (GetSize() > m_maxHypoStackSize) { // ok, if not over the limit
@@ -232,6 +252,7 @@ void ChartHypothesisCollection::PruneToSize(ChartManager &manager)
}
}
+//! sort hypothses by descending score. Put these hypos into a vector m_hyposOrdered to be returned by function GetSortedHypotheses()
void ChartHypothesisCollection::SortHypotheses()
{
CHECK(m_hyposOrdered.empty());
@@ -245,6 +266,7 @@ void ChartHypothesisCollection::SortHypotheses()
}
}
+//! Call CleanupArcList() for each main hypo in collection
void ChartHypothesisCollection::CleanupArcList()
{
HCType::iterator iter;
@@ -254,6 +276,12 @@ void ChartHypothesisCollection::CleanupArcList()
}
}
+/** Return all hypos, and all hypos in the arclist, in order to create the output searchgraph, ie. the hypergraph. The output is the debug hypo information.
+ * @todo this is a useful function. Make sure it outputs everything required, especially scores.
+ * \param translationId unique, contiguous id for the input sentence
+ * \param outputSearchGraphStream stream to output the info to
+ * \param reachable @todo don't know
+ */
void ChartHypothesisCollection::GetSearchGraph(long translationId, std::ostream &outputSearchGraphStream, const std::map<unsigned, bool> &reachable) const
{
HCType::const_iterator iter;
diff --git a/moses/src/ChartHypothesisCollection.h b/moses/src/ChartHypothesisCollection.h
index ebf1c6002..f88cb8302 100644
--- a/moses/src/ChartHypothesisCollection.h
+++ b/moses/src/ChartHypothesisCollection.h
@@ -28,7 +28,7 @@
namespace Moses
{
-// order by descending score
+//! functor to compare (chart) hypotheses by (descending) score
class ChartHypothesisScoreOrderer
{
public:
@@ -37,6 +37,9 @@ public:
}
};
+/** functor to compare (chart) hypotheses by feature function states.
+ * If 2 hypos are equal, according to this functor, then they can be recombined.
+ */
class ChartHypothesisRecombinationOrderer
{
public:
@@ -57,7 +60,9 @@ public:
}
};
-// 1 of these for each target LHS in each cell
+/** Contains a set of unique hypos that have the same HS non-term.
+ * ie. 1 of these for each target LHS in each cell
+ */
class ChartHypothesisCollection
{
friend std::ostream& operator<<(std::ostream&, const ChartHypothesisCollection&);
@@ -72,9 +77,6 @@ protected:
size_t m_maxHypoStackSize; /**< maximum number of hypothesis allowed in this stack */
bool m_nBestIsEnabled; /**< flag to determine whether to keep track of old arcs */
- /** add hypothesis to stack. Prune if necessary.
- * Returns false if equiv hypo exists in collection, otherwise returns true
- */
std::pair<HCType::iterator, bool> Add(ChartHypothesis *hypo, ChartManager &manager);
public:
@@ -92,9 +94,7 @@ public:
~ChartHypothesisCollection();
bool AddHypothesis(ChartHypothesis *hypo, ChartManager &manager);
- //! remove hypothesis pointed to by iterator but don't delete the object
void Detach(const HCType::iterator &iter);
- /** destroy Hypothesis pointed to by iterator (object pool version) */
void Remove(const HCType::iterator &iter);
void PruneToSize(ChartManager &manager);
@@ -109,10 +109,12 @@ public:
void SortHypotheses();
void CleanupArcList();
+ //! return vector of hypothesis that has been sorted by score
const HypoList &GetSortedHypotheses() const {
return m_hyposOrdered;
}
+ //! return the best total score of all hypos in this collection
float GetBestScore() const { return m_bestScore; }
void GetSearchGraph(long translationId, std::ostream &outputSearchGraphStream, const std::map<unsigned,bool> &reachable) const;
diff --git a/moses/src/ChartManager.cpp b/moses/src/ChartManager.cpp
index 4d0fc186e..adb11ea94 100644
--- a/moses/src/ChartManager.cpp
+++ b/moses/src/ChartManager.cpp
@@ -23,6 +23,7 @@
#include "ChartManager.h"
#include "ChartCell.h"
#include "ChartHypothesis.h"
+#include "ChartTranslationOptions.h"
#include "ChartTrellisDetourQueue.h"
#include "ChartTrellisNode.h"
#include "ChartTrellisPath.h"
@@ -30,6 +31,7 @@
#include "StaticData.h"
#include "DecodeStep.h"
#include "TreeInput.h"
+#include "DummyScoreProducers.h"
using namespace std;
using namespace Moses;
@@ -38,13 +40,19 @@ namespace Moses
{
extern bool g_debug;
+/* constructor. Initialize everything prior to decoding a particular sentence.
+ * \param source the sentence to be decoded
+ * \param system which particular set of models to use.
+ */
ChartManager::ChartManager(InputType const& source, const TranslationSystem* system)
:m_source(source)
,m_hypoStackColl(source, *this)
- ,m_transOptColl(source, system, m_hypoStackColl, m_ruleLookupManagers)
,m_system(system)
,m_start(clock())
,m_hypothesisId(0)
+ ,m_translationOptionList(StaticData::Instance().GetRuleLimit())
+ ,m_decodeGraphList(system->GetDecodeGraphs())
+
{
m_system->InitializeBeforeSentenceProcessing(source);
const std::vector<PhraseDictionaryFeature*> &dictionaries = m_system->GetPhraseDictionaries();
@@ -60,10 +68,13 @@ ChartManager::ChartManager(InputType const& source, const TranslationSystem* sys
ChartManager::~ChartManager()
{
- m_system->CleanUpAfterSentenceProcessing();
+ m_system->CleanUpAfterSentenceProcessing(m_source);
RemoveAllInColl(m_ruleLookupManagers);
+ RemoveAllInColl(m_unksrcs);
+ RemoveAllInColl(m_cacheTargetPhraseCollection);
+
clock_t end = clock();
float et = (end - m_start);
et /= (float)CLOCKS_PER_SEC;
@@ -71,6 +82,7 @@ ChartManager::~ChartManager()
}
+//! decode the sentence. This contains the main laps. Basically, the CKY++ algorithm
void ChartManager::ProcessSentence()
{
VERBOSE(1,"Translating: " << m_source << endl);
@@ -90,14 +102,13 @@ void ChartManager::ProcessSentence()
WordsRange range(startPos, endPos);
// create trans opt
- m_transOptColl.CreateTranslationOptionsForRange(range);
+ CreateTranslationOptionsForRange(range);
// decode
ChartCell &cell = m_hypoStackColl.Get(range);
- cell.ProcessSentence(m_transOptColl.GetTranslationOptionList()
- ,m_hypoStackColl);
- m_transOptColl.Clear();
+ cell.ProcessSentence(m_translationOptionList, m_hypoStackColl);
+ m_translationOptionList.Clear();
cell.PruneToSize();
cell.CleanupArcList();
cell.SortHypotheses();
@@ -125,14 +136,18 @@ void ChartManager::ProcessSentence()
}
}
+/** add specific translation options and hypotheses according to the XML override translation scheme.
+ * Doesn't seem to do anything about walls and zones.
+ * @todo check walls & zones. Check that the implementation doesn't leak, xml options sometimes does if you're not careful
+ */
void ChartManager::AddXmlChartOptions() {
- const std::vector <ChartTranslationOption*> xmlChartOptionsList = m_source.GetXmlChartTranslationOptions();
+ const std::vector <ChartTranslationOptions*> xmlChartOptionsList = m_source.GetXmlChartTranslationOptions();
IFVERBOSE(2) { cerr << "AddXmlChartOptions " << xmlChartOptionsList.size() << endl; }
if (xmlChartOptionsList.size() == 0) return;
- for(std::vector<ChartTranslationOption*>::const_iterator i = xmlChartOptionsList.begin();
+ for(std::vector<ChartTranslationOptions*>::const_iterator i = xmlChartOptionsList.begin();
i != xmlChartOptionsList.end(); ++i) {
- ChartTranslationOption* opt = *i;
+ ChartTranslationOptions* opt = *i;
Moses::Scores wordPenaltyScore(1, -0.434294482); // TODO what is this number?
opt->GetTargetPhraseCollection().GetCollection()[0]->SetScore((ScoreProducer*)m_system->GetWordPenaltyProducer(), wordPenaltyScore);
@@ -146,6 +161,7 @@ void ChartManager::AddXmlChartOptions() {
}
}
+//! get best complete translation from the top chart cell.
const ChartHypothesis *ChartManager::GetBestHypothesis() const
{
size_t size = m_source.GetSize();
@@ -159,7 +175,13 @@ const ChartHypothesis *ChartManager::GetBestHypothesis() const
}
}
-void ChartManager::CalcNBest(size_t count, ChartTrellisPathList &ret, bool onlyDistinct) const
+ /** Calculate the n-best paths through the output hypergraph.
+ * Return the list of paths with the variable ret
+ * \param count how may paths to return
+ * \param ret return argument
+ * \param onlyDistinct whether to check for distinct output sentence or not (default - don't check, just return top n-paths)
+ */
+void ChartManager::CalcNBest(size_t count, ChartTrellisPathList &ret,bool onlyDistinct) const
{
size_t size = m_source.GetSize();
if (count == 0 || size == 0)
@@ -248,10 +270,6 @@ void ChartManager::CalcNBest(size_t count, ChartTrellisPathList &ret, bool onlyD
}
}
-void ChartManager::CalcDecoderStatistics() const
-{
-}
-
void ChartManager::GetSearchGraph(long translationId, std::ostream &outputSearchGraphStream) const
{
size_t size = m_source.GetSize();
@@ -337,4 +355,165 @@ void ChartManager::CreateDeviantPaths(
}
}
+void ChartManager::CreateTranslationOptionsForRange(const WordsRange &wordsRange)
+{
+ assert(m_decodeGraphList.size() == m_ruleLookupManagers.size());
+
+ m_translationOptionList.Clear();
+
+ std::vector <DecodeGraph*>::const_iterator iterDecodeGraph;
+ std::vector <ChartRuleLookupManager*>::const_iterator iterRuleLookupManagers = m_ruleLookupManagers.begin();
+ for (iterDecodeGraph = m_decodeGraphList.begin(); iterDecodeGraph != m_decodeGraphList.end(); ++iterDecodeGraph, ++iterRuleLookupManagers) {
+ const DecodeGraph &decodeGraph = **iterDecodeGraph;
+ assert(decodeGraph.GetSize() == 1);
+ ChartRuleLookupManager &ruleLookupManager = **iterRuleLookupManagers;
+ size_t maxSpan = decodeGraph.GetMaxChartSpan();
+ if (maxSpan == 0 || wordsRange.GetNumWordsCovered() <= maxSpan) {
+ ruleLookupManager.GetChartRuleCollection(wordsRange, m_translationOptionList);
+ }
+ }
+
+ if (wordsRange.GetNumWordsCovered() == 1 && wordsRange.GetStartPos() != 0 && wordsRange.GetStartPos() != m_source.GetSize()-1) {
+ bool alwaysCreateDirectTranslationOption = StaticData::Instance().IsAlwaysCreateDirectTranslationOption();
+ if (m_translationOptionList.GetSize() == 0 || alwaysCreateDirectTranslationOption) {
+ // create unknown words for 1 word coverage where we don't have any trans options
+ const Word &sourceWord = m_source.GetWord(wordsRange.GetStartPos());
+ ProcessOneUnknownWord(sourceWord, wordsRange);
+ }
+ }
+
+ m_translationOptionList.ApplyThreshold();
+ PreCalculateScores();
+}
+
+//! special handling of ONE unknown words.
+void ChartManager::ProcessOneUnknownWord(const Word &sourceWord, const WordsRange &range)
+{
+ // unknown word, add as trans opt
+ const StaticData &staticData = StaticData::Instance();
+ const UnknownWordPenaltyProducer *unknownWordPenaltyProducer = m_system->GetUnknownWordPenaltyProducer();
+ vector<float> wordPenaltyScore(1, -0.434294482); // TODO what is this number?
+
+ const ChartCell &chartCell = m_hypoStackColl.Get(range);
+ const ChartCellLabel &sourceWordLabel = chartCell.GetSourceWordLabel();
+
+ size_t isDigit = 0;
+ if (staticData.GetDropUnknown()) {
+ const Factor *f = sourceWord[0]; // TODO hack. shouldn't know which factor is surface
+ const string &s = f->GetString();
+ isDigit = s.find_first_of("0123456789");
+ if (isDigit == string::npos)
+ isDigit = 0;
+ else
+ isDigit = 1;
+ // modify the starting bitmap
+ }
+
+ Phrase* m_unksrc = new Phrase(1);
+ m_unksrc->AddWord() = sourceWord;
+ m_unksrcs.push_back(m_unksrc);
+
+ //TranslationOption *transOpt;
+ if (! staticData.GetDropUnknown() || isDigit) {
+ // loop
+ const UnknownLHSList &lhsList = staticData.GetUnknownLHS();
+ UnknownLHSList::const_iterator iterLHS;
+ for (iterLHS = lhsList.begin(); iterLHS != lhsList.end(); ++iterLHS) {
+ const string &targetLHSStr = iterLHS->first;
+ float prob = iterLHS->second;
+
+ // lhs
+ //const Word &sourceLHS = staticData.GetInputDefaultNonTerminal();
+ Word targetLHS(true);
+
+ targetLHS.CreateFromString(Output, staticData.GetOutputFactorOrder(), targetLHSStr, true);
+ CHECK(targetLHS.GetFactor(0) != NULL);
+
+ // add to dictionary
+ TargetPhrase *targetPhrase = new TargetPhrase(Output);
+ TargetPhraseCollection *tpc = new TargetPhraseCollection;
+ tpc->Add(targetPhrase);
+
+ m_cacheTargetPhraseCollection.push_back(tpc);
+ Word &targetWord = targetPhrase->AddWord();
+ targetWord.CreateUnknownWord(sourceWord);
+
+ // scores
+ vector<float> unknownScore(1, FloorScore(TransformScore(prob)));
+
+ //targetPhrase->SetScore();
+ targetPhrase->SetScore(unknownWordPenaltyProducer, unknownScore);
+ targetPhrase->SetScore(m_system->GetWordPenaltyProducer(), wordPenaltyScore);
+ targetPhrase->SetSourcePhrase(*m_unksrc);
+ targetPhrase->SetTargetLHS(targetLHS);
+
+ // chart rule
+ m_translationOptionList.Add(*tpc, m_emptyStackVec, range);
+ } // for (iterLHS
+ } else {
+ // drop source word. create blank trans opt
+ vector<float> unknownScore(1, FloorScore(-numeric_limits<float>::infinity()));
+
+ TargetPhrase *targetPhrase = new TargetPhrase(Output);
+ TargetPhraseCollection *tpc = new TargetPhraseCollection;
+ tpc->Add(targetPhrase);
+ // loop
+ const UnknownLHSList &lhsList = staticData.GetUnknownLHS();
+ UnknownLHSList::const_iterator iterLHS;
+ for (iterLHS = lhsList.begin(); iterLHS != lhsList.end(); ++iterLHS) {
+ const string &targetLHSStr = iterLHS->first;
+ //float prob = iterLHS->second;
+
+ Word targetLHS(true);
+ targetLHS.CreateFromString(Output, staticData.GetOutputFactorOrder(), targetLHSStr, true);
+ CHECK(targetLHS.GetFactor(0) != NULL);
+
+ m_cacheTargetPhraseCollection.push_back(tpc);
+ targetPhrase->SetSourcePhrase(*m_unksrc);
+ targetPhrase->SetScore(unknownWordPenaltyProducer, unknownScore);
+ targetPhrase->SetTargetLHS(targetLHS);
+
+ // chart rule
+ m_translationOptionList.Add(*tpc, m_emptyStackVec, range);
+ }
+ }
+}
+
+void ChartManager::PreCalculateScores()
+{
+ for (size_t i = 0; i < m_translationOptionList.GetSize(); ++i) {
+ const ChartTranslationOptions& cto = m_translationOptionList.Get(i);
+ for (TargetPhraseCollection::const_iterator j = cto.GetTargetPhraseCollection().begin();
+ j != cto.GetTargetPhraseCollection().end(); ++j) {
+ const TargetPhrase* targetPhrase = *j;
+ if (m_precalculatedScores.find(*targetPhrase) == m_precalculatedScores.end()) {
+ ChartBasedFeatureContext context(*targetPhrase,m_source);
+ const vector<const StatelessFeatureFunction*>& sfs =
+ m_system->GetStatelessFeatureFunctions();
+ ScoreComponentCollection& breakdown = m_precalculatedScores[*targetPhrase];
+ for (size_t k = 0; k < sfs.size(); ++k) {
+ if (!sfs[k]->ComputeValueInTranslationTable()) {
+ sfs[k]->EvaluateChart(context,&breakdown);
+ }
+ }
+ }
+ }
+ }
+}
+
+void ChartManager::InsertPreCalculatedScores(
+ const TargetPhrase& targetPhrase, ScoreComponentCollection* scoreBreakdown) const
+{
+ boost::unordered_map<TargetPhrase,ScoreComponentCollection>::const_iterator scoreIter =
+ m_precalculatedScores.find(targetPhrase);
+ if (scoreIter != m_precalculatedScores.end()) {
+ scoreBreakdown->PlusEquals(scoreIter->second);
+ } else {
+ TRACE_ERR("ERROR: " << targetPhrase << " missing from precalculation cache" << endl);
+ assert(0);
+ }
+
+}
+
+
} // namespace Moses
diff --git a/moses/src/ChartManager.h b/moses/src/ChartManager.h
index d59975c61..45db8c206 100644
--- a/moses/src/ChartManager.h
+++ b/moses/src/ChartManager.h
@@ -22,14 +22,15 @@
#pragma once
#include <vector>
+#include <boost/unordered_map.hpp>
#include "ChartCell.h"
-#include "ChartTranslationOptionCollection.h"
#include "ChartCellCollection.h"
#include "InputType.h"
#include "WordsRange.h"
#include "SentenceStats.h"
#include "TranslationSystem.h"
#include "ChartRuleLookupManager.h"
+#include "ChartTranslationOptionList.h"
#include <boost/shared_ptr.hpp>
@@ -42,6 +43,8 @@ class ChartTrellisNode;
class ChartTrellisPath;
class ChartTrellisPathList;
+/** Holds everything you need to decode 1 sentence with the hierachical/syntax decoder
+ */
class ChartManager
{
private:
@@ -51,46 +54,73 @@ private:
static void CreateDeviantPaths(boost::shared_ptr<const ChartTrellisPath>,
const ChartTrellisNode &,
ChartTrellisDetourQueue &);
+ void CreateTranslationOptionsForRange(const WordsRange &wordsRange);
+ void ProcessOneUnknownWord(const Word &sourceWord, const WordsRange &range);
InputType const& m_source; /**< source sentence to be translated */
ChartCellCollection m_hypoStackColl;
- ChartTranslationOptionCollection m_transOptColl; /**< pre-computed list of translation options for the phrases in this sentence */
std::auto_ptr<SentenceStats> m_sentenceStats;
const TranslationSystem* m_system;
clock_t m_start; /**< starting time, used for logging */
std::vector<ChartRuleLookupManager*> m_ruleLookupManagers;
unsigned m_hypothesisId; /* For handing out hypothesis ids to ChartHypothesis */
+ ChartTranslationOptionList m_translationOptionList; /**< pre-computed list of translation options for the phrases in this sentence */
+ std::vector<Phrase*> m_unksrcs;
+ std::list<TargetPhraseCollection*> m_cacheTargetPhraseCollection;
+ std::vector <DecodeGraph*> m_decodeGraphList;
+ StackVec m_emptyStackVec;
+
+ //! Some features should be calculated prior to search
+ boost::unordered_map<TargetPhrase,ScoreComponentCollection, RuleHash, RuleComparator> m_precalculatedScores;
+
+ //! Pre-calculate most stateless feature values
+ void PreCalculateScores();
+
public:
ChartManager(InputType const& source, const TranslationSystem* system);
~ChartManager();
void ProcessSentence();
void AddXmlChartOptions();
const ChartHypothesis *GetBestHypothesis() const;
- void CalcNBest(size_t count, ChartTrellisPathList &ret,bool onlyDistinct=0) const;
+ void CalcNBest(size_t count, ChartTrellisPathList &ret, bool onlyDistinct=0) const;
void GetSearchGraph(long translationId, std::ostream &outputSearchGraphStream) const;
void FindReachableHypotheses( const ChartHypothesis *hypo, std::map<unsigned,bool> &reachable ) const; /* auxilliary function for GetSearchGraph */
+ //! the input sentence being decoded
const InputType& GetSource() const {
return m_source;
}
+
+ //! which particular set of models is in use
const TranslationSystem* GetTranslationSystem() const {
return m_system;
}
+ //! debug data collected when decoding sentence
SentenceStats& GetSentenceStats() const {
return *m_sentenceStats;
}
+
/***
* to be called after processing a sentence (which may consist of more than just calling ProcessSentence() )
+ * currently an empty function
*/
- void CalcDecoderStatistics() const;
+ void CalcDecoderStatistics() const
+ { }
+
void ResetSentenceStats(const InputType& source) {
m_sentenceStats = std::auto_ptr<SentenceStats>(new SentenceStats(source));
}
+ //! contigious hypo id for each input sentence. For debugging purposes
unsigned GetNextHypoId() { return m_hypothesisId++; }
+
+ //! Access the pre-calculated values
+ void InsertPreCalculatedScores(const TargetPhrase& targetPhrase,
+ ScoreComponentCollection* scoreBreakdown) const;
+
};
}
diff --git a/moses/src/ChartRuleLookupManager.h b/moses/src/ChartRuleLookupManager.h
index 4053a59b1..2c17aea19 100644
--- a/moses/src/ChartRuleLookupManager.h
+++ b/moses/src/ChartRuleLookupManager.h
@@ -30,11 +30,12 @@ namespace Moses
class ChartTranslationOptionList;
class WordsRange;
-// Defines an interface for looking up rules in a rule table. Concrete
-// implementation classes should correspond to specific PhraseDictionary
-// subclasses (memory or on-disk). Since a ChartRuleLookupManager object
-// maintains sentence-specific state, exactly one should be created for
-// each sentence that is to be decoded.
+/** Defines an interface for looking up rules in a rule table. Concrete
+ * implementation classes should correspond to specific PhraseDictionary
+ * subclasses (memory or on-disk). Since a ChartRuleLookupManager object
+ * maintains sentence-specific state, exactly one should be created for
+ * each sentence that is to be decoded.
+ */
class ChartRuleLookupManager
{
public:
@@ -45,20 +46,28 @@ public:
virtual ~ChartRuleLookupManager() {}
+ //! the sentence being decoded
const InputType &GetSentence() const {
return m_sentence;
}
+
+ //! all the chart cells
const ChartCellCollection &GetCellCollection() const {
return m_cellCollection;
}
+ /** abstract function. Return a vector of translation options for given a range in the input sentence
+ * \param range source range for which you want the translation options
+ * \param outColl return argument
+ */
virtual void GetChartRuleCollection(
const WordsRange &range,
ChartTranslationOptionList &outColl) = 0;
private:
- // Non-copyable: copy constructor and assignment operator not implemented.
+ //! Non-copyable: copy constructor and assignment operator not implemented.
ChartRuleLookupManager(const ChartRuleLookupManager &);
+ //! Non-copyable: copy constructor and assignment operator not implemented.
ChartRuleLookupManager &operator=(const ChartRuleLookupManager &);
const InputType &m_sentence;
diff --git a/moses/src/ChartTranslationOptionCollection.cpp b/moses/src/ChartTranslationOptionCollection.cpp
deleted file mode 100644
index 26f218534..000000000
--- a/moses/src/ChartTranslationOptionCollection.cpp
+++ /dev/null
@@ -1,177 +0,0 @@
-// vim:tabstop=2
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2010 Hieu Hoang
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#include "util/check.hh"
-#include "ChartTranslationOptionCollection.h"
-#include "ChartCellCollection.h"
-#include "InputType.h"
-#include "StaticData.h"
-#include "DecodeStep.h"
-#include "DummyScoreProducers.h"
-#include "Util.h"
-
-using namespace std;
-
-namespace Moses
-{
-
-ChartTranslationOptionCollection::ChartTranslationOptionCollection(InputType const& source
- , const TranslationSystem* system
- , const ChartCellCollection &hypoStackColl
- , const std::vector<ChartRuleLookupManager*> &ruleLookupManagers)
- :m_source(source)
- ,m_system(system)
- ,m_decodeGraphList(system->GetDecodeGraphs())
- ,m_hypoStackColl(hypoStackColl)
- ,m_ruleLookupManagers(ruleLookupManagers)
- ,m_translationOptionList(StaticData::Instance().GetRuleLimit())
-{
-}
-
-ChartTranslationOptionCollection::~ChartTranslationOptionCollection()
-{
- RemoveAllInColl(m_unksrcs);
- RemoveAllInColl(m_cacheTargetPhraseCollection);
-}
-
-void ChartTranslationOptionCollection::CreateTranslationOptionsForRange(
- const WordsRange &wordsRange)
-{
- assert(m_decodeGraphList.size() == m_ruleLookupManagers.size());
-
- m_translationOptionList.Clear();
-
- std::vector <DecodeGraph*>::const_iterator iterDecodeGraph;
- std::vector <ChartRuleLookupManager*>::const_iterator iterRuleLookupManagers = m_ruleLookupManagers.begin();
- for (iterDecodeGraph = m_decodeGraphList.begin(); iterDecodeGraph != m_decodeGraphList.end(); ++iterDecodeGraph, ++iterRuleLookupManagers) {
- const DecodeGraph &decodeGraph = **iterDecodeGraph;
- assert(decodeGraph.GetSize() == 1);
- ChartRuleLookupManager &ruleLookupManager = **iterRuleLookupManagers;
- size_t maxSpan = decodeGraph.GetMaxChartSpan();
- if (maxSpan == 0 || wordsRange.GetNumWordsCovered() <= maxSpan) {
- ruleLookupManager.GetChartRuleCollection(wordsRange, m_translationOptionList);
- }
- }
-
- if (wordsRange.GetNumWordsCovered() == 1 && wordsRange.GetStartPos() != 0 && wordsRange.GetStartPos() != m_source.GetSize()-1) {
- bool alwaysCreateDirectTranslationOption = StaticData::Instance().IsAlwaysCreateDirectTranslationOption();
- if (m_translationOptionList.GetSize() == 0 || alwaysCreateDirectTranslationOption) {
- // create unknown words for 1 word coverage where we don't have any trans options
- const Word &sourceWord = m_source.GetWord(wordsRange.GetStartPos());
- ProcessOneUnknownWord(sourceWord, wordsRange);
- }
- }
-
- m_translationOptionList.ApplyThreshold();
-}
-
-//! special handling of ONE unknown words.
-void ChartTranslationOptionCollection::ProcessOneUnknownWord(const Word &sourceWord, const WordsRange &range)
-{
- // unknown word, add as trans opt
- const StaticData &staticData = StaticData::Instance();
- const UnknownWordPenaltyProducer *unknownWordPenaltyProducer = m_system->GetUnknownWordPenaltyProducer();
- vector<float> wordPenaltyScore(1, -0.434294482); // TODO what is this number?
-
- const ChartCell &chartCell = m_hypoStackColl.Get(range);
- const ChartCellLabel &sourceWordLabel = chartCell.GetSourceWordLabel();
-
- size_t isDigit = 0;
- if (staticData.GetDropUnknown()) {
- const Factor *f = sourceWord[0]; // TODO hack. shouldn't know which factor is surface
- const string &s = f->GetString();
- isDigit = s.find_first_of("0123456789");
- if (isDigit == string::npos)
- isDigit = 0;
- else
- isDigit = 1;
- // modify the starting bitmap
- }
-
- Phrase* m_unksrc = new Phrase(1);
- m_unksrc->AddWord() = sourceWord;
- m_unksrcs.push_back(m_unksrc);
-
- //TranslationOption *transOpt;
- if (! staticData.GetDropUnknown() || isDigit) {
- // loop
- const UnknownLHSList &lhsList = staticData.GetUnknownLHS();
- UnknownLHSList::const_iterator iterLHS;
- for (iterLHS = lhsList.begin(); iterLHS != lhsList.end(); ++iterLHS) {
- const string &targetLHSStr = iterLHS->first;
- float prob = iterLHS->second;
-
- // lhs
- //const Word &sourceLHS = staticData.GetInputDefaultNonTerminal();
- Word targetLHS(true);
-
- targetLHS.CreateFromString(Output, staticData.GetOutputFactorOrder(), targetLHSStr, true);
- CHECK(targetLHS.GetFactor(0) != NULL);
-
- // add to dictionary
- TargetPhrase *targetPhrase = new TargetPhrase(Output);
- TargetPhraseCollection *tpc = new TargetPhraseCollection;
- tpc->Add(targetPhrase);
-
- m_cacheTargetPhraseCollection.push_back(tpc);
- Word &targetWord = targetPhrase->AddWord();
- targetWord.CreateUnknownWord(sourceWord);
-
- // scores
- vector<float> unknownScore(1, FloorScore(TransformScore(prob)));
-
- //targetPhrase->SetScore();
- targetPhrase->SetScore(unknownWordPenaltyProducer, unknownScore);
- targetPhrase->SetScore(m_system->GetWordPenaltyProducer(), wordPenaltyScore);
- targetPhrase->SetSourcePhrase(*m_unksrc);
- targetPhrase->SetTargetLHS(targetLHS);
-
- // chart rule
- m_translationOptionList.Add(*tpc, m_emptyStackVec, range);
- } // for (iterLHS
- } else {
- // drop source word. create blank trans opt
- vector<float> unknownScore(1, FloorScore(-numeric_limits<float>::infinity()));
-
- TargetPhrase *targetPhrase = new TargetPhrase(Output);
- TargetPhraseCollection *tpc = new TargetPhraseCollection;
- tpc->Add(targetPhrase);
- // loop
- const UnknownLHSList &lhsList = staticData.GetUnknownLHS();
- UnknownLHSList::const_iterator iterLHS;
- for (iterLHS = lhsList.begin(); iterLHS != lhsList.end(); ++iterLHS) {
- const string &targetLHSStr = iterLHS->first;
- //float prob = iterLHS->second;
-
- Word targetLHS(true);
- targetLHS.CreateFromString(Output, staticData.GetOutputFactorOrder(), targetLHSStr, true);
- CHECK(targetLHS.GetFactor(0) != NULL);
-
- targetPhrase->SetSourcePhrase(*m_unksrc);
- targetPhrase->SetScore(unknownWordPenaltyProducer, unknownScore);
- targetPhrase->SetTargetLHS(targetLHS);
-
- // chart rule
- m_translationOptionList.Add(*tpc, m_emptyStackVec, range);
- }
- }
-}
-
-} // namespace
diff --git a/moses/src/ChartTranslationOptionCollection.h b/moses/src/ChartTranslationOptionCollection.h
deleted file mode 100644
index 4f5b84062..000000000
--- a/moses/src/ChartTranslationOptionCollection.h
+++ /dev/null
@@ -1,72 +0,0 @@
-// vim:tabstop=2
-/***********************************************************************
- Moses - factored phrase-based language decoder
- Copyright (C) 2010 Hieu Hoang
-
- This library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- This library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with this library; if not, write to the Free Software
- Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- ***********************************************************************/
-
-#pragma once
-
-#include <vector>
-#include "InputType.h"
-#include "DecodeGraph.h"
-#include "ChartTranslationOptionList.h"
-#include "ChartRuleLookupManager.h"
-#include "StackVec.h"
-
-namespace Moses
-{
-class DecodeGraph;
-class Word;
-class ChartTranslationOption;
-class DottedRule;
-class WordPenaltyProducer;
-class ChartCellCollection;
-
-class ChartTranslationOptionCollection
-{
-protected:
- const InputType &m_source;
- const TranslationSystem* m_system;
- std::vector <DecodeGraph*> m_decodeGraphList;
- const ChartCellCollection &m_hypoStackColl;
- const std::vector<ChartRuleLookupManager*> &m_ruleLookupManagers;
-
- ChartTranslationOptionList m_translationOptionList;
- std::vector<Phrase*> m_unksrcs;
- std::list<TargetPhraseCollection*> m_cacheTargetPhraseCollection;
- StackVec m_emptyStackVec;
-
- //! special handling of ONE unknown words.
- virtual void ProcessOneUnknownWord(const Word &, const WordsRange &);
-
-public:
- ChartTranslationOptionCollection(InputType const& source
- , const TranslationSystem* system
- , const ChartCellCollection &hypoStackColl
- , const std::vector<ChartRuleLookupManager*> &ruleLookupManagers);
- virtual ~ChartTranslationOptionCollection();
- void CreateTranslationOptionsForRange(const WordsRange &);
-
- const ChartTranslationOptionList &GetTranslationOptionList() const {
- return m_translationOptionList;
- }
-
- void Clear() { m_translationOptionList.Clear(); }
-
-};
-
-}
diff --git a/moses/src/ChartTranslationOptionList.cpp b/moses/src/ChartTranslationOptionList.cpp
index eadd4b688..41b059239 100644
--- a/moses/src/ChartTranslationOptionList.cpp
+++ b/moses/src/ChartTranslationOptionList.cpp
@@ -21,7 +21,7 @@
#include <iostream>
#include "StaticData.h"
#include "ChartTranslationOptionList.h"
-#include "ChartTranslationOption.h"
+#include "ChartTranslationOptions.h"
#include "ChartCellCollection.h"
#include "WordsRange.h"
@@ -49,7 +49,7 @@ void ChartTranslationOptionList::Clear()
class ChartTranslationOptionOrderer
{
public:
- bool operator()(const ChartTranslationOption* itemA, const ChartTranslationOption* itemB) const {
+ bool operator()(const ChartTranslationOptions* itemA, const ChartTranslationOptions* itemB) const {
return itemA->GetEstimateOfBestScore() > itemB->GetEstimateOfBestScore();
}
};
@@ -62,7 +62,7 @@ void ChartTranslationOptionList::Add(const TargetPhraseCollection &tpc,
return;
}
- float score = ChartTranslationOption::CalcEstimateOfBestScore(tpc, stackVec);
+ float score = ChartTranslationOptions::CalcEstimateOfBestScore(tpc, stackVec);
// If the rule limit has already been reached then don't add the option
// unless it is better than at least one existing option.
@@ -73,11 +73,11 @@ void ChartTranslationOptionList::Add(const TargetPhraseCollection &tpc,
// Add the option to the list.
if (m_size == m_collection.size()) {
// m_collection has reached capacity: create a new object.
- m_collection.push_back(new ChartTranslationOption(tpc, stackVec,
+ m_collection.push_back(new ChartTranslationOptions(tpc, stackVec,
range, score));
} else {
// Overwrite an unused object.
- *(m_collection[m_size]) = ChartTranslationOption(tpc, stackVec,
+ *(m_collection[m_size]) = ChartTranslationOptions(tpc, stackVec,
range, score);
}
++m_size;
@@ -122,7 +122,7 @@ void ChartTranslationOptionList::ApplyThreshold()
CollType::const_iterator iter;
for (iter = m_collection.begin(); iter != m_collection.begin()+m_size; ++iter) {
- const ChartTranslationOption *transOpt = *iter;
+ const ChartTranslationOptions *transOpt = *iter;
float score = transOpt->GetEstimateOfBestScore();
scoreThreshold = (score > scoreThreshold) ? score : scoreThreshold;
}
diff --git a/moses/src/ChartTranslationOptionList.h b/moses/src/ChartTranslationOptionList.h
index 75ef73665..3bd56c2a3 100644
--- a/moses/src/ChartTranslationOptionList.h
+++ b/moses/src/ChartTranslationOptionList.h
@@ -19,7 +19,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#pragma once
-#include "ChartTranslationOption.h"
+#include "ChartTranslationOptions.h"
#include "StackVec.h"
#include <vector>
@@ -30,14 +30,14 @@ namespace Moses
class TargetPhraseCollection;
class WordsRange;
-//! a list of target phrases that is trsnalated from the same source phrase
+//! a vector of translations options for a specific range, in a specific sentence
class ChartTranslationOptionList
{
public:
ChartTranslationOptionList(size_t);
~ChartTranslationOptionList();
- const ChartTranslationOption &Get(size_t i) const { return *m_collection[i]; }
+ const ChartTranslationOptions &Get(size_t i) const { return *m_collection[i]; }
//! number of translation options
size_t GetSize() const { return m_size; }
@@ -50,12 +50,12 @@ class ChartTranslationOptionList
void ApplyThreshold();
private:
- typedef std::vector<ChartTranslationOption*> CollType;
+ typedef std::vector<ChartTranslationOptions*> CollType;
struct ScoreThresholdPred
{
ScoreThresholdPred(float threshold) : m_thresholdScore(threshold) {}
- bool operator()(const ChartTranslationOption *option)
+ bool operator()(const ChartTranslationOptions *option)
{
return option->GetEstimateOfBestScore() >= m_thresholdScore;
}
diff --git a/moses/src/ChartTranslationOption.cpp b/moses/src/ChartTranslationOptions.cpp
index 792bfde82..ef21bc1d5 100644
--- a/moses/src/ChartTranslationOption.cpp
+++ b/moses/src/ChartTranslationOptions.cpp
@@ -17,14 +17,14 @@
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
-#include "ChartTranslationOption.h"
+#include "ChartTranslationOptions.h"
#include "ChartHypothesis.h"
namespace Moses
{
-float ChartTranslationOption::CalcEstimateOfBestScore(
+float ChartTranslationOptions::CalcEstimateOfBestScore(
const TargetPhraseCollection &tpc,
const StackVec &stackVec)
{
diff --git a/moses/src/ChartTranslationOption.h b/moses/src/ChartTranslationOptions.h
index d17f00427..4910723f7 100644
--- a/moses/src/ChartTranslationOption.h
+++ b/moses/src/ChartTranslationOptions.h
@@ -30,43 +30,55 @@
namespace Moses
{
-// Similar to a DottedRule, but contains a direct reference to a list
-// of translations and provdes an estimate of the best score.
-class ChartTranslationOption
+/** Similar to a DottedRule, but contains a direct reference to a list
+ * of translations and provdes an estimate of the best score. For a specific range in the input sentence
+ */
+class ChartTranslationOptions
{
public:
- ChartTranslationOption(const TargetPhraseCollection &targetPhraseColl,
+ /** Constructor
+ \param targetPhraseColl @todo dunno
+ \param stackVec @todo dunno
+ \param wordsRange the range in the source sentence this translation option covers
+ \param score @todo dunno
+ */
+ ChartTranslationOptions(const TargetPhraseCollection &targetPhraseColl,
const StackVec &stackVec,
const WordsRange &wordsRange,
float score)
: m_stackVec(stackVec)
, m_targetPhraseCollection(&targetPhraseColl)
, m_wordsRange(&wordsRange)
- , m_estimateOfBestScore(score) {}
+ , m_estimateOfBestScore(score)
+ {}
- ~ChartTranslationOption() {}
+ ~ChartTranslationOptions() {}
static float CalcEstimateOfBestScore(const TargetPhraseCollection &,
const StackVec &);
+ //! @todo dunno
const StackVec &GetStackVec() const { return m_stackVec; }
+ //! @todo isn't the translation suppose to just contain 1 target phrase, not a whole collection of them?
const TargetPhraseCollection &GetTargetPhraseCollection() const {
return *m_targetPhraseCollection;
}
+ //! the range in the source sentence this translation option covers
const WordsRange &GetSourceWordsRange() const {
return *m_wordsRange;
}
- // return an estimate of the best score possible with this translation option.
- // the estimate is the sum of the top target phrase's estimated score plus the
- // scores of the best child hypotheses.
+ /** return an estimate of the best score possible with this translation option.
+ * the estimate is the sum of the top target phrase's estimated score plus the
+ * scores of the best child hypotheses.
+ */
inline float GetEstimateOfBestScore() const { return m_estimateOfBestScore; }
private:
- StackVec m_stackVec;
+ StackVec m_stackVec; //! vector of hypothesis list!
const TargetPhraseCollection *m_targetPhraseCollection;
const WordsRange *m_wordsRange;
float m_estimateOfBestScore;
diff --git a/moses/src/ChartTrellisDetour.h b/moses/src/ChartTrellisDetour.h
index a3b07ad00..977ccb67d 100644
--- a/moses/src/ChartTrellisDetour.h
+++ b/moses/src/ChartTrellisDetour.h
@@ -27,6 +27,8 @@ class ChartHypothesis;
class ChartTrellisNode;
class ChartTrellisPath;
+/** @todo Something to do with make deviant paths
+ */
class ChartTrellisDetour
{
public:
diff --git a/moses/src/ChartTrellisDetourQueue.h b/moses/src/ChartTrellisDetourQueue.h
index f679708e4..d6505d8a2 100644
--- a/moses/src/ChartTrellisDetourQueue.h
+++ b/moses/src/ChartTrellisDetourQueue.h
@@ -25,10 +25,11 @@
namespace Moses {
-// A bounded priority queue of ChartTrellisDetour pointers. The top item is
-// the best scoring detour. The queue assumes ownership of pushed items and
-// relinquishes ownership when they are popped. Any remaining items at the
-// time of the queue's destruction are deleted.
+/** A bounded priority queue of ChartTrellisDetour pointers. The top item is
+ * the best scoring detour. The queue assumes ownership of pushed items and
+ * relinquishes ownership when they are popped. Any remaining items at the
+ * time of the queue's destruction are deleted.
+ */
class ChartTrellisDetourQueue {
public:
// Create empty queue with fixed capacity of c. Capacity 0 means unbounded.
diff --git a/moses/src/ChartTrellisNode.cpp b/moses/src/ChartTrellisNode.cpp
index 95e6d613a..9493fa82c 100644
--- a/moses/src/ChartTrellisNode.cpp
+++ b/moses/src/ChartTrellisNode.cpp
@@ -76,14 +76,12 @@ Phrase ChartTrellisNode::GetOutputPhrase() const
// exactly like same fn in hypothesis, but use trellis nodes instead of prevHypos pointer
Phrase ret(ARRAY_SIZE_INCR);
- const Phrase &currTargetPhrase = m_hypo.GetCurrTargetPhrase();
- const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
- m_hypo.GetCurrTargetPhrase().GetAlignmentInfo().GetNonTermIndexMap();
+ const TargetPhrase &currTargetPhrase = m_hypo.GetCurrTargetPhrase();
for (size_t pos = 0; pos < currTargetPhrase.GetSize(); ++pos) {
const Word &word = currTargetPhrase.GetWord(pos);
if (word.IsNonTerminal()) {
// non-term. fill out with prev hypo
- size_t nonTermInd = nonTermIndexMap[pos];
+ size_t nonTermInd = currTargetPhrase.GetAlignmentInfo().GetNonTermIndexMap()[pos];
const ChartTrellisNode &childNode = GetChild(nonTermInd);
Phrase childPhrase = childNode.GetOutputPhrase();
ret.Append(childPhrase);
diff --git a/moses/src/ChartTrellisNode.h b/moses/src/ChartTrellisNode.h
index 7b81ff4b2..58203677e 100644
--- a/moses/src/ChartTrellisNode.h
+++ b/moses/src/ChartTrellisNode.h
@@ -30,6 +30,8 @@ class ScoreComponentCollection;
class ChartHypothesis;
class ChartTrellisDetour;
+/** 1 node in the output hypergraph. Used in ChartTrellisPath
+ */
class ChartTrellisNode
{
public:
diff --git a/moses/src/ChartTrellisPath.h b/moses/src/ChartTrellisPath.h
index 4dee018c3..589fe9158 100644
--- a/moses/src/ChartTrellisPath.h
+++ b/moses/src/ChartTrellisPath.h
@@ -34,6 +34,11 @@ class ChartTrellisDetour;
class ChartTrellisDetourQueue;
class ChartTrellisNode;
+/** 1 path throught the output hypergraph
+ * The class hold the final node in the path used for constructing n-best list in chart decoding.
+ * Each node hold it's own children.
+ * Also contains the total score and score breakdown for this path.
+ */
class ChartTrellisPath
{
public:
diff --git a/moses/src/CompactPT/BlockHashIndex.cpp b/moses/src/CompactPT/BlockHashIndex.cpp
new file mode 100644
index 000000000..a4ad79162
--- /dev/null
+++ b/moses/src/CompactPT/BlockHashIndex.cpp
@@ -0,0 +1,422 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include "ThrowingFwrite.h"
+#include "BlockHashIndex.h"
+#include "CmphStringVectorAdapter.h"
+
+#ifdef HAVE_CMPH
+#include "cmph.h"
+#endif
+
+namespace Moses
+{
+#ifdef WITH_THREADS
+BlockHashIndex::BlockHashIndex(size_t orderBits, size_t fingerPrintBits,
+ size_t threadsNum)
+: m_orderBits(orderBits), m_fingerPrintBits(fingerPrintBits),
+ m_fileHandle(0), m_fileHandleStart(0), m_size(0),
+ m_lastSaved(-1), m_lastDropped(-1), m_numLoadedRanges(0),
+ m_threadPool(threadsNum) {
+#ifndef HAVE_CMPH
+ std::cerr << "minphr: CMPH support not compiled in." << std::endl;
+ exit(1);
+#endif
+ }
+#else
+BlockHashIndex::BlockHashIndex(size_t orderBits, size_t fingerPrintBits)
+: m_orderBits(orderBits), m_fingerPrintBits(fingerPrintBits),
+ m_fileHandle(0), m_fileHandleStart(0), m_size(0),
+ m_lastSaved(-1), m_lastDropped(-1), m_numLoadedRanges(0) {
+#ifndef HAVE_CMPH
+ std::cerr << "minphr: CMPH support not compiled in." << std::endl;
+ exit(1);
+#endif
+ }
+#endif
+
+BlockHashIndex::~BlockHashIndex()
+{
+#ifdef HAVE_CMPH
+ for(std::vector<void*>::iterator it = m_hashes.begin();
+ it != m_hashes.end(); it++)
+ if(*it != 0)
+ cmph_destroy((cmph_t*)*it);
+
+ for(std::vector<PairedPackedArray<>*>::iterator it = m_arrays.begin();
+ it != m_arrays.end(); it++)
+ if(*it != 0)
+ delete *it;
+#endif
+}
+
+size_t BlockHashIndex::GetHash(const char* key)
+{
+ std::string keyStr(key);
+ size_t i = std::distance(m_landmarks.begin(),
+ std::upper_bound(m_landmarks.begin(),
+ m_landmarks.end(), keyStr)) - 1;
+
+ if(i == 0ul-1)
+ return GetSize();
+
+ size_t pos = GetHash(i, key);
+ if(pos != GetSize())
+ return (1ul << m_orderBits) * i + pos;
+ else
+ return GetSize();
+}
+
+size_t BlockHashIndex::GetFprint(const char* key) const
+{
+ size_t hash;
+ MurmurHash3_x86_32(key, std::strlen(key), 100000, &hash);
+ hash &= (1ul << m_fingerPrintBits) - 1;
+ return hash;
+}
+
+size_t BlockHashIndex::GetHash(size_t i, const char* key)
+{
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_mutex);
+#endif
+ if(m_hashes[i] == 0)
+ LoadRange(i);
+#ifdef HAVE_CMPH
+ size_t idx = cmph_search((cmph_t*)m_hashes[i], key, (cmph_uint32) strlen(key));
+#else
+ assert(0);
+ size_t idx = 0;
+#endif
+
+ std::pair<size_t, size_t> orderPrint = m_arrays[i]->Get(idx, m_orderBits, m_fingerPrintBits);
+ m_clocks[i] = clock();
+
+ if(GetFprint(key) == orderPrint.second)
+ return orderPrint.first;
+ else
+ return GetSize();
+}
+
+size_t BlockHashIndex::GetHash(std::string key)
+{
+ return GetHash(key.c_str());
+}
+
+size_t BlockHashIndex::operator[](std::string key)
+{
+ return GetHash(key);
+}
+
+size_t BlockHashIndex::operator[](char* key)
+{
+ return GetHash(key);
+}
+
+size_t BlockHashIndex::Save(std::string filename)
+{
+ std::FILE* mphf = std::fopen(filename.c_str(), "w");
+ size_t size = Save(mphf);
+ std::fclose(mphf);
+ return size;
+}
+
+void BlockHashIndex::BeginSave(std::FILE * mphf)
+{
+ m_fileHandle = mphf;
+ ThrowingFwrite(&m_orderBits, sizeof(size_t), 1, m_fileHandle);
+ ThrowingFwrite(&m_fingerPrintBits, sizeof(size_t), 1, m_fileHandle);
+
+ m_fileHandleStart = std::ftell(m_fileHandle);
+
+ size_t relIndexPos = 0;
+ ThrowingFwrite(&relIndexPos, sizeof(size_t), 1, m_fileHandle);
+}
+
+void BlockHashIndex::SaveRange(size_t i)
+{
+#ifdef HAVE_CMPH
+ if(m_seekIndex.size() <= i)
+ m_seekIndex.resize(i+1);
+ m_seekIndex[i] = std::ftell(m_fileHandle) - m_fileHandleStart;
+ cmph_dump((cmph_t*)m_hashes[i], m_fileHandle);
+ m_arrays[i]->Save(m_fileHandle);
+#endif
+}
+
+void BlockHashIndex::SaveLastRange()
+{
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_mutex);
+#endif
+
+ while(!m_queue.empty() && m_lastSaved + 1 == -m_queue.top())
+ {
+ size_t current = -m_queue.top();
+ m_queue.pop();
+ SaveRange(current);
+ m_lastSaved = current;
+ }
+}
+
+void BlockHashIndex::DropRange(size_t i)
+{
+#ifdef HAVE_CMPH
+ if(m_hashes[i] != 0)
+ {
+ cmph_destroy((cmph_t*)m_hashes[i]);
+ m_hashes[i] = 0;
+ }
+ if(m_arrays[i] != 0)
+ {
+ delete m_arrays[i];
+ m_arrays[i] = 0;
+ m_clocks[i] = 0;
+ }
+ m_numLoadedRanges--;
+#endif
+}
+
+void BlockHashIndex::DropLastRange()
+{
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_mutex);
+#endif
+
+ while(m_lastDropped != m_lastSaved)
+ DropRange(++m_lastDropped);
+}
+
+#ifdef WITH_THREADS
+void BlockHashIndex::WaitAll()
+{
+ m_threadPool.Stop(true);
+}
+#endif
+
+size_t BlockHashIndex::FinalizeSave()
+{
+#ifdef WITH_THREADS
+ m_threadPool.Stop(true);
+#endif
+
+ SaveLastRange();
+
+ size_t relIndexPos = std::ftell(m_fileHandle) - m_fileHandleStart;
+
+ std::fseek(m_fileHandle, m_fileHandleStart, SEEK_SET);
+ ThrowingFwrite(&relIndexPos, sizeof(size_t), 1, m_fileHandle);
+
+ std::fseek(m_fileHandle, m_fileHandleStart + relIndexPos, SEEK_SET);
+ m_landmarks.save(m_fileHandle);
+
+ size_t seekIndexSize = m_seekIndex.size();
+ ThrowingFwrite(&seekIndexSize, sizeof(size_t), 1, m_fileHandle);
+ ThrowingFwrite(&m_seekIndex[0], sizeof(size_t), seekIndexSize, m_fileHandle);
+
+ ThrowingFwrite(&m_size, sizeof(size_t), 1, m_fileHandle);
+
+ size_t fileHandleStop = std::ftell(m_fileHandle);
+ return fileHandleStop - m_fileHandleStart + sizeof(m_orderBits)
+ + sizeof(m_fingerPrintBits);
+}
+
+size_t BlockHashIndex::Save(std::FILE * mphf)
+{
+ m_queue = std::priority_queue<int>();
+ BeginSave(mphf);
+ for(size_t i = 0; i < m_hashes.size(); i++)
+ SaveRange(i);
+ return FinalizeSave();
+}
+
+size_t BlockHashIndex::LoadIndex(std::FILE* mphf)
+{
+ m_fileHandle = mphf;
+
+ size_t beginning = std::ftell(mphf);
+
+ size_t read = 0;
+ read += std::fread(&m_orderBits, sizeof(size_t), 1, mphf);
+ read += std::fread(&m_fingerPrintBits, sizeof(size_t), 1, mphf);
+ m_fileHandleStart = std::ftell(m_fileHandle);
+
+ size_t relIndexPos;
+ read += std::fread(&relIndexPos, sizeof(size_t), 1, mphf);
+ std::fseek(m_fileHandle, m_fileHandleStart + relIndexPos, SEEK_SET);
+
+ m_landmarks.load(mphf);
+
+ size_t seekIndexSize;
+ read += std::fread(&seekIndexSize, sizeof(size_t), 1, m_fileHandle);
+ m_seekIndex.resize(seekIndexSize);
+ read += std::fread(&m_seekIndex[0], sizeof(size_t), seekIndexSize, m_fileHandle);
+ m_hashes.resize(seekIndexSize, 0);
+ m_clocks.resize(seekIndexSize, 0);
+ m_arrays.resize(seekIndexSize, 0);
+
+ read += std::fread(&m_size, sizeof(size_t), 1, m_fileHandle);
+
+ size_t end = std::ftell(mphf);
+
+ return end - beginning;
+}
+
+void BlockHashIndex::LoadRange(size_t i)
+{
+#ifdef HAVE_CMPH
+ std::fseek(m_fileHandle, m_fileHandleStart + m_seekIndex[i], SEEK_SET);
+ cmph_t* hash = cmph_load(m_fileHandle);
+ m_arrays[i] = new PairedPackedArray<>(0, m_orderBits,
+ m_fingerPrintBits);
+ m_arrays[i]->Load(m_fileHandle);
+
+ m_hashes[i] = (void*)hash;
+ m_clocks[i] = clock();
+
+ m_numLoadedRanges++;
+#endif
+}
+
+size_t BlockHashIndex::Load(std::string filename)
+{
+ std::FILE* mphf = std::fopen(filename.c_str(), "r");
+ size_t size = Load(mphf);
+ std::fclose(mphf);
+ return size;
+}
+
+size_t BlockHashIndex::Load(std::FILE * mphf)
+{
+ size_t byteSize = LoadIndex(mphf);
+ size_t end = std::ftell(mphf);
+
+ for(size_t i = 0; i < m_seekIndex.size(); i++)
+ LoadRange(i);
+ std::fseek(m_fileHandle, end, SEEK_SET);
+ return byteSize;
+}
+
+size_t BlockHashIndex::GetSize() const
+{
+ return m_size;
+}
+
+void BlockHashIndex::KeepNLastRanges(float ratio, float tolerance)
+{
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_mutex);
+#endif
+ size_t n = m_hashes.size() * ratio;
+ size_t max = n * (1 + tolerance);
+ if(m_numLoadedRanges > max)
+ {
+ typedef std::vector<std::pair<clock_t, size_t> > LastLoaded;
+ LastLoaded lastLoaded;
+ for(size_t i = 0; i < m_hashes.size(); i++)
+ if(m_hashes[i] != 0)
+ lastLoaded.push_back(std::make_pair(m_clocks[i], i));
+
+ std::sort(lastLoaded.begin(), lastLoaded.end());
+ for(LastLoaded::reverse_iterator it = lastLoaded.rbegin() + size_t(n * (1 - tolerance));
+ it != lastLoaded.rend(); it++)
+ DropRange(it->second);
+ }
+}
+
+void BlockHashIndex::CalcHash(size_t current, void* source_void)
+{
+#ifdef HAVE_CMPH
+ cmph_io_adapter_t* source = (cmph_io_adapter_t*) source_void;
+ cmph_config_t *config = cmph_config_new(source);
+ cmph_config_set_algo(config, CMPH_CHD);
+
+ cmph_t* hash = cmph_new(config);
+ PairedPackedArray<> *pv =
+ new PairedPackedArray<>(source->nkeys, m_orderBits, m_fingerPrintBits);
+
+ size_t i = 0;
+
+ source->rewind(source->data);
+
+ std::string lastKey = "";
+ while(i < source->nkeys)
+ {
+ unsigned keylen;
+ char* key;
+ source->read(source->data, &key, &keylen);
+ std::string temp(key, keylen);
+ source->dispose(source->data, key, keylen);
+
+ if(lastKey > temp) {
+ std::cerr << "ERROR: Input file does not appear to be sorted with LC_ALL=C sort" << std::endl;
+ std::cerr << "1: " << lastKey << std::endl;
+ std::cerr << "2: " << temp << std::endl;
+ abort();
+ }
+ lastKey = temp;
+
+ size_t fprint = GetFprint(temp.c_str());
+ size_t idx = cmph_search(hash, temp.c_str(),
+ (cmph_uint32) temp.size());
+
+ pv->Set(idx, i, fprint, m_orderBits, m_fingerPrintBits);
+ i++;
+ }
+
+ cmph_config_destroy(config);
+
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_mutex);
+#endif
+
+ if(m_hashes.size() <= current)
+ {
+ m_hashes.resize(current + 1, 0);
+ m_arrays.resize(current + 1, 0);
+ m_clocks.resize(current + 1, 0);
+ }
+
+ m_hashes[current] = (void*)hash;
+ m_arrays[current] = pv;
+ m_clocks[current] = clock();
+ m_queue.push(-current);
+#endif
+}
+
+#ifdef HAVE_CMPH
+void* BlockHashIndex::vectorAdapter(std::vector<std::string>& v)
+{
+ return (void*)CmphVectorAdapter(v);
+}
+
+void* BlockHashIndex::vectorAdapter(StringVector<unsigned, size_t, std::allocator>& sv)
+{
+ return (void*)CmphStringVectorAdapter(sv);
+}
+
+void* BlockHashIndex::vectorAdapter(StringVector<unsigned, size_t, MmapAllocator>& sv)
+{
+ return (void*)CmphStringVectorAdapter(sv);
+}
+#endif
+
+}
diff --git a/moses/src/CompactPT/BlockHashIndex.h b/moses/src/CompactPT/BlockHashIndex.h
new file mode 100644
index 000000000..1f982ebea
--- /dev/null
+++ b/moses/src/CompactPT/BlockHashIndex.h
@@ -0,0 +1,182 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_BlockHashIndex_h
+#define moses_BlockHashIndex_h
+
+#include <iostream>
+#include <string>
+#include <vector>
+#include <queue>
+#include <cstring>
+#include <cstdio>
+
+#include "MurmurHash3.h"
+#include "StringVector.h"
+#include "PackedArray.h"
+
+#ifdef WITH_THREADS
+#include "ThreadPool.h"
+#endif
+
+namespace Moses
+{
+
+class BlockHashIndex
+{
+ private:
+ std::priority_queue<int> m_queue;
+
+ size_t m_orderBits;
+ size_t m_fingerPrintBits;
+
+ std::FILE* m_fileHandle;
+ size_t m_fileHandleStart;
+
+ StringVector<unsigned char, unsigned long> m_landmarks;
+
+ std::vector<void*> m_hashes;
+ std::vector<clock_t> m_clocks;
+ std::vector<PairedPackedArray<>*> m_arrays;
+
+ std::vector<size_t> m_seekIndex;
+
+ size_t m_size;
+ int m_lastSaved;
+ int m_lastDropped;
+ size_t m_numLoadedRanges;
+
+#ifdef WITH_THREADS
+ ThreadPool m_threadPool;
+ boost::mutex m_mutex;
+
+ template <typename Keys>
+ class HashTask : public Task
+ {
+ public:
+ HashTask(int id, BlockHashIndex& hash, Keys& keys)
+ : m_id(id), m_hash(hash), m_keys(new Keys(keys)) {}
+
+ virtual void Run()
+ {
+ m_hash.CalcHash(m_id, *m_keys);
+ }
+
+ virtual ~HashTask()
+ {
+ delete m_keys;
+ }
+
+ private:
+ int m_id;
+ BlockHashIndex& m_hash;
+ Keys* m_keys;
+ };
+#endif
+
+ size_t GetFprint(const char* key) const;
+ size_t GetHash(size_t i, const char* key);
+
+ public:
+#ifdef WITH_THREADS
+ BlockHashIndex(size_t orderBits, size_t fingerPrintBits,
+ size_t threadsNum = 2);
+#else
+ BlockHashIndex(size_t orderBits, size_t fingerPrintBits);
+#endif
+
+ ~BlockHashIndex();
+
+ size_t GetHash(const char* key);
+ size_t GetHash(std::string key);
+
+ size_t operator[](std::string key);
+ size_t operator[](char* key);
+
+ void BeginSave(std::FILE* mphf);
+ void SaveRange(size_t i);
+ void SaveLastRange();
+ size_t FinalizeSave();
+
+#ifdef WITH_THREADS
+ void WaitAll();
+#endif
+
+ void DropRange(size_t i);
+ void DropLastRange();
+
+ size_t LoadIndex(std::FILE* mphf);
+ void LoadRange(size_t i);
+
+ size_t Save(std::string filename);
+ size_t Save(std::FILE * mphf);
+
+ size_t Load(std::string filename);
+ size_t Load(std::FILE * mphf);
+
+ size_t GetSize() const;
+
+ void KeepNLastRanges(float ratio = 0.1, float tolerance = 0.1);
+
+ template <typename Keys>
+ void AddRange(Keys &keys)
+ {
+ size_t current = m_landmarks.size();
+
+ if(m_landmarks.size() && m_landmarks.back().str() >= keys[0])
+ {
+ std::cerr << "ERROR: Input file does not appear to be sorted with LC_ALL=C sort" << std::endl;
+ std::cerr << "1: " << m_landmarks.back().str() << std::endl;
+ std::cerr << "2: " << keys[0] << std::endl;
+ abort();
+ }
+
+ m_landmarks.push_back(keys[0]);
+ m_size += keys.size();
+
+#ifdef WITH_THREADS
+ HashTask<Keys>* ht = new HashTask<Keys>(current, *this, keys);
+ m_threadPool.Submit(ht);
+#else
+ CalcHash(current, keys);
+#endif
+ }
+
+ template <typename Keys>
+ void CalcHash(size_t current, Keys &keys)
+ {
+#ifdef HAVE_CMPH
+ void* source = vectorAdapter(keys);
+ CalcHash(current, source);
+#endif
+ }
+
+ void CalcHash(size_t current, void* source);
+
+#ifdef HAVE_CMPH
+ void* vectorAdapter(std::vector<std::string>& v);
+ void* vectorAdapter(StringVector<unsigned, size_t, std::allocator>& sv);
+ void* vectorAdapter(StringVector<unsigned, size_t, MmapAllocator>& sv);
+#endif
+};
+
+}
+#endif
diff --git a/moses/src/CompactPT/CanonicalHuffman.h b/moses/src/CompactPT/CanonicalHuffman.h
new file mode 100644
index 000000000..faf7ce411
--- /dev/null
+++ b/moses/src/CompactPT/CanonicalHuffman.h
@@ -0,0 +1,350 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_CanonicalHuffman_h
+#define moses_CanonicalHuffman_h
+
+#include <string>
+#include <algorithm>
+#include <boost/dynamic_bitset.hpp>
+#include <boost/unordered_map.hpp>
+
+#include "ThrowingFwrite.h"
+
+namespace Moses {
+
+template <typename Data>
+class CanonicalHuffman
+{
+ private:
+ std::vector<Data> m_symbols;
+ std::vector<size_t> m_firstCodes;
+ std::vector<size_t> m_lengthIndex;
+
+ typedef boost::unordered_map<Data, boost::dynamic_bitset<> > EncodeMap;
+ EncodeMap m_encodeMap;
+
+ struct MinHeapSorter {
+ std::vector<size_t>& m_vec;
+
+ MinHeapSorter(std::vector<size_t>& vec) : m_vec(vec) { }
+
+ bool operator()(size_t a, size_t b)
+ {
+ return m_vec[a] > m_vec[b];
+ }
+ };
+
+ template <class Iterator>
+ void CalcLengths(Iterator begin, Iterator end, std::vector<size_t>& lengths)
+ {
+ size_t n = std::distance(begin, end);
+ std::vector<size_t> A(2 * n, 0);
+
+ m_symbols.resize(n);
+ size_t i = 0;
+ for(Iterator it = begin; it != end; it++)
+ {
+ m_symbols[i] = it->first;
+
+ A[i] = n + i;
+ A[n + i] = it->second;
+ i++;
+ }
+
+ if(n == 1)
+ {
+ lengths.push_back(1);
+ return;
+ }
+
+ MinHeapSorter hs(A);
+ std::make_heap(A.begin(), A.begin() + n, hs);
+
+ size_t h = n;
+ size_t m1, m2;
+ while(h > 1)
+ {
+ m1 = A[0];
+ std::pop_heap(A.begin(), A.begin() + h, hs);
+
+ h--;
+
+ m2 = A[0];
+ std::pop_heap(A.begin(), A.begin() + h, hs);
+
+ A[h] = A[m1] + A[m2];
+ A[h-1] = h;
+ A[m1] = A[m2] = h;
+
+ std::push_heap(A.begin(), A.begin() + h, hs);
+ }
+
+ A[1] = 0;
+ for(size_t i = 2; i < 2*n; i++)
+ A[i] = A[A[i]] + 1;
+
+ lengths.resize(n);
+ for(size_t i = 0; i < n; i++)
+ lengths[i] = A[i + n];
+ }
+
+ void CalcCodes(std::vector<size_t>& lengths)
+ {
+ std::vector<size_t> numLength;
+ for(std::vector<size_t>::iterator it = lengths.begin();
+ it != lengths.end(); it++) {
+ size_t length = *it;
+ if(numLength.size() <= length)
+ numLength.resize(length + 1, 0);
+ numLength[length]++;
+ }
+
+ m_lengthIndex.resize(numLength.size());
+ m_lengthIndex[0] = 0;
+ for(size_t l = 1; l < numLength.size(); l++)
+ m_lengthIndex[l] = m_lengthIndex[l - 1] + numLength[l - 1];
+
+ size_t maxLength = numLength.size() - 1;
+
+ m_firstCodes.resize(maxLength + 1, 0);
+ for(size_t l = maxLength - 1; l > 0; l--)
+ m_firstCodes[l] = (m_firstCodes[l + 1] + numLength[l + 1]) / 2;
+
+ std::vector<Data> t_symbols;
+ t_symbols.resize(lengths.size());
+
+ std::vector<size_t> nextCode = m_firstCodes;
+ for(size_t i = 0; i < lengths.size(); i++)
+ {
+ Data data = m_symbols[i];
+ size_t length = lengths[i];
+
+ size_t pos = m_lengthIndex[length]
+ + (nextCode[length] - m_firstCodes[length]);
+ t_symbols[pos] = data;
+
+ nextCode[length] = nextCode[length] + 1;
+ }
+
+ m_symbols.swap(t_symbols);
+ }
+
+ void CreateCodeMap()
+ {
+ for(size_t l = 1; l < m_lengthIndex.size(); l++)
+ {
+ size_t intCode = m_firstCodes[l];
+ size_t num = ((l+1 < m_lengthIndex.size()) ? m_lengthIndex[l+1]
+ : m_symbols.size()) - m_lengthIndex[l];
+
+ for(size_t i = 0; i < num; i++)
+ {
+ Data data = m_symbols[m_lengthIndex[l] + i];
+ boost::dynamic_bitset<> bitCode(l, intCode);
+ m_encodeMap[data] = bitCode;
+ intCode++;
+ }
+ }
+ }
+
+ boost::dynamic_bitset<>& Encode(Data data)
+ {
+ return m_encodeMap[data];
+ }
+
+ template <class BitWrapper>
+ void PutCode(BitWrapper& bitWrapper, boost::dynamic_bitset<>& code)
+ {
+ for(int j = code.size()-1; j >= 0; j--)
+ bitWrapper.Put(code[j]);
+ }
+
+ public:
+
+ template <class Iterator>
+ CanonicalHuffman(Iterator begin, Iterator end, bool forEncoding = true)
+ {
+ std::vector<size_t> lengths;
+ CalcLengths(begin, end, lengths);
+ CalcCodes(lengths);
+
+ if(forEncoding)
+ CreateCodeMap();
+ }
+
+ CanonicalHuffman(std::FILE* pFile, bool forEncoding = false)
+ {
+ Load(pFile);
+
+ if(forEncoding)
+ CreateCodeMap();
+ }
+
+ template <class BitWrapper>
+ void Put(BitWrapper& bitWrapper, Data data)
+ {
+ PutCode(bitWrapper, Encode(data));
+ }
+
+ template <class BitWrapper>
+ Data Read(BitWrapper& bitWrapper)
+ {
+ if(bitWrapper.TellFromEnd())
+ {
+ size_t intCode = bitWrapper.Read();
+ size_t len = 1;
+ while(intCode < m_firstCodes[len]) {
+ intCode = 2 * intCode + bitWrapper.Read();
+ len++;
+ }
+ return m_symbols[m_lengthIndex[len] + (intCode - m_firstCodes[len])];
+ }
+ return Data();
+ }
+
+ size_t Load(std::FILE* pFile)
+ {
+ size_t start = std::ftell(pFile);
+ size_t read = 0;
+
+ size_t size;
+ read += std::fread(&size, sizeof(size_t), 1, pFile);
+ m_symbols.resize(size);
+ read += std::fread(&m_symbols[0], sizeof(Data), size, pFile);
+
+ read += std::fread(&size, sizeof(size_t), 1, pFile);
+ m_firstCodes.resize(size);
+ read += std::fread(&m_firstCodes[0], sizeof(size_t), size, pFile);
+
+ read += std::fread(&size, sizeof(size_t), 1, pFile);
+ m_lengthIndex.resize(size);
+ read += std::fread(&m_lengthIndex[0], sizeof(size_t), size, pFile);
+
+ return std::ftell(pFile) - start;
+ }
+
+ size_t Save(std::FILE* pFile)
+ {
+ size_t start = std::ftell(pFile);
+
+ size_t size = m_symbols.size();
+ ThrowingFwrite(&size, sizeof(size_t), 1, pFile);
+ ThrowingFwrite(&m_symbols[0], sizeof(Data), size, pFile);
+
+ size = m_firstCodes.size();
+ ThrowingFwrite(&size, sizeof(size_t), 1, pFile);
+ ThrowingFwrite(&m_firstCodes[0], sizeof(size_t), size, pFile);
+
+ size = m_lengthIndex.size();
+ ThrowingFwrite(&size, sizeof(size_t), 1, pFile);
+ ThrowingFwrite(&m_lengthIndex[0], sizeof(size_t), size, pFile);
+
+ return std::ftell(pFile) - start;
+ }
+};
+
+template <class Container = std::string>
+class BitWrapper
+{
+ private:
+ Container& m_data;
+
+ typename Container::iterator m_iterator;
+ typename Container::value_type m_currentValue;
+
+ size_t m_valueBits;
+ typename Container::value_type m_mask;
+ size_t m_bitPos;
+
+ public:
+
+ BitWrapper(Container &data)
+ : m_data(data), m_iterator(m_data.begin()), m_currentValue(0),
+ m_valueBits(sizeof(typename Container::value_type) * 8),
+ m_mask(1), m_bitPos(0) { }
+
+ bool Read()
+ {
+ if(m_bitPos % m_valueBits == 0)
+ {
+ if(m_iterator != m_data.end())
+ m_currentValue = *m_iterator++;
+ }
+ else
+ m_currentValue = m_currentValue >> 1;
+
+ m_bitPos++;
+ return (m_currentValue & m_mask);
+ }
+
+ void Put(bool bit) {
+ if(m_bitPos % m_valueBits == 0)
+ m_data.push_back(0);
+
+ if(bit)
+ m_data[m_data.size()-1] |= m_mask << (m_bitPos % m_valueBits);
+
+ m_bitPos++;
+ }
+
+ size_t Tell()
+ {
+ return m_bitPos;
+ }
+
+ size_t TellFromEnd()
+ {
+ if(m_data.size() * m_valueBits < m_bitPos)
+ return 0;
+ return m_data.size() * m_valueBits - m_bitPos;
+ }
+
+ void Seek(size_t bitPos)
+ {
+ m_bitPos = bitPos;
+ m_iterator = m_data.begin() + int((m_bitPos-1)/m_valueBits);
+ m_currentValue = (*m_iterator) >> ((m_bitPos-1) % m_valueBits);
+ m_iterator++;
+ }
+
+ void SeekFromEnd(size_t bitPosFromEnd)
+ {
+ size_t bitPos = m_data.size() * m_valueBits - bitPosFromEnd;
+ Seek(bitPos);
+ }
+
+ void Reset()
+ {
+ m_iterator = m_data.begin();
+ m_currentValue = 0;
+ m_bitPos = 0;
+ }
+
+ Container& GetContainer()
+ {
+ return m_data;
+ }
+};
+
+}
+
+#endif
diff --git a/moses/src/CompactPT/CmphStringVectorAdapter.cpp b/moses/src/CompactPT/CmphStringVectorAdapter.cpp
new file mode 100644
index 000000000..40fff6690
--- /dev/null
+++ b/moses/src/CompactPT/CmphStringVectorAdapter.cpp
@@ -0,0 +1,94 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifdef HAVE_CMPH
+
+#include "CmphStringVectorAdapter.h"
+
+namespace Moses
+{
+
+ void CmphStringVectorAdapterDispose(void *data, char *key, cmph_uint32 keylen)
+ {
+ delete[] key;
+ }
+
+ void CmphStringVectorAdapterRewind(void *data)
+ {
+ cmph_vector_t *cmph_vector = (cmph_vector_t *)data;
+ cmph_vector->position = 0;
+ }
+
+ //************************************************************************//
+
+ cmph_io_adapter_t *CmphVectorAdapterNew(std::vector<std::string>& v)
+ {
+ cmph_io_adapter_t * key_source = (cmph_io_adapter_t *)malloc(sizeof(cmph_io_adapter_t));
+ cmph_vector_t * cmph_vector = (cmph_vector_t *)malloc(sizeof(cmph_vector_t));
+ assert(key_source);
+ assert(cmph_vector);
+
+ cmph_vector->vector = (void *)&v;
+ cmph_vector->position = 0;
+ key_source->data = (void *)cmph_vector;
+ key_source->nkeys = v.size();
+
+ return key_source;
+ }
+
+ int CmphVectorAdapterRead(void *data, char **key, cmph_uint32 *keylen)
+ {
+ cmph_vector_t *cmph_vector = (cmph_vector_t *)data;
+ std::vector<std::string>* v = (std::vector<std::string>*)cmph_vector->vector;
+ size_t size;
+ *keylen = (*v)[cmph_vector->position].size();
+ size = *keylen;
+ *key = new char[size + 1];
+ std::string temp = (*v)[cmph_vector->position];
+ strcpy(*key, temp.c_str());
+ cmph_vector->position = cmph_vector->position + 1;
+ return (int)(*keylen);
+ }
+
+ void CmphVectorAdapterDispose(void *data, char *key, cmph_uint32 keylen)
+ {
+ delete[] key;
+ }
+
+ void CmphVectorAdapterRewind(void *data)
+ {
+ cmph_vector_t *cmph_vector = (cmph_vector_t *)data;
+ cmph_vector->position = 0;
+ }
+
+ cmph_io_adapter_t* CmphVectorAdapter(std::vector<std::string>& v)
+ {
+ cmph_io_adapter_t * key_source = CmphVectorAdapterNew(v);
+
+ key_source->read = CmphVectorAdapterRead;
+ key_source->dispose = CmphVectorAdapterDispose;
+ key_source->rewind = CmphVectorAdapterRewind;
+ return key_source;
+ }
+
+}
+
+#endif
diff --git a/moses/src/CompactPT/CmphStringVectorAdapter.h b/moses/src/CompactPT/CmphStringVectorAdapter.h
new file mode 100644
index 000000000..5516d4f4d
--- /dev/null
+++ b/moses/src/CompactPT/CmphStringVectorAdapter.h
@@ -0,0 +1,106 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_CmphStringVectorAdapterNew_h
+#define moses_CmphStringVectorAdapterNew_h
+
+#include <cassert>
+#include <cstring>
+
+#ifdef HAVE_CMPH
+#include "cmph.h"
+
+#include "StringVector.h"
+
+namespace Moses
+{
+
+ typedef struct
+ {
+ void *vector;
+ cmph_uint32 position;
+ }
+ cmph_vector_t;
+
+
+ template <typename ValueT, typename PosT, template <typename> class Allocator>
+ cmph_io_adapter_t *CmphStringVectorAdapterNew(StringVector<ValueT, PosT, Allocator>& sv)
+ {
+ cmph_io_adapter_t * key_source = (cmph_io_adapter_t *)malloc(sizeof(cmph_io_adapter_t));
+ cmph_vector_t * cmph_vector = (cmph_vector_t *)malloc(sizeof(cmph_vector_t));
+ assert(key_source);
+ assert(cmph_vector);
+
+ cmph_vector->vector = (void *)&sv;
+ cmph_vector->position = 0;
+ key_source->data = (void *)cmph_vector;
+ key_source->nkeys = sv.size();
+
+ return key_source;
+ }
+
+ template <typename ValueT, typename PosT, template <typename> class Allocator>
+ int CmphStringVectorAdapterRead(void *data, char **key, cmph_uint32 *keylen)
+ {
+ cmph_vector_t *cmph_vector = (cmph_vector_t *)data;
+ StringVector<ValueT, PosT, Allocator>* sv = (StringVector<ValueT, PosT, Allocator>*)cmph_vector->vector;
+ size_t size;
+ *keylen = (*sv)[cmph_vector->position].size();
+ size = *keylen;
+ *key = new char[size + 1];
+ std::string temp = (*sv)[cmph_vector->position];
+ std::strcpy(*key, temp.c_str());
+ cmph_vector->position = cmph_vector->position + 1;
+ return (int)(*keylen);
+ }
+
+ void CmphStringVectorAdapterDispose(void *data, char *key, cmph_uint32 keylen);
+
+ void CmphStringVectorAdapterRewind(void *data);
+
+ template <typename ValueT, typename PosT, template <typename> class Allocator>
+ cmph_io_adapter_t* CmphStringVectorAdapter(StringVector<ValueT, PosT, Allocator>& sv)
+ {
+ cmph_io_adapter_t * key_source = CmphStringVectorAdapterNew(sv);
+
+ key_source->read = CmphStringVectorAdapterRead<ValueT, PosT, Allocator>;
+ key_source->dispose = CmphStringVectorAdapterDispose;
+ key_source->rewind = CmphStringVectorAdapterRewind;
+ return key_source;
+ }
+
+ //************************************************************************//
+
+ cmph_io_adapter_t *CmphVectorAdapterNew(std::vector<std::string>& v);
+
+ int CmphVectorAdapterRead(void *data, char **key, cmph_uint32 *keylen);
+
+ void CmphVectorAdapterDispose(void *data, char *key, cmph_uint32 keylen);
+
+ void CmphVectorAdapterRewind(void *data);
+
+ cmph_io_adapter_t* CmphVectorAdapter(std::vector<std::string>& v);
+
+}
+
+#endif
+
+#endif
diff --git a/moses/src/CompactPT/ConsistentPhrases.h b/moses/src/CompactPT/ConsistentPhrases.h
new file mode 100644
index 000000000..0ec86e1ac
--- /dev/null
+++ b/moses/src/CompactPT/ConsistentPhrases.h
@@ -0,0 +1,127 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_ConsistentPhrases_h
+#define moses_ConsistentPhrases_h
+
+#include <set>
+
+namespace Moses
+{
+
+class ConsistentPhrases
+{
+ public:
+ struct Phrase
+ {
+ int i, j, m, n;
+ Phrase(int i_, int m_, int j_, int n_) : i(i_), j(j_), m(m_), n(n_) { }
+ };
+
+ struct PhraseSorter
+ {
+ bool operator()(Phrase a, Phrase b)
+ {
+ if(a.n > b.n)
+ return true;
+ if(a.n == b.n && a.j < b.j)
+ return true;
+ if(a.n == b.n && a.j == b.j && a.m > b.m)
+ return true;
+ if(a.n == b.n && a.j == b.j && a.m == b.m && a.i < b.i)
+ return true;
+ return false;
+ }
+ };
+
+ private:
+ typedef std::set<Phrase, PhraseSorter> PhraseQueue;
+ PhraseQueue m_phraseQueue;
+
+ typedef std::pair<unsigned char, unsigned char> AlignPoint;
+ typedef std::set<AlignPoint> Alignment;
+
+ public:
+
+ ConsistentPhrases(int mmax, int nmax, Alignment& a)
+ {
+ for(int i = 0; i < mmax; i++)
+ {
+ for(int m = 1; m <= mmax-i; m++)
+ {
+ for(int j = 0; j < nmax; j++)
+ {
+ for(int n = 1; n <= nmax-j; n++)
+ {
+ bool consistant = true;
+ for(Alignment::iterator it = a.begin(); it != a.end(); it++)
+ {
+ int ip = it->first;
+ int jp = it->second;
+ if((i <= ip && ip < i+m) != (j <= jp && jp < j+n))
+ {
+ consistant = false;
+ break;
+ }
+ }
+ if(consistant)
+ m_phraseQueue.insert(Phrase(i, m, j, n));
+ }
+ }
+ }
+ }
+ m_phraseQueue.erase(Phrase(0, mmax, 0, nmax));
+ }
+
+ size_t Empty()
+ {
+ return !m_phraseQueue.size();
+ }
+
+ Phrase Pop()
+ {
+ if(m_phraseQueue.size())
+ {
+ Phrase p = *m_phraseQueue.begin();
+ m_phraseQueue.erase(m_phraseQueue.begin());
+ return p;
+ }
+ return Phrase(0,0,0,0);
+ }
+
+ void RemoveOverlap(Phrase p)
+ {
+ PhraseQueue ok;
+ for(PhraseQueue::iterator it = m_phraseQueue.begin(); it != m_phraseQueue.end(); it++)
+ {
+ Phrase pp = *it;
+ if(!((p.i <= pp.i && pp.i < p.i + p.m) || (pp.i <= p.i && p.i < pp.i + pp.m) ||
+ (p.j <= pp.j && pp.j < p.j + p.n) || (pp.j <= p.j && p.j < pp.j + pp.n)))
+ ok.insert(pp);
+ }
+ m_phraseQueue = ok;
+ }
+
+};
+
+}
+
+#endif
diff --git a/moses/src/CompactPT/Jamfile b/moses/src/CompactPT/Jamfile
new file mode 100644
index 000000000..c4c5db96f
--- /dev/null
+++ b/moses/src/CompactPT/Jamfile
@@ -0,0 +1,18 @@
+local current = "" ;
+local includes = ;
+local with-cmph = [ option.get "with-cmph" ] ;
+if $(with-cmph) {
+ lib cmph : : <search>$(with-cmph)/lib ;
+ includes += <include>$(with-cmph)/include ;
+ current = "--with-cmph=$(with-cmph)" ;
+}
+else {
+ alias cmph ;
+}
+
+alias sources : [ glob *.cpp ] ;
+
+path-constant PT-LOG : bin/pt.log ;
+update-if-changed $(PT-LOG) $(current) ;
+
+lib CompactPT : sources ..//moses_internal cmph : $(includes) <dependency>$(PT-LOG) ;
diff --git a/moses/src/CompactPT/LexicalReorderingTableCompact.cpp b/moses/src/CompactPT/LexicalReorderingTableCompact.cpp
new file mode 100644
index 000000000..fab430b63
--- /dev/null
+++ b/moses/src/CompactPT/LexicalReorderingTableCompact.cpp
@@ -0,0 +1,157 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include "LexicalReorderingTableCompact.h"
+
+namespace Moses {
+
+LexicalReorderingTableCompact::LexicalReorderingTableCompact(
+ const std::string& filePath,
+ const std::vector<FactorType>& f_factors,
+ const std::vector<FactorType>& e_factors,
+ const std::vector<FactorType>& c_factors)
+ : LexicalReorderingTable(f_factors, e_factors, c_factors),
+ m_inMemory(StaticData::Instance().UseMinlexrInMemory()),
+ m_numScoreComponent(6), m_multipleScoreTrees(true),
+ m_hash(10, 16), m_scoreTrees(1)
+{
+ Load(filePath);
+}
+
+LexicalReorderingTableCompact::LexicalReorderingTableCompact(
+ const std::vector<FactorType>& f_factors,
+ const std::vector<FactorType>& e_factors,
+ const std::vector<FactorType>& c_factors)
+ : LexicalReorderingTable(f_factors, e_factors, c_factors),
+ m_inMemory(StaticData::Instance().UseMinlexrInMemory()),
+ m_numScoreComponent(6), m_multipleScoreTrees(true),
+ m_hash(10, 16), m_scoreTrees(1)
+{ }
+
+LexicalReorderingTableCompact::~LexicalReorderingTableCompact() {
+ for(size_t i = 0; i < m_scoreTrees.size(); i++)
+ delete m_scoreTrees[i];
+}
+
+std::vector<float> LexicalReorderingTableCompact::GetScore(const Phrase& f,
+ const Phrase& e,
+ const Phrase& c)
+{
+ std::string key;
+ Scores scores;
+
+ if(0 == c.GetSize())
+ key = MakeKey(f, e, c);
+ else
+ for(size_t i = 0; i <= c.GetSize(); ++i)
+ {
+ Phrase sub_c(c.GetSubString(WordsRange(i,c.GetSize()-1)));
+ key = MakeKey(f,e,sub_c);
+ }
+
+ size_t index = m_hash[key];
+ if(m_hash.GetSize() != index)
+ {
+ std::string scoresString;
+ if(m_inMemory)
+ scoresString = m_scoresMemory[index];
+ else
+ scoresString = m_scoresMapped[index];
+
+ BitWrapper<> bitStream(scoresString);
+ for(size_t i = 0; i < m_numScoreComponent; i++)
+ scores.push_back(m_scoreTrees[m_multipleScoreTrees ? i : 0]->Read(bitStream));
+
+ return scores;
+ }
+
+ return Scores();
+}
+
+std::string LexicalReorderingTableCompact::MakeKey(const Phrase& f,
+ const Phrase& e,
+ const Phrase& c) const
+{
+ return MakeKey(Trim(f.GetStringRep(m_FactorsF)),
+ Trim(e.GetStringRep(m_FactorsE)),
+ Trim(c.GetStringRep(m_FactorsC)));
+}
+
+std::string LexicalReorderingTableCompact::MakeKey(const std::string& f,
+ const std::string& e,
+ const std::string& c) const
+{
+ std::string key;
+ if(!f.empty())
+ {
+ key += f;
+ }
+ if(!m_FactorsE.empty())
+ {
+ if(!key.empty())
+ {
+ key += " ||| ";
+ }
+ key += e;
+ }
+ if(!m_FactorsC.empty())
+ {
+ if(!key.empty())
+ {
+ key += " ||| ";
+ }
+ key += c;
+ }
+ key += " ||| ";
+ return key;
+}
+
+void LexicalReorderingTableCompact::Load(std::string filePath)
+{
+ std::FILE* pFile = std::fopen(filePath.c_str(), "r");
+ if(m_inMemory)
+ m_hash.Load(pFile);
+ else
+ m_hash.LoadIndex(pFile);
+
+ size_t read = 0;
+ read += std::fread(&m_numScoreComponent, sizeof(m_numScoreComponent), 1, pFile);
+ read += std::fread(&m_multipleScoreTrees, sizeof(m_multipleScoreTrees), 1, pFile);
+
+ if(m_multipleScoreTrees)
+ {
+ m_scoreTrees.resize(m_numScoreComponent);
+ for(size_t i = 0; i < m_numScoreComponent; i++)
+ m_scoreTrees[i] = new CanonicalHuffman<float>(pFile);
+ }
+ else
+ {
+ m_scoreTrees.resize(1);
+ m_scoreTrees[0] = new CanonicalHuffman<float>(pFile);
+ }
+
+ if(m_inMemory)
+ m_scoresMemory.load(pFile, false);
+ else
+ m_scoresMapped.load(pFile, true);
+}
+
+}
diff --git a/moses/src/CompactPT/LexicalReorderingTableCompact.h b/moses/src/CompactPT/LexicalReorderingTableCompact.h
new file mode 100644
index 000000000..d9c473b46
--- /dev/null
+++ b/moses/src/CompactPT/LexicalReorderingTableCompact.h
@@ -0,0 +1,77 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_LexicalReorderingTableCompact_h
+#define moses_LexicalReorderingTableCompact_h
+
+#include "LexicalReorderingTable.h"
+#include "StaticData.h"
+#include "PhraseDictionary.h"
+#include "GenerationDictionary.h"
+#include "TargetPhrase.h"
+#include "TargetPhraseCollection.h"
+
+#include "BlockHashIndex.h"
+#include "CanonicalHuffman.h"
+#include "StringVector.h"
+
+namespace Moses {
+
+class LexicalReorderingTableCompact: public LexicalReorderingTable
+{
+ private:
+ bool m_inMemory;
+
+ size_t m_numScoreComponent;
+ bool m_multipleScoreTrees;
+
+ BlockHashIndex m_hash;
+
+ typedef CanonicalHuffman<float> ScoreTree;
+ std::vector<ScoreTree*> m_scoreTrees;
+
+ StringVector<unsigned char, unsigned long, MmapAllocator> m_scoresMapped;
+ StringVector<unsigned char, unsigned long, std::allocator> m_scoresMemory;
+
+ std::string MakeKey(const Phrase& f, const Phrase& e, const Phrase& c) const;
+ std::string MakeKey(const std::string& f, const std::string& e, const std::string& c) const;
+
+ public:
+ LexicalReorderingTableCompact(
+ const std::string& filePath,
+ const std::vector<FactorType>& f_factors,
+ const std::vector<FactorType>& e_factors,
+ const std::vector<FactorType>& c_factors);
+
+ LexicalReorderingTableCompact(
+ const std::vector<FactorType>& f_factors,
+ const std::vector<FactorType>& e_factors,
+ const std::vector<FactorType>& c_factors);
+
+ virtual ~LexicalReorderingTableCompact();
+
+ virtual std::vector<float> GetScore(const Phrase& f, const Phrase& e, const Phrase& c);
+ void Load(std::string filePath);
+};
+
+}
+
+#endif
diff --git a/moses/src/CompactPT/LexicalReorderingTableCreator.cpp b/moses/src/CompactPT/LexicalReorderingTableCreator.cpp
new file mode 100644
index 000000000..f474f4cb6
--- /dev/null
+++ b/moses/src/CompactPT/LexicalReorderingTableCreator.cpp
@@ -0,0 +1,433 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include "LexicalReorderingTableCreator.h"
+#include "ThrowingFwrite.h"
+
+namespace Moses {
+
+LexicalReorderingTableCreator::LexicalReorderingTableCreator(
+ std::string inPath, std::string outPath,
+ size_t orderBits, size_t fingerPrintBits, bool multipleScoreTrees,
+ size_t quantize
+#ifdef WITH_THREADS
+ , size_t threads
+#endif
+ )
+ : m_inPath(inPath), m_outPath(outPath), m_orderBits(orderBits),
+ m_fingerPrintBits(fingerPrintBits), m_numScoreComponent(0),
+ m_multipleScoreTrees(multipleScoreTrees), m_quantize(quantize),
+ m_separator(" ||| "), m_hash(m_orderBits, m_fingerPrintBits),
+ m_lastFlushedLine(-1)
+#ifdef WITH_THREADS
+ , m_threads(threads)
+#endif
+{
+ PrintInfo();
+
+ m_outFile = std::fopen(m_outPath.c_str(), "w");
+
+ std::cerr << "Pass 1/2: Creating phrase index + Counting scores" << std::endl;
+ m_hash.BeginSave(m_outFile);
+ EncodeScores();
+
+ std::cerr << "Intermezzo: Calculating Huffman code sets" << std::endl;
+ CalcHuffmanCodes();
+
+ std::cerr << "Pass 2/2: Compressing scores" << std::endl;
+ CompressScores();
+
+ std::cerr << "Saving to " << m_outPath << std::endl;
+ Save();
+ std::cerr << "Done" << std::endl;
+ std::fclose(m_outFile);
+}
+
+void LexicalReorderingTableCreator::PrintInfo()
+{
+ std::cerr << "Used options:" << std::endl;
+ std::cerr << "\tText reordering table will be read from: " << m_inPath << std::endl;
+ std::cerr << "\tOutput reordering table will be written to: " << m_outPath << std::endl;
+ std::cerr << "\tStep size for source landmark phrases: 2^" << m_orderBits << "=" << (1ul << m_orderBits) << std::endl;
+ std::cerr << "\tPhrase fingerprint size: " << m_fingerPrintBits << " bits / P(fp)=" << (float(1)/(1ul << m_fingerPrintBits)) << std::endl;
+ std::cerr << "\tSingle Huffman code set for score components: " << (m_multipleScoreTrees ? "no" : "yes") << std::endl;
+ std::cerr << "\tUsing score quantization: ";
+ if(m_quantize)
+ std::cerr << m_quantize << " best" << std::endl;
+ else
+ std::cerr << "no" << std::endl;
+
+#ifdef WITH_THREADS
+ std::cerr << "\tRunning with " << m_threads << " threads" << std::endl;
+#endif
+ std::cerr << std::endl;
+}
+
+LexicalReorderingTableCreator::~LexicalReorderingTableCreator()
+{
+ for(size_t i = 0; i < m_scoreTrees.size(); i++) {
+ delete m_scoreTrees[i];
+ delete m_scoreCounters[i];
+ }
+}
+
+
+void LexicalReorderingTableCreator::EncodeScores()
+{
+ InputFileStream inFile(m_inPath);
+
+#ifdef WITH_THREADS
+ boost::thread_group threads;
+ for (size_t i = 0; i < m_threads; ++i)
+ {
+ EncodingTaskReordering* et = new EncodingTaskReordering(inFile, *this);
+ threads.create_thread(*et);
+ }
+ threads.join_all();
+#else
+ EncodingTaskReordering* et = new EncodingTaskReordering(inFile, *this);
+ (*et)();
+ delete et;
+#endif
+ FlushEncodedQueue(true);
+}
+
+void LexicalReorderingTableCreator::CalcHuffmanCodes()
+{
+ std::vector<ScoreTree*>::iterator treeIt = m_scoreTrees.begin();
+ for(std::vector<ScoreCounter*>::iterator it = m_scoreCounters.begin();
+ it != m_scoreCounters.end(); it++)
+ {
+ if(m_quantize)
+ (*it)->Quantize(m_quantize);
+
+ std::cerr << "\tCreating Huffman codes for " << (*it)->Size()
+ << " scores" << std::endl;
+
+ *treeIt = new ScoreTree((*it)->Begin(), (*it)->End());
+ treeIt++;
+ }
+ std::cerr << std::endl;
+}
+
+void LexicalReorderingTableCreator::CompressScores()
+{
+#ifdef WITH_THREADS
+ boost::thread_group threads;
+ for (size_t i = 0; i < m_threads; ++i) {
+ CompressionTaskReordering* ct = new CompressionTaskReordering(m_encodedScores, *this);
+ threads.create_thread(*ct);
+ }
+ threads.join_all();
+#else
+ CompressionTaskReordering* ct = new CompressionTaskReordering(m_encodedScores, *this);
+ (*ct)();
+ delete ct;
+#endif
+ FlushCompressedQueue(true);
+}
+
+void LexicalReorderingTableCreator::Save()
+{
+ ThrowingFwrite(&m_numScoreComponent, sizeof(m_numScoreComponent), 1, m_outFile);
+ ThrowingFwrite(&m_multipleScoreTrees, sizeof(m_multipleScoreTrees), 1, m_outFile);
+ for(size_t i = 0; i < m_scoreTrees.size(); i++)
+ m_scoreTrees[i]->Save(m_outFile);
+
+ m_compressedScores.save(m_outFile);
+}
+
+std::string LexicalReorderingTableCreator::MakeSourceTargetKey(std::string &source, std::string &target)
+{
+ std::string key = source + m_separator;
+ if(!target.empty())
+ key += target + m_separator;
+ return key;
+}
+
+std::string LexicalReorderingTableCreator::EncodeLine(std::vector<std::string>& tokens)
+{
+ std::string scoresString = tokens.back();
+ std::stringstream scoresStream;
+
+ std::vector<float> scores;
+ Tokenize<float>(scores, scoresString);
+
+ if(!m_numScoreComponent) {
+ m_numScoreComponent = scores.size();
+ m_scoreCounters.resize(m_multipleScoreTrees ? m_numScoreComponent : 1);
+ for(std::vector<ScoreCounter*>::iterator it = m_scoreCounters.begin();
+ it != m_scoreCounters.end(); it++)
+ *it = new ScoreCounter();
+ m_scoreTrees.resize(m_multipleScoreTrees ? m_numScoreComponent : 1);
+ }
+
+ if(m_numScoreComponent != scores.size()) {
+ std::cerr << "Error: Wrong number of scores detected ("
+ << scores.size() << " != " << m_numScoreComponent << ") :" << std::endl;
+ std::cerr << "Line: " << tokens[0] << " ||| ... ||| " << scoresString << std::endl;
+ abort();
+ }
+
+ size_t c = 0;
+ float score;
+ while(c < m_numScoreComponent)
+ {
+ score = scores[c];
+ score = FloorScore(TransformScore(score));
+ scoresStream.write((char*)&score, sizeof(score));
+
+ m_scoreCounters[m_multipleScoreTrees ? c : 0]->Increase(score);
+ c++;
+ }
+
+ return scoresStream.str();
+}
+
+void LexicalReorderingTableCreator::AddEncodedLine(PackedItem& pi)
+{
+ m_queue.push(pi);
+}
+
+void LexicalReorderingTableCreator::FlushEncodedQueue(bool force) {
+ if(force || m_queue.size() > 10000)
+ {
+ while(!m_queue.empty() && m_lastFlushedLine + 1 == m_queue.top().GetLine())
+ {
+ PackedItem pi = m_queue.top();
+ m_queue.pop();
+ m_lastFlushedLine++;
+
+ m_lastRange.push_back(pi.GetSrc());
+ m_encodedScores.push_back(pi.GetTrg());
+
+ if((pi.GetLine()+1) % 100000 == 0)
+ std::cerr << ".";
+ if((pi.GetLine()+1) % 5000000 == 0)
+ std::cerr << "[" << (pi.GetLine()+1) << "]" << std::endl;
+
+ if(m_lastRange.size() == (1ul << m_orderBits))
+ {
+ m_hash.AddRange(m_lastRange);
+ m_hash.SaveLastRange();
+ m_hash.DropLastRange();
+ m_lastRange.clear();
+ }
+ }
+ }
+
+ if(force)
+ {
+ m_lastFlushedLine = -1;
+
+ m_hash.AddRange(m_lastRange);
+ m_lastRange.clear();
+
+#ifdef WITH_THREADS
+ m_hash.WaitAll();
+#endif
+
+ m_hash.SaveLastRange();
+ m_hash.DropLastRange();
+ m_hash.FinalizeSave();
+
+ std::cerr << std::endl << std::endl;
+ }
+}
+
+std::string LexicalReorderingTableCreator::CompressEncodedScores(std::string &encodedScores) {
+ std::stringstream encodedScoresStream(encodedScores);
+ encodedScoresStream.unsetf(std::ios::skipws);
+
+ std::string compressedScores;
+ BitWrapper<> compressedScoresStream(compressedScores);
+
+ size_t currScore = 0;
+ float score;
+ encodedScoresStream.read((char*) &score, sizeof(score));
+
+ while(encodedScoresStream) {
+ size_t index = currScore % m_scoreTrees.size();
+
+ if(m_quantize)
+ score = m_scoreCounters[index]->LowerBound(score);
+
+ m_scoreTrees[index]->Put(compressedScoresStream, score);
+ encodedScoresStream.read((char*) &score, sizeof(score));
+ currScore++;
+ }
+
+ return compressedScores;
+}
+
+void LexicalReorderingTableCreator::AddCompressedScores(PackedItem& pi) {
+ m_queue.push(pi);
+}
+
+void LexicalReorderingTableCreator::FlushCompressedQueue(bool force)
+{
+ if(force || m_queue.size() > 10000)
+ {
+ while(!m_queue.empty() && m_lastFlushedLine + 1 == m_queue.top().GetLine())
+ {
+ PackedItem pi = m_queue.top();
+ m_queue.pop();
+ m_lastFlushedLine++;
+
+ m_compressedScores.push_back(pi.GetTrg());
+
+ if((pi.GetLine()+1) % 100000 == 0)
+ std::cerr << ".";
+ if((pi.GetLine()+1) % 5000000 == 0)
+ std::cerr << "[" << (pi.GetLine()+1) << "]" << std::endl;
+ }
+ }
+
+ if(force)
+ {
+ m_lastFlushedLine = -1;
+ std::cerr << std::endl << std::endl;
+ }
+}
+
+//****************************************************************************//
+
+size_t EncodingTaskReordering::m_lineNum = 0;
+#ifdef WITH_THREADS
+boost::mutex EncodingTaskReordering::m_mutex;
+boost::mutex EncodingTaskReordering::m_fileMutex;
+#endif
+
+EncodingTaskReordering::EncodingTaskReordering(InputFileStream& inFile, LexicalReorderingTableCreator& creator)
+ : m_inFile(inFile), m_creator(creator) {}
+
+void EncodingTaskReordering::operator()()
+{
+ size_t lineNum = 0;
+
+ std::vector<std::string> lines;
+ size_t max_lines = 1000;
+ lines.reserve(max_lines);
+
+ {
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_fileMutex);
+#endif
+ std::string line;
+ while(lines.size() < max_lines && std::getline(m_inFile, line))
+ lines.push_back(line);
+ lineNum = m_lineNum;
+ m_lineNum += lines.size();
+ }
+
+ std::vector<PackedItem> result;
+ result.reserve(max_lines);
+
+ while(lines.size())
+ {
+ for(size_t i = 0; i < lines.size(); i++)
+ {
+ std::vector<std::string> tokens;
+ Moses::TokenizeMultiCharSeparator(tokens, lines[i], m_creator.m_separator);
+
+ std::string encodedLine = m_creator.EncodeLine(tokens);
+
+ std::string f = tokens[0];
+
+ std::string e;
+ if(tokens.size() > 2)
+ e = tokens[1];
+
+ PackedItem packedItem(lineNum + i, m_creator.MakeSourceTargetKey(f, e),
+ encodedLine, i);
+ result.push_back(packedItem);
+ }
+ lines.clear();
+
+ {
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_mutex);
+#endif
+ for(size_t i = 0; i < result.size(); i++)
+ m_creator.AddEncodedLine(result[i]);
+ m_creator.FlushEncodedQueue();
+ }
+
+ result.clear();
+ lines.reserve(max_lines);
+ result.reserve(max_lines);
+
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_fileMutex);
+#endif
+ std::string line;
+ while(lines.size() < max_lines && std::getline(m_inFile, line))
+ lines.push_back(line);
+ lineNum = m_lineNum;
+ m_lineNum += lines.size();
+ }
+}
+
+//****************************************************************************//
+
+size_t CompressionTaskReordering::m_scoresNum = 0;
+#ifdef WITH_THREADS
+boost::mutex CompressionTaskReordering::m_mutex;
+#endif
+
+CompressionTaskReordering::CompressionTaskReordering(StringVector<unsigned char, unsigned long,
+ MmapAllocator>& encodedScores,
+ LexicalReorderingTableCreator& creator)
+ : m_encodedScores(encodedScores), m_creator(creator)
+{ }
+
+void CompressionTaskReordering::operator()()
+{
+ size_t scoresNum;
+ {
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_mutex);
+#endif
+ scoresNum = m_scoresNum;
+ m_scoresNum++;
+ }
+
+ while(scoresNum < m_encodedScores.size())
+ {
+ std::string scores = m_encodedScores[scoresNum];
+ std::string compressedScores
+ = m_creator.CompressEncodedScores(scores);
+
+ std::string dummy;
+ PackedItem packedItem(scoresNum, dummy, compressedScores, 0);
+
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_mutex);
+#endif
+ m_creator.AddCompressedScores(packedItem);
+ m_creator.FlushCompressedQueue();
+
+ scoresNum = m_scoresNum;
+ m_scoresNum++;
+ }
+}
+
+}
diff --git a/moses/src/CompactPT/LexicalReorderingTableCreator.h b/moses/src/CompactPT/LexicalReorderingTableCreator.h
new file mode 100644
index 000000000..68b37dcb8
--- /dev/null
+++ b/moses/src/CompactPT/LexicalReorderingTableCreator.h
@@ -0,0 +1,139 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_LexicalReorderingTableCreator_h
+#define moses_LexicalReorderingTableCreator_h
+
+#include "PhraseTableCreator.h"
+
+namespace Moses {
+
+class LexicalReorderingTableCreator {
+ private:
+ std::string m_inPath;
+ std::string m_outPath;
+
+ std::FILE* m_outFile;
+
+ size_t m_orderBits;
+ size_t m_fingerPrintBits;
+
+ size_t m_numScoreComponent;
+
+ bool m_multipleScoreTrees;
+ bool m_quantize;
+
+ std::string m_separator;
+
+ BlockHashIndex m_hash;
+
+ typedef Counter<float> ScoreCounter;
+ typedef CanonicalHuffman<float> ScoreTree;
+
+ std::vector<ScoreCounter*> m_scoreCounters;
+ std::vector<ScoreTree*> m_scoreTrees;
+
+ StringVector<unsigned char, unsigned long, MmapAllocator> m_encodedScores;
+ StringVector<unsigned char, unsigned long, MmapAllocator> m_compressedScores;
+
+ std::priority_queue<PackedItem> m_queue;
+ long m_lastFlushedLine;
+ long m_lastFlushedSourceNum;
+ std::string m_lastFlushedSourcePhrase;
+ std::vector<std::string> m_lastRange;
+
+#ifdef WITH_THREADS
+ size_t m_threads;
+#endif
+
+ void PrintInfo();
+
+ void EncodeScores();
+ void CalcHuffmanCodes();
+ void CompressScores();
+ void Save();
+
+ std::string MakeSourceTargetKey(std::string&, std::string&);
+
+ std::string EncodeLine(std::vector<std::string>& tokens);
+ void AddEncodedLine(PackedItem& pi);
+ void FlushEncodedQueue(bool force = false);
+
+ std::string CompressEncodedScores(std::string &encodedScores);
+ void AddCompressedScores(PackedItem& pi);
+ void FlushCompressedQueue(bool force = false);
+
+ public:
+ LexicalReorderingTableCreator(std::string inPath,
+ std::string outPath,
+ size_t orderBits = 10,
+ size_t fingerPrintBits = 16,
+ bool multipleScoreTrees = true,
+ size_t quantize = 0
+#ifdef WITH_THREADS
+ , size_t threads = 2
+#endif
+ );
+
+ ~LexicalReorderingTableCreator();
+
+ friend class EncodingTaskReordering;
+ friend class CompressionTaskReordering;
+};
+
+class EncodingTaskReordering
+{
+ private:
+#ifdef WITH_THREADS
+ static boost::mutex m_mutex;
+ static boost::mutex m_fileMutex;
+#endif
+ static size_t m_lineNum;
+ static size_t m_sourcePhraseNum;
+ static std::string m_lastSourcePhrase;
+
+ InputFileStream& m_inFile;
+ LexicalReorderingTableCreator& m_creator;
+
+ public:
+ EncodingTaskReordering(InputFileStream& inFile, LexicalReorderingTableCreator& creator);
+ void operator()();
+};
+
+class CompressionTaskReordering
+{
+ private:
+#ifdef WITH_THREADS
+ static boost::mutex m_mutex;
+#endif
+ static size_t m_scoresNum;
+ StringVector<unsigned char, unsigned long, MmapAllocator> &m_encodedScores;
+ LexicalReorderingTableCreator &m_creator;
+
+ public:
+ CompressionTaskReordering(StringVector<unsigned char, unsigned long, MmapAllocator>&
+ m_encodedScores, LexicalReorderingTableCreator& creator);
+ void operator()();
+};
+
+}
+
+#endif
diff --git a/moses/src/CompactPT/ListCoders.h b/moses/src/CompactPT/ListCoders.h
new file mode 100644
index 000000000..329e1297a
--- /dev/null
+++ b/moses/src/CompactPT/ListCoders.h
@@ -0,0 +1,309 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_ListCoders_h
+#define moses_ListCoders_h
+
+#include <cmath>
+#include <cassert>
+
+namespace Moses
+{
+
+template <typename T = unsigned int>
+class VarIntType
+{
+ private:
+ template <typename IntType, typename OutIt>
+ static void EncodeSymbol(IntType input, OutIt output)
+ {
+ if(input == 0)
+ {
+ *output = 0;
+ output++;
+ return;
+ }
+
+ T msb = 1 << (sizeof(T)*8-1);
+ IntType mask = ~msb;
+ IntType shift = (sizeof(T)*8-1);
+
+ while(input)
+ {
+ T res = input & mask;
+ input >>= shift;
+ if(input)
+ res |= msb;
+ *output = res;
+ output++;
+ }
+ };
+
+ template <typename InIt, typename IntType>
+ static void DecodeSymbol(InIt &it, InIt end, IntType &output)
+ {
+ T msb = 1 << (sizeof(T)*8-1);
+ IntType shift = (sizeof(T)*8-1);
+
+ output = 0;
+ size_t i = 0;
+ while(it != end && *it & msb) {
+ IntType temp = *it & ~msb;
+ temp <<= shift*i;
+ output |= temp;
+ it++; i++;
+ }
+ assert(it != end);
+
+ IntType temp = *it;
+ temp <<= shift*i;
+ output |= temp;
+ it++;
+ }
+
+ public:
+
+ template <typename InIt, typename OutIt>
+ static void Encode(InIt it, InIt end, OutIt outIt)
+ {
+ while(it != end)
+ {
+ EncodeSymbol(*it, outIt);
+ it++;
+ }
+ }
+
+ template <typename InIt, typename OutIt>
+ static void Decode(InIt &it, InIt end, OutIt outIt)
+ {
+ while(it != end)
+ {
+ size_t output;
+ DecodeSymbol(it, end, output);
+ *outIt = output;
+ outIt++;
+ }
+ }
+
+ template <typename InIt>
+ static size_t DecodeAndSum(InIt &it, InIt end, size_t num)
+ {
+ size_t sum = 0;
+ size_t curr = 0;
+
+ while(it != end && curr < num)
+ {
+ size_t output;
+ DecodeSymbol(it, end, output);
+ sum += output; curr++;
+ }
+
+ return sum;
+ }
+
+};
+
+typedef VarIntType<unsigned char> VarByte;
+
+typedef VarByte VarInt8;
+typedef VarIntType<unsigned short> VarInt16;
+typedef VarIntType<unsigned int> VarInt32;
+
+class Simple9
+{
+ private:
+ typedef unsigned int uint;
+
+ template <typename InIt>
+ inline static void EncodeSymbol(uint &output, InIt it, InIt end)
+ {
+ uint length = end - it;
+
+ uint type = 0;
+ uint bitlength = 0;
+
+ switch(length)
+ {
+ case 1: type = 1; bitlength = 28; break;
+ case 2: type = 2; bitlength = 14; break;
+ case 3: type = 3; bitlength = 9; break;
+ case 4: type = 4; bitlength = 7; break;
+ case 5: type = 5; bitlength = 5; break;
+ case 7: type = 6; bitlength = 4; break;
+ case 9: type = 7; bitlength = 3; break;
+ case 14: type = 8; bitlength = 2; break;
+ case 28: type = 9; bitlength = 1; break;
+ }
+
+ output = 0;
+ output |= (type << 28);
+
+ uint i = 0;
+ while(it != end)
+ {
+ uint l = bitlength * (length-i-1);
+ output |= *it << l;
+ it++;
+ i++;
+ }
+ }
+
+ template <typename OutIt>
+ static inline void DecodeSymbol(uint input, OutIt outIt)
+ {
+ uint type = (input >> 28);
+
+ uint bitlen = 0;
+ uint shift = 0;
+ uint mask = 0;
+
+ switch(type)
+ {
+ case 1: bitlen = 28; shift = 0; mask = 268435455; break;
+ case 2: bitlen = 14; shift = 14; mask = 16383; break;
+ case 3: bitlen = 9; shift = 18; mask = 511; break;
+ case 4: bitlen = 7; shift = 21; mask = 127; break;
+ case 5: bitlen = 5; shift = 20; mask = 31; break;
+ case 6: bitlen = 4; shift = 24; mask = 15; break;
+ case 7: bitlen = 3; shift = 24; mask = 7; break;
+ case 8: bitlen = 2; shift = 26; mask = 3; break;
+ case 9: bitlen = 1; shift = 27; mask = 1; break;
+ }
+
+ while(shift > 0)
+ {
+ *outIt = (input >> shift) & mask;
+ shift -= bitlen;
+ outIt++;
+ }
+ *outIt = input & mask;
+ outIt++;
+ }
+
+ static inline size_t DecodeAndSumSymbol(uint input, size_t num, size_t &curr)
+ {
+ uint type = (input >> 28);
+
+ uint bitlen = 0;
+ uint shift = 0;
+ uint mask = 0;
+
+ switch(type)
+ {
+ case 1: bitlen = 28; shift = 0; mask = 268435455; break;
+ case 2: bitlen = 14; shift = 14; mask = 16383; break;
+ case 3: bitlen = 9; shift = 18; mask = 511; break;
+ case 4: bitlen = 7; shift = 21; mask = 127; break;
+ case 5: bitlen = 5; shift = 20; mask = 31; break;
+ case 6: bitlen = 4; shift = 24; mask = 15; break;
+ case 7: bitlen = 3; shift = 24; mask = 7; break;
+ case 8: bitlen = 2; shift = 26; mask = 3; break;
+ case 9: bitlen = 1; shift = 27; mask = 1; break;
+ }
+
+ size_t sum = 0;
+ while(shift > 0)
+ {
+ sum += (input >> shift) & mask;
+ shift -= bitlen;
+ if(++curr == num)
+ return sum;
+ }
+ sum += input & mask;
+ curr++;
+ return sum;
+ }
+
+ public:
+ template <typename InIt, typename OutIt>
+ static void Encode(InIt it, InIt end, OutIt outIt)
+ {
+ uint parts[] = { 1, 2, 3, 4, 5, 7, 9, 14, 28 };
+
+ uint buffer[28];
+ for(InIt i = it; i < end; i++)
+ {
+ uint lastbit = 1;
+ uint lastpos = 0;
+ uint lastyes = 0;
+ uint j = 0;
+
+ double log2 = log(2);
+ while(j < 9 && lastpos < 28 && (i+lastpos) < end)
+ {
+ if(lastpos >= parts[j])
+ j++;
+
+ buffer[lastpos] = *(i + lastpos);
+
+ uint reqbit = ceil(log(buffer[lastpos]+1)/log2);
+ assert(reqbit <= 28);
+
+ uint bit = 28/floor(28/reqbit);
+ if(lastbit < bit)
+ lastbit = bit;
+
+ if(parts[j] > 28/lastbit)
+ break;
+ else if(lastpos == parts[j]-1)
+ lastyes = lastpos;
+
+ lastpos++;
+ }
+ i += lastyes;
+
+ uint length = lastyes + 1;
+ uint output;
+ EncodeSymbol(output, buffer, buffer + length);
+
+ *outIt = output;
+ outIt++;
+ }
+ }
+
+ template <typename InIt, typename OutIt>
+ static void Decode(InIt &it, InIt end, OutIt outIt)
+ {
+ while(it != end)
+ {
+ DecodeSymbol(*it, outIt);
+ it++;
+ }
+ }
+
+ template <typename InIt>
+ static size_t DecodeAndSum(InIt &it, InIt end, size_t num)
+ {
+ size_t sum = 0;
+ size_t curr = 0;
+ while(it != end && curr < num)
+ {
+ sum += DecodeAndSumSymbol(*it, num, curr);
+ it++;
+ }
+ assert(curr == num);
+ return sum;
+ }
+};
+
+}
+
+#endif
diff --git a/moses/src/CompactPT/MmapAllocator.h b/moses/src/CompactPT/MmapAllocator.h
new file mode 100644
index 000000000..c4655692b
--- /dev/null
+++ b/moses/src/CompactPT/MmapAllocator.h
@@ -0,0 +1,204 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_MmapAllocator_h
+#define moses_MmapAllocator_h
+
+#include <limits>
+#include <iostream>
+#include <sys/mman.h>
+#include <cstdio>
+#include <unistd.h>
+
+namespace Moses
+{
+ template <class T>
+ class MmapAllocator
+ {
+ protected:
+ std::FILE* m_file_ptr;
+ size_t m_file_desc;
+
+ size_t m_page_size;
+ size_t m_map_size;
+
+ char* m_data_ptr;
+ size_t m_data_offset;
+ bool m_fixed;
+ size_t* m_count;
+
+ public:
+ typedef T value_type;
+ typedef T* pointer;
+ typedef const T* const_pointer;
+ typedef T& reference;
+ typedef const T& const_reference;
+ typedef std::size_t size_type;
+ typedef std::ptrdiff_t difference_type;
+
+ MmapAllocator() throw()
+ : m_file_ptr(std::tmpfile()), m_file_desc(fileno(m_file_ptr)),
+ m_page_size(sysconf(_SC_PAGE_SIZE)), m_map_size(0), m_data_ptr(0),
+ m_data_offset(0), m_fixed(false), m_count(new size_t(0))
+ { }
+
+ MmapAllocator(std::FILE* f_ptr) throw()
+ : m_file_ptr(f_ptr), m_file_desc(fileno(m_file_ptr)),
+ m_page_size(sysconf(_SC_PAGE_SIZE)), m_map_size(0), m_data_ptr(0),
+ m_data_offset(0), m_fixed(false), m_count(new size_t(0))
+ { }
+
+ MmapAllocator(std::FILE* f_ptr, size_t data_offset = 0) throw()
+ : m_file_ptr(f_ptr), m_file_desc(fileno(m_file_ptr)),
+ m_page_size(sysconf(_SC_PAGE_SIZE)), m_map_size(0), m_data_ptr(0),
+ m_data_offset(data_offset), m_fixed(true), m_count(new size_t(0))
+ { }
+
+ MmapAllocator(std::string fileName) throw()
+ : m_file_ptr(std::fopen(fileName.c_str(), "wb+")), m_file_desc(fileno(m_file_ptr)),
+ m_page_size(sysconf(_SC_PAGE_SIZE)), m_map_size(0), m_data_ptr(0),
+ m_data_offset(0), m_fixed(false), m_count(new size_t(0))
+ { }
+
+ MmapAllocator(const MmapAllocator& c) throw()
+ : m_file_ptr(c.m_file_ptr), m_file_desc(c.m_file_desc),
+ m_page_size(c.m_page_size), m_map_size(c.m_map_size),
+ m_data_ptr(c.m_data_ptr), m_data_offset(c.m_data_offset),
+ m_fixed(c.m_fixed), m_count(c.m_count)
+ {
+ (*m_count)++;
+ }
+
+ ~MmapAllocator() throw()
+ {
+ if(m_data_ptr && *m_count == 0)
+ {
+ munmap(m_data_ptr, m_map_size);
+ if(!m_fixed && std::ftell(m_file_ptr) != -1)
+ std::fclose(m_file_ptr);
+ }
+ (*m_count)--;
+ }
+
+ template <class U>
+ struct rebind {
+ typedef MmapAllocator<U> other;
+ };
+
+ pointer address (reference value) const
+ {
+ return &value;
+ }
+
+ const_pointer address (const_reference value) const
+ {
+ return &value;
+ }
+
+ size_type max_size () const throw()
+ {
+ return std::numeric_limits<size_t>::max() / sizeof(value_type);
+ }
+
+ pointer allocate (size_type num, const void* = 0)
+ {
+ m_map_size = num * sizeof(T);
+
+ if(!m_fixed)
+ {
+ size_t read = 0;
+ read += ftruncate(m_file_desc, m_map_size);
+ m_data_ptr = (char*)mmap(0, m_map_size, PROT_READ|PROT_WRITE, MAP_SHARED,
+ m_file_desc, 0);
+ if(m_data_ptr == MAP_FAILED)
+ std::cerr << "Error: mmapping" << std::endl;
+ return (pointer)m_data_ptr;
+ }
+ else
+ {
+ size_t map_offset = (m_data_offset / m_page_size) * m_page_size;
+ size_t relative_offset = m_data_offset - map_offset;
+
+ size_t map_size = m_map_size + relative_offset;
+
+ m_data_ptr = (char*)mmap(0, map_size, PROT_READ, MAP_SHARED,
+ m_file_desc, map_offset);
+
+ return (pointer)(m_data_ptr + relative_offset);
+ }
+ }
+
+ void deallocate (pointer p, size_type num)
+ {
+ if(!m_fixed) {
+ munmap(p, num * sizeof(T));
+ }
+ else {
+ size_t map_offset = (m_data_offset / m_page_size) * m_page_size;
+ size_t relative_offset = m_data_offset - map_offset;
+ munmap((pointer)((char*)p - relative_offset), num * sizeof(T));
+ }
+
+ }
+
+ void construct (pointer p, const T& value)
+ {
+ if(!m_fixed)
+ new(p) value_type(value);
+ }
+ void destroy (pointer p)
+ {
+ if(!m_fixed)
+ p->~T();
+ }
+
+ template <class T1, class T2>
+ friend bool operator== (const MmapAllocator<T1>&, const MmapAllocator<T2>&) throw();
+
+ template <class T1, class T2>
+ friend bool operator!= (const MmapAllocator<T1>&, const MmapAllocator<T2>&) throw();
+ };
+
+ template <class T1, class T2>
+ bool operator== (const MmapAllocator<T1>& a1,
+ const MmapAllocator<T2>& a2) throw()
+ {
+ bool equal = true;
+ equal &= a1.m_file_ptr == a2.m_file_ptr;
+ equal &= a1.m_file_desc == a2.m_file_desc;
+ equal &= a1.m_page_size == a2.m_page_size;
+ equal &= a1.m_map_size == a2.m_map_size;
+ equal &= a1.m_data_ptr == a2.m_data_ptr;
+ equal &= a1.m_data_offset == a2.m_data_offset;
+ equal &= a1.m_fixed == a2.m_fixed;
+ return equal;
+ }
+
+ template <class T1, class T2>
+ bool operator!=(const MmapAllocator<T1>& a1,
+ const MmapAllocator<T2>& a2) throw()
+ {
+ return !(a1 == a2);
+ }
+
+}
+
+#endif
diff --git a/moses/src/CompactPT/MonotonicVector.h b/moses/src/CompactPT/MonotonicVector.h
new file mode 100644
index 000000000..a4423c369
--- /dev/null
+++ b/moses/src/CompactPT/MonotonicVector.h
@@ -0,0 +1,249 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_MonotonicVector_h
+#define moses_MonotonicVector_h
+
+// MonotonicVector - Represents a monotonic increasing function that maps
+// positive integers of any size onto a given number type. Each value has to be
+// equal or larger than the previous one. Depending on the stepSize it can save
+// up to 90% of memory compared to a std::vector<long>. Time complexity is roughly
+// constant, in the worst case, however, stepSize times slower than a normal
+// std::vector.
+
+#include <vector>
+#include <limits>
+#include <algorithm>
+#include <cstdio>
+#include <cassert>
+
+#include "ThrowingFwrite.h"
+#include "ListCoders.h"
+#include "MmapAllocator.h"
+
+namespace Moses
+{
+
+template<typename PosT = size_t, typename NumT = size_t, PosT stepSize = 32,
+template <typename> class Allocator = std::allocator>
+class MonotonicVector
+{
+ private:
+ typedef std::vector<NumT, Allocator<NumT> > Anchors;
+ typedef std::vector<unsigned int, Allocator<unsigned int> > Diffs;
+
+ Anchors m_anchors;
+ Diffs m_diffs;
+ std::vector<unsigned int> m_tempDiffs;
+
+ size_t m_size;
+ PosT m_last;
+ bool m_final;
+
+ public:
+ typedef PosT value_type;
+
+ MonotonicVector() : m_size(0), m_last(0), m_final(false) {}
+
+ size_t size() const
+ {
+ return m_size + m_tempDiffs.size();
+ }
+
+ PosT at(size_t i) const
+ {
+ PosT s = stepSize;
+ PosT j = m_anchors[i / s];
+ PosT r = i % s;
+
+ typename Diffs::const_iterator it = m_diffs.begin() + j;
+
+ PosT k = 0;
+ k += VarInt32::DecodeAndSum(it, m_diffs.end(), 1);
+ if(i < m_size)
+ k += Simple9::DecodeAndSum(it, m_diffs.end(), r);
+ else if(i < m_size + m_tempDiffs.size())
+ for(size_t l = 0; l < r; l++)
+ k += m_tempDiffs[l];
+
+ return k;
+ }
+
+ PosT operator[](PosT i) const
+ {
+ return at(i);
+ }
+
+ PosT back() const
+ {
+ return at(size()-1);
+ }
+
+ void push_back(PosT i)
+ {
+ assert(m_final != true);
+
+ if(m_anchors.size() == 0 && m_tempDiffs.size() == 0)
+ {
+ m_anchors.push_back(0);
+ VarInt32::Encode(&i, &i+1, std::back_inserter(m_diffs));
+ m_last = i;
+ m_size++;
+
+ return;
+ }
+
+ if(m_tempDiffs.size() == stepSize-1)
+ {
+ Simple9::Encode(m_tempDiffs.begin(), m_tempDiffs.end(),
+ std::back_inserter(m_diffs));
+ m_anchors.push_back(m_diffs.size());
+ VarInt32::Encode(&i, &i+1, std::back_inserter(m_diffs));
+
+ m_size += m_tempDiffs.size() + 1;
+ m_tempDiffs.clear();
+ }
+ else
+ {
+ PosT last = m_last;
+ PosT diff = i - last;
+ m_tempDiffs.push_back(diff);
+ }
+ m_last = i;
+ }
+
+ void commit()
+ {
+ assert(m_final != true);
+ Simple9::Encode(m_tempDiffs.begin(), m_tempDiffs.end(),
+ std::back_inserter(m_diffs));
+ m_size += m_tempDiffs.size();
+ m_tempDiffs.clear();
+ m_final = true;
+ }
+
+ size_t usage()
+ {
+ return m_diffs.size() * sizeof(unsigned int)
+ + m_anchors.size() * sizeof(NumT);
+ }
+
+ size_t load(std::FILE* in, bool map = false)
+ {
+ size_t byteSize = 0;
+
+ byteSize += fread(&m_final, sizeof(bool), 1, in) * sizeof(bool);
+ byteSize += fread(&m_size, sizeof(size_t), 1, in) * sizeof(size_t);
+ byteSize += fread(&m_last, sizeof(PosT), 1, in) * sizeof(PosT);
+
+ byteSize += loadVector(m_diffs, in, map);
+ byteSize += loadVector(m_anchors, in, map);
+
+ return byteSize;
+ }
+
+ template <typename ValueT>
+ size_t loadVector(std::vector<ValueT, std::allocator<ValueT> >& v,
+ std::FILE* in, bool map = false)
+ {
+ // Can only be read into memory. Mapping not possible with std:allocator.
+ assert(map == false);
+
+ size_t byteSize = 0;
+
+ size_t valSize;
+ byteSize += std::fread(&valSize, sizeof(size_t), 1, in) * sizeof(size_t);
+
+ v.resize(valSize, 0);
+ byteSize += std::fread(&v[0], sizeof(ValueT), valSize, in) * sizeof(ValueT);
+
+ return byteSize;
+ }
+
+ template <typename ValueT>
+ size_t loadVector(std::vector<ValueT, MmapAllocator<ValueT> >& v,
+ std::FILE* in, bool map = false)
+ {
+ size_t byteSize = 0;
+
+ size_t valSize;
+ byteSize += std::fread(&valSize, sizeof(size_t), 1, in) * sizeof(size_t);
+
+ if(map == false)
+ {
+ // Read data into temporary file (default constructor of MmapAllocator)
+ // and map memory onto temporary file. Can be resized.
+
+ v.resize(valSize, 0);
+ byteSize += std::fread(&v[0], sizeof(ValueT), valSize, in) * sizeof(ValueT);
+ }
+ else
+ {
+ // Map it directly on specified region of file "in" starting at valPos
+ // with length valSize * sizeof(ValueT). Mapped region cannot be resized.
+
+ size_t valPos = std::ftell(in);
+
+ Allocator<ValueT> alloc(in, valPos);
+ std::vector<ValueT, Allocator<ValueT> > vTemp(alloc);
+ vTemp.resize(valSize);
+ v.swap(vTemp);
+
+ std::fseek(in, valSize * sizeof(ValueT), SEEK_CUR);
+ byteSize += valSize * sizeof(ValueT);
+ }
+
+ return byteSize;
+ }
+
+ size_t save(std::FILE* out)
+ {
+ if(!m_final)
+ commit();
+
+ bool byteSize = 0;
+ byteSize += ThrowingFwrite(&m_final, sizeof(bool), 1, out) * sizeof(bool);
+ byteSize += ThrowingFwrite(&m_size, sizeof(size_t), 1, out) * sizeof(size_t);
+ byteSize += ThrowingFwrite(&m_last, sizeof(PosT), 1, out) * sizeof(PosT);
+
+ size_t size = m_diffs.size();
+ byteSize += ThrowingFwrite(&size, sizeof(size_t), 1, out) * sizeof(size_t);
+ byteSize += ThrowingFwrite(&m_diffs[0], sizeof(unsigned int), size, out) * sizeof(unsigned int);
+
+ size = m_anchors.size();
+ byteSize += ThrowingFwrite(&size, sizeof(size_t), 1, out) * sizeof(size_t);
+ byteSize += ThrowingFwrite(&m_anchors[0], sizeof(NumT), size, out) * sizeof(NumT);
+
+ return byteSize;
+ }
+
+ void swap(MonotonicVector<PosT, NumT, stepSize, Allocator> &mv)
+ {
+ if(!m_final)
+ commit();
+
+ m_diffs.swap(mv.m_diffs);
+ m_anchors.swap(mv.m_anchors);
+ }
+};
+
+}
+#endif
diff --git a/moses/src/CompactPT/MurmurHash3.cpp b/moses/src/CompactPT/MurmurHash3.cpp
new file mode 100644
index 000000000..0bf738662
--- /dev/null
+++ b/moses/src/CompactPT/MurmurHash3.cpp
@@ -0,0 +1,335 @@
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+// Note - The x86 and x64 versions do _not_ produce the same results, as the
+// algorithms are optimized for their respective platforms. You can still
+// compile and run any of them on any platform, but your performance with the
+// non-native version will be less than optimal.
+
+#include "MurmurHash3.h"
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+#define FORCE_INLINE __forceinline
+
+#include <stdlib.h>
+
+#define ROTL32(x,y) _rotl(x,y)
+#define ROTL64(x,y) _rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x)
+
+// Other compilers
+
+#else // defined(_MSC_VER)
+
+#define FORCE_INLINE __attribute__((always_inline))
+
+inline uint32_t rotl32 ( uint32_t x, int8_t r )
+{
+ return (x << r) | (x >> (32 - r));
+}
+
+inline uint64_t rotl64 ( uint64_t x, int8_t r )
+{
+ return (x << r) | (x >> (64 - r));
+}
+
+#define ROTL32(x,y) rotl32(x,y)
+#define ROTL64(x,y) rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x##LLU)
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+// Block read - if your platform needs to do endian-swapping or can only
+// handle aligned reads, do the conversion here
+
+FORCE_INLINE uint32_t getblock ( const uint32_t * p, int i )
+{
+ return p[i];
+}
+
+FORCE_INLINE uint64_t getblock ( const uint64_t * p, int i )
+{
+ return p[i];
+}
+
+//-----------------------------------------------------------------------------
+// Finalization mix - force all bits of a hash block to avalanche
+
+FORCE_INLINE uint32_t fmix ( uint32_t h )
+{
+ h ^= h >> 16;
+ h *= 0x85ebca6b;
+ h ^= h >> 13;
+ h *= 0xc2b2ae35;
+ h ^= h >> 16;
+
+ return h;
+}
+
+//----------
+
+FORCE_INLINE uint64_t fmix ( uint64_t k )
+{
+ k ^= k >> 33;
+ k *= BIG_CONSTANT(0xff51afd7ed558ccd);
+ k ^= k >> 33;
+ k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
+ k ^= k >> 33;
+
+ return k;
+}
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_32 ( const void * key, int len,
+ uint32_t seed, void * out )
+{
+ const uint8_t * data = (const uint8_t*)key;
+ const int nblocks = len / 4;
+
+ uint32_t h1 = seed;
+
+ uint32_t c1 = 0xcc9e2d51;
+ uint32_t c2 = 0x1b873593;
+
+ //----------
+ // body
+
+ const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
+
+ for(int i = -nblocks; i; i++)
+ {
+ uint32_t k1 = getblock(blocks,i);
+
+ k1 *= c1;
+ k1 = ROTL32(k1,15);
+ k1 *= c2;
+
+ h1 ^= k1;
+ h1 = ROTL32(h1,13);
+ h1 = h1*5+0xe6546b64;
+ }
+
+ //----------
+ // tail
+
+ const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
+
+ uint32_t k1 = 0;
+
+ switch(len & 3)
+ {
+ case 3: k1 ^= tail[2] << 16;
+ case 2: k1 ^= tail[1] << 8;
+ case 1: k1 ^= tail[0];
+ k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+ };
+
+ //----------
+ // finalization
+
+ h1 ^= len;
+
+ h1 = fmix(h1);
+
+ *(uint32_t*)out = h1;
+}
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_128 ( const void * key, const int len,
+ uint32_t seed, void * out )
+{
+ const uint8_t * data = (const uint8_t*)key;
+ const int nblocks = len / 16;
+
+ uint32_t h1 = seed;
+ uint32_t h2 = seed;
+ uint32_t h3 = seed;
+ uint32_t h4 = seed;
+
+ uint32_t c1 = 0x239b961b;
+ uint32_t c2 = 0xab0e9789;
+ uint32_t c3 = 0x38b34ae5;
+ uint32_t c4 = 0xa1e38b93;
+
+ //----------
+ // body
+
+ const uint32_t * blocks = (const uint32_t *)(data + nblocks*16);
+
+ for(int i = -nblocks; i; i++)
+ {
+ uint32_t k1 = getblock(blocks,i*4+0);
+ uint32_t k2 = getblock(blocks,i*4+1);
+ uint32_t k3 = getblock(blocks,i*4+2);
+ uint32_t k4 = getblock(blocks,i*4+3);
+
+ k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+
+ h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;
+
+ k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
+
+ h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;
+
+ k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
+
+ h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;
+
+ k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
+
+ h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;
+ }
+
+ //----------
+ // tail
+
+ const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
+
+ uint32_t k1 = 0;
+ uint32_t k2 = 0;
+ uint32_t k3 = 0;
+ uint32_t k4 = 0;
+
+ switch(len & 15)
+ {
+ case 15: k4 ^= tail[14] << 16;
+ case 14: k4 ^= tail[13] << 8;
+ case 13: k4 ^= tail[12] << 0;
+ k4 *= c4; k4 = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
+
+ case 12: k3 ^= tail[11] << 24;
+ case 11: k3 ^= tail[10] << 16;
+ case 10: k3 ^= tail[ 9] << 8;
+ case 9: k3 ^= tail[ 8] << 0;
+ k3 *= c3; k3 = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
+
+ case 8: k2 ^= tail[ 7] << 24;
+ case 7: k2 ^= tail[ 6] << 16;
+ case 6: k2 ^= tail[ 5] << 8;
+ case 5: k2 ^= tail[ 4] << 0;
+ k2 *= c2; k2 = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
+
+ case 4: k1 ^= tail[ 3] << 24;
+ case 3: k1 ^= tail[ 2] << 16;
+ case 2: k1 ^= tail[ 1] << 8;
+ case 1: k1 ^= tail[ 0] << 0;
+ k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
+ };
+
+ //----------
+ // finalization
+
+ h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;
+
+ h1 += h2; h1 += h3; h1 += h4;
+ h2 += h1; h3 += h1; h4 += h1;
+
+ h1 = fmix(h1);
+ h2 = fmix(h2);
+ h3 = fmix(h3);
+ h4 = fmix(h4);
+
+ h1 += h2; h1 += h3; h1 += h4;
+ h2 += h1; h3 += h1; h4 += h1;
+
+ ((uint32_t*)out)[0] = h1;
+ ((uint32_t*)out)[1] = h2;
+ ((uint32_t*)out)[2] = h3;
+ ((uint32_t*)out)[3] = h4;
+}
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x64_128 ( const void * key, const int len,
+ const uint32_t seed, void * out )
+{
+ const uint8_t * data = (const uint8_t*)key;
+ const int nblocks = len / 16;
+
+ uint64_t h1 = seed;
+ uint64_t h2 = seed;
+
+ uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
+ uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
+
+ //----------
+ // body
+
+ const uint64_t * blocks = (const uint64_t *)(data);
+
+ for(int i = 0; i < nblocks; i++)
+ {
+ uint64_t k1 = getblock(blocks,i*2+0);
+ uint64_t k2 = getblock(blocks,i*2+1);
+
+ k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
+
+ h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
+
+ k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
+
+ h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
+ }
+
+ //----------
+ // tail
+
+ const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
+
+ uint64_t k1 = 0;
+ uint64_t k2 = 0;
+
+ switch(len & 15)
+ {
+ case 15: k2 ^= uint64_t(tail[14]) << 48;
+ case 14: k2 ^= uint64_t(tail[13]) << 40;
+ case 13: k2 ^= uint64_t(tail[12]) << 32;
+ case 12: k2 ^= uint64_t(tail[11]) << 24;
+ case 11: k2 ^= uint64_t(tail[10]) << 16;
+ case 10: k2 ^= uint64_t(tail[ 9]) << 8;
+ case 9: k2 ^= uint64_t(tail[ 8]) << 0;
+ k2 *= c2; k2 = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
+
+ case 8: k1 ^= uint64_t(tail[ 7]) << 56;
+ case 7: k1 ^= uint64_t(tail[ 6]) << 48;
+ case 6: k1 ^= uint64_t(tail[ 5]) << 40;
+ case 5: k1 ^= uint64_t(tail[ 4]) << 32;
+ case 4: k1 ^= uint64_t(tail[ 3]) << 24;
+ case 3: k1 ^= uint64_t(tail[ 2]) << 16;
+ case 2: k1 ^= uint64_t(tail[ 1]) << 8;
+ case 1: k1 ^= uint64_t(tail[ 0]) << 0;
+ k1 *= c1; k1 = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
+ };
+
+ //----------
+ // finalization
+
+ h1 ^= len; h2 ^= len;
+
+ h1 += h2;
+ h2 += h1;
+
+ h1 = fmix(h1);
+ h2 = fmix(h2);
+
+ h1 += h2;
+ h2 += h1;
+
+ ((uint64_t*)out)[0] = h1;
+ ((uint64_t*)out)[1] = h2;
+}
+
+//-----------------------------------------------------------------------------
+
diff --git a/moses/src/CompactPT/MurmurHash3.h b/moses/src/CompactPT/MurmurHash3.h
new file mode 100644
index 000000000..58e98204d
--- /dev/null
+++ b/moses/src/CompactPT/MurmurHash3.h
@@ -0,0 +1,37 @@
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+#ifndef _MURMURHASH3_H_
+#define _MURMURHASH3_H_
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+typedef unsigned char uint8_t;
+typedef unsigned long uint32_t;
+typedef unsigned __int64 uint64_t;
+
+// Other compilers
+
+#else // defined(_MSC_VER)
+
+#include <stdint.h>
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_32 ( const void * key, int len, uint32_t seed, void * out );
+
+void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );
+
+void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
+
+//-----------------------------------------------------------------------------
+
+#endif // _MURMURHASH3_H_
diff --git a/moses/src/CompactPT/PackedArray.h b/moses/src/CompactPT/PackedArray.h
new file mode 100644
index 000000000..b74a98850
--- /dev/null
+++ b/moses/src/CompactPT/PackedArray.h
@@ -0,0 +1,201 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_PackedArray_h
+#define moses_PackedArray_h
+
+#include <vector>
+#include <cmath>
+#include <cstring>
+#include <cstdio>
+
+#include "ThrowingFwrite.h"
+
+namespace Moses
+{
+
+template <typename T = size_t, typename D = unsigned char>
+class PackedArray
+{
+ protected:
+ static size_t m_dataBits;
+
+ size_t m_size;
+ size_t m_storageSize;
+ D* m_storage;
+
+ public:
+ PackedArray()
+ {
+ m_size = 0;
+ m_storageSize = 0;
+ m_storage = new D[0];
+ }
+
+ PackedArray(size_t size, size_t bits) : m_size(size)
+ {
+ m_storageSize = ceil(float(bits * size) / float(m_dataBits));
+ m_storage = new D[m_storageSize];
+ }
+
+ PackedArray(const PackedArray<T, D> &c)
+ {
+ m_size = c.m_size;
+
+ m_storageSize = c.m_storageSize;
+ m_storage = new D[m_storageSize];
+
+ std::memcpy(m_storage, c.m_storage, m_storageSize * sizeof(D));
+ }
+
+ ~PackedArray()
+ {
+ delete [] m_storage;
+ m_size = 0;
+ m_storageSize = 0;
+ m_storage = 0;
+ }
+
+ T Get(size_t i, size_t bits) const
+ {
+ T out = 0;
+
+ size_t bitstart = (i * bits);
+ size_t bitpos = bitstart;
+
+ size_t zero = ((1ul << (bits)) - 1);
+
+ while(bitpos - bitstart < bits) {
+ size_t pos = bitpos / m_dataBits;
+ size_t off = bitpos % m_dataBits;
+
+ out |= (T(m_storage[pos]) << (bitpos - bitstart)) >> off;
+
+ bitpos += (m_dataBits - off);
+ }
+
+ out &= zero;
+ return out;
+ }
+
+ void Set(size_t i, T v, size_t bits)
+ {
+ size_t bitstart = (i * bits);
+ size_t bitpos = bitstart;
+
+ while(bitpos - bitstart < bits) {
+ size_t pos = bitpos / m_dataBits;
+ size_t off = bitpos % m_dataBits;
+
+ size_t rest = bits - (bitpos - bitstart);
+ D zero = ~((1ul << (rest + off)) - 1) | ((1ul << off) - 1);
+
+ m_storage[pos] &= zero;
+ m_storage[pos] |= v << off;
+ v = v >> (m_dataBits - off);
+ bitpos += (m_dataBits - off);
+ }
+ }
+
+ virtual D*& GetStorage()
+ {
+ return m_storage;
+ }
+
+ virtual size_t GetStorageSize() const
+ {
+ return m_storageSize;
+ }
+
+ virtual size_t Size() const
+ {
+ return m_size;
+ }
+
+ virtual size_t Load(std::FILE* in)
+ {
+ size_t a1 = std::ftell(in);
+
+ size_t read = 0;
+ read += std::fread(&m_size, sizeof(m_size), 1, in);
+ read += std::fread(&m_storageSize, sizeof(m_storageSize), 1, in);
+ delete [] m_storage;
+ m_storage = new D[m_storageSize];
+ read += std::fread(m_storage, sizeof(D), m_storageSize, in);
+
+ size_t a2 = std::ftell(in);
+ return a2 - a1;
+ }
+
+ virtual size_t Save(std::FILE* out)
+ {
+ size_t a1 = std::ftell(out);
+
+ ThrowingFwrite(&m_size, sizeof(m_size), 1, out);
+ ThrowingFwrite(&m_storageSize, sizeof(m_storageSize), 1, out);
+ ThrowingFwrite(m_storage, sizeof(D), m_storageSize, out);
+
+ size_t a2 = std::ftell(out);
+ return a2 - a1;
+ }
+
+};
+
+template <typename T, typename D>
+size_t PackedArray<T, D>::m_dataBits = sizeof(D)*8;
+
+/**************************************************************************/
+
+template <typename T = size_t, typename D = unsigned char>
+class PairedPackedArray : public PackedArray<T,D>
+{
+ public:
+ PairedPackedArray() : PackedArray<T,D>() {}
+
+ PairedPackedArray(size_t size, size_t bits1, size_t bits2)
+ : PackedArray<T, D>(size, bits1 + bits2) { }
+
+ void Set(size_t i, T a, T b, size_t bits1, size_t bits2)
+ {
+ T c = 0;
+ c = a | (b << bits1);
+ PackedArray<T,D>::Set(i, c, bits1 + bits2);
+ }
+
+ void Set(size_t i, std::pair<T,T> p, size_t bits1, size_t bits2)
+ {
+ T c = 0;
+ c = p.second | (p.first << bits1);
+ PackedArray<T, D>::Set(i, c);
+ }
+
+ std::pair<T, T> Get(size_t i, size_t bits1, size_t bits2)
+ {
+ T v = PackedArray<T, D>::Get(i, bits1 + bits2);
+ T a = v & ((1 << bits1) - 1);
+ T b = v >> bits1;
+ return std::pair<T, T>(a, b);
+ }
+};
+
+}
+
+#endif
diff --git a/moses/src/CompactPT/PhraseDecoder.cpp b/moses/src/CompactPT/PhraseDecoder.cpp
new file mode 100644
index 000000000..9a4b83b62
--- /dev/null
+++ b/moses/src/CompactPT/PhraseDecoder.cpp
@@ -0,0 +1,491 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <deque>
+
+#include "PhraseDecoder.h"
+
+namespace Moses
+{
+
+PhraseDecoder::PhraseDecoder(
+ PhraseDictionaryCompact &phraseDictionary,
+ const std::vector<FactorType>* &input,
+ const std::vector<FactorType>* &output,
+ const PhraseDictionaryFeature* feature,
+ size_t numScoreComponent,
+ const std::vector<float>* weight,
+ float weightWP,
+ const LMList* languageModels
+)
+ : m_coding(None), m_numScoreComponent(numScoreComponent),
+ m_containsAlignmentInfo(true), m_maxRank(0),
+ m_symbolTree(0), m_multipleScoreTrees(false),
+ m_scoreTrees(1), m_alignTree(0),
+ m_phraseDictionary(phraseDictionary), m_input(input), m_output(output),
+ m_feature(feature), m_weight(weight),
+ m_weightWP(weightWP), m_languageModels(languageModels),
+ m_separator(" ||| ")
+{ }
+
+PhraseDecoder::~PhraseDecoder()
+{
+ if(m_symbolTree)
+ delete m_symbolTree;
+
+ for(size_t i = 0; i < m_scoreTrees.size(); i++)
+ if(m_scoreTrees[i])
+ delete m_scoreTrees[i];
+
+ if(m_alignTree)
+ delete m_alignTree;
+}
+
+inline unsigned PhraseDecoder::GetSourceSymbolId(std::string& symbol)
+{
+ boost::unordered_map<std::string, unsigned>::iterator it
+ = m_sourceSymbolsMap.find(symbol);
+ if(it != m_sourceSymbolsMap.end())
+ return it->second;
+
+ size_t idx = m_sourceSymbols.find(symbol);
+ m_sourceSymbolsMap[symbol] = idx;
+ return idx;
+}
+
+inline std::string PhraseDecoder::GetTargetSymbol(unsigned idx) const
+{
+ if(idx < m_targetSymbols.size())
+ return m_targetSymbols[idx];
+ return std::string("##ERROR##");
+}
+
+inline size_t PhraseDecoder::GetREncType(unsigned encodedSymbol)
+{
+ return (encodedSymbol >> 30) + 1;
+}
+
+inline size_t PhraseDecoder::GetPREncType(unsigned encodedSymbol)
+{
+ return (encodedSymbol >> 31) + 1;
+}
+
+inline unsigned PhraseDecoder::GetTranslation(unsigned srcIdx, size_t rank)
+{
+ size_t srcTrgIdx = m_lexicalTableIndex[srcIdx];
+ return m_lexicalTable[srcTrgIdx + rank].second;
+}
+
+size_t PhraseDecoder::GetMaxSourcePhraseLength()
+{
+ return m_maxPhraseLength;
+}
+
+inline unsigned PhraseDecoder::DecodeREncSymbol1(unsigned encodedSymbol)
+{
+ return encodedSymbol &= ~(3 << 30);
+}
+
+inline unsigned PhraseDecoder::DecodeREncSymbol2Rank(unsigned encodedSymbol)
+{
+ return encodedSymbol &= ~(255 << 24);
+}
+
+inline unsigned PhraseDecoder::DecodeREncSymbol2Position(unsigned encodedSymbol)
+{
+ encodedSymbol &= ~(3 << 30);
+ encodedSymbol >>= 24;
+ return encodedSymbol;
+}
+
+inline unsigned PhraseDecoder::DecodeREncSymbol3(unsigned encodedSymbol)
+{
+ return encodedSymbol &= ~(3 << 30);
+}
+
+inline unsigned PhraseDecoder::DecodePREncSymbol1(unsigned encodedSymbol)
+{
+ return encodedSymbol &= ~(1 << 31);
+}
+
+inline int PhraseDecoder::DecodePREncSymbol2Left(unsigned encodedSymbol)
+{
+ return ((encodedSymbol >> 25) & 63) - 32;
+}
+
+inline int PhraseDecoder::DecodePREncSymbol2Right(unsigned encodedSymbol)
+{
+ return ((encodedSymbol >> 19) & 63) - 32;
+}
+
+inline unsigned PhraseDecoder::DecodePREncSymbol2Rank(unsigned encodedSymbol)
+{
+ return (encodedSymbol & 524287);
+}
+
+size_t PhraseDecoder::Load(std::FILE* in)
+{
+ size_t start = std::ftell(in);
+ size_t read = 0;
+
+ read += std::fread(&m_coding, sizeof(m_coding), 1, in);
+ read += std::fread(&m_numScoreComponent, sizeof(m_numScoreComponent), 1, in);
+ read += std::fread(&m_containsAlignmentInfo, sizeof(m_containsAlignmentInfo), 1, in);
+ read += std::fread(&m_maxRank, sizeof(m_maxRank), 1, in);
+ read += std::fread(&m_maxPhraseLength, sizeof(m_maxPhraseLength), 1, in);
+
+ if(m_coding == REnc)
+ {
+ m_sourceSymbols.load(in);
+
+ size_t size;
+ read += std::fread(&size, sizeof(size_t), 1, in);
+ m_lexicalTableIndex.resize(size);
+ read += std::fread(&m_lexicalTableIndex[0], sizeof(size_t), size, in);
+
+ read += std::fread(&size, sizeof(size_t), 1, in);
+ m_lexicalTable.resize(size);
+ read += std::fread(&m_lexicalTable[0], sizeof(SrcTrg), size, in);
+ }
+
+ m_targetSymbols.load(in);
+
+ m_symbolTree = new CanonicalHuffman<unsigned>(in);
+
+ read += std::fread(&m_multipleScoreTrees, sizeof(m_multipleScoreTrees), 1, in);
+ if(m_multipleScoreTrees)
+ {
+ m_scoreTrees.resize(m_numScoreComponent);
+ for(size_t i = 0; i < m_numScoreComponent; i++)
+ m_scoreTrees[i] = new CanonicalHuffman<float>(in);
+ }
+ else
+ {
+ m_scoreTrees.resize(1);
+ m_scoreTrees[0] = new CanonicalHuffman<float>(in);
+ }
+
+ if(m_containsAlignmentInfo)
+ m_alignTree = new CanonicalHuffman<AlignPoint>(in);
+
+ size_t end = std::ftell(in);
+ return end - start;
+}
+
+std::string PhraseDecoder::MakeSourceKey(std::string &source)
+{
+ return source + m_separator;
+}
+
+TargetPhraseVectorPtr PhraseDecoder::CreateTargetPhraseCollection(const Phrase &sourcePhrase, bool topLevel)
+{
+
+ // Not using TargetPhraseCollection avoiding "new" operator
+ // which can introduce heavy locking with multiple threads
+ TargetPhraseVectorPtr tpv(new TargetPhraseVector());
+ size_t bitsLeft = 0;
+
+ if(m_coding == PREnc)
+ {
+ std::pair<TargetPhraseVectorPtr, size_t> cachedPhraseColl
+ = m_decodingCache.Retrieve(sourcePhrase);
+
+ // Has been cached and is complete or does not need to be completed
+ if(cachedPhraseColl.first != NULL && (!topLevel || cachedPhraseColl.second == 0))
+ return cachedPhraseColl.first;
+
+ // Has been cached, but is incomplete
+ else if(cachedPhraseColl.first != NULL)
+ {
+ bitsLeft = cachedPhraseColl.second;
+ tpv->resize(cachedPhraseColl.first->size());
+ std::copy(cachedPhraseColl.first->begin(),
+ cachedPhraseColl.first->end(),
+ tpv->begin());
+ }
+ }
+
+ // Retrieve source phrase identifier
+ std::string sourcePhraseString = sourcePhrase.GetStringRep(*m_input);
+ size_t sourcePhraseId = m_phraseDictionary.m_hash[MakeSourceKey(sourcePhraseString)];
+
+ if(sourcePhraseId != m_phraseDictionary.m_hash.GetSize())
+ {
+ // Retrieve compressed and encoded target phrase collection
+ std::string encodedPhraseCollection;
+ if(m_phraseDictionary.m_inMemory)
+ encodedPhraseCollection = m_phraseDictionary.m_targetPhrasesMemory[sourcePhraseId];
+ else
+ encodedPhraseCollection = m_phraseDictionary.m_targetPhrasesMapped[sourcePhraseId];
+
+ BitWrapper<> encodedBitStream(encodedPhraseCollection);
+ if(m_coding == PREnc && bitsLeft)
+ encodedBitStream.SeekFromEnd(bitsLeft);
+
+ // Decompress and decode target phrase collection
+ TargetPhraseVectorPtr decodedPhraseColl =
+ DecodeCollection(tpv, encodedBitStream, sourcePhrase, topLevel);
+
+ return decodedPhraseColl;
+ }
+ else
+ return TargetPhraseVectorPtr();
+}
+
+TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
+ TargetPhraseVectorPtr tpv, BitWrapper<> &encodedBitStream,
+ const Phrase &sourcePhrase, bool topLevel)
+{
+
+ bool extending = tpv->size();
+ size_t bitsLeft = encodedBitStream.TellFromEnd();
+
+ typedef std::pair<size_t, size_t> AlignPointSizeT;
+
+ std::vector<int> sourceWords;
+ if(m_coding == REnc)
+ {
+ for(size_t i = 0; i < sourcePhrase.GetSize(); i++)
+ {
+ std::string sourceWord
+ = sourcePhrase.GetWord(i).GetString(*m_input, false);
+ unsigned idx = GetSourceSymbolId(sourceWord);
+ sourceWords.push_back(idx);
+ }
+ }
+
+ unsigned phraseStopSymbol = 0;
+ AlignPoint alignStopSymbol(-1, -1);
+
+ std::vector<float> scores;
+ std::set<AlignPointSizeT> alignment;
+
+ enum DecodeState { New, Symbol, Score, Alignment, Add } state = New;
+
+ size_t srcSize = sourcePhrase.GetSize();
+
+ TargetPhrase* targetPhrase = NULL;
+ while(encodedBitStream.TellFromEnd())
+ {
+
+ if(state == New)
+ {
+ // Creating new TargetPhrase on the heap
+ tpv->push_back(TargetPhrase(Output));
+ targetPhrase = &tpv->back();
+
+ targetPhrase->SetSourcePhrase(sourcePhrase);
+ alignment.clear();
+ scores.clear();
+
+ state = Symbol;
+ }
+
+ if(state == Symbol)
+ {
+ unsigned symbol = m_symbolTree->Read(encodedBitStream);
+ if(symbol == phraseStopSymbol)
+ {
+ state = Score;
+ }
+ else
+ {
+ if(m_coding == REnc)
+ {
+ std::string wordString;
+ size_t type = GetREncType(symbol);
+
+ if(type == 1)
+ {
+ unsigned decodedSymbol = DecodeREncSymbol1(symbol);
+ wordString = GetTargetSymbol(decodedSymbol);
+ }
+ else if (type == 2)
+ {
+ size_t rank = DecodeREncSymbol2Rank(symbol);
+ size_t srcPos = DecodeREncSymbol2Position(symbol);
+
+ if(srcPos >= sourceWords.size())
+ return TargetPhraseVectorPtr();
+
+ wordString = GetTargetSymbol(GetTranslation(sourceWords[srcPos], rank));
+ if(m_phraseDictionary.m_useAlignmentInfo)
+ {
+ size_t trgPos = targetPhrase->GetSize();
+ alignment.insert(AlignPoint(srcPos, trgPos));
+ }
+ }
+ else if(type == 3)
+ {
+ size_t rank = DecodeREncSymbol3(symbol);
+ size_t srcPos = targetPhrase->GetSize();
+
+ if(srcPos >= sourceWords.size())
+ return TargetPhraseVectorPtr();
+
+ wordString = GetTargetSymbol(GetTranslation(sourceWords[srcPos], rank));
+ if(m_phraseDictionary.m_useAlignmentInfo)
+ {
+ size_t trgPos = srcPos;
+ alignment.insert(AlignPoint(srcPos, trgPos));
+ }
+ }
+
+ Word word;
+ word.CreateFromString(Output, *m_output, wordString, false);
+ targetPhrase->AddWord(word);
+ }
+ else if(m_coding == PREnc)
+ {
+ // if the symbol is just a word
+ if(GetPREncType(symbol) == 1)
+ {
+ unsigned decodedSymbol = DecodePREncSymbol1(symbol);
+
+ Word word;
+ word.CreateFromString(Output, *m_output,
+ GetTargetSymbol(decodedSymbol), false);
+ targetPhrase->AddWord(word);
+ }
+ // if the symbol is a subphrase pointer
+ else
+ {
+ int left = DecodePREncSymbol2Left(symbol);
+ int right = DecodePREncSymbol2Right(symbol);
+ unsigned rank = DecodePREncSymbol2Rank(symbol);
+
+ int srcStart = left + targetPhrase->GetSize();
+ int srcEnd = srcSize - right - 1;
+
+ // false positive consistency check
+ if(0 > srcStart || srcStart > srcEnd || unsigned(srcEnd) >= srcSize)
+ return TargetPhraseVectorPtr();
+
+ // false positive consistency check
+ if(m_maxRank && rank > m_maxRank)
+ return TargetPhraseVectorPtr();
+
+ // set subphrase by default to itself
+ TargetPhraseVectorPtr subTpv = tpv;
+
+ // if range smaller than source phrase retrieve subphrase
+ if(unsigned(srcEnd - srcStart + 1) != srcSize)
+ {
+ Phrase subPhrase = sourcePhrase.GetSubString(WordsRange(srcStart, srcEnd));
+ subTpv = CreateTargetPhraseCollection(subPhrase, false);
+ }
+
+ // false positive consistency check
+ if(subTpv != NULL && rank < subTpv->size())
+ {
+ // insert the subphrase into the main target phrase
+ TargetPhrase& subTp = subTpv->at(rank);
+ if(m_phraseDictionary.m_useAlignmentInfo)
+ {
+ // reconstruct the alignment data based on the alignment of the subphrase
+ for(AlignmentInfo::const_iterator it = subTp.GetAlignmentInfo().begin();
+ it != subTp.GetAlignmentInfo().end(); it++)
+ {
+ alignment.insert(AlignPointSizeT(srcStart + it->first,
+ targetPhrase->GetSize() + it->second));
+ }
+ }
+ targetPhrase->Append(subTp);
+ }
+ else
+ return TargetPhraseVectorPtr();
+ }
+ }
+ else
+ {
+ Word word;
+ word.CreateFromString(Output, *m_output,
+ GetTargetSymbol(symbol), false);
+ targetPhrase->AddWord(word);
+ }
+ }
+ }
+ else if(state == Score)
+ {
+ size_t idx = m_multipleScoreTrees ? scores.size() : 0;
+ float score = m_scoreTrees[idx]->Read(encodedBitStream);
+ scores.push_back(score);
+
+ if(scores.size() == m_numScoreComponent)
+ {
+ targetPhrase->SetScore(m_feature, scores, ScoreComponentCollection() /*sparse*/,*m_weight, m_weightWP, *m_languageModels);
+
+ if(m_containsAlignmentInfo)
+ state = Alignment;
+ else
+ state = Add;
+ }
+ }
+ else if(state == Alignment)
+ {
+ AlignPoint alignPoint = m_alignTree->Read(encodedBitStream);
+ if(alignPoint == alignStopSymbol)
+ {
+ state = Add;
+ }
+ else
+ {
+ if(m_phraseDictionary.m_useAlignmentInfo)
+ alignment.insert(AlignPointSizeT(alignPoint));
+ }
+ }
+
+ if(state == Add)
+ {
+ if(m_phraseDictionary.m_useAlignmentInfo)
+ targetPhrase->SetAlignmentInfo(alignment);
+
+ if(m_coding == PREnc)
+ {
+ if(!m_maxRank || tpv->size() <= m_maxRank)
+ bitsLeft = encodedBitStream.TellFromEnd();
+
+ if(!topLevel && m_maxRank && tpv->size() >= m_maxRank)
+ break;
+ }
+
+ if(encodedBitStream.TellFromEnd() <= 8)
+ break;
+
+ state = New;
+ }
+ }
+
+ if(m_coding == PREnc && !extending)
+ {
+ bitsLeft = bitsLeft > 8 ? bitsLeft : 0;
+ m_decodingCache.Cache(sourcePhrase, tpv, bitsLeft, m_maxRank);
+ }
+
+ return tpv;
+}
+
+void PhraseDecoder::PruneCache()
+{
+ m_decodingCache.Prune();
+}
+
+}
diff --git a/moses/src/CompactPT/PhraseDecoder.h b/moses/src/CompactPT/PhraseDecoder.h
new file mode 100644
index 000000000..197687025
--- /dev/null
+++ b/moses/src/CompactPT/PhraseDecoder.h
@@ -0,0 +1,153 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_PhraseDecoder_h
+#define moses_PhraseDecoder_h
+
+#include <sstream>
+#include <vector>
+#include <boost/unordered_map.hpp>
+#include <boost/unordered_set.hpp>
+#include <fstream>
+#include <string>
+#include <iterator>
+#include <algorithm>
+#include <sys/stat.h>
+
+#include "TypeDef.h"
+#include "FactorCollection.h"
+#include "Word.h"
+#include "Util.h"
+#include "InputFileStream.h"
+#include "StaticData.h"
+#include "WordsRange.h"
+#include "UserMessage.h"
+
+#include "PhraseDictionaryCompact.h"
+#include "StringVector.h"
+#include "CanonicalHuffman.h"
+#include "TargetPhraseCollectionCache.h"
+
+namespace Moses
+{
+
+class PhraseDictionaryCompact;
+
+class PhraseDecoder
+{
+ protected:
+
+ friend class PhraseDictionaryCompact;
+
+ typedef std::pair<unsigned char, unsigned char> AlignPoint;
+ typedef std::pair<unsigned, unsigned> SrcTrg;
+
+ enum Coding { None, REnc, PREnc } m_coding;
+
+ size_t m_numScoreComponent;
+ bool m_containsAlignmentInfo;
+ size_t m_maxRank;
+ size_t m_maxPhraseLength;
+
+ boost::unordered_map<std::string, unsigned> m_sourceSymbolsMap;
+ StringVector<unsigned char, unsigned, std::allocator> m_sourceSymbols;
+ StringVector<unsigned char, unsigned, std::allocator> m_targetSymbols;
+
+ std::vector<size_t> m_lexicalTableIndex;
+ std::vector<SrcTrg> m_lexicalTable;
+
+ CanonicalHuffman<unsigned>* m_symbolTree;
+
+ bool m_multipleScoreTrees;
+ std::vector<CanonicalHuffman<float>*> m_scoreTrees;
+
+ CanonicalHuffman<AlignPoint>* m_alignTree;
+
+ TargetPhraseCollectionCache m_decodingCache;
+
+ PhraseDictionaryCompact& m_phraseDictionary;
+
+ // ***********************************************
+
+ const std::vector<FactorType>* m_input;
+ const std::vector<FactorType>* m_output;
+ const PhraseDictionaryFeature* m_feature;
+ const std::vector<float>* m_weight;
+ float m_weightWP;
+ const LMList* m_languageModels;
+
+ std::string m_separator;
+
+ // ***********************************************
+
+ unsigned GetSourceSymbolId(std::string& s);
+ std::string GetTargetSymbol(unsigned id) const;
+
+ size_t GetREncType(unsigned encodedSymbol);
+ size_t GetPREncType(unsigned encodedSymbol);
+
+ unsigned GetTranslation(unsigned srcIdx, size_t rank);
+
+ size_t GetMaxSourcePhraseLength();
+
+ unsigned DecodeREncSymbol1(unsigned encodedSymbol);
+ unsigned DecodeREncSymbol2Rank(unsigned encodedSymbol);
+ unsigned DecodeREncSymbol2Position(unsigned encodedSymbol);
+ unsigned DecodeREncSymbol3(unsigned encodedSymbol);
+
+ unsigned DecodePREncSymbol1(unsigned encodedSymbol);
+ int DecodePREncSymbol2Left(unsigned encodedSymbol);
+ int DecodePREncSymbol2Right(unsigned encodedSymbol);
+ unsigned DecodePREncSymbol2Rank(unsigned encodedSymbol);
+
+ std::string MakeSourceKey(std::string &);
+
+ public:
+
+ PhraseDecoder(
+ PhraseDictionaryCompact &phraseDictionary,
+ const std::vector<FactorType>* &input,
+ const std::vector<FactorType>* &output,
+ const PhraseDictionaryFeature* feature,
+ size_t numScoreComponent,
+ const std::vector<float>* weight,
+ float weightWP,
+ const LMList* languageModels
+ );
+
+ ~PhraseDecoder();
+
+ size_t Load(std::FILE* in);
+
+ TargetPhraseVectorPtr CreateTargetPhraseCollection(const Phrase &sourcePhrase,
+ bool topLevel = false);
+
+ TargetPhraseVectorPtr DecodeCollection(TargetPhraseVectorPtr tpv,
+ BitWrapper<> &encodedBitStream,
+ const Phrase &sourcePhrase,
+ bool topLevel);
+
+ void PruneCache();
+};
+
+}
+
+#endif
diff --git a/moses/src/CompactPT/PhraseDictionaryCompact.cpp b/moses/src/CompactPT/PhraseDictionaryCompact.cpp
new file mode 100644
index 000000000..c5f963fd4
--- /dev/null
+++ b/moses/src/CompactPT/PhraseDictionaryCompact.cpp
@@ -0,0 +1,195 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <fstream>
+#include <string>
+#include <iterator>
+#include <queue>
+#include <algorithm>
+#include <sys/stat.h>
+
+#include "PhraseDictionaryCompact.h"
+#include "FactorCollection.h"
+#include "Word.h"
+#include "Util.h"
+#include "InputFileStream.h"
+#include "StaticData.h"
+#include "WordsRange.h"
+#include "UserMessage.h"
+#include "ThreadPool.h"
+
+using namespace std;
+
+namespace Moses
+{
+
+bool PhraseDictionaryCompact::Load(const std::vector<FactorType> &input
+ , const std::vector<FactorType> &output
+ , const string &filePath
+ , const vector<float> &weight
+ , size_t tableLimit
+ , const LMList &languageModels
+ , float weightWP)
+{
+ m_input = &input;
+ m_output = &output;
+ m_weight = &weight;
+ m_tableLimit = tableLimit;
+ m_languageModels = &languageModels;
+ m_weightWP = weightWP;
+
+ std::string tFilePath = filePath;
+
+ if(!FileExists(tFilePath))
+ {
+ if(FileExists(tFilePath + ".minphr"))
+ {
+ tFilePath += ".minphr";
+ }
+ else
+ {
+ std::cerr << "Error: File " + tFilePath + "(.minphr) does not exit." << std::endl;
+ exit(1);
+ }
+ }
+
+ m_phraseDecoder = new PhraseDecoder(*this, m_input, m_output, m_feature,
+ m_numScoreComponent, m_weight, m_weightWP,
+ m_languageModels);
+
+ std::FILE* pFile = std::fopen(tFilePath.c_str() , "r");
+
+ size_t indexSize;
+ if(m_inMemory)
+ // Load source phrase index into memory
+ indexSize = m_hash.Load(pFile);
+ else
+ // Keep source phrase index on disk
+ indexSize = m_hash.LoadIndex(pFile);
+
+ size_t coderSize = m_phraseDecoder->Load(pFile);
+
+ size_t phraseSize;
+ if(m_inMemory)
+ // Load target phrase collections into memory
+ phraseSize = m_targetPhrasesMemory.load(pFile, false);
+ else
+ // Keep target phrase collections on disk
+ phraseSize = m_targetPhrasesMapped.load(pFile, true);
+
+ return indexSize && coderSize && phraseSize;
+}
+
+struct CompareTargetPhrase {
+ bool operator() (const TargetPhrase &a, const TargetPhrase &b) {
+ return a.GetFutureScore() > b.GetFutureScore();
+ }
+};
+
+const TargetPhraseCollection*
+PhraseDictionaryCompact::GetTargetPhraseCollection(const Phrase &sourcePhrase) const {
+
+ // There is no souch source phrase if source phrase is longer than longest
+ // observed source phrase during compilation
+ if(sourcePhrase.GetSize() > m_phraseDecoder->GetMaxSourcePhraseLength())
+ return NULL;
+
+ // Retrieve target phrase collection from phrase table
+ TargetPhraseVectorPtr decodedPhraseColl
+ = m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true);
+
+ if(decodedPhraseColl != NULL && decodedPhraseColl->size()) {
+ TargetPhraseVectorPtr tpv(new TargetPhraseVector(*decodedPhraseColl));
+ TargetPhraseCollection* phraseColl = new TargetPhraseCollection();
+
+ // Score phrases and if possible apply ttable_limit
+ TargetPhraseVector::iterator nth =
+ (m_tableLimit == 0 || tpv->size() < m_tableLimit) ?
+ tpv->end() : tpv->begin() + m_tableLimit;
+ std::nth_element(tpv->begin(), nth, tpv->end(), CompareTargetPhrase());
+ for(TargetPhraseVector::iterator it = tpv->begin(); it != nth; it++)
+ phraseColl->Add(new TargetPhrase(*it));
+
+ // Cache phrase pair for for clean-up or retrieval with PREnc
+ const_cast<PhraseDictionaryCompact*>(this)->CacheForCleanup(phraseColl);
+
+ return phraseColl;
+ }
+ else
+ return NULL;
+}
+
+TargetPhraseVectorPtr
+PhraseDictionaryCompact::GetTargetPhraseCollectionRaw(const Phrase &sourcePhrase) const {
+
+ // There is no souch source phrase if source phrase is longer than longest
+ // observed source phrase during compilation
+ if(sourcePhrase.GetSize() > m_phraseDecoder->GetMaxSourcePhraseLength())
+ return TargetPhraseVectorPtr();
+
+ // Retrieve target phrase collection from phrase table
+ return m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true);
+}
+
+PhraseDictionaryCompact::~PhraseDictionaryCompact() {
+ if(m_phraseDecoder)
+ delete m_phraseDecoder;
+}
+
+//TO_STRING_BODY(PhraseDictionaryCompact)
+
+void PhraseDictionaryCompact::CacheForCleanup(TargetPhraseCollection* tpc) {
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_sentenceMutex);
+ PhraseCache &ref = m_sentenceCache[boost::this_thread::get_id()];
+#else
+ PhraseCache &ref = m_sentenceCache;
+#endif
+ ref.push_back(tpc);
+}
+
+void PhraseDictionaryCompact::InitializeForInput(const Moses::InputType&) {}
+
+void PhraseDictionaryCompact::AddEquivPhrase(const Phrase &source,
+ const TargetPhrase &targetPhrase) { }
+
+void PhraseDictionaryCompact::CleanUp(const InputType &source) {
+ if(!m_inMemory)
+ m_hash.KeepNLastRanges(0.01, 0.2);
+
+ m_phraseDecoder->PruneCache();
+
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_sentenceMutex);
+ PhraseCache &ref = m_sentenceCache[boost::this_thread::get_id()];
+#else
+ PhraseCache &ref = m_sentenceCache;
+#endif
+
+ for(PhraseCache::iterator it = ref.begin(); it != ref.end(); it++)
+ delete *it;
+
+ PhraseCache temp;
+ temp.swap(ref);
+}
+
+}
+
diff --git a/moses/src/CompactPT/PhraseDictionaryCompact.h b/moses/src/CompactPT/PhraseDictionaryCompact.h
new file mode 100644
index 000000000..6090967d2
--- /dev/null
+++ b/moses/src/CompactPT/PhraseDictionaryCompact.h
@@ -0,0 +1,124 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_PhraseDictionaryCompact_h
+#define moses_PhraseDictionaryCompact_h
+
+#include <boost/unordered_map.hpp>
+
+#ifdef WITH_THREADS
+#ifdef BOOST_HAS_PTHREADS
+#include <boost/thread/mutex.hpp>
+#endif
+#endif
+
+#include "PhraseDictionary.h"
+#include "ThreadPool.h"
+
+#include "BlockHashIndex.h"
+#include "StringVector.h"
+#include "PhraseDecoder.h"
+#include "TargetPhraseCollectionCache.h"
+
+namespace Moses
+{
+
+class PhraseDecoder;
+
+class PhraseDictionaryCompact : public PhraseDictionary
+{
+protected:
+ friend class PhraseDecoder;
+
+ PhraseTableImplementation m_implementation;
+ bool m_inMemory;
+ bool m_useAlignmentInfo;
+
+ typedef std::vector<TargetPhraseCollection*> PhraseCache;
+#ifdef WITH_THREADS
+ boost::mutex m_sentenceMutex;
+ typedef std::map<boost::thread::id, PhraseCache> SentenceCache;
+#else
+ typedef PhraseCache SentenceCache;
+#endif
+ SentenceCache m_sentenceCache;
+
+ BlockHashIndex m_hash;
+ PhraseDecoder* m_phraseDecoder;
+
+ StringVector<unsigned char, size_t, MmapAllocator> m_targetPhrasesMapped;
+ StringVector<unsigned char, size_t, std::allocator> m_targetPhrasesMemory;
+
+ const std::vector<FactorType>* m_input;
+ const std::vector<FactorType>* m_output;
+
+ const std::vector<float>* m_weight;
+ const LMList* m_languageModels;
+ float m_weightWP;
+
+public:
+ PhraseDictionaryCompact(size_t numScoreComponent,
+ PhraseTableImplementation implementation,
+ PhraseDictionaryFeature* feature,
+ bool inMemory = StaticData::Instance().UseMinphrInMemory(),
+ bool useAlignmentInfo = StaticData::Instance().UseAlignmentInfo())
+ : PhraseDictionary(numScoreComponent, feature),
+ m_implementation(implementation),
+ m_inMemory(inMemory),
+ m_useAlignmentInfo(useAlignmentInfo),
+ m_hash(10, 16),
+ m_phraseDecoder(0)
+ {}
+
+ virtual ~PhraseDictionaryCompact();
+
+ bool Load(const std::vector<FactorType> &input
+ , const std::vector<FactorType> &output
+ , const std::string &filePath
+ , const std::vector<float> &weight
+ , size_t tableLimit
+ , const LMList &languageModels
+ , float weightWP);
+
+ const TargetPhraseCollection* GetTargetPhraseCollection(const Phrase &source) const;
+ TargetPhraseVectorPtr GetTargetPhraseCollectionRaw(const Phrase &source) const;
+
+ void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase);
+
+ void InitializeForInput(const Moses::InputType&);
+
+ void CacheForCleanup(TargetPhraseCollection* tpc);
+ void CleanUp(const InputType &source);
+
+ virtual ChartRuleLookupManager *CreateRuleLookupManager(
+ const InputType &,
+ const ChartCellCollection &)
+ {
+ assert(false);
+ return 0;
+ }
+
+ TO_STRING();
+
+};
+
+}
+#endif
diff --git a/moses/src/CompactPT/PhraseTableCreator.cpp b/moses/src/CompactPT/PhraseTableCreator.cpp
new file mode 100644
index 000000000..c62305b99
--- /dev/null
+++ b/moses/src/CompactPT/PhraseTableCreator.cpp
@@ -0,0 +1,1283 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include <cstdio>
+
+#include "PhraseTableCreator.h"
+#include "ConsistentPhrases.h"
+#include "ThrowingFwrite.h"
+
+namespace Moses
+{
+
+bool operator<(const PackedItem &pi1, const PackedItem &pi2)
+{
+ if(pi1.GetLine() < pi2.GetLine())
+ return false;
+ return true;
+}
+
+std::string PhraseTableCreator::m_phraseStopSymbol = "__SPECIAL_STOP_SYMBOL__";
+std::string PhraseTableCreator::m_separator = " ||| ";
+
+PhraseTableCreator::PhraseTableCreator(std::string inPath,
+ std::string outPath,
+ size_t numScoreComponent,
+ size_t sortScoreIndex,
+ Coding coding,
+ size_t orderBits,
+ size_t fingerPrintBits,
+ bool useAlignmentInfo,
+ bool multipleScoreTrees,
+ size_t quantize,
+ size_t maxRank,
+ bool warnMe
+#ifdef WITH_THREADS
+ , size_t threads
+#endif
+ )
+ : m_inPath(inPath), m_outPath(outPath),
+ m_outFile(std::fopen(m_outPath.c_str(), "w")), m_numScoreComponent(numScoreComponent),
+ m_sortScoreIndex(sortScoreIndex), m_warnMe(warnMe),
+ m_coding(coding), m_orderBits(orderBits), m_fingerPrintBits(fingerPrintBits),
+ m_useAlignmentInfo(useAlignmentInfo),
+ m_multipleScoreTrees(multipleScoreTrees),
+ m_quantize(quantize), m_maxRank(maxRank),
+ #ifdef WITH_THREADS
+ m_threads(threads),
+ m_srcHash(m_orderBits, m_fingerPrintBits, 1),
+ m_rnkHash(10, 24, m_threads),
+ #else
+ m_srcHash(m_orderBits, m_fingerPrintBits),
+ m_rnkHash(m_orderBits, m_fingerPrintBits),
+ #endif
+ m_maxPhraseLength(0),
+ m_lastFlushedLine(-1), m_lastFlushedSourceNum(0),
+ m_lastFlushedSourcePhrase("")
+{
+ PrintInfo();
+
+ AddTargetSymbolId(m_phraseStopSymbol);
+
+ size_t cur_pass = 1;
+ size_t all_passes = 2;
+ if(m_coding == PREnc)
+ all_passes = 3;
+
+ m_scoreCounters.resize(m_multipleScoreTrees ? m_numScoreComponent : 1);
+ for(std::vector<ScoreCounter*>::iterator it = m_scoreCounters.begin();
+ it != m_scoreCounters.end(); it++)
+ *it = new ScoreCounter();
+ m_scoreTrees.resize(m_multipleScoreTrees ? m_numScoreComponent : 1);
+
+ // 0th pass
+ if(m_coding == REnc)
+ {
+ size_t found = inPath.find_last_of("/\\");
+ std::string path;
+ if(found != std::string::npos)
+ path = inPath.substr(0, found);
+ else
+ path = ".";
+ LoadLexicalTable(path + "/lex.f2e");
+ }
+ else if(m_coding == PREnc)
+ {
+ std::cerr << "Pass " << cur_pass << "/" << all_passes << ": Creating hash function for rank assignment" << std::endl;
+ cur_pass++;
+ CreateRankHash();
+ }
+
+ // 1st pass
+ std::cerr << "Pass " << cur_pass << "/" << all_passes << ": Creating source phrase index + Encoding target phrases" << std::endl;
+ m_srcHash.BeginSave(m_outFile);
+ EncodeTargetPhrases();
+
+ cur_pass++;
+
+ std::cerr << "Intermezzo: Calculating Huffman code sets" << std::endl;
+ CalcHuffmanCodes();
+
+ // 2nd pass
+ std::cerr << "Pass " << cur_pass << "/" << all_passes << ": Compressing target phrases" << std::endl;
+ CompressTargetPhrases();
+
+ std::cerr << "Saving to " << m_outPath << std::endl;
+ Save();
+ std::cerr << "Done" << std::endl;
+ std::fclose(m_outFile);
+}
+
+PhraseTableCreator::~PhraseTableCreator()
+{
+ delete m_symbolTree;
+ if(m_useAlignmentInfo)
+ delete m_alignTree;
+ for(size_t i = 0; i < m_scoreTrees.size(); i++) {
+ delete m_scoreTrees[i];
+ delete m_scoreCounters[i];
+ }
+}
+
+void PhraseTableCreator::PrintInfo()
+{
+ std::string encodings[3] = {"Huffman", "Huffman + REnc", "Huffman + PREnc"};
+
+ std::cerr << "Used options:" << std::endl;
+ std::cerr << "\tText phrase table will be read from: " << m_inPath << std::endl;
+ std::cerr << "\tOutput phrase table will be written to: " << m_outPath << std::endl;
+ std::cerr << "\tStep size for source landmark phrases: 2^" << m_orderBits << "=" << (1ul << m_orderBits) << std::endl;
+ std::cerr << "\tSource phrase fingerprint size: " << m_fingerPrintBits << " bits / P(fp)=" << (float(1)/(1ul << m_fingerPrintBits)) << std::endl;
+ std::cerr << "\tSelected target phrase encoding: " << encodings[m_coding] << std::endl;
+ if(m_coding == PREnc)
+ {
+ std::cerr << "\tMaxiumum allowed rank for PREnc: ";
+ if(!m_maxRank)
+ std::cerr << "unlimited" << std::endl;
+ else
+ std::cerr << m_maxRank << std::endl;
+ }
+ std::cerr << "\tNumber of score components in phrase table: " << m_numScoreComponent << std::endl;
+ std::cerr << "\tSingle Huffman code set for score components: " << (m_multipleScoreTrees ? "no" : "yes") << std::endl;
+ std::cerr << "\tUsing score quantization: ";
+ if(m_quantize)
+ std::cerr << m_quantize << " best" << std::endl;
+ else
+ std::cerr << "no" << std::endl;
+ std::cerr << "\tExplicitly included alignment information: " << (m_useAlignmentInfo ? "yes" : "no") << std::endl;
+
+#ifdef WITH_THREADS
+ std::cerr << "\tRunning with " << m_threads << " threads" << std::endl;
+#endif
+ std::cerr << std::endl;
+}
+
+void PhraseTableCreator::Save()
+{
+ // Save type of encoding
+ ThrowingFwrite(&m_coding, sizeof(m_coding), 1, m_outFile);
+ ThrowingFwrite(&m_numScoreComponent, sizeof(m_numScoreComponent), 1, m_outFile);
+ ThrowingFwrite(&m_useAlignmentInfo, sizeof(m_useAlignmentInfo), 1, m_outFile);
+ ThrowingFwrite(&m_maxRank, sizeof(m_maxRank), 1, m_outFile);
+ ThrowingFwrite(&m_maxPhraseLength, sizeof(m_maxPhraseLength), 1, m_outFile);
+
+ if(m_coding == REnc)
+ {
+ // Save source language symbols for REnc
+ std::vector<std::string> temp1;
+ temp1.resize(m_sourceSymbolsMap.size());
+ for(boost::unordered_map<std::string, unsigned>::iterator it
+ = m_sourceSymbolsMap.begin(); it != m_sourceSymbolsMap.end(); it++)
+ temp1[it->second] = it->first;
+ std::sort(temp1.begin(), temp1.end());
+ StringVector<unsigned char, unsigned, std::allocator> sourceSymbols;
+ for(std::vector<std::string>::iterator it = temp1.begin();
+ it != temp1.end(); it++)
+ sourceSymbols.push_back(*it);
+ sourceSymbols.save(m_outFile);
+
+ // Save lexical translation table for REnc
+ size_t size = m_lexicalTableIndex.size();
+ ThrowingFwrite(&size, sizeof(size_t), 1, m_outFile);
+ ThrowingFwrite(&m_lexicalTableIndex[0], sizeof(size_t), size, m_outFile);
+ size = m_lexicalTable.size();
+ ThrowingFwrite(&size, sizeof(size_t), 1, m_outFile);
+ ThrowingFwrite(&m_lexicalTable[0], sizeof(SrcTrg), size, m_outFile);
+ }
+
+ // Save target language symbols
+ std::vector<std::string> temp2;
+ temp2.resize(m_targetSymbolsMap.size());
+ for(boost::unordered_map<std::string, unsigned>::iterator it
+ = m_targetSymbolsMap.begin(); it != m_targetSymbolsMap.end(); it++)
+ temp2[it->second] = it->first;
+ StringVector<unsigned char, unsigned, std::allocator> targetSymbols;
+ for(std::vector<std::string>::iterator it = temp2.begin();
+ it != temp2.end(); it++)
+ targetSymbols.push_back(*it);
+ targetSymbols.save(m_outFile);
+
+ // Save Huffman codes for target language symbols
+ m_symbolTree->Save(m_outFile);
+
+ // Save number of Huffman code sets for scores and
+ // save Huffman code sets
+ ThrowingFwrite(&m_multipleScoreTrees, sizeof(m_multipleScoreTrees), 1, m_outFile);
+ size_t numScoreTrees = m_scoreTrees.size();
+ for(size_t i = 0; i < numScoreTrees; i++)
+ m_scoreTrees[i]->Save(m_outFile);
+
+ // Save Huffman codes for alignments
+ if(m_useAlignmentInfo)
+ m_alignTree->Save(m_outFile);
+
+ // Save compressed target phrase collections
+ m_compressedTargetPhrases.save(m_outFile);
+}
+
+void PhraseTableCreator::LoadLexicalTable(std::string filePath)
+{
+ std::vector<SrcTrgProb> t_lexTable;
+
+ std::cerr << "Reading in lexical table for Rank Encoding" << std::endl;
+ std::ifstream lexIn(filePath.c_str(), std::ifstream::in);
+ std::string src, trg;
+ float prob;
+
+ // Reading in the translation probability lexicon
+
+ std::cerr << "\tLoading from " << filePath << std::endl;
+ while(lexIn >> trg >> src >> prob)
+ {
+ t_lexTable.push_back(SrcTrgProb(SrcTrgString(src, trg), prob));
+ AddSourceSymbolId(src);
+ AddTargetSymbolId(trg);
+ }
+
+ // Sorting lexicon by source words by lexicographical order, corresponding
+ // target words by decreasing probability.
+
+ std::cerr << "\tSorting according to translation rank" << std::endl;
+ std::sort(t_lexTable.begin(), t_lexTable.end(), SrcTrgProbSorter());
+
+ // Re-assigning source word ids in lexicographical order
+
+ std::vector<std::string> temp1;
+ temp1.resize(m_sourceSymbolsMap.size());
+ for(boost::unordered_map<std::string, unsigned>::iterator it
+ = m_sourceSymbolsMap.begin(); it != m_sourceSymbolsMap.end(); it++)
+ temp1[it->second] = it->first;
+
+ std::sort(temp1.begin(), temp1.end());
+
+ for(size_t i = 0; i < temp1.size(); i++)
+ m_sourceSymbolsMap[temp1[i]] = i;
+
+ // Building the lexicon based on source and target word ids
+
+ std::string srcWord = "";
+ size_t srcIdx = 0;
+ for(std::vector<SrcTrgProb>::iterator it = t_lexTable.begin();
+ it != t_lexTable.end(); it++)
+ {
+ // If we encounter a new source word
+ if(it->first.first != srcWord)
+ {
+ srcIdx = GetSourceSymbolId(it->first.first);
+
+ // Store position of first translation
+ if(srcIdx >= m_lexicalTableIndex.size())
+ m_lexicalTableIndex.resize(srcIdx + 1);
+ m_lexicalTableIndex[srcIdx] = m_lexicalTable.size();
+ }
+
+ // Store pair of source word and target word
+ size_t trgIdx = GetTargetSymbolId(it->first.second);
+ m_lexicalTable.push_back(SrcTrg(srcIdx, trgIdx));
+
+ srcWord = it->first.first;
+ }
+ std::cerr << "\tLoaded " << m_lexicalTable.size() << " lexical pairs" << std::endl;
+ std::cerr << std::endl;
+}
+
+void PhraseTableCreator::CreateRankHash()
+{
+ InputFileStream inFile(m_inPath);
+
+#ifdef WITH_THREADS
+ boost::thread_group threads;
+ for (size_t i = 0; i < m_threads; ++i)
+ {
+ RankingTask* rt = new RankingTask(inFile, *this);
+ threads.create_thread(*rt);
+ }
+ threads.join_all();
+#else
+ RankingTask* rt = new RankingTask(inFile, *this);
+ (*rt)();
+ delete rt;
+#endif
+ FlushRankedQueue(true);
+}
+
+inline std::string PhraseTableCreator::MakeSourceKey(std::string &source)
+{
+ return source + m_separator;
+}
+
+inline std::string PhraseTableCreator::MakeSourceTargetKey(std::string &source, std::string &target)
+{
+ return source + m_separator + target + m_separator;
+}
+
+void PhraseTableCreator::EncodeTargetPhrases()
+{
+ InputFileStream inFile(m_inPath);
+
+#ifdef WITH_THREADS
+ boost::thread_group threads;
+ for (size_t i = 0; i < m_threads; ++i)
+ {
+ EncodingTask* et = new EncodingTask(inFile, *this);
+ threads.create_thread(*et);
+ }
+ threads.join_all();
+#else
+ EncodingTask* et = new EncodingTask(inFile, *this);
+ (*et)();
+ delete et;
+#endif
+ FlushEncodedQueue(true);
+}
+
+
+void PhraseTableCreator::CompressTargetPhrases()
+{
+#ifdef WITH_THREADS
+ boost::thread_group threads;
+ for (size_t i = 0; i < m_threads; ++i) {
+ CompressionTask* ct = new CompressionTask(m_encodedTargetPhrases, *this);
+ threads.create_thread(*ct);
+ }
+ threads.join_all();
+#else
+ CompressionTask* ct = new CompressionTask(m_encodedTargetPhrases, *this);
+ (*ct)();
+ delete ct;
+#endif
+ FlushCompressedQueue(true);
+}
+
+void PhraseTableCreator::CalcHuffmanCodes()
+{
+ std::cerr << "\tCreating Huffman codes for " << m_symbolCounter.Size()
+ << " target phrase symbols" << std::endl;
+
+ m_symbolTree = new SymbolTree(m_symbolCounter.Begin(),
+ m_symbolCounter.End());
+
+ std::vector<ScoreTree*>::iterator treeIt = m_scoreTrees.begin();
+ for(std::vector<ScoreCounter*>::iterator it = m_scoreCounters.begin();
+ it != m_scoreCounters.end(); it++)
+ {
+ if(m_quantize)
+ (*it)->Quantize(m_quantize);
+
+ std::cerr << "\tCreating Huffman codes for " << (*it)->Size()
+ << " scores" << std::endl;
+
+ *treeIt = new ScoreTree((*it)->Begin(), (*it)->End());
+ treeIt++;
+ }
+
+ if(m_useAlignmentInfo)
+ {
+ std::cerr << "\tCreating Huffman codes for " << m_alignCounter.Size()
+ << " alignment points" << std::endl;
+ m_alignTree = new AlignTree(m_alignCounter.Begin(), m_alignCounter.End());
+ }
+ std::cerr << std::endl;
+}
+
+
+void PhraseTableCreator::AddSourceSymbolId(std::string& symbol)
+{
+ if(m_sourceSymbolsMap.count(symbol) == 0) {
+ unsigned value = m_sourceSymbolsMap.size();
+ m_sourceSymbolsMap[symbol] = value;
+ }
+}
+
+void PhraseTableCreator::AddTargetSymbolId(std::string& symbol)
+{
+ if(m_targetSymbolsMap.count(symbol) == 0) {
+ unsigned value = m_targetSymbolsMap.size();
+ m_targetSymbolsMap[symbol] = value;
+ }
+}
+
+unsigned PhraseTableCreator::GetSourceSymbolId(std::string& symbol)
+{
+ boost::unordered_map<std::string, unsigned>::iterator it
+ = m_sourceSymbolsMap.find(symbol);
+
+ if(it != m_sourceSymbolsMap.end())
+ return it->second;
+ else
+ return m_sourceSymbolsMap.size();
+}
+
+unsigned PhraseTableCreator::GetTargetSymbolId(std::string& symbol)
+{
+ boost::unordered_map<std::string, unsigned>::iterator it
+ = m_targetSymbolsMap.find(symbol);
+
+ if(it != m_targetSymbolsMap.end())
+ return it->second;
+ else
+ return m_targetSymbolsMap.size();
+}
+
+unsigned PhraseTableCreator::GetOrAddTargetSymbolId(std::string& symbol)
+{
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_mutex);
+#endif
+ boost::unordered_map<std::string, unsigned>::iterator it
+ = m_targetSymbolsMap.find(symbol);
+
+ if(it != m_targetSymbolsMap.end())
+ return it->second;
+ else
+ {
+ unsigned value = m_targetSymbolsMap.size();
+ m_targetSymbolsMap[symbol] = value;
+ return value;
+ }
+}
+
+unsigned PhraseTableCreator::GetRank(unsigned srcIdx, unsigned trgIdx)
+{
+ size_t srcTrgIdx = m_lexicalTableIndex[srcIdx];
+ while(srcTrgIdx < m_lexicalTable.size()
+ && srcIdx == m_lexicalTable[srcTrgIdx].first
+ && m_lexicalTable[srcTrgIdx].second != trgIdx)
+ srcTrgIdx++;
+
+ if(srcTrgIdx < m_lexicalTable.size()
+ && m_lexicalTable[srcTrgIdx].second == trgIdx)
+ return srcTrgIdx - m_lexicalTableIndex[srcIdx];
+ else
+ return m_lexicalTable.size();
+}
+
+unsigned PhraseTableCreator::EncodeREncSymbol1(unsigned trgIdx)
+{
+ assert((~(1 << 31)) > trgIdx);
+ return trgIdx;
+}
+
+unsigned PhraseTableCreator::EncodeREncSymbol2(unsigned pos, unsigned rank)
+{
+ unsigned symbol = rank;
+ symbol |= 1 << 30;
+ symbol |= pos << 24;
+ return symbol;
+}
+
+unsigned PhraseTableCreator::EncodeREncSymbol3(unsigned rank)
+{
+ unsigned symbol = rank;
+ symbol |= 2 << 30;
+ return symbol;
+}
+
+unsigned PhraseTableCreator::EncodePREncSymbol1(unsigned trgIdx)
+{
+ assert((~(1 << 31)) > trgIdx);
+ return trgIdx;
+}
+
+unsigned PhraseTableCreator::EncodePREncSymbol2(int left, int right, unsigned rank)
+{
+ // "left" and "right" must be smaller than 2^5
+ // "rank" must be smaller than 2^19
+ left = left + 32;
+ right = right + 32;
+
+ assert(64 > left);
+ assert(64 > right);
+ assert(524288 > rank);
+
+ unsigned symbol = 0;
+ symbol |= 1 << 31;
+ symbol |= left << 25;
+ symbol |= right << 19;
+ symbol |= rank;
+ return symbol;
+}
+
+void PhraseTableCreator::EncodeTargetPhraseNone(std::vector<std::string>& t,
+ std::ostream& os)
+{
+ std::stringstream encodedTargetPhrase;
+ size_t j = 0;
+ while(j < t.size())
+ {
+ unsigned targetSymbolId = GetOrAddTargetSymbolId(t[j]);
+
+ m_symbolCounter.Increase(targetSymbolId);
+ os.write((char*)&targetSymbolId, sizeof(targetSymbolId));
+ j++;
+ }
+
+ unsigned stopSymbolId = GetTargetSymbolId(m_phraseStopSymbol);
+ os.write((char*)&stopSymbolId, sizeof(stopSymbolId));
+ m_symbolCounter.Increase(stopSymbolId);
+}
+
+void PhraseTableCreator::EncodeTargetPhraseREnc(std::vector<std::string>& s,
+ std::vector<std::string>& t,
+ std::set<AlignPoint>& a,
+ std::ostream& os)
+{
+ std::stringstream encodedTargetPhrase;
+
+ std::vector<std::vector<size_t> > a2(t.size());
+ for(std::set<AlignPoint>::iterator it = a.begin(); it != a.end(); it++)
+ a2[it->second].push_back(it->first);
+
+ for(size_t i = 0; i < t.size(); i++)
+ {
+ unsigned idxTarget = GetOrAddTargetSymbolId(t[i]);
+ unsigned encodedSymbol = -1;
+
+ unsigned bestSrcPos = s.size();
+ unsigned bestDiff = s.size();
+ unsigned bestRank = m_lexicalTable.size();
+ unsigned badRank = m_lexicalTable.size();
+
+ for(std::vector<size_t>::iterator it = a2[i].begin(); it != a2[i].end(); it++)
+ {
+ unsigned idxSource = GetSourceSymbolId(s[*it]);
+ size_t r = GetRank(idxSource, idxTarget);
+ if(r != badRank)
+ {
+ if(r < bestRank)
+ {
+ bestRank = r;
+ bestSrcPos = *it;
+ bestDiff = abs(*it-i);
+ }
+ else if(r == bestRank && unsigned(abs(*it-i)) < bestDiff)
+ {
+ bestSrcPos = *it;
+ bestDiff = abs(*it-i);
+ }
+ }
+ }
+
+ if(bestRank != badRank && bestSrcPos < s.size())
+ {
+ if(bestSrcPos == i)
+ encodedSymbol = EncodeREncSymbol3(bestRank);
+ else
+ encodedSymbol = EncodeREncSymbol2(bestSrcPos, bestRank);
+ a.erase(AlignPoint(bestSrcPos, i));
+ }
+ else
+ {
+ encodedSymbol = EncodeREncSymbol1(idxTarget);
+ }
+
+ os.write((char*)&encodedSymbol, sizeof(encodedSymbol));
+ m_symbolCounter.Increase(encodedSymbol);
+ }
+
+ unsigned stopSymbolId = GetTargetSymbolId(m_phraseStopSymbol);
+ unsigned encodedSymbol = EncodeREncSymbol1(stopSymbolId);
+ os.write((char*)&encodedSymbol, sizeof(encodedSymbol));
+ m_symbolCounter.Increase(encodedSymbol);
+}
+
+void PhraseTableCreator::EncodeTargetPhrasePREnc(std::vector<std::string>& s,
+ std::vector<std::string>& t,
+ std::set<AlignPoint>& a,
+ size_t ownRank,
+ std::ostream& os)
+{
+ std::vector<unsigned> encodedSymbols(t.size());
+ std::vector<unsigned> encodedSymbolsLengths(t.size(), 0);
+
+ ConsistentPhrases cp(s.size(), t.size(), a);
+ while(!cp.Empty()) {
+ ConsistentPhrases::Phrase p = cp.Pop();
+
+ std::stringstream key1;
+ key1 << s[p.i];
+ for(int i = p.i+1; i < p.i+p.m; i++)
+ key1 << " " << s[i];
+
+ std::stringstream key2;
+ key2 << t[p.j];
+ for(int i = p.j+1; i < p.j+p.n; i++)
+ key2 << " " << t[i];
+
+ int rank = -1;
+ std::string key1Str = key1.str(), key2Str = key2.str();
+ size_t idx = m_rnkHash[MakeSourceTargetKey(key1Str, key2Str)];
+ if(idx != m_rnkHash.GetSize())
+ rank = m_ranks[idx];
+
+ if(rank >= 0 && (m_maxRank == 0 || unsigned(rank) < m_maxRank))
+ {
+ if(unsigned(p.m) != s.size() || unsigned(rank) < ownRank)
+ {
+ std::stringstream encodedSymbol;
+ encodedSymbols[p.j] = EncodePREncSymbol2(p.i-p.j, s.size()-(p.i+p.m), rank);
+ encodedSymbolsLengths[p.j] = p.n;
+
+ std::set<AlignPoint> tAlignment;
+ for(std::set<AlignPoint>::iterator it = a.begin();
+ it != a.end(); it++)
+ if(it->first < p.i || it->first >= p.i + p.m
+ || it->second < p.j || it->second >= p.j + p.n)
+ tAlignment.insert(*it);
+ a = tAlignment;
+ cp.RemoveOverlap(p);
+ }
+ }
+ }
+
+ std::stringstream encodedTargetPhrase;
+
+ size_t j = 0;
+ while(j < t.size())
+ {
+ if(encodedSymbolsLengths[j] > 0)
+ {
+ unsigned encodedSymbol = encodedSymbols[j];
+ m_symbolCounter.Increase(encodedSymbol);
+ os.write((char*)&encodedSymbol, sizeof(encodedSymbol));
+ j += encodedSymbolsLengths[j];
+ }
+ else
+ {
+ unsigned targetSymbolId = GetOrAddTargetSymbolId(t[j]);
+ unsigned encodedSymbol = EncodePREncSymbol1(targetSymbolId);
+ m_symbolCounter.Increase(encodedSymbol);
+ os.write((char*)&encodedSymbol, sizeof(encodedSymbol));
+ j++;
+ }
+ }
+
+ unsigned stopSymbolId = GetTargetSymbolId(m_phraseStopSymbol);
+ unsigned encodedSymbol = EncodePREncSymbol1(stopSymbolId);
+ os.write((char*)&encodedSymbol, sizeof(encodedSymbol));
+ m_symbolCounter.Increase(encodedSymbol);
+}
+
+void PhraseTableCreator::EncodeScores(std::vector<float>& scores, std::ostream& os)
+{
+ size_t c = 0;
+ float score;
+
+ while(c < scores.size())
+ {
+ score = scores[c];
+ score = FloorScore(TransformScore(score));
+ os.write((char*)&score, sizeof(score));
+ m_scoreCounters[m_multipleScoreTrees ? c : 0]->Increase(score);
+ c++;
+ }
+}
+
+void PhraseTableCreator::EncodeAlignment(std::set<AlignPoint>& alignment,
+ std::ostream& os)
+{
+ for(std::set<AlignPoint>::iterator it = alignment.begin();
+ it != alignment.end(); it++)
+ {
+ os.write((char*)&(*it), sizeof(AlignPoint));
+ m_alignCounter.Increase(*it);
+ }
+ AlignPoint stop(-1, -1);
+ os.write((char*) &stop, sizeof(AlignPoint));
+ m_alignCounter.Increase(stop);
+}
+
+std::string PhraseTableCreator::EncodeLine(std::vector<std::string>& tokens, size_t ownRank)
+{
+ std::string sourcePhraseStr = tokens[0];
+ std::string targetPhraseStr = tokens[1];
+ std::string scoresStr = tokens[2];
+
+ std::string alignmentStr = "";
+ if(tokens.size() > 3)
+ alignmentStr = tokens[3];
+
+ std::vector<std::string> s = Tokenize(sourcePhraseStr);
+
+ size_t phraseLength = s.size();
+ if(m_maxPhraseLength < phraseLength)
+ m_maxPhraseLength = phraseLength;
+
+ std::vector<std::string> t = Tokenize(targetPhraseStr);
+ std::vector<float> scores = Tokenize<float>(scoresStr);
+
+ if(scores.size() != m_numScoreComponent) {
+ std::cerr << "Error: Wrong number of scores detected ("
+ << scores.size() << " != " << m_numScoreComponent << ") :" << std::endl;
+ std::cerr << "Line: " << tokens[0] << " ||| " << tokens[1] << " ||| " << tokens[3] << " ..." << std::endl;
+ abort();
+ }
+
+ std::set<AlignPoint> a;
+ if(m_coding != None || m_useAlignmentInfo)
+ {
+ std::vector<size_t> positions = Tokenize<size_t>(alignmentStr, " \t-");
+ for(size_t i = 0; i < positions.size(); i += 2)
+ {
+ a.insert(AlignPoint(positions[i], positions[i+1]));
+ }
+ }
+
+ std::stringstream encodedTargetPhrase;
+
+ if(m_coding == PREnc)
+ {
+ EncodeTargetPhrasePREnc(s, t, a, ownRank, encodedTargetPhrase);
+ }
+ else if(m_coding == REnc)
+ {
+ EncodeTargetPhraseREnc(s, t, a, encodedTargetPhrase);
+ }
+ else
+ {
+ EncodeTargetPhraseNone(t, encodedTargetPhrase);
+ }
+
+ EncodeScores(scores, encodedTargetPhrase);
+
+ if(m_useAlignmentInfo)
+ EncodeAlignment(a, encodedTargetPhrase);
+
+ return encodedTargetPhrase.str();
+}
+
+std::string PhraseTableCreator::CompressEncodedCollection(std::string encodedCollection)
+{
+ enum EncodeState {
+ ReadSymbol, ReadScore, ReadAlignment,
+ EncodeSymbol, EncodeScore, EncodeAlignment };
+ EncodeState state = ReadSymbol;
+
+ unsigned phraseStopSymbolId;
+ if(m_coding == REnc)
+ phraseStopSymbolId = EncodeREncSymbol1(GetTargetSymbolId(m_phraseStopSymbol));
+ else if(m_coding == PREnc)
+ phraseStopSymbolId = EncodePREncSymbol1(GetTargetSymbolId(m_phraseStopSymbol));
+ else
+ phraseStopSymbolId = GetTargetSymbolId(m_phraseStopSymbol);
+ AlignPoint alignStopSymbol(-1, -1);
+
+ std::stringstream encodedStream(encodedCollection);
+ encodedStream.unsetf(std::ios::skipws);
+
+ std::string compressedEncodedCollection;
+ BitWrapper<> bitStream(compressedEncodedCollection);
+
+ unsigned symbol;
+ float score;
+ size_t currScore = 0;
+ AlignPoint alignPoint;
+
+ while(encodedStream)
+ {
+ switch(state)
+ {
+ case ReadSymbol:
+ encodedStream.read((char*) &symbol, sizeof(unsigned));
+ state = EncodeSymbol;
+ break;
+ case ReadScore:
+ if(currScore == m_numScoreComponent)
+ {
+ currScore = 0;
+ if(m_useAlignmentInfo)
+ state = ReadAlignment;
+ else
+ state = ReadSymbol;
+ }
+ else
+ {
+ encodedStream.read((char*) &score, sizeof(float));
+ currScore++;
+ state = EncodeScore;
+ }
+ break;
+ case ReadAlignment:
+ encodedStream.read((char*) &alignPoint, sizeof(AlignPoint));
+ state = EncodeAlignment;
+ break;
+
+ case EncodeSymbol:
+ state = (symbol == phraseStopSymbolId) ? ReadScore : ReadSymbol;
+ m_symbolTree->Put(bitStream, symbol);
+ break;
+ case EncodeScore:
+ {
+ state = ReadScore;
+ size_t idx = m_multipleScoreTrees ? currScore-1 : 0;
+ if(m_quantize)
+ score = m_scoreCounters[idx]->LowerBound(score);
+ m_scoreTrees[idx]->Put(bitStream, score);
+ }
+ break;
+ case EncodeAlignment:
+ state = (alignPoint == alignStopSymbol) ? ReadSymbol : ReadAlignment;
+ m_alignTree->Put(bitStream, alignPoint);
+ break;
+ }
+ }
+
+ return compressedEncodedCollection;
+}
+
+void PhraseTableCreator::AddRankedLine(PackedItem& pi)
+{
+ m_queue.push(pi);
+}
+
+void PhraseTableCreator::FlushRankedQueue(bool force)
+{
+ size_t step = 1ul << 10;
+
+ while(!m_queue.empty() && m_lastFlushedLine + 1 == m_queue.top().GetLine())
+ {
+ m_lastFlushedLine++;
+
+ PackedItem pi = m_queue.top();
+ m_queue.pop();
+
+ if(m_lastSourceRange.size() == step)
+ {
+ m_rnkHash.AddRange(m_lastSourceRange);
+ m_lastSourceRange.clear();
+ }
+
+ if(m_lastFlushedSourcePhrase != pi.GetSrc())
+ {
+ if(m_rankQueue.size()) {
+ m_lastFlushedSourceNum++;
+ if(m_lastFlushedSourceNum % 100000 == 0) {
+ std::cerr << ".";
+ }
+ if(m_lastFlushedSourceNum % 5000000 == 0)
+ {
+ std::cerr << "[" << m_lastFlushedSourceNum << "]" << std::endl;
+ }
+
+ m_ranks.resize(m_lastFlushedLine + 1);
+ int r = 0;
+ while(!m_rankQueue.empty()) {
+ m_ranks[m_rankQueue.top().second] = r++;
+ m_rankQueue.pop();
+ }
+ }
+ }
+
+ m_lastSourceRange.push_back(pi.GetTrg());
+
+ m_rankQueue.push(std::make_pair(pi.GetScore(), pi.GetLine()));
+ m_lastFlushedSourcePhrase = pi.GetSrc();
+ }
+
+ if(force)
+ {
+ m_rnkHash.AddRange(m_lastSourceRange);
+ m_lastSourceRange.clear();
+
+#ifdef WITH_THREADS
+ m_rnkHash.WaitAll();
+#endif
+
+ m_ranks.resize(m_lastFlushedLine + 1);
+ int r = 0;
+ while(!m_rankQueue.empty())
+ {
+ m_ranks[m_rankQueue.top().second] = r++;
+ m_rankQueue.pop();
+ }
+
+ m_lastFlushedLine = -1;
+ m_lastFlushedSourceNum = 0;
+
+ std::cerr << std::endl << std::endl;
+ }
+}
+
+
+void PhraseTableCreator::AddEncodedLine(PackedItem& pi)
+{
+ m_queue.push(pi);
+}
+
+void PhraseTableCreator::FlushEncodedQueue(bool force)
+{
+ while(!m_queue.empty() && m_lastFlushedLine + 1 == m_queue.top().GetLine())
+ {
+ PackedItem pi = m_queue.top();
+ m_queue.pop();
+ m_lastFlushedLine++;
+
+ if(m_lastFlushedSourcePhrase != pi.GetSrc())
+ {
+ if(m_lastCollection.size())
+ {
+ std::stringstream targetPhraseCollection;
+ for(std::vector<std::string>::iterator it =
+ m_lastCollection.begin(); it != m_lastCollection.end(); it++)
+ targetPhraseCollection << *it;
+
+ m_lastSourceRange.push_back(MakeSourceKey(m_lastFlushedSourcePhrase));
+ m_encodedTargetPhrases.push_back(targetPhraseCollection.str());
+
+ m_lastFlushedSourceNum++;
+ if(m_lastFlushedSourceNum % 100000 == 0)
+ std::cerr << ".";
+ if(m_lastFlushedSourceNum % 5000000 == 0)
+ std::cerr << "[" << m_lastFlushedSourceNum << "]" << std::endl;
+
+ m_lastCollection.clear();
+ }
+ }
+
+ if(m_lastSourceRange.size() == (1ul << m_orderBits))
+ {
+ m_srcHash.AddRange(m_lastSourceRange);
+ m_srcHash.SaveLastRange();
+ m_srcHash.DropLastRange();
+ m_lastSourceRange.clear();
+ }
+
+ m_lastFlushedSourcePhrase = pi.GetSrc();
+ if(m_coding == PREnc)
+ {
+ if(m_lastCollection.size() <= pi.GetRank())
+ m_lastCollection.resize(pi.GetRank() + 1);
+ m_lastCollection[pi.GetRank()] = pi.GetTrg();
+ }
+ else
+ {
+ m_lastCollection.push_back(pi.GetTrg());
+ }
+ }
+
+ if(force)
+ {
+ if(m_lastCollection.size())
+ {
+ std::stringstream targetPhraseCollection;
+ for(std::vector<std::string>::iterator it =
+ m_lastCollection.begin(); it != m_lastCollection.end(); it++)
+ targetPhraseCollection << *it;
+
+ m_encodedTargetPhrases.push_back(targetPhraseCollection.str());
+ m_lastCollection.clear();
+ }
+
+ m_srcHash.AddRange(m_lastSourceRange);
+ m_lastSourceRange.clear();
+
+#ifdef WITH_THREADS
+ m_srcHash.WaitAll();
+#endif
+
+ m_srcHash.SaveLastRange();
+ m_srcHash.DropLastRange();
+ m_srcHash.FinalizeSave();
+
+ m_lastFlushedLine = -1;
+ m_lastFlushedSourceNum = 0;
+
+ std::cerr << std::endl << std::endl;
+ }
+}
+
+void PhraseTableCreator::AddCompressedCollection(PackedItem& pi)
+{
+ m_queue.push(pi);
+}
+
+void PhraseTableCreator::FlushCompressedQueue(bool force)
+{
+ if(force || m_queue.size() > 10000)
+ {
+ while(!m_queue.empty() && m_lastFlushedLine + 1 == m_queue.top().GetLine())
+ {
+ PackedItem pi = m_queue.top();
+ m_queue.pop();
+ m_lastFlushedLine++;
+
+ m_compressedTargetPhrases.push_back(pi.GetTrg());
+
+ if((pi.GetLine()+1) % 100000 == 0)
+ std::cerr << ".";
+ if((pi.GetLine()+1) % 5000000 == 0)
+ std::cerr << "[" << (pi.GetLine()+1) << "]" << std::endl;
+ }
+ }
+
+ if(force)
+ {
+ m_lastFlushedLine = -1;
+ std::cerr << std::endl << std::endl;
+ }
+}
+
+//****************************************************************************//
+
+size_t RankingTask::m_lineNum = 0;
+#ifdef WITH_THREADS
+boost::mutex RankingTask::m_mutex;
+boost::mutex RankingTask::m_fileMutex;
+#endif
+
+RankingTask::RankingTask(InputFileStream& inFile, PhraseTableCreator& creator)
+ : m_inFile(inFile), m_creator(creator) {}
+
+void RankingTask::operator()()
+{
+ size_t lineNum = 0;
+
+ std::vector<std::string> lines;
+ size_t max_lines = 1000;
+ lines.reserve(max_lines);
+
+ {
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_fileMutex);
+#endif
+ std::string line;
+ while(lines.size() < max_lines && std::getline(m_inFile, line))
+ lines.push_back(line);
+ lineNum = m_lineNum;
+ m_lineNum += lines.size();
+ }
+
+ std::vector<PackedItem> result;
+ result.reserve(max_lines);
+
+ while(lines.size())
+ {
+ for(size_t i = 0; i < lines.size(); i++)
+ {
+ std::vector<std::string> tokens;
+ Moses::TokenizeMultiCharSeparator(tokens, lines[i], m_creator.m_separator);
+
+ if(tokens.size() < 3)
+ {
+ std::cerr << "Error: It seems the following line has a wrong format:" << std::endl;
+ std::cerr << "Line " << i << ": " << lines[i] << std::endl;
+ abort();
+ }
+ if(tokens.size() == 3 && m_creator.m_warnMe) {
+ std::cerr << "Warning: It seems the following line contains no alignment information, " << std::endl;
+ std::cerr << "but you are using PREnc encoding which makes use of alignment data. " << std::endl;
+ std::cerr << "Better use -encoding None or disable this warning with -no-warnings ." << std::endl;
+ std::cerr << "Line " << i << ": " << lines[i] << std::endl;
+ }
+
+ std::vector<float> scores = Tokenize<float>(tokens[2]);
+ if(scores.size() != m_creator.m_numScoreComponent) {
+ std::cerr << "Error: It seems the following line has a wrong number of scores ("
+ << scores.size() << " != " << m_creator.m_numScoreComponent << ") :" << std::endl;
+ std::cerr << "Line " << i << ": " << lines[i] << std::endl;
+ abort();
+ }
+
+ float sortScore = scores[m_creator.m_sortScoreIndex];
+
+ std::string key1 = m_creator.MakeSourceKey(tokens[0]);
+ std::string key2 = m_creator.MakeSourceTargetKey(tokens[0], tokens[1]);
+
+ PackedItem packedItem(lineNum + i, key1, key2, 0, sortScore);
+ result.push_back(packedItem);
+ }
+ lines.clear();
+
+ {
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_mutex);
+#endif
+ for(size_t i = 0; i < result.size(); i++)
+ m_creator.AddRankedLine(result[i]);
+ m_creator.FlushRankedQueue();
+ }
+
+ result.clear();
+ lines.reserve(max_lines);
+ result.reserve(max_lines);
+
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_fileMutex);
+#endif
+ std::string line;
+ while(lines.size() < max_lines && std::getline(m_inFile, line))
+ lines.push_back(line);
+ lineNum = m_lineNum;
+ m_lineNum += lines.size();
+ }
+}
+
+size_t EncodingTask::m_lineNum = 0;
+#ifdef WITH_THREADS
+boost::mutex EncodingTask::m_mutex;
+boost::mutex EncodingTask::m_fileMutex;
+#endif
+
+EncodingTask::EncodingTask(InputFileStream& inFile, PhraseTableCreator& creator)
+ : m_inFile(inFile), m_creator(creator) {}
+
+void EncodingTask::operator()()
+{
+ size_t lineNum = 0;
+
+ std::vector<std::string> lines;
+ size_t max_lines = 1000;
+ lines.reserve(max_lines);
+
+ {
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_fileMutex);
+#endif
+ std::string line;
+ while(lines.size() < max_lines && std::getline(m_inFile, line))
+ lines.push_back(line);
+ lineNum = m_lineNum;
+ m_lineNum += lines.size();
+ }
+
+ std::vector<PackedItem> result;
+ result.reserve(max_lines);
+
+ while(lines.size())
+ {
+ for(size_t i = 0; i < lines.size(); i++)
+ {
+ std::vector<std::string> tokens;
+ Moses::TokenizeMultiCharSeparator(tokens, lines[i], m_creator.m_separator);
+
+ if(tokens.size() < 3)
+ {
+ std::cerr << "Error: It seems the following line has a wrong format:" << std::endl;
+ std::cerr << "Line " << i << ": " << lines[i] << std::endl;
+ abort();
+ }
+ if(tokens.size() == 3 && m_creator.m_coding != PhraseTableCreator::None && m_creator.m_warnMe) {
+ std::cerr << "Warning: It seems the following line contains no alignment information, " << std::endl;
+ std::cerr << "but you are using ";
+ std::cerr << (m_creator.m_coding == PhraseTableCreator::PREnc ? "PREnc" : "REnc");
+ std::cerr << " encoding which makes use of alignment data. " << std::endl;
+ std::cerr << "Better use -encoding None or disable this warning with -no-warnings." << std::endl;
+ std::cerr << "Line " << i << ": " << lines[i] << std::endl;
+ }
+
+ size_t ownRank = 0;
+ if(m_creator.m_coding == PhraseTableCreator::PREnc)
+ ownRank = m_creator.m_ranks[lineNum + i];
+
+ std::string encodedLine = m_creator.EncodeLine(tokens, ownRank);
+
+ PackedItem packedItem(lineNum + i, tokens[0], encodedLine, ownRank);
+ result.push_back(packedItem);
+ }
+ lines.clear();
+
+ {
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_mutex);
+#endif
+ for(size_t i = 0; i < result.size(); i++)
+ m_creator.AddEncodedLine(result[i]);
+ m_creator.FlushEncodedQueue();
+ }
+
+ result.clear();
+ lines.reserve(max_lines);
+ result.reserve(max_lines);
+
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_fileMutex);
+#endif
+ std::string line;
+ while(lines.size() < max_lines && std::getline(m_inFile, line))
+ lines.push_back(line);
+ lineNum = m_lineNum;
+ m_lineNum += lines.size();
+ }
+}
+
+//****************************************************************************//
+
+size_t CompressionTask::m_collectionNum = 0;
+#ifdef WITH_THREADS
+boost::mutex CompressionTask::m_mutex;
+#endif
+
+CompressionTask::CompressionTask(StringVector<unsigned char, unsigned long,
+ MmapAllocator>& encodedCollections,
+ PhraseTableCreator& creator)
+ : m_encodedCollections(encodedCollections), m_creator(creator) {}
+
+void CompressionTask::operator()()
+{
+ size_t collectionNum;
+ {
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_mutex);
+#endif
+ collectionNum = m_collectionNum;
+ m_collectionNum++;
+ }
+
+ while(collectionNum < m_encodedCollections.size())
+ {
+ std::string collection = m_encodedCollections[collectionNum];
+ std::string compressedCollection
+ = m_creator.CompressEncodedCollection(collection);
+
+ std::string dummy;
+ PackedItem packedItem(collectionNum, dummy, compressedCollection, 0);
+
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_mutex);
+#endif
+ m_creator.AddCompressedCollection(packedItem);
+ m_creator.FlushCompressedQueue();
+
+ collectionNum = m_collectionNum;
+ m_collectionNum++;
+ }
+}
+
+//****************************************************************************//
+
+PackedItem::PackedItem(long line, std::string sourcePhrase,
+ std::string packedTargetPhrase, size_t rank,
+ float score)
+ : m_line(line), m_sourcePhrase(sourcePhrase),
+ m_packedTargetPhrase(packedTargetPhrase), m_rank(rank),
+ m_score(score) {}
+
+long PackedItem::GetLine() const { return m_line; }
+
+const std::string& PackedItem::GetSrc() const { return m_sourcePhrase; }
+
+const std::string& PackedItem::GetTrg() const { return m_packedTargetPhrase; }
+
+size_t PackedItem::GetRank() const { return m_rank; }
+
+float PackedItem::GetScore() const { return m_score; }
+
+}
diff --git a/moses/src/CompactPT/PhraseTableCreator.h b/moses/src/CompactPT/PhraseTableCreator.h
new file mode 100644
index 000000000..f1ab3c772
--- /dev/null
+++ b/moses/src/CompactPT/PhraseTableCreator.h
@@ -0,0 +1,425 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_PhraseTableCreator_h
+#define moses_PhraseTableCreator_h
+
+#include <sstream>
+#include <iostream>
+#include <queue>
+#include <vector>
+#include <set>
+#include <boost/unordered_map.hpp>
+
+#include "InputFileStream.h"
+#include "ThreadPool.h"
+#include "UserMessage.h"
+#include "Util.h"
+
+#include "BlockHashIndex.h"
+#include "StringVector.h"
+#include "CanonicalHuffman.h"
+
+namespace Moses
+{
+
+typedef std::pair<unsigned char, unsigned char> AlignPoint;
+
+template <typename DataType>
+class Counter
+{
+ public:
+ typedef boost::unordered_map<DataType, size_t> FreqMap;
+ typedef typename FreqMap::iterator iterator;
+ typedef typename FreqMap::mapped_type mapped_type;
+ typedef typename FreqMap::value_type value_type;
+
+ private:
+#ifdef WITH_THREADS
+ boost::mutex m_mutex;
+#endif
+ FreqMap m_freqMap;
+ size_t m_maxSize;
+ std::vector<DataType> m_bestVec;
+
+ struct FreqSorter
+ {
+ bool operator()(const value_type& a, const value_type& b) const
+ {
+ if(a.second > b.second)
+ return true;
+ // Check impact on translation quality!
+ if(a.second == b.second && a.first > b.first)
+ return true;
+ return false;
+ }
+ };
+
+ public:
+ Counter() : m_maxSize(0) {}
+
+ iterator Begin()
+ {
+ return m_freqMap.begin();
+ }
+
+ iterator End()
+ {
+ return m_freqMap.end();
+ }
+
+ void Increase(DataType data)
+ {
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_mutex);
+#endif
+ m_freqMap[data]++;
+ }
+
+ void IncreaseBy(DataType data, size_t num)
+ {
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_mutex);
+#endif
+ m_freqMap[data] += num;
+ }
+
+ mapped_type& operator[](DataType data)
+ {
+ return m_freqMap[data];
+ }
+
+ size_t Size()
+ {
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_mutex);
+#endif
+ return m_freqMap.size();
+ }
+
+ void Quantize(size_t maxSize)
+ {
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_mutex);
+#endif
+ m_maxSize = maxSize;
+ std::vector<std::pair<DataType, mapped_type> > freqVec;
+ freqVec.insert(freqVec.begin(), m_freqMap.begin(), m_freqMap.end());
+ std::sort(freqVec.begin(), freqVec.end(), FreqSorter());
+
+ for(size_t i = 0; i < freqVec.size() && i < m_maxSize; i++)
+ m_bestVec.push_back(freqVec[i].first);
+
+ std::sort(m_bestVec.begin(), m_bestVec.end());
+
+ FreqMap t_freqMap;
+ for(typename std::vector<std::pair<DataType, mapped_type> >::iterator it
+ = freqVec.begin(); it != freqVec.end(); it++)
+ {
+ DataType closest = LowerBound(it->first);
+ t_freqMap[closest] += it->second;
+ }
+
+ m_freqMap.swap(t_freqMap);
+ }
+
+ void Clear()
+ {
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_mutex);
+#endif
+ m_freqMap.clear();
+ }
+
+ DataType LowerBound(DataType data)
+ {
+ if(m_maxSize == 0 || m_bestVec.size() == 0)
+ return data;
+ else
+ {
+ typename std::vector<DataType>::iterator it
+ = std::lower_bound(m_bestVec.begin(), m_bestVec.end(), data);
+ if(it != m_bestVec.end())
+ return *it;
+ else
+ return m_bestVec.back();
+ }
+ }
+};
+
+class PackedItem
+{
+ private:
+ long m_line;
+ std::string m_sourcePhrase;
+ std::string m_packedTargetPhrase;
+ size_t m_rank;
+ float m_score;
+
+ public:
+ PackedItem(long line, std::string sourcePhrase,
+ std::string packedTargetPhrase, size_t rank,
+ float m_score = 0);
+
+ long GetLine() const;
+ const std::string& GetSrc() const;
+ const std::string& GetTrg() const;
+ size_t GetRank() const;
+ float GetScore() const;
+};
+
+bool operator<(const PackedItem &pi1, const PackedItem &pi2);
+
+class PhraseTableCreator
+{
+ public:
+ enum Coding { None, REnc, PREnc };
+
+ private:
+ std::string m_inPath;
+ std::string m_outPath;
+
+ std::FILE* m_outFile;
+
+ size_t m_numScoreComponent;
+ size_t m_sortScoreIndex;
+ size_t m_warnMe;
+
+ Coding m_coding;
+ size_t m_orderBits;
+ size_t m_fingerPrintBits;
+ bool m_useAlignmentInfo;
+ bool m_multipleScoreTrees;
+ size_t m_quantize;
+ size_t m_maxRank;
+
+ static std::string m_phraseStopSymbol;
+ static std::string m_separator;
+
+#ifdef WITH_THREADS
+ size_t m_threads;
+ boost::mutex m_mutex;
+#endif
+
+ BlockHashIndex m_srcHash;
+ BlockHashIndex m_rnkHash;
+
+ size_t m_maxPhraseLength;
+
+ std::vector<unsigned> m_ranks;
+
+ typedef std::pair<unsigned, unsigned> SrcTrg;
+ typedef std::pair<std::string, std::string> SrcTrgString;
+ typedef std::pair<SrcTrgString, float> SrcTrgProb;
+
+ struct SrcTrgProbSorter
+ {
+ bool operator()(const SrcTrgProb& a, const SrcTrgProb& b) const
+ {
+ if(a.first.first < b.first.first)
+ return true;
+
+ if(a.first.first == b.first.first && a.second > b.second)
+ return true;
+
+ if(a.first.first == b.first.first
+ && a.second == b.second
+ && a.first.second < b.first.second)
+ return true;
+
+ return false;
+ }
+ };
+
+ std::vector<size_t> m_lexicalTableIndex;
+ std::vector<SrcTrg> m_lexicalTable;
+
+ StringVector<unsigned char, unsigned long, MmapAllocator>
+ m_encodedTargetPhrases;
+
+ StringVector<unsigned char, unsigned long, MmapAllocator>
+ m_compressedTargetPhrases;
+
+ boost::unordered_map<std::string, unsigned> m_targetSymbolsMap;
+ boost::unordered_map<std::string, unsigned> m_sourceSymbolsMap;
+
+ typedef Counter<unsigned> SymbolCounter;
+ typedef Counter<float> ScoreCounter;
+ typedef Counter<AlignPoint> AlignCounter;
+
+ typedef CanonicalHuffman<unsigned> SymbolTree;
+ typedef CanonicalHuffman<float> ScoreTree;
+ typedef CanonicalHuffman<AlignPoint> AlignTree;
+
+ SymbolCounter m_symbolCounter;
+ SymbolTree* m_symbolTree;
+
+ AlignCounter m_alignCounter;
+ AlignTree* m_alignTree;
+
+ std::vector<ScoreCounter*> m_scoreCounters;
+ std::vector<ScoreTree*> m_scoreTrees;
+
+ std::priority_queue<PackedItem> m_queue;
+ long m_lastFlushedLine;
+ long m_lastFlushedSourceNum;
+ std::string m_lastFlushedSourcePhrase;
+ std::vector<std::string> m_lastSourceRange;
+ std::priority_queue<std::pair<float, size_t> > m_rankQueue;
+ std::vector<std::string> m_lastCollection;
+
+ void Save();
+ void PrintInfo();
+
+ void AddSourceSymbolId(std::string& symbol);
+ unsigned GetSourceSymbolId(std::string& symbol);
+
+ void AddTargetSymbolId(std::string& symbol);
+ unsigned GetTargetSymbolId(std::string& symbol);
+ unsigned GetOrAddTargetSymbolId(std::string& symbol);
+
+ unsigned GetRank(unsigned srcIdx, unsigned trgIdx);
+
+ unsigned EncodeREncSymbol1(unsigned symbol);
+ unsigned EncodeREncSymbol2(unsigned position, unsigned rank);
+ unsigned EncodeREncSymbol3(unsigned rank);
+
+ unsigned EncodePREncSymbol1(unsigned symbol);
+ unsigned EncodePREncSymbol2(int lOff, int rOff, unsigned rank);
+
+ void EncodeTargetPhraseNone(std::vector<std::string>& t,
+ std::ostream& os);
+
+ void EncodeTargetPhraseREnc(std::vector<std::string>& s,
+ std::vector<std::string>& t,
+ std::set<AlignPoint>& a,
+ std::ostream& os);
+
+ void EncodeTargetPhrasePREnc(std::vector<std::string>& s,
+ std::vector<std::string>& t,
+ std::set<AlignPoint>& a, size_t ownRank,
+ std::ostream& os);
+
+ void EncodeScores(std::vector<float>& scores, std::ostream& os);
+ void EncodeAlignment(std::set<AlignPoint>& alignment, std::ostream& os);
+
+ std::string MakeSourceKey(std::string&);
+ std::string MakeSourceTargetKey(std::string&, std::string&);
+
+ void LoadLexicalTable(std::string filePath);
+
+ void CreateRankHash();
+ void EncodeTargetPhrases();
+ void CalcHuffmanCodes();
+ void CompressTargetPhrases();
+
+ void AddRankedLine(PackedItem& pi);
+ void FlushRankedQueue(bool force = false);
+
+ std::string EncodeLine(std::vector<std::string>& tokens, size_t ownRank);
+ void AddEncodedLine(PackedItem& pi);
+ void FlushEncodedQueue(bool force = false);
+
+ std::string CompressEncodedCollection(std::string encodedCollection);
+ void AddCompressedCollection(PackedItem& pi);
+ void FlushCompressedQueue(bool force = false);
+
+ public:
+
+ PhraseTableCreator(std::string inPath,
+ std::string outPath,
+ size_t numScoreComponent = 5,
+ size_t sortScoreIndex = 2,
+ Coding coding = PREnc,
+ size_t orderBits = 10,
+ size_t fingerPrintBits = 16,
+ bool useAlignmentInfo = false,
+ bool multipleScoreTrees = true,
+ size_t quantize = 0,
+ size_t maxRank = 100,
+ bool warnMe = true
+#ifdef WITH_THREADS
+ , size_t threads = 2
+#endif
+ );
+
+ ~PhraseTableCreator();
+
+ friend class RankingTask;
+ friend class EncodingTask;
+ friend class CompressionTask;
+};
+
+class RankingTask
+{
+ private:
+#ifdef WITH_THREADS
+ static boost::mutex m_mutex;
+ static boost::mutex m_fileMutex;
+#endif
+ static size_t m_lineNum;
+ InputFileStream& m_inFile;
+ PhraseTableCreator& m_creator;
+
+ public:
+ RankingTask(InputFileStream& inFile, PhraseTableCreator& creator);
+ void operator()();
+};
+
+class EncodingTask
+{
+ private:
+#ifdef WITH_THREADS
+ static boost::mutex m_mutex;
+ static boost::mutex m_fileMutex;
+#endif
+ static size_t m_lineNum;
+ static size_t m_sourcePhraseNum;
+ static std::string m_lastSourcePhrase;
+
+ InputFileStream& m_inFile;
+ PhraseTableCreator& m_creator;
+
+ public:
+ EncodingTask(InputFileStream& inFile, PhraseTableCreator& creator);
+ void operator()();
+};
+
+class CompressionTask
+{
+ private:
+#ifdef WITH_THREADS
+ static boost::mutex m_mutex;
+#endif
+ static size_t m_collectionNum;
+ StringVector<unsigned char, unsigned long, MmapAllocator>&
+ m_encodedCollections;
+ PhraseTableCreator& m_creator;
+
+ public:
+ CompressionTask(StringVector<unsigned char, unsigned long, MmapAllocator>&
+ encodedCollections, PhraseTableCreator& creator);
+ void operator()();
+};
+
+}
+
+#endif
diff --git a/moses/src/CompactPT/StringVector.h b/moses/src/CompactPT/StringVector.h
new file mode 100644
index 000000000..76146176d
--- /dev/null
+++ b/moses/src/CompactPT/StringVector.h
@@ -0,0 +1,622 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_StringVector_h
+#define moses_StringVector_h
+
+#include <vector>
+#include <algorithm>
+#include <string>
+#include <iterator>
+#include <cstdio>
+#include <cassert>
+
+#include <boost/iterator/iterator_facade.hpp>
+
+#include "ThrowingFwrite.h"
+#include "MonotonicVector.h"
+#include "MmapAllocator.h"
+
+namespace Moses
+{
+
+// ********** ValueIteratorRange **********
+
+template <typename ValueIteratorT>
+class ValueIteratorRange
+{
+ private:
+ ValueIteratorT m_begin;
+ ValueIteratorT m_end;
+
+ public:
+ ValueIteratorRange(ValueIteratorT begin, ValueIteratorT end);
+
+ const ValueIteratorT& begin() const;
+ const ValueIteratorT& end() const;
+ const std::string str() const;
+ operator const std::string()
+ {
+ return str();
+ }
+
+ size_t size()
+ {
+ return std::distance(m_begin, m_end);
+ }
+
+ template <typename StringT>
+ bool operator==(const StringT& o) const;
+ bool operator==(const char* c) const;
+
+ template <typename StringT>
+ bool operator<(const StringT& o) const;
+ bool operator<(const char* c) const;
+};
+
+// ********** StringVector **********
+
+template <typename ValueT = unsigned char, typename PosT = unsigned int,
+ template <typename> class Allocator = std::allocator>
+class StringVector
+{
+ protected:
+ std::vector<ValueT, Allocator<ValueT> > m_charArray;
+ MonotonicVector<PosT, unsigned int, 32, Allocator> m_positions;
+ bool m_sorted;
+ bool m_memoryMapped;
+
+ virtual const ValueT* value_ptr(PosT i) const;
+
+ public:
+ typedef ValueIteratorRange<typename std::vector<ValueT, Allocator<ValueT> >::const_iterator> range;
+
+ // ********** RangeIterator **********
+
+ class RangeIterator : public boost::iterator_facade<RangeIterator,
+ range, std::random_access_iterator_tag, range, PosT>
+ {
+
+ private:
+ PosT m_index;
+ StringVector<ValueT, PosT, Allocator>* m_container;
+
+ public:
+ RangeIterator();
+ RangeIterator(StringVector<ValueT, PosT, Allocator> &sv, PosT index=0);
+
+ PosT get_index();
+
+ private:
+ friend class boost::iterator_core_access;
+
+ range dereference() const;
+ bool equal(RangeIterator const& other) const;
+ void increment();
+ void decrement();
+ void advance(PosT n);
+
+ PosT distance_to(RangeIterator const& other) const;
+ };
+
+ // ********** StringIterator **********
+
+ class StringIterator : public boost::iterator_facade<StringIterator,
+ std::string, std::random_access_iterator_tag, const std::string, PosT>
+ {
+
+ private:
+ PosT m_index;
+ StringVector<ValueT, PosT, Allocator>* m_container;
+
+ public:
+ StringIterator();
+ StringIterator(StringVector<ValueT, PosT, Allocator> &sv, PosT index=0);
+
+ PosT get_index();
+
+ private:
+ friend class boost::iterator_core_access;
+
+ const std::string dereference() const;
+ bool equal(StringIterator const& other) const;
+ void increment();
+ void decrement();
+ void advance(PosT n);
+ PosT distance_to(StringIterator const& other) const;
+ };
+
+ typedef RangeIterator iterator;
+ typedef StringIterator string_iterator;
+
+ StringVector();
+
+ void swap(StringVector<ValueT, PosT, Allocator> &c)
+ {
+ m_positions.commit();
+ m_positions.swap(c.m_positions);
+ m_charArray.swap(c.m_charArray);
+
+ bool temp = m_sorted;
+ m_sorted = c.m_sorted;
+ c.m_sorted = temp;
+ }
+
+ bool is_sorted() const;
+ PosT size() const;
+ virtual PosT size2() const;
+
+ template<class Iterator> Iterator begin() const;
+ template<class Iterator> Iterator end() const;
+
+ iterator begin() const;
+ iterator end() const;
+
+ PosT length(PosT i) const;
+ typename std::vector<ValueT, Allocator<ValueT> >::const_iterator begin(PosT i) const;
+ typename std::vector<ValueT, Allocator<ValueT> >::const_iterator end(PosT i) const;
+
+ void clear()
+ {
+ m_charArray.clear();
+ m_sorted = true;
+ m_positions = MonotonicVector<PosT, unsigned int, 32>();
+ }
+
+ range at(PosT i) const;
+ range operator[](PosT i) const;
+ range back() const;
+
+ template <typename StringT>
+ void push_back(StringT s);
+ void push_back(const char* c);
+
+ template <typename StringT>
+ PosT find(StringT &s) const;
+ PosT find(const char* c) const;
+
+ virtual size_t load(std::FILE* in, bool memoryMapped = false)
+ {
+ size_t size = 0;
+ m_memoryMapped = memoryMapped;
+
+ size += std::fread(&m_sorted, sizeof(bool), 1, in) * sizeof(bool);
+ size += m_positions.load(in, m_memoryMapped);
+
+ size += loadCharArray(m_charArray, in, m_memoryMapped);
+ return size;
+ }
+
+ size_t loadCharArray(std::vector<ValueT, std::allocator<ValueT> >& c,
+ std::FILE* in, bool map = false)
+ {
+ // Can only be read into memory. Mapping not possible with std:allocator.
+ assert(map == false);
+
+ size_t byteSize = 0;
+
+ size_t valSize;
+ byteSize += std::fread(&valSize, sizeof(size_t), 1, in) * sizeof(size_t);
+
+ c.resize(valSize, 0);
+ byteSize += std::fread(&c[0], sizeof(ValueT), valSize, in) * sizeof(ValueT);
+
+ return byteSize;
+ }
+
+ size_t loadCharArray(std::vector<ValueT, MmapAllocator<ValueT> >& c,
+ std::FILE* in, bool map = false)
+ {
+ size_t byteSize = 0;
+
+ size_t valSize;
+ byteSize += std::fread(&valSize, sizeof(size_t), 1, in) * sizeof(size_t);
+
+ if(map == false)
+ {
+ // Read data into temporary file (default constructor of MmapAllocator)
+ // and map memory onto temporary file. Can be resized.
+
+ c.resize(valSize, 0);
+ byteSize += std::fread(&c[0], sizeof(ValueT), valSize, in) * sizeof(ValueT);
+ }
+ else
+ {
+ // Map it directly on specified region of file "in" starting at valPos
+ // with length valSize * sizeof(ValueT). Mapped region cannot be resized.
+
+ size_t valPos = std::ftell(in);
+ Allocator<ValueT> alloc(in, valPos);
+ std::vector<ValueT, Allocator<ValueT> > charArrayTemp(alloc);
+ charArrayTemp.resize(valSize);
+ c.swap(charArrayTemp);
+
+ byteSize += valSize * sizeof(ValueT);
+ }
+
+ return byteSize;
+ }
+
+ size_t load(std::string filename, bool memoryMapped = false)
+ {
+ std::FILE* pFile = fopen(filename.c_str(), "r");
+ size_t byteSize = load(pFile, memoryMapped);
+ fclose(pFile);
+ return byteSize;
+ }
+
+ size_t save(std::FILE* out)
+ {
+ size_t byteSize = 0;
+ byteSize += ThrowingFwrite(&m_sorted, sizeof(bool), 1, out) * sizeof(bool);
+
+ byteSize += m_positions.save(out);
+
+ size_t valSize = size2();
+ byteSize += ThrowingFwrite(&valSize, sizeof(size_t), 1, out) * sizeof(size_t);
+ byteSize += ThrowingFwrite(&m_charArray[0], sizeof(ValueT), valSize, out) * sizeof(ValueT);
+
+ return byteSize;
+ }
+
+ size_t save(std::string filename)
+ {
+ std::FILE* pFile = fopen(filename.c_str(), "w");
+ size_t byteSize = save(pFile);
+ fclose(pFile);
+ return byteSize;
+ }
+
+};
+
+// ********** Implementation **********
+
+// ValueIteratorRange
+
+template <typename ValueIteratorT>
+ValueIteratorRange<ValueIteratorT>::ValueIteratorRange(ValueIteratorT begin,
+ ValueIteratorT end) : m_begin(begin), m_end(end) { }
+
+template <typename ValueIteratorT>
+const ValueIteratorT& ValueIteratorRange<ValueIteratorT>::begin() const
+{
+ return m_begin;
+}
+
+template <typename ValueIteratorT>
+const ValueIteratorT& ValueIteratorRange<ValueIteratorT>::end() const
+{
+ return m_end;
+}
+
+template <typename ValueIteratorT>
+const std::string ValueIteratorRange<ValueIteratorT>::str() const
+{
+ std::string dummy;
+ for(ValueIteratorT it = m_begin; it != m_end; it++)
+ dummy.push_back(*it);
+ return dummy;
+}
+
+template <typename ValueIteratorT>
+template <typename StringT>
+bool ValueIteratorRange<ValueIteratorT>::operator==(const StringT& o) const
+{
+ if(std::distance(m_begin, m_end) == std::distance(o.begin(), o.end()))
+ return std::equal(m_begin, m_end, o.begin());
+ else
+ return false;
+}
+
+template <typename ValueIteratorT>
+bool ValueIteratorRange<ValueIteratorT>::operator==(const char* c) const
+{
+ return *this == std::string(c);
+}
+
+template <typename ValueIteratorT>
+template <typename StringT>
+bool ValueIteratorRange<ValueIteratorT>::operator<(const StringT &s2) const
+{
+ return std::lexicographical_compare(m_begin, m_end, s2.begin(), s2.end(),
+ std::less<typename std::iterator_traits<ValueIteratorT>::value_type>());
+}
+
+template <typename ValueIteratorT>
+bool ValueIteratorRange<ValueIteratorT>::operator<(const char* c) const
+{
+ return *this < std::string(c);
+}
+
+template <typename StringT, typename ValueIteratorT>
+bool operator<(const StringT &s1, const ValueIteratorRange<ValueIteratorT> &s2)
+{
+ return std::lexicographical_compare(s1.begin(), s1.end(), s2.begin(), s2.end(),
+ std::less<typename std::iterator_traits<ValueIteratorT>::value_type>());
+}
+
+template <typename ValueIteratorT>
+bool operator<(const char* c, const ValueIteratorRange<ValueIteratorT> &s2)
+{
+ size_t len = std::char_traits<char>::length(c);
+ return std::lexicographical_compare(c, c + len, s2.begin(), s2.end(),
+ std::less<typename std::iterator_traits<ValueIteratorT>::value_type>());
+}
+
+template <typename OStream, typename ValueIteratorT>
+OStream& operator<<(OStream &os, ValueIteratorRange<ValueIteratorT> cr)
+{
+ ValueIteratorT it = cr.begin();
+ while(it != cr.end())
+ os << *(it++);
+ return os;
+}
+
+// StringVector
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+StringVector<ValueT, PosT, Allocator>::StringVector()
+ : m_sorted(true), m_memoryMapped(false) { }
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+template <typename StringT>
+void StringVector<ValueT, PosT, Allocator>::push_back(StringT s)
+{
+ if(is_sorted() && size() && !(back() < s))
+ m_sorted = false;
+
+ m_positions.push_back(size2());
+ std::copy(s.begin(), s.end(), std::back_inserter(m_charArray));
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+void StringVector<ValueT, PosT, Allocator>::push_back(const char* c)
+{
+ std::string dummy(c);
+ push_back(dummy);
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+template <typename Iterator>
+Iterator StringVector<ValueT, PosT, Allocator>::begin() const
+{
+ return Iterator(const_cast<StringVector<ValueT, PosT, Allocator>&>(*this), 0);
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+template <typename Iterator>
+Iterator StringVector<ValueT, PosT, Allocator>::end() const
+{
+ return Iterator(const_cast<StringVector<ValueT, PosT, Allocator>&>(*this), size());
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+typename StringVector<ValueT, PosT, Allocator>::iterator StringVector<ValueT, PosT, Allocator>::begin() const
+{
+ return begin<iterator>();
+};
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+typename StringVector<ValueT, PosT, Allocator>::iterator StringVector<ValueT, PosT, Allocator>::end() const
+{
+ return end<iterator>();
+};
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+bool StringVector<ValueT, PosT, Allocator>::is_sorted() const
+{
+ return m_sorted;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+PosT StringVector<ValueT, PosT, Allocator>::size() const
+{
+ return m_positions.size();
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+PosT StringVector<ValueT, PosT, Allocator>::size2() const
+{
+ return m_charArray.size();
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+typename StringVector<ValueT, PosT, Allocator>::range StringVector<ValueT, PosT, Allocator>::at(PosT i) const
+{
+ return range(begin(i), end(i));
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+typename StringVector<ValueT, PosT, Allocator>::range StringVector<ValueT, PosT, Allocator>::operator[](PosT i) const
+{
+ return at(i);
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+typename StringVector<ValueT, PosT, Allocator>::range StringVector<ValueT, PosT, Allocator>::back() const
+{
+ return at(size()-1);
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+PosT StringVector<ValueT, PosT, Allocator>::length(PosT i) const
+{
+ if(i+1 < size())
+ return m_positions[i+1] - m_positions[i];
+ else
+ return size2() - m_positions[i];
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+const ValueT* StringVector<ValueT, PosT, Allocator>::value_ptr(PosT i) const
+{
+ return &m_charArray[m_positions[i]];
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+typename std::vector<ValueT, Allocator<ValueT> >::const_iterator StringVector<ValueT, PosT, Allocator>::begin(PosT i) const
+{
+ return typename std::vector<ValueT, Allocator<ValueT> >::const_iterator(value_ptr(i));
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+typename std::vector<ValueT, Allocator<ValueT> >::const_iterator StringVector<ValueT, PosT, Allocator>::end(PosT i) const
+{
+ return typename std::vector<ValueT, Allocator<ValueT> >::const_iterator(value_ptr(i) + length(i));
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+template <typename StringT>
+PosT StringVector<ValueT, PosT, Allocator>::find(StringT &s) const
+{
+ if(m_sorted)
+ return std::distance(begin(), std::lower_bound(begin(), end(), s));
+ return std::distance(begin(), std::find(begin(), end(), s));
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+PosT StringVector<ValueT, PosT, Allocator>::find(const char* c) const
+{
+ std::string s(c);
+ return find(s);
+}
+
+// RangeIterator
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+StringVector<ValueT, PosT, Allocator>::RangeIterator::RangeIterator() : m_index(0), m_container(0) { }
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+StringVector<ValueT, PosT, Allocator>::RangeIterator::RangeIterator(StringVector<ValueT, PosT, Allocator> &sv, PosT index)
+ : m_index(index), m_container(&sv) { }
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+PosT StringVector<ValueT, PosT, Allocator>::RangeIterator::get_index()
+{
+ return m_index;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+typename StringVector<ValueT, PosT, Allocator>::range
+ StringVector<ValueT, PosT, Allocator>::RangeIterator::dereference() const
+{
+ return typename StringVector<ValueT, PosT, Allocator>::range(
+ m_container->begin(m_index),
+ m_container->end(m_index)
+ );
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+bool StringVector<ValueT, PosT, Allocator>::RangeIterator::equal(
+ StringVector<ValueT, PosT, Allocator>::RangeIterator const& other) const
+{
+ return m_index == other.m_index && m_container == other.m_container;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+void StringVector<ValueT, PosT, Allocator>::RangeIterator::increment()
+{
+ m_index++;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+void StringVector<ValueT, PosT, Allocator>::RangeIterator::decrement()
+{
+ m_index--;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+void StringVector<ValueT, PosT, Allocator>::RangeIterator::advance(PosT n)
+{
+ m_index += n;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+PosT StringVector<ValueT, PosT, Allocator>::RangeIterator::distance_to(
+ StringVector<ValueT, PosT, Allocator>::RangeIterator const& other) const
+{
+ return other.m_index - m_index;
+}
+
+// StringIterator
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+StringVector<ValueT, PosT, Allocator>::StringIterator::StringIterator()
+ : m_index(0), m_container(0) { }
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+StringVector<ValueT, PosT, Allocator>::StringIterator::StringIterator(
+ StringVector<ValueT, PosT, Allocator> &sv, PosT index) : m_index(index),
+ m_container(&sv) { }
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+PosT StringVector<ValueT, PosT, Allocator>::StringIterator::get_index()
+{
+ return m_index;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+const std::string StringVector<ValueT, PosT, Allocator>::StringIterator::dereference() const
+{
+ return StringVector<ValueT, PosT, Allocator>::range(m_container->begin(m_index),
+ m_container->end(m_index)).str();
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+bool StringVector<ValueT, PosT, Allocator>::StringIterator::equal(
+ StringVector<ValueT, PosT, Allocator>::StringIterator const& other) const
+{
+ return m_index == other.m_index && m_container == other.m_container;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+void StringVector<ValueT, PosT, Allocator>::StringIterator::increment()
+{
+ m_index++;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+void StringVector<ValueT, PosT, Allocator>::StringIterator::decrement()
+{
+ m_index--;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+void StringVector<ValueT, PosT, Allocator>::StringIterator::advance(PosT n)
+{
+ m_index += n;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+PosT StringVector<ValueT, PosT, Allocator>::StringIterator::distance_to(
+ StringVector<ValueT, PosT, Allocator>::StringIterator const& other) const
+{
+ return other.m_index - m_index;
+}
+
+// ********** Some typedefs **********
+
+typedef StringVector<unsigned char, unsigned int> MediumStringVector;
+typedef StringVector<unsigned char, unsigned long> LongStringVector;
+
+}
+
+#endif
diff --git a/moses/src/CompactPT/TargetPhraseCollectionCache.h b/moses/src/CompactPT/TargetPhraseCollectionCache.h
new file mode 100644
index 000000000..7861afe8d
--- /dev/null
+++ b/moses/src/CompactPT/TargetPhraseCollectionCache.h
@@ -0,0 +1,182 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_TargetPhraseCollectionCache_h
+#define moses_TargetPhraseCollectionCache_h
+
+#include <map>
+#include <set>
+#include <vector>
+
+#ifdef WITH_THREADS
+#ifdef BOOST_HAS_PTHREADS
+#include <boost/thread/mutex.hpp>
+#endif
+#endif
+
+#include <boost/shared_ptr.hpp>
+
+#include "Phrase.h"
+#include "TargetPhraseCollection.h"
+
+namespace Moses
+{
+
+// Avoid using new due to locking
+typedef std::vector<TargetPhrase> TargetPhraseVector;
+typedef boost::shared_ptr<TargetPhraseVector> TargetPhraseVectorPtr;
+
+class TargetPhraseCollectionCache
+{
+ private:
+ size_t m_max;
+ float m_tolerance;
+
+ struct LastUsed {
+ clock_t m_clock;
+ TargetPhraseVectorPtr m_tpv;
+ size_t m_bitsLeft;
+
+ LastUsed() : m_clock(0), m_bitsLeft(0) {}
+
+ LastUsed(clock_t clock, TargetPhraseVectorPtr tpv, size_t bitsLeft = 0)
+ : m_clock(clock), m_tpv(tpv), m_bitsLeft(bitsLeft) {}
+ };
+
+ typedef std::map<Phrase, LastUsed> CacheMap;
+
+ CacheMap m_phraseCache;
+
+#ifdef WITH_THREADS
+ boost::mutex m_mutex;
+#endif
+
+ public:
+
+ typedef CacheMap::iterator iterator;
+ typedef CacheMap::const_iterator const_iterator;
+
+ TargetPhraseCollectionCache(size_t max = 5000, float tolerance = 0.2)
+ : m_max(max), m_tolerance(tolerance)
+ {}
+
+ iterator Begin()
+ {
+ return m_phraseCache.begin();
+ }
+
+ const_iterator Begin() const
+ {
+ return m_phraseCache.begin();
+ }
+
+ iterator End()
+ {
+ return m_phraseCache.end();
+ }
+
+ const_iterator End() const
+ {
+ return m_phraseCache.end();
+ }
+
+ void Cache(const Phrase &sourcePhrase, TargetPhraseVectorPtr tpv,
+ size_t bitsLeft = 0, size_t maxRank = 0)
+ {
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_mutex);
+#endif
+
+ iterator it = m_phraseCache.find(sourcePhrase);
+ if(it != m_phraseCache.end())
+ it->second.m_clock = clock();
+ else
+ {
+ if(maxRank && tpv->size() > maxRank)
+ {
+ TargetPhraseVectorPtr tpv_temp(new TargetPhraseVector());
+ tpv_temp->resize(maxRank);
+ std::copy(tpv->begin(), tpv->begin() + maxRank, tpv_temp->begin());
+ m_phraseCache[sourcePhrase] = LastUsed(clock(), tpv_temp, bitsLeft);
+ }
+ else
+ m_phraseCache[sourcePhrase] = LastUsed(clock(), tpv, bitsLeft);
+ }
+ }
+
+ std::pair<TargetPhraseVectorPtr, size_t> Retrieve(const Phrase &sourcePhrase)
+ {
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_mutex);
+#endif
+
+ iterator it = m_phraseCache.find(sourcePhrase);
+ if(it != m_phraseCache.end())
+ {
+ LastUsed &lu = it->second;
+ lu.m_clock = clock();
+ return std::make_pair(lu.m_tpv, lu.m_bitsLeft);
+ }
+ else
+ return std::make_pair(TargetPhraseVectorPtr(), 0);
+ }
+
+ void Prune()
+ {
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_mutex);
+#endif
+
+ if(m_phraseCache.size() > m_max * (1 + m_tolerance))
+ {
+ typedef std::set<std::pair<clock_t, Phrase> > Cands;
+ Cands cands;
+ for(CacheMap::iterator it = m_phraseCache.begin();
+ it != m_phraseCache.end(); it++)
+ {
+ LastUsed &lu = it->second;
+ cands.insert(std::make_pair(lu.m_clock, it->first));
+ }
+
+ for(Cands::iterator it = cands.begin(); it != cands.end(); it++)
+ {
+ const Phrase& p = it->second;
+ m_phraseCache.erase(p);
+
+ if(m_phraseCache.size() < (m_max * (1 - m_tolerance)))
+ break;
+ }
+ }
+ }
+
+ void CleanUp()
+ {
+#ifdef WITH_THREADS
+ boost::mutex::scoped_lock lock(m_mutex);
+#endif
+ m_phraseCache.clear();
+ }
+
+};
+
+}
+
+#endif
diff --git a/moses/src/CompactPT/ThrowingFwrite.cpp b/moses/src/CompactPT/ThrowingFwrite.cpp
new file mode 100644
index 000000000..35e8e3122
--- /dev/null
+++ b/moses/src/CompactPT/ThrowingFwrite.cpp
@@ -0,0 +1,29 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#include "ThrowingFwrite.h"
+
+size_t ThrowingFwrite(const void *ptr, size_t size, size_t count, FILE* stream) {
+ assert(size);
+ size_t returnValue = std::fwrite(ptr, size, count, stream);
+ UTIL_THROW_IF(count != returnValue, util::ErrnoException, "Short fwrite; requested size " << size);
+ return returnValue;
+}
diff --git a/moses/src/CompactPT/ThrowingFwrite.h b/moses/src/CompactPT/ThrowingFwrite.h
new file mode 100644
index 000000000..4f45ae8f5
--- /dev/null
+++ b/moses/src/CompactPT/ThrowingFwrite.h
@@ -0,0 +1,31 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+***********************************************************************/
+
+#ifndef moses_ThrowingFwrite_h
+#define moses_ThrowingFwrite_h
+
+#include <cassert>
+#include <cstdio>
+#include "util/exception.hh"
+
+size_t ThrowingFwrite(const void *ptr, size_t size, size_t count, FILE* stream);
+
+#endif
diff --git a/moses/src/ConfusionNet.h b/moses/src/ConfusionNet.h
index 25b5a021d..fdb97a423 100644
--- a/moses/src/ConfusionNet.h
+++ b/moses/src/ConfusionNet.h
@@ -16,6 +16,9 @@ class TranslationOptionCollection;
class Sentence;
class TranslationSystem;
+/** An input to the decoder where each position can be 1 of a number of words,
+ * each with an associated probability. Compared with a sentence, where each position is a word
+ */
class ConfusionNet : public InputType
{
public:
diff --git a/moses/src/DecodeFeature.h b/moses/src/DecodeFeature.h
index afa1771ba..2d338979d 100644
--- a/moses/src/DecodeFeature.h
+++ b/moses/src/DecodeFeature.h
@@ -33,6 +33,7 @@ namespace Moses
/**
* A feature on the decoding path (Generation or Translation)
+ * @todo don't quite understand what this is
**/
class DecodeFeature : public StatelessFeatureFunction {
diff --git a/moses/src/DecodeGraph.h b/moses/src/DecodeGraph.h
index 770cb9958..ad5eb8ace 100644
--- a/moses/src/DecodeGraph.h
+++ b/moses/src/DecodeGraph.h
@@ -33,7 +33,7 @@ namespace Moses
class DecodeStep;
-//! list of DecodeStep s which factorizes the translation
+//! list of DecodeSteps which factorizes the translation
class DecodeGraph
{
protected:
diff --git a/moses/src/DecodeStep.h b/moses/src/DecodeStep.h
index 4d2f2280b..26ae84a51 100644
--- a/moses/src/DecodeStep.h
+++ b/moses/src/DecodeStep.h
@@ -39,7 +39,7 @@ class FactorCollection;
class InputType;
class TranslationSystem;
-/*! Specification for a decoding step.
+/** Specification for a decoding step.
* The factored translation model consists of Translation and Generation
* steps, which consult a Dictionary of phrase translations or word
* generations. This class implements the specification for one of these
diff --git a/moses/src/Dictionary.cpp b/moses/src/Dictionary.cpp
index 05b4f6bb4..464df6af6 100644
--- a/moses/src/Dictionary.cpp
+++ b/moses/src/Dictionary.cpp
@@ -31,7 +31,7 @@ Dictionary::Dictionary(size_t numScoreComponent)
Dictionary::~Dictionary() {}
-void Dictionary::CleanUp() {}
+void Dictionary::CleanUp(const InputType& source) {}
}
diff --git a/moses/src/Dictionary.h b/moses/src/Dictionary.h
index 7bb4d09b1..539af282e 100644
--- a/moses/src/Dictionary.h
+++ b/moses/src/Dictionary.h
@@ -28,10 +28,10 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
namespace Moses
{
-
-/** Abstract class from which PhraseDictionary and GenerationDictionary
- * are inherited.
-*/
+class InputType;
+
+/** Abstract class from which PhraseDictionary and GenerationDictionary are inherited.
+ */
class Dictionary
{
protected:
@@ -49,7 +49,7 @@ public:
virtual DecodeType GetDecodeType() const = 0;
// clean up temporary memory, called after processing each sentence
- virtual void CleanUp();
+ virtual void CleanUp(const InputType& source);
};
}
diff --git a/moses/src/DummyScoreProducers.cpp b/moses/src/DummyScoreProducers.cpp
index a78e6a231..5808363c8 100644
--- a/moses/src/DummyScoreProducers.cpp
+++ b/moses/src/DummyScoreProducers.cpp
@@ -47,28 +47,45 @@ std::string DistortionScoreProducer::GetScoreProducerWeightShortName(unsigned) c
float DistortionScoreProducer::CalculateDistortionScore(const Hypothesis& hypo,
const WordsRange &prev, const WordsRange &curr, const int FirstGap) const
{
- const int USE_OLD = 1;
- if (USE_OLD) {
+ if(!StaticData::Instance().UseEarlyDistortionCost()) {
return - (float) hypo.GetInput().ComputeDistortionDistance(prev, curr);
}
-
- // Pay distortion score as soon as possible, from Moore and Quirk MT Summit 2007
-
- int prefixEndPos = FirstGap-1;
- if ((int) curr.GetStartPos() == prefixEndPos+1) {
- return 0;
- }
-
- if ((int) curr.GetEndPos() < (int) prev.GetEndPos()) {
- return (float) -2*curr.GetNumWordsCovered();
- }
-
- if ((int) prev.GetEndPos() <= prefixEndPos) {
- int z = curr.GetStartPos()-prefixEndPos;
- return (float) -2*(z + curr.GetNumWordsCovered());
+ else {
+ /* Pay distortion score as soon as possible, from Moore and Quirk MT Summit 2007
+ Definitions:
+ S : current source range
+ S' : last translated source phrase range
+ S'' : longest fully-translated initial segment
+ */
+
+ int prefixEndPos = (int)FirstGap-1;
+ if((int)FirstGap==-1)
+ prefixEndPos = -1;
+
+ // case1: S is adjacent to S'' => return 0
+ if ((int) curr.GetStartPos() == prefixEndPos+1) {
+ IFVERBOSE(4) std::cerr<< "MQ07disto:case1" << std::endl;
+ return 0;
+ }
+
+ // case2: S is to the left of S' => return 2(length(S))
+ if ((int) curr.GetEndPos() < (int) prev.GetEndPos()) {
+ IFVERBOSE(4) std::cerr<< "MQ07disto:case2" << std::endl;
+ return (float) -2*(int)curr.GetNumWordsCovered();
+ }
+
+ // case3: S' is a subsequence of S'' => return 2(nbWordBetween(S,S'')+length(S))
+ if ((int) prev.GetEndPos() <= prefixEndPos) {
+ IFVERBOSE(4) std::cerr<< "MQ07disto:case3" << std::endl;
+ int z = (int)curr.GetStartPos()-prefixEndPos - 1;
+ return (float) -2*(z + (int)curr.GetNumWordsCovered());
+ }
+
+ // case4: otherwise => return 2(nbWordBetween(S,S')+length(S))
+ IFVERBOSE(4) std::cerr<< "MQ07disto:case4" << std::endl;
+ return (float) -2*((int)curr.GetNumWordsBetween(prev) + (int)curr.GetNumWordsCovered());
+
}
-
- return (float) -2*(curr.GetNumWordsBetween(prev) + curr.GetNumWordsCovered());
}
@@ -86,7 +103,7 @@ FFState* DistortionScoreProducer::Evaluate(
out->PlusEquals(this, distortionScore);
DistortionState_traditional* res = new DistortionState_traditional(
hypo.GetCurrSourceWordsRange(),
- hypo.GetPrevHypo()->GetWordsBitmap().GetFirstGapPos());
+ hypo.GetWordsBitmap().GetFirstGapPos());
return res;
}
@@ -96,9 +113,11 @@ std::string WordPenaltyProducer::GetScoreProducerWeightShortName(unsigned) const
return "w";
}
-void WordPenaltyProducer::Evaluate(const Hypothesis& cur_hypo, ScoreComponentCollection* out) const
+void WordPenaltyProducer::Evaluate(
+ const PhraseBasedFeatureContext& context,
+ ScoreComponentCollection* out) const
{
- const TargetPhrase& tp = cur_hypo.GetCurrTargetPhrase();
+ const TargetPhrase& tp = context.GetTargetPhrase();
out->PlusEquals(this, -static_cast<float>(tp.GetSize()));
}
diff --git a/moses/src/DummyScoreProducers.h b/moses/src/DummyScoreProducers.h
index db7e60d50..612d63926 100644
--- a/moses/src/DummyScoreProducers.h
+++ b/moses/src/DummyScoreProducers.h
@@ -29,8 +29,8 @@ public:
ScoreComponentCollection* accumulator) const;
virtual FFState* EvaluateChart(
- const ChartHypothesis&,
- int /* featureID */,
+ const ChartHypothesis& /* cur_hypo */,
+ int /* featureID - used to index the state in the previous hypotheses */,
ScoreComponentCollection*) const {
CHECK(0); // feature function not valid in chart decoder
return NULL;
@@ -38,7 +38,7 @@ public:
};
/** Doesn't do anything but provide a key into the global
- * score array to store the word penalty in.
+ * score array to store the word penalty in.
*/
class WordPenaltyProducer : public StatelessFeatureFunction
{
@@ -48,15 +48,18 @@ public:
std::string GetScoreProducerWeightShortName(unsigned) const;
virtual void Evaluate(
- const Hypothesis& cur_hypo,
+ const PhraseBasedFeatureContext& context,
ScoreComponentCollection* accumulator) const;
virtual void EvaluateChart(
- const ChartHypothesis&,
- int /* featureID */,
- ScoreComponentCollection*) const {
- // needs to be implemented but does nothing
- }
+ const ChartBasedFeatureContext& context,
+ ScoreComponentCollection* accumulator) const
+ {
+ //required but does nothing.
+ }
+
+
+
};
/** unknown word penalty */
@@ -68,6 +71,18 @@ public:
std::string GetScoreProducerWeightShortName(unsigned) const;
virtual bool ComputeValueInTranslationOption() const;
+ void Evaluate( const PhraseBasedFeatureContext& context,
+ ScoreComponentCollection* accumulator) const
+ {
+ //do nothing - not a real feature
+ }
+
+ void EvaluateChart(
+ const ChartBasedFeatureContext& context,
+ ScoreComponentCollection* accumulator) const
+ {
+ //do nothing - not a real feature
+ }
};
@@ -80,14 +95,14 @@ class MetaFeatureProducer : public StatelessFeatureFunction
std::string GetScoreProducerWeightShortName(unsigned) const;
- virtual void Evaluate(const Hypothesis& cur_hypo,
- ScoreComponentCollection* accumulator) const {
+ void Evaluate(const PhraseBasedFeatureContext& context,
+ ScoreComponentCollection* accumulator) const {
+ //do nothing - not a real feature
}
- virtual void EvaluateChart(const ChartHypothesis&,
- int /* featureID */,
- ScoreComponentCollection*) const {
- // needs to be implemented but does nothing
+ void EvaluateChart(const ChartBasedFeatureContext& context,
+ ScoreComponentCollection*) const {
+ //do nothing - not a real feature
}
};
diff --git a/moses/src/DynSAInclude/file.cpp b/moses/src/DynSAInclude/FileHandler.cpp
index d2901c1ae..33c2524f9 100644
--- a/moses/src/DynSAInclude/file.cpp
+++ b/moses/src/DynSAInclude/FileHandler.cpp
@@ -1,4 +1,10 @@
-#include "file.h"
+#include "FileHandler.h"
+#include <stdio.h>
+
+#ifdef WIN32
+#define popen(A, B) _popen(A, B)
+#define pclose(A) _pclose(A)
+#endif
namespace Moses
{
@@ -17,7 +23,7 @@ const std::string FileHandler::kBzip2Command = "bzip2 -f";
const std::string FileHandler::kBunzip2Command = "bunzip2 -f";
FileHandler::FileHandler(const std::string & path, std::ios_base::openmode flags, bool /* checkExists */)
- : std::fstream(NULL), path_(path), flags_(flags), buffer_(NULL), fp_(NULL)
+ : std::fstream((const char*) NULL), path_(path), flags_(flags), buffer_(NULL), fp_(NULL)
{
if( !(flags^(std::ios::in|std::ios::out)) ) {
fprintf(stderr, "ERROR: FileHandler does not support bidirectional files (%s).\n", path_.c_str());
@@ -31,8 +37,10 @@ FileHandler::FileHandler(const std::string & path, std::ios_base::openmode flags
FileHandler::~FileHandler()
{
+#ifndef NO_PIPES
if( fp_ != 0 )
pclose(fp_);
+#endif
if( path_ != FileHandler::kStdInDescriptor &&
path_ != FileHandler::kStdOutDescriptor )
delete buffer_;
@@ -45,7 +53,11 @@ fdstreambuf * FileHandler::openCompressedFile(const char * cmd)
//bool isInput = (flags_ & std::ios::in);
//open pipe to file with compression/decompression command
const char * p_type = (flags_ & std::ios::in ? "r" : "w");
+#ifndef NO_PIPES
fp_ = popen(cmd, p_type);
+#else
+ fp_ = NULL;
+#endif
if( fp_ == NULL ) {
//fprintf(stderr, "ERROR:Failed to open compressed file at %s\n", path_.c_str());
perror("openCompressedFile: ");
@@ -152,6 +164,7 @@ bool FileHandler::getCompressionCmds(const std::string & filepath, std::string &
bool FileHandler::reset()
{
+#ifndef NO_PIPES
// move to beginning of file
if (fp_ != 0) {
//can't seek on a pipe so reopen
@@ -162,6 +175,7 @@ bool FileHandler::reset()
//reinitialize
this->init(buffer_);
} else
+#endif
buffer_->pubseekoff(0, std::ios_base::beg); //sets both get and put pointers to beginning of stream
return true;
}
diff --git a/moses/src/DynSAInclude/file.h b/moses/src/DynSAInclude/FileHandler.h
index 3157f918b..0c32bb725 100644
--- a/moses/src/DynSAInclude/file.h
+++ b/moses/src/DynSAInclude/FileHandler.h
@@ -15,7 +15,7 @@ namespace Moses
{
typedef std::string FileExtension;
-
+//! @todo ask abby2
class FileHandler: public std::fstream
{
public:
diff --git a/moses/src/DynSAInclude/RandLMCache.h b/moses/src/DynSAInclude/RandLMCache.h
index 3f38cae02..b92a2a164 100644
--- a/moses/src/DynSAInclude/RandLMCache.h
+++ b/moses/src/DynSAInclude/RandLMCache.h
@@ -24,6 +24,7 @@
namespace randlm {
+ //! @todo ask abby2
template<typename T>
class CacheNode {
public:
diff --git a/moses/src/DynSAInclude/RandLMFilter.h b/moses/src/DynSAInclude/RandLMFilter.h
index 556bbe44a..298464693 100644
--- a/moses/src/DynSAInclude/RandLMFilter.h
+++ b/moses/src/DynSAInclude/RandLMFilter.h
@@ -18,18 +18,23 @@
#define INC_RANDLM_FILTER_H
#include <cmath>
-#include "file.h"
+#include "FileHandler.h"
+
+#ifdef WIN32
+#define log2(X) (log((double)X)/log((double)2))
+#endif
namespace randlm {
- // Class Filter wraps a contiguous array of data. Filter and its subclasses
- // implement read/write/increment functionality on arrays with arbitrary sized addresses
- // (i.e. an address may not use a full number of bytes). When converting to byte-based
- // representation we assume "unused" bits are to left.
- // E.g. if the underlying data is stored in units T = uint16 and the 'width' = 11
- // to read 'address' = 3 we extract bits at indices [33,42] (i.e. [11*3, 11*4 - 1])
- // and store in a uint16 in positions 0000 0001 111111 where the first 7 bits have
- // been masked out.
+ /* Class Filter wraps a contiguous array of data. Filter and its subclasses
+ * implement read/write/increment functionality on arrays with arbitrary sized addresses
+ * (i.e. an address may not use a full number of bytes). When converting to byte-based
+ * representation we assume "unused" bits are to left.
+ * E.g. if the underlying data is stored in units T = uint16 and the 'width' = 11
+ * to read 'address' = 3 we extract bits at indices [33,42] (i.e. [11*3, 11*4 - 1])
+ * and store in a uint16 in positions 0000 0001 111111 where the first 7 bits have
+ * been masked out.
+ */
template<typename T>
class Filter {
public:
@@ -39,7 +44,7 @@ namespace randlm {
// current implementation has following constraints
CHECK(cell_width_ > 0 && cell_width_ <= 64 && cell_width_ >= width);
// used for >> division
- log_cell_width_ = static_cast<int>(floor(log(cell_width_)/log(2) + 0.000001));
+ log_cell_width_ = static_cast<int>(floor(log((double)cell_width_)/log((double)2) + 0.000001));
// size of underlying data in Ts
cells_ = ((addresses * width) + cell_width_ - 1) >> log_cell_width_;
// instantiate underlying data
diff --git a/moses/src/DynSAInclude/hash.h b/moses/src/DynSAInclude/hash.h
index a73b3de96..03669845e 100644
--- a/moses/src/DynSAInclude/hash.h
+++ b/moses/src/DynSAInclude/hash.h
@@ -5,10 +5,11 @@
#include <cmath>
#include "types.h"
#include "utils.h"
-#include "file.h"
+#include "FileHandler.h"
using namespace Moses;
typedef uint64_t P; // largest input range is 2^64
+//! @todo ask abby2
template <typename T>
class HashBase {
protected:
@@ -38,6 +39,8 @@ class HashBase {
fin->read((char*)&H_, sizeof(H_));
}
};
+
+//! @todo ask abby2
template <typename T>
class UnivHash_linear: public HashBase<T> {
public:
@@ -64,12 +67,12 @@ class UnivHash_linear: public HashBase<T> {
void freeSeeds();
};
-/* UnivHash_noPrimes:
+/** UnivHash_noPrimes:
* From Dietzfelbinger 2008
* p = input domain range = 2^l
* m = output range = 2^k
* # of hash function = 2^(l-1)
-*/
+ */
template <typename T>
class UnivHash_noPrimes: public HashBase<T> {
public:
@@ -96,6 +99,7 @@ class UnivHash_noPrimes: public HashBase<T> {
void freeSeeds() {delete[] a_;}
};
+//! @todo ask abby2
template <typename T>
class Hash_shiftAddXOR: public HashBase<T> {
public:
@@ -113,6 +117,7 @@ class Hash_shiftAddXOR: public HashBase<T> {
void freeSeeds() {delete[] v_;}
};
+//! @todo ask abby2
template <typename T>
class UnivHash_tableXOR: public HashBase<T> {
public:
diff --git a/moses/src/DynSAInclude/onlineRLM.h b/moses/src/DynSAInclude/onlineRLM.h
index 4bfd7b118..b47cfdd0e 100644
--- a/moses/src/DynSAInclude/onlineRLM.h
+++ b/moses/src/DynSAInclude/onlineRLM.h
@@ -2,6 +2,7 @@
#define INC_DYNAMICLM_H
#include <algorithm>
+#include <vector>
#include "perfectHash.h"
#include "RandLMCache.h"
#include "types.h"
@@ -15,6 +16,7 @@ using randlm::Cache;
const bool strict_checks_ = false;
+//! @todo ask abby2
template<typename T>
class OnlineRLM: public PerfectHash<T> {
public:
@@ -86,6 +88,7 @@ private:
BitFilter* bPrefix_;
BitFilter* bHit_;
};
+
template<typename T>
bool OnlineRLM<T>::insert(const std::vector<string>& ngram, const int value) {
int len = ngram.size();
@@ -103,10 +106,11 @@ bool OnlineRLM<T>::insert(const std::vector<string>& ngram, const int value) {
markQueried(index);
return true;
}
+
template<typename T>
bool OnlineRLM<T>::update(const std::vector<string>& ngram, const int value) {
int len = ngram.size();
- wordID_t wrdIDs[len];
+ std::vector<wordID_t> wrdIDs(len);
uint64_t index(this->cells_ + 1);
hpdEntry_t hpdItr;
vocab_->MakeOpen();
@@ -115,14 +119,15 @@ bool OnlineRLM<T>::update(const std::vector<string>& ngram, const int value) {
// if updating, minimize false positives by pre-checking if context already in model
bool bIncluded(true);
if(value > 1 && len < (int)order_)
- bIncluded = markPrefix(wrdIDs, ngram.size(), true); // mark context
+ bIncluded = markPrefix(&wrdIDs[0], ngram.size(), true); // mark context
if(bIncluded) { // if context found
- bIncluded = PerfectHash<T>::update2(wrdIDs, len, value, hpdItr, index);
+ bIncluded = PerfectHash<T>::update2(&wrdIDs[0], len, value, hpdItr, index);
if(index < this->cells_) {
markQueried(index);
}
else if(hpdItr != this->dict_.end()) markQueried(hpdItr);
}
+
return bIncluded;
}
template<typename T>
@@ -143,6 +148,7 @@ int OnlineRLM<T>::query(const wordID_t* IDs, int len) {
}
return value > 0 ? value : 0;
}
+
template<typename T>
bool OnlineRLM<T>::markPrefix(const wordID_t* IDs, const int len, bool bSet) {
if(len <= 1) return true; // only do this for for ngrams with context
@@ -171,16 +177,19 @@ bool OnlineRLM<T>::markPrefix(const wordID_t* IDs, const int len, bool bSet) {
}
return true;
}
+
template<typename T>
void OnlineRLM<T>::markQueried(const uint64_t& index) {
bHit_->setBit(index);
//cerr << "filter[" << index << "] = " << this->filter_->read(index) << endl;
}
+
template<typename T>
void OnlineRLM<T>::markQueried(hpdEntry_t& value) {
// set high bit of counter to indicate "hit" status
value->second |= this->hitMask_;
}
+
template<typename T>
void OnlineRLM<T>::remove(const std::vector<string>& ngram) {
wordID_t IDs[ngram.size()];
@@ -188,6 +197,7 @@ void OnlineRLM<T>::remove(const std::vector<string>& ngram) {
IDs[i] = vocab_->GetWordID(ngram[i]);
PerfectHash<T>::remove(IDs, ngram.size());
}
+
template<typename T>
count_t OnlineRLM<T>::heurDelete(count_t num2del, count_t order) {
count_t deleted = 0;
@@ -218,6 +228,7 @@ count_t OnlineRLM<T>::heurDelete(count_t num2del, count_t order) {
cerr << "Total deleted = " << deleted << endl;
return deleted;
}
+
template<typename T>
int OnlineRLM<T>::sbsqQuery(const std::vector<string>& ngram, int* codes,
bool bStrict) {
@@ -226,6 +237,7 @@ int OnlineRLM<T>::sbsqQuery(const std::vector<string>& ngram, int* codes,
IDs[i] = vocab_->GetWordID(ngram[i]);
return sbsqQuery(IDs, ngram.size(), codes, bStrict);
}
+
template<typename T>
int OnlineRLM<T>::sbsqQuery(const wordID_t* IDs, const int len, int* codes,
bool bStrict) {
@@ -252,8 +264,10 @@ int OnlineRLM<T>::sbsqQuery(const wordID_t* IDs, const int len, int* codes,
if(val != -1) break; // if anything found
else --fnd; // else decrement found
}
+
return fnd;
}
+
template<typename T>
float OnlineRLM<T>::getProb(const wordID_t* ngram, int len,
const void** state) {
@@ -264,7 +278,7 @@ float OnlineRLM<T>::getProb(const wordID_t* ngram, int len,
if(!cache_->checkCacheNgram(ngram, len, &logprob, &context)) {
// get full prob and put in cache
int num_fnd(0), den_val(0);
- int in[len]; // in[] keeps counts of increasing order numerator
+ int *in = new int[len]; // in[] keeps counts of increasing order numerator
for(int i = 0; i < len; ++i) in[i] = 0;
for(int i = len - 1; i >= 0; --i) {
if(ngram[i] == vocab_->GetkOOVWordID()) break; // no need to query if OOV
@@ -309,14 +323,19 @@ float OnlineRLM<T>::getProb(const wordID_t* ngram, int len,
} // end checkCache
return logprob;
}
+
template<typename T>
const void* OnlineRLM<T>::getContext(const wordID_t* ngram, int len) {
int dummy(0);
- float* addresses[len]; // only interested in addresses of cache
+ float* *addresses = new float*[len]; // only interested in addresses of cache
CHECK(cache_->getCache2(ngram, len, &addresses[0], &dummy) == len);
// return address of cache node
- return (const void*)addresses[0];
+
+ float *addr0 = addresses[0];
+ free( addresses );
+ return (const void*)addr0;
}
+
template<typename T>
void OnlineRLM<T>::randDelete(int num2del) {
int deleted = 0;
@@ -328,6 +347,7 @@ void OnlineRLM<T>::randDelete(int num2del) {
if(deleted >= num2del) break;
}
}
+
template<typename T>
int OnlineRLM<T>::countHits() {
int hit(0);
@@ -339,6 +359,7 @@ int OnlineRLM<T>::countHits() {
cerr << "Hit count = " << hit << endl;
return hit;
}
+
template<typename T>
int OnlineRLM<T>::countPrefixes() {
int pfx(0);
@@ -348,6 +369,7 @@ int OnlineRLM<T>::countPrefixes() {
cerr << "Prefix count (in filter) = " << pfx << endl;
return pfx;
}
+
template<typename T>
int OnlineRLM<T>::cleanUpHPD() {
cerr << "HPD size before = " << this->dict_.size() << endl;
@@ -363,6 +385,7 @@ int OnlineRLM<T>::cleanUpHPD() {
cerr << "HPD size after = " << this->dict_.size() << endl;
return vDel.size();
}
+
template<typename T>
void OnlineRLM<T>::clearMarkings() {
cerr << "clearing all event hits\n";
@@ -373,6 +396,7 @@ void OnlineRLM<T>::clearMarkings() {
*value -= ((*value & this->hitMask_) != 0) ? this->hitMask_ : 0;
}
}
+
template<typename T>
void OnlineRLM<T>::save(FileHandler* fout) {
cerr << "Saving ORLM...\n";
@@ -386,6 +410,7 @@ void OnlineRLM<T>::save(FileHandler* fout) {
PerfectHash<T>::save(fout);
cerr << "Finished saving ORLM." << endl;
}
+
template<typename T>
void OnlineRLM<T>::load(FileHandler* fin) {
cerr << "Loading ORLM...\n";
@@ -401,6 +426,7 @@ void OnlineRLM<T>::load(FileHandler* fin) {
// load everything else
PerfectHash<T>::load(fin);
}
+
template<typename T>
void OnlineRLM<T>::removeNonMarked() {
cerr << "deleting all unused events\n";
@@ -415,6 +441,7 @@ void OnlineRLM<T>::removeNonMarked() {
deleted += cleanUpHPD();
cerr << "total removed from ORLM = " << deleted << endl;
}
+
/*
template<typename T>
float OnlineRLM<T>::getProb2(const wordID_t* ngram, int len, const void** state) {
diff --git a/moses/src/DynSAInclude/params.h b/moses/src/DynSAInclude/params.h
index e0aab6135..d5af6331d 100644
--- a/moses/src/DynSAInclude/params.h
+++ b/moses/src/DynSAInclude/params.h
@@ -6,7 +6,7 @@
#include <set>
#include <vector>
#include "util/check.hh"
-#include "file.h"
+#include "FileHandler.h"
#include "utils.h"
#include "types.h"
@@ -21,6 +21,7 @@ typedef struct ParamDefs {
std::string description;
} ParamDefs;
+ //! @todo ask abby2
class Parameters {
public:
static const std::string kNotSetValue;
diff --git a/moses/src/DynSAInclude/perfectHash.h b/moses/src/DynSAInclude/perfectHash.h
index 83112197b..f445e063a 100644
--- a/moses/src/DynSAInclude/perfectHash.h
+++ b/moses/src/DynSAInclude/perfectHash.h
@@ -7,7 +7,8 @@
#include "hash.h"
#include "RandLMFilter.h"
#include "quantizer.h"
-/*
+
+/**
* PerfectHash handles setting up hash functions and storage
* for LM data.
*/
@@ -16,6 +17,7 @@ using randlm::BitFilter;
typedef std::map<string, count_t> hpDict_t;
typedef hpDict_t::iterator hpdEntry_t;
static count_t collisions_ = 0;
+
/* Based on Mortenson et. al. 2006 */
template<typename T>
class PerfectHash {
@@ -60,6 +62,7 @@ private:
UnivHash_linear<T>* fingerHash_;
LogQtizer* qtizer_;
};
+
template<typename T>
PerfectHash<T>::PerfectHash(uint16_t MBs, int width, int bucketRange,
float qBase): hitMask_(1 << 31), memBound_(MBs * (1ULL << 20)),
@@ -84,6 +87,7 @@ PerfectHash<T>::PerfectHash(uint16_t MBs, int width, int bucketRange,
bucketHash_ = new UnivHash_linear<count_t>(totBuckets_, 1, PRIME);
fingerHash_ = new UnivHash_linear<T>(pow(2.0f, cellWidth_), MAX_HASH_FUNCS, PRIME);
}
+
template<typename T>
PerfectHash<T>::~PerfectHash() {
delete[] idxTracker_;
@@ -94,6 +98,7 @@ PerfectHash<T>::~PerfectHash() {
delete qtizer_;
delete values_;
}
+
template<typename T>
uint64_t PerfectHash<T>::insert(const wordID_t* IDs, const int len,
const count_t value) {
@@ -128,6 +133,7 @@ uint64_t PerfectHash<T>::insert(const wordID_t* IDs, const int len,
return cells_ + 1;
}
}
+
template<typename T>
bool PerfectHash<T>::update(const wordID_t* IDs, const int len,
const count_t value, hpdEntry_t& hpdAddr, uint64_t& filterIdx) {
@@ -157,6 +163,7 @@ bool PerfectHash<T>::update(const wordID_t* IDs, const int len,
// could add if it gets here.
return false;
}
+
template<typename T>
int PerfectHash<T>::query(const wordID_t* IDs, const int len,
hpdEntry_t& hpdAddr, uint64_t& filterIdx) {
@@ -187,6 +194,7 @@ int PerfectHash<T>::query(const wordID_t* IDs, const int len,
}
return -1;
}
+
template<typename T>
void PerfectHash<T>::remove(const wordID_t* IDs, const int len) {
// delete key if in high perf. dictionary
@@ -212,6 +220,7 @@ void PerfectHash<T>::remove(const wordID_t* IDs, const int len) {
}
}
}
+
template<typename T> // clear filter index
void PerfectHash<T>::remove(uint64_t index) {
CHECK(index < cells_);
@@ -222,6 +231,7 @@ void PerfectHash<T>::remove(uint64_t index) {
count_t bucket = index / bucketRange_;
--idxTracker_[bucket];
}
+
template<typename T>
T PerfectHash<T>::nonZeroSignature(const wordID_t* IDs, const int len,
count_t bucket) {
@@ -235,6 +245,7 @@ T PerfectHash<T>::nonZeroSignature(const wordID_t* IDs, const int len,
cerr << "WARNING: Unable to find non-zero signature for ngram\n" << endl;
return fingerprint;
}
+
template<typename T>
string PerfectHash<T>::hpDictKeyValue(const wordID_t* IDs, const int len) {
string skey(" ");
@@ -243,16 +254,19 @@ string PerfectHash<T>::hpDictKeyValue(const wordID_t* IDs, const int len) {
Utils::trim(skey);
return skey;
}
+
template<typename T>
count_t PerfectHash<T>::hpDictMemUse() {
// return hpDict memory usage in MBs
return (count_t) sizeof(hpDict_t::value_type)* dict_.size() >> 20;
}
+
template<typename T>
count_t PerfectHash<T>::bucketsMemUse() {
// return bucket memory usage in MBs
return (count_t) (filter_->size() + values_->size());
}
+
template<typename T>
void PerfectHash<T>::save(FileHandler* fout) {
CHECK(fout != 0);
@@ -278,6 +292,7 @@ void PerfectHash<T>::save(FileHandler* fout) {
iterate(dict_, t)
*fout << t->first << "\t" << t->second << "\n";
}
+
template<typename T>
void PerfectHash<T>::load(FileHandler* fin) {
CHECK(fin != 0);
@@ -314,6 +329,7 @@ void PerfectHash<T>::load(FileHandler* fin) {
cerr << "\tHPD size=" << dict_.size() << endl;
cerr << "Finished loading ORLM." << endl;
}
+
template<typename T>
void PerfectHash<T>::analyze() {
cerr << "Analyzing Dynamic Bloomier Filter...\n";
@@ -373,6 +389,7 @@ void PerfectHash<T>::analyze() {
cerr << "values MBs= " << values_->size() << endl;
delete[] bucketCnt;
}
+
template<typename T>
bool PerfectHash<T>::update2(const wordID_t* IDs, const int len,
const count_t value, hpdEntry_t& hpdAddr, uint64_t& filterIdx) {
@@ -404,4 +421,6 @@ bool PerfectHash<T>::update2(const wordID_t* IDs, const int len,
insert(IDs, len, value);
return false;
}
+
#endif
+
diff --git a/moses/src/DynSAInclude/quantizer.h b/moses/src/DynSAInclude/quantizer.h
index c12189615..6c6850fa6 100644
--- a/moses/src/DynSAInclude/quantizer.h
+++ b/moses/src/DynSAInclude/quantizer.h
@@ -8,6 +8,12 @@
#include "types.h"
static const float kFloatErr = 0.00001f;
+
+#ifdef WIN32
+#define log2(X) (log((double)X)/log((double)2))
+#endif
+
+//! @todo ask abby2
class LogQtizer {
public:
LogQtizer(float i): base_(pow(2, 1 / i)) {
diff --git a/moses/src/DynSAInclude/types.h b/moses/src/DynSAInclude/types.h
index 47d142666..cb714dab4 100644
--- a/moses/src/DynSAInclude/types.h
+++ b/moses/src/DynSAInclude/types.h
@@ -7,10 +7,16 @@
#include <vector>
#include <typeinfo>
#include <stdint.h>
+
+#ifdef WIN32
+#define iterate(c, i) for(decltype(c.begin()) i = c.begin(); i != c.end(); ++i)
+#define piterate(c, i) for(decltype(c->begin()) i = c->begin(); i != c->end(); ++i)
+#define riterate(c, i) for(decltype(c.rbegin()) i = c.rbegin(); i != c.rend(); ++i)
+#else
#define iterate(c, i) for(__typeof__(c.begin()) i = c.begin(); i != c.end(); ++i)
#define piterate(c, i) for(__typeof__(c->begin()) i = c->begin(); i != c->end(); ++i)
#define riterate(c, i) for(__typeof__(c.rbegin()) i = c.rbegin(); i != c.rend(); ++i)
-
+#endif
#define THREADED false
#define THREAD_MAX 2
diff --git a/moses/src/DynSAInclude/utils.h b/moses/src/DynSAInclude/utils.h
index 74703863d..e2f24fd4f 100644
--- a/moses/src/DynSAInclude/utils.h
+++ b/moses/src/DynSAInclude/utils.h
@@ -9,6 +9,7 @@
#include <cmath>
#include <cstring>
+//! @todo ask abby2
class Utils
{
public:
diff --git a/moses/src/DynSAInclude/vocab.h b/moses/src/DynSAInclude/vocab.h
index 467d16fdb..77faa8ec5 100644
--- a/moses/src/DynSAInclude/vocab.h
+++ b/moses/src/DynSAInclude/vocab.h
@@ -4,15 +4,15 @@
#include <map>
#include <string>
#include "types.h"
-#include "file.h"
+#include "FileHandler.h"
#include "utils.h"
#include "../TypeDef.h"
#include "../Word.h"
namespace Moses
{
-// Vocab maps between strings and uint32 ids.
-
+
+//! Vocab maps between strings and uint32 ids.
class Vocab
{
public:
diff --git a/moses/src/DynSuffixArray.h b/moses/src/DynSuffixArray.h
index 05ea0596a..5969195d9 100644
--- a/moses/src/DynSuffixArray.h
+++ b/moses/src/DynSuffixArray.h
@@ -14,6 +14,8 @@ namespace Moses
typedef std::vector<unsigned> vuint_t;
+/** @todo ask Abbey Levenberg
+ */
class DynSuffixArray
{
diff --git a/moses/src/FFState.h b/moses/src/FFState.h
index 49b0e55a8..4b1b145a3 100644
--- a/moses/src/FFState.h
+++ b/moses/src/FFState.h
@@ -8,6 +8,8 @@
namespace Moses
{
+/** @todo What is the difference between this and the classes in FeatureFunction?
+ */
class FFState
{
public:
diff --git a/moses/src/Factor.h b/moses/src/Factor.h
index 9cf3fd799..ac1b591ed 100644
--- a/moses/src/Factor.h
+++ b/moses/src/Factor.h
@@ -34,7 +34,6 @@ struct FactorFriend;
class FactorCollection;
/** Represents a factor (word, POS, etc).
- *
* A Factor has a contiguous identifier and string value.
*/
class Factor
diff --git a/moses/src/FactorCollection.cpp b/moses/src/FactorCollection.cpp
index 2ef55b6d1..a0b1142b1 100644
--- a/moses/src/FactorCollection.cpp
+++ b/moses/src/FactorCollection.cpp
@@ -20,6 +20,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <boost/version.hpp>
+#include <boost/thread/locks.hpp>
#include <ostream>
#include <string>
#include "FactorCollection.h"
diff --git a/moses/src/FactorCollection.h b/moses/src/FactorCollection.h
index d68d183da..9a01766f4 100644
--- a/moses/src/FactorCollection.h
+++ b/moses/src/FactorCollection.h
@@ -38,7 +38,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
namespace Moses
{
-/* We don't want Factor to be copyable by anybody. But we also want to store
+/** We don't want Factor to be copyable by anybody. But we also want to store
* it in an STL container. The solution is that Factor's copy constructor is
* private and friended to FactorFriend. The STL containers can delegate
* copying, so friending the container isn't sufficient. STL containers see
diff --git a/moses/src/FactorTypeSet.h b/moses/src/FactorTypeSet.h
index b0db0502c..5464cee40 100644
--- a/moses/src/FactorTypeSet.h
+++ b/moses/src/FactorTypeSet.h
@@ -32,7 +32,7 @@ namespace Moses
{
/** set of unique FactorTypes. Used to store what factor types are used in phrase tables etc
-*/
+ */
class FactorMask : public std::bitset<MAX_NUM_FACTORS>
{
friend std::ostream& operator<<(std::ostream&, const FactorMask&);
diff --git a/moses/src/FeatureFunction.cpp b/moses/src/FeatureFunction.cpp
index d435df5af..0d70bd70f 100644
--- a/moses/src/FeatureFunction.cpp
+++ b/moses/src/FeatureFunction.cpp
@@ -1,33 +1,88 @@
-#include "FeatureFunction.h"
+#include <stdexcept>
#include "util/check.hh"
+#include "ChartHypothesis.h"
+#include "ChartManager.h"
+#include "FeatureFunction.h"
+#include "Hypothesis.h"
+#include "Manager.h"
+#include "TranslationOption.h"
+
+
namespace Moses
{
-FeatureFunction::~FeatureFunction() {}
+PhraseBasedFeatureContext::PhraseBasedFeatureContext(const Hypothesis* hypothesis) :
+ m_hypothesis(hypothesis),
+ m_translationOption(m_hypothesis->GetTranslationOption()),
+ m_source(m_hypothesis->GetManager().GetSource()) {}
-bool StatelessFeatureFunction::IsStateless() const
+PhraseBasedFeatureContext::PhraseBasedFeatureContext
+ (const TranslationOption& translationOption, const InputType& source) :
+ m_hypothesis(NULL),
+ m_translationOption(translationOption),
+ m_source(source) {}
+
+const TranslationOption& PhraseBasedFeatureContext::GetTranslationOption() const
{
- return true;
+ return m_translationOption;
}
-bool StatelessFeatureFunction::ComputeValueInTranslationOption() const
+const InputType& PhraseBasedFeatureContext::GetSource() const
{
- return false;
+ return m_source;
+}
+
+const TargetPhrase& PhraseBasedFeatureContext::GetTargetPhrase() const
+{
+ return m_translationOption.GetTargetPhrase();
}
-void StatelessFeatureFunction::Evaluate(const Hypothesis& /* cur_hypo */,
- ScoreComponentCollection* /* accumulator */) const
+const WordsBitmap& PhraseBasedFeatureContext::GetWordsBitmap() const
{
- CHECK(!"Please implement Evaluate or set ComputeValueInTranslationOption to true");
+ if (!m_hypothesis) {
+ throw std::logic_error("Coverage vector not available during pre-calculation");
+ }
+ return m_hypothesis->GetWordsBitmap();
}
-void StatelessFeatureFunction::EvaluateChart(const ChartHypothesis& /*cur_hypo*/,
- int /*featureID*/,
- ScoreComponentCollection* /*accumulator*/) const
+
+ChartBasedFeatureContext::ChartBasedFeatureContext
+ (const ChartHypothesis* hypothesis):
+ m_hypothesis(hypothesis),
+ m_targetPhrase(hypothesis->GetCurrTargetPhrase()),
+ m_source(hypothesis->GetManager().GetSource()) {}
+
+ChartBasedFeatureContext::ChartBasedFeatureContext(
+ const TargetPhrase& targetPhrase,
+ const InputType& source):
+ m_hypothesis(NULL),
+ m_targetPhrase(targetPhrase),
+ m_source(source) {}
+
+const InputType& ChartBasedFeatureContext::GetSource() const
{
- CHECK(!"Please implement EvaluateChart or set ComputeValueInTranslationOption to true");
+ return m_source;
+}
+
+const TargetPhrase& ChartBasedFeatureContext::GetTargetPhrase() const
+{
+ return m_targetPhrase;
+}
+
+
+
+FeatureFunction::~FeatureFunction() {}
+
+bool StatelessFeatureFunction::IsStateless() const
+{
+ return true;
+}
+
+bool StatelessFeatureFunction::ComputeValueInTranslationOption() const
+{
+ return false;
}
bool StatefulFeatureFunction::IsStateless() const
diff --git a/moses/src/FeatureFunction.h b/moses/src/FeatureFunction.h
index 1a66a2857..b1b97bd7e 100644
--- a/moses/src/FeatureFunction.h
+++ b/moses/src/FeatureFunction.h
@@ -9,12 +9,71 @@ namespace Moses
{
class TargetPhrase;
+class TranslationOption;
class Hypothesis;
class ChartHypothesis;
class FFState;
class InputType;
class ScoreComponentCollection;
+class WordsBitmap;
+class WordsRange;
+
+
+/**
+ * Contains all that a feature function can access without affecting recombination.
+ * For stateless features, this is all that it can access. Currently this is not
+ * used for stateful features, as it would need to be retro-fitted to the LM feature.
+ * TODO: Expose source segmentation,lattice path.
+ * XXX Don't add anything to the context that would break recombination XXX
+ **/
+class PhraseBasedFeatureContext
+{
+ // The context either has a hypothesis (during search), or a TranslationOption and
+ // source sentence (during pre-calculation).
+ const Hypothesis* m_hypothesis;
+ const TranslationOption& m_translationOption;
+ const InputType& m_source;
+
+public:
+ PhraseBasedFeatureContext(const Hypothesis* hypothesis);
+ PhraseBasedFeatureContext(const TranslationOption& translationOption,
+ const InputType& source);
+
+ const TranslationOption& GetTranslationOption() const;
+ const InputType& GetSource() const;
+ const TargetPhrase& GetTargetPhrase() const; //convenience method
+ const WordsBitmap& GetWordsBitmap() const;
+
+};
+
+/**
+ * Same as PhraseBasedFeatureContext, but for chart-based Moses.
+ **/
+class ChartBasedFeatureContext
+{
+ //The context either has a hypothesis (during search) or a
+ //TargetPhrase and source sentence (during pre-calculation)
+ //TODO: should the context also include some info on where the TargetPhrase
+ //is anchored (assuming it's lexicalised), which is available at pre-calc?
+ const ChartHypothesis* m_hypothesis;
+ const TargetPhrase& m_targetPhrase;
+ const InputType& m_source;
+
+public:
+ ChartBasedFeatureContext(const ChartHypothesis* hypothesis);
+ ChartBasedFeatureContext(const TargetPhrase& targetPhrase,
+ const InputType& source);
+ const InputType& GetSource() const;
+ const TargetPhrase& GetTargetPhrase() const;
+
+};
+
+
+/** base class for all feature functions.
+ * @todo is this for pb & hiero too?
+ * @todo what's the diff between FeatureFunction and ScoreProducer?
+ */
class FeatureFunction: public ScoreProducer
{
@@ -27,29 +86,41 @@ public:
float GetSparseProducerWeight() const { return 1; }
};
+/** base class for all stateless feature functions.
+ * eg. phrase table, word penalty, phrase penalty
+ */
class StatelessFeatureFunction: public FeatureFunction
{
public:
StatelessFeatureFunction(const std::string& description, size_t numScoreComponents) :
FeatureFunction(description, numScoreComponents) {}
- //! Evaluate for stateless feature functions. Implement this.
- virtual void Evaluate(const Hypothesis& cur_hypo,
- ScoreComponentCollection* accumulator) const;
-
- virtual void EvaluateChart(const ChartHypothesis& cur_hypo,
- int featureID,
- ScoreComponentCollection* accumulator) const;
-
- // If true, this value is expected to be included in the
- // ScoreBreakdown in the TranslationOption once it has been
- // constructed.
- // Default: false
+ /**
+ * This should be implemented for features that apply to phrase-based models.
+ **/
+ virtual void Evaluate(const PhraseBasedFeatureContext& context,
+ ScoreComponentCollection* accumulator) const = 0;
+
+ /**
+ * Same for chart-based features.
+ **/
+ virtual void EvaluateChart(const ChartBasedFeatureContext& context,
+ ScoreComponentCollection* accumulator) const = 0;
+
+ //If true, then the feature is evaluated before search begins, and stored in
+ //the TranslationOptionCollection.
virtual bool ComputeValueInTranslationOption() const;
+ //!If true, the feature is stored in the ttable, so gets copied into the
+ //TargetPhrase and does not need cached in the TranslationOption
+ virtual bool ComputeValueInTranslationTable() const {return false;}
+
bool IsStateless() const;
};
+/** base class for all stateful feature functions.
+ * eg. LM, distortion penalty
+ */
class StatefulFeatureFunction: public FeatureFunction
{
@@ -71,7 +142,7 @@ public:
virtual FFState* EvaluateChart(
const ChartHypothesis& /* cur_hypo */,
- int /* featureID */,
+ int /* featureID - used to index the state in the previous hypotheses */,
ScoreComponentCollection* accumulator) const = 0;
//! return the state associated with the empty hypothesis for a given sentence
diff --git a/moses/src/FilePtr.h b/moses/src/FilePtr.h
index 1613b2f2f..b9c835e59 100644
--- a/moses/src/FilePtr.h
+++ b/moses/src/FilePtr.h
@@ -13,9 +13,9 @@
namespace Moses
{
-// smart pointer for on-demand loading from file
-// requirement: T has a constructor T(FILE*)
-
+/** smart pointer for on-demand loading from file
+ * requirement: T has a constructor T(FILE*)
+ */
template<typename T> class FilePtr
{
public:
diff --git a/moses/src/GenerationDictionary.h b/moses/src/GenerationDictionary.h
index 1c3f75f75..af7765b35 100644
--- a/moses/src/GenerationDictionary.h
+++ b/moses/src/GenerationDictionary.h
@@ -24,6 +24,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <list>
#include <map>
+#include <stdexcept>
#include <vector>
#include "ScoreComponentCollection.h"
#include "Phrase.h"
@@ -87,6 +88,22 @@ public:
*/
const OutputWordCollection *FindWord(const Word &word) const;
virtual bool ComputeValueInTranslationOption() const;
+
+ //Usual feature function methods are not implemented
+ virtual void Evaluate(const PhraseBasedFeatureContext& context,
+ ScoreComponentCollection* accumulator) const
+ {
+ throw std::logic_error("GenerationDictionary::Evaluate() Not implemented");
+ }
+
+ virtual void EvaluateChart(const ChartBasedFeatureContext& context,
+ ScoreComponentCollection* accumulator) const
+ {
+ throw std::logic_error("GenerationDictionary.Evaluate() Not implemented");
+ }
+
+ virtual bool ComputeValueInTranslationTable() const {return true;}
+
};
diff --git a/moses/src/GlobalLexicalModel.cpp b/moses/src/GlobalLexicalModel.cpp
index 04ac0d6fc..474ca8e8f 100644
--- a/moses/src/GlobalLexicalModel.cpp
+++ b/moses/src/GlobalLexicalModel.cpp
@@ -2,6 +2,7 @@
#include "GlobalLexicalModel.h"
#include "StaticData.h"
#include "InputFileStream.h"
+#include "TranslationOption.h"
#include "UserMessage.h"
using namespace std;
@@ -157,8 +158,11 @@ float GlobalLexicalModel::GetFromCacheOrScorePhrase( const TargetPhrase& targetP
return score;
}
-void GlobalLexicalModel::Evaluate(const TargetPhrase& targetPhrase, ScoreComponentCollection* accumulator) const
+ void GlobalLexicalModel::Evaluate
+ (const PhraseBasedFeatureContext& context,
+ ScoreComponentCollection* accumulator) const
{
- accumulator->PlusEquals( this, GetFromCacheOrScorePhrase( targetPhrase ) );
+ accumulator->PlusEquals( this,
+ GetFromCacheOrScorePhrase(context.GetTargetPhrase()) );
}
}
diff --git a/moses/src/GlobalLexicalModel.h b/moses/src/GlobalLexicalModel.h
index ad75d0d23..7ab9459e5 100644
--- a/moses/src/GlobalLexicalModel.h
+++ b/moses/src/GlobalLexicalModel.h
@@ -31,7 +31,6 @@ class InputType;
* each output word from _all_ the input words. The intuition behind this
* feature is that it uses context words for disambiguation
*/
-
class GlobalLexicalModel : public StatelessFeatureFunction
{
typedef std::map< const Word*, std::map< const Word*, float, WordComparer >, WordComparer > DoubleHash;
@@ -75,11 +74,12 @@ public:
void InitializeForInput( Sentence const& in );
- void Evaluate(const TargetPhrase&, ScoreComponentCollection* ) const;
+ void Evaluate(const PhraseBasedFeatureContext& context,
+ ScoreComponentCollection* accumulator) const;
+
void EvaluateChart(
- const ChartHypothesis& cur_hypo,
- int featureID,
+ const ChartBasedFeatureContext& context,
ScoreComponentCollection* accumulator) const
{
std::cerr << "EvaluateChart not implemented." << std::endl;
diff --git a/moses/src/GlobalLexicalModelUnlimited.h b/moses/src/GlobalLexicalModelUnlimited.h
index c05c11be8..307461db0 100644
--- a/moses/src/GlobalLexicalModelUnlimited.h
+++ b/moses/src/GlobalLexicalModelUnlimited.h
@@ -121,6 +121,8 @@ public:
return new DummyState();
}
+ //TODO: This implements the old interface, but cannot be updated because
+ //it appears to be stateful
void Evaluate(const Hypothesis& cur_hypo,
ScoreComponentCollection* accumulator) const;
diff --git a/moses/src/HypoList.h b/moses/src/HypoList.h
index 9364399ff..5c3ebe991 100644
--- a/moses/src/HypoList.h
+++ b/moses/src/HypoList.h
@@ -26,6 +26,7 @@ namespace Moses
class ChartHypothesis;
+//! vector of chart hypotheses. May get turned into a class
typedef std::vector<const ChartHypothesis*> HypoList;
}
diff --git a/moses/src/Hypothesis.cpp b/moses/src/Hypothesis.cpp
index d40dd7e86..9a76abcd1 100644
--- a/moses/src/Hypothesis.cpp
+++ b/moses/src/Hypothesis.cpp
@@ -258,6 +258,32 @@ void Hypothesis::ResetScore()
m_futureScore = m_totalScore = 0.0f;
}
+void Hypothesis::IncorporateTransOptScores() {
+ m_currScoreBreakdown.PlusEquals(m_transOpt->GetScoreBreakdown());
+}
+
+void Hypothesis::EvaluateWith(StatefulFeatureFunction* sfff,
+ int state_idx) {
+ m_ffStates[state_idx] = sfff->Evaluate(
+ *this,
+ m_prevHypo ? m_prevHypo->m_ffStates[state_idx] : NULL,
+ &m_currScoreBreakdown);
+
+}
+
+void Hypothesis::EvaluateWith(const StatelessFeatureFunction* slff) {
+ slff->Evaluate(PhraseBasedFeatureContext(this), &m_currScoreBreakdown);
+}
+
+void Hypothesis::CalculateFutureScore(const SquareMatrix& futureScore) {
+ m_futureScore = futureScore.CalcFutureScore( m_sourceCompleted );
+}
+
+void Hypothesis::CalculateFinalScore() {
+ m_totalScore = GetScoreBreakdown().InnerProduct(
+ StaticData::Instance().GetAllWeights()) + m_futureScore;
+}
+
/***
* calculate the logarithm of our total translation score (sum up components)
*/
@@ -269,15 +295,23 @@ void Hypothesis::CalcScore(const SquareMatrix &futureScore)
// phrase are also included here
m_currScoreBreakdown = m_transOpt->GetScoreBreakdown();
+ // other stateless features have their scores cached in the
+ // TranslationOptionsCollection
+ m_manager.getSntTranslationOptions()->InsertPreCalculatedScores
+ (*m_transOpt, &m_currScoreBreakdown);
+
const StaticData &staticData = StaticData::Instance();
clock_t t=0; // used to track time
// compute values of stateless feature functions that were not
- // cached in the translation option-- there is no principled distinction
+ // cached in the translation option
const vector<const StatelessFeatureFunction*>& sfs =
m_manager.GetTranslationSystem()->GetStatelessFeatureFunctions();
- for (unsigned i = 0; i < sfs.size(); ++i)
- sfs[i]->Evaluate(*this, &m_currScoreBreakdown);
+ for (unsigned i = 0; i < sfs.size(); ++i) {
+ if (!sfs[i]->ComputeValueInTranslationOption()) {
+ EvaluateWith(sfs[i]);
+ }
+ }
const vector<const StatefulFeatureFunction*>& ffs =
m_manager.GetTranslationSystem()->GetStatefulFeatureFunctions();
diff --git a/moses/src/Hypothesis.h b/moses/src/Hypothesis.h
index 23980326e..fe885ccc8 100644
--- a/moses/src/Hypothesis.h
+++ b/moses/src/Hypothesis.h
@@ -257,9 +257,19 @@ public:
float GetScore() const {
return m_totalScore-m_futureScore;
}
+ const FFState* GetFFState(int idx) const {
+ return m_ffStates[idx];
+ }
+ void SetFFState(int idx, FFState* state) {
+ m_ffStates[idx] = state;
+ }
-
-
+ // Added by oliver.wilson@ed.ac.uk for async lm stuff.
+ void IncorporateTransOptScores();
+ void EvaluateWith(StatefulFeatureFunction* sfff, int state_idx);
+ void EvaluateWith(const StatelessFeatureFunction* slff);
+ void CalculateFutureScore(const SquareMatrix& futureScore);
+ void CalculateFinalScore();
//! target span that trans opt would populate if applied to this hypo. Used for alignment check
size_t GetNextStartPos(const TranslationOption &transOpt) const;
diff --git a/moses/src/HypothesisStack.h b/moses/src/HypothesisStack.h
index 045773725..26e6ed21b 100644
--- a/moses/src/HypothesisStack.h
+++ b/moses/src/HypothesisStack.h
@@ -11,6 +11,9 @@ namespace Moses
class Manager;
+/** abstract unique set of hypotheses that cover a certain number of words,
+ * ie. a stack in phrase-based decoding
+ */
class HypothesisStack
{
diff --git a/moses/src/HypothesisStackCubePruning.h b/moses/src/HypothesisStackCubePruning.h
index 53cad04a1..6dc973ed3 100644
--- a/moses/src/HypothesisStackCubePruning.h
+++ b/moses/src/HypothesisStackCubePruning.h
@@ -38,7 +38,7 @@ class Manager;
typedef std::map<WordsBitmap, BitmapContainer*> _BMType;
-/** Stack for instances of Hypothesis, includes functions for pruning. */
+/** A stack for phrase-based decoding with cube-pruning. */
class HypothesisStackCubePruning : public HypothesisStack
{
public:
diff --git a/moses/src/HypothesisStackNormal.h b/moses/src/HypothesisStackNormal.h
index d991ec373..5be07f20e 100644
--- a/moses/src/HypothesisStackNormal.h
+++ b/moses/src/HypothesisStackNormal.h
@@ -33,7 +33,7 @@ namespace Moses
// class WordsBitmap;
// typedef size_t WordsBitmapID;
-/** Stack for instances of Hypothesis, includes functions for pruning. */
+/** A stack for standard phrase-based decoding. */
class HypothesisStackNormal: public HypothesisStack
{
public:
diff --git a/moses/src/InputType.cpp b/moses/src/InputType.cpp
index d67e343f1..64dc9a7fd 100644
--- a/moses/src/InputType.cpp
+++ b/moses/src/InputType.cpp
@@ -23,7 +23,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <cstdlib>
#include "InputType.h"
-#include "ChartTranslationOption.h"
+#include "ChartTranslationOptions.h"
namespace Moses
{
@@ -61,10 +61,10 @@ bool InputType::CanIGetFromAToB(size_t /*start*/, size_t /*end*/) const
return true;
}
-std::vector <ChartTranslationOption*> InputType::GetXmlChartTranslationOptions() const
+std::vector <ChartTranslationOptions*> InputType::GetXmlChartTranslationOptions() const
{
// default. return nothing
- std::vector <ChartTranslationOption*> ret;
+ std::vector <ChartTranslationOptions*> ret;
return ret;
}
diff --git a/moses/src/InputType.h b/moses/src/InputType.h
index 99c987c4f..b72cccfd8 100644
--- a/moses/src/InputType.h
+++ b/moses/src/InputType.h
@@ -38,9 +38,11 @@ class Factor;
class PhraseDictionary;
class TranslationOptionCollection;
class TranslationSystem;
-class ChartTranslationOption;
+class ChartTranslationOptions;
-//! base class for sentences and confusion networks
+/** base class for all types of inputs to the decoder,
+ * eg. sentences, confusion networks, lattices and tree
+ */
class InputType
{
protected:
@@ -173,7 +175,7 @@ public:
virtual const NonTerminalSet &GetLabelSet(size_t startPos, size_t endPos) const = 0;
- virtual std::vector <ChartTranslationOption*> GetXmlChartTranslationOptions() const;
+ virtual std::vector <ChartTranslationOptions*> GetXmlChartTranslationOptions() const;
TO_STRING();
diff --git a/moses/src/Jamfile b/moses/src/Jamfile
index 178a657bb..ce85d6ac5 100644
--- a/moses/src/Jamfile
+++ b/moses/src/Jamfile
@@ -11,12 +11,20 @@ if [ option.get "with-synlm" : no : yes ] = yes
alias synlm ;
}
+local have-clock = [ SHELL "bash -c \"g++ -dM -x c++ -E /dev/null -include time.h 2>/dev/null |grep CLOCK_MONOTONIC\"" : exit-status ] ;
+if $(have-clock[2]) = 0 {
+ #required for clock_gettime. Threads already have rt.
+ lib rt : : <runtime-link>static:<link>static <runtime-link>shared:<link>shared ;
+} else {
+ alias rt ;
+}
+
lib moses_internal :
#All cpp files except those listed
[ glob *.cpp DynSAInclude/*.cpp : PhraseDictionary.cpp ThreadPool.cpp SyntacticLanguageModel.cpp *Test.cpp Mock*.cpp ]
-synlm ThreadPool headers ;
+synlm ThreadPool headers rt ;
-alias moses : PhraseDictionary.cpp moses_internal CYKPlusParser//CYKPlusParser LM//LM RuleTable//RuleTable Scope3Parser//Scope3Parser headers ../..//z ../../OnDiskPt//OnDiskPt ;
+lib moses : PhraseDictionary.cpp moses_internal CYKPlusParser//CYKPlusParser CompactPT//CompactPT LM//LM RuleTable//RuleTable Scope3Parser//Scope3Parser fuzzy-match//fuzzy-match headers ../..//z ../../OnDiskPt//OnDiskPt ../..//boost_filesystem ;
alias headers-to-install : [ glob-tree *.h ] ;
diff --git a/moses/src/LM/Base.h b/moses/src/LM/Base.h
index 621ab0dc3..424dcb443 100644
--- a/moses/src/LM/Base.h
+++ b/moses/src/LM/Base.h
@@ -61,7 +61,7 @@ public:
virtual void InitializeBeforeSentenceProcessing() {}
- virtual void CleanUpAfterSentenceProcessing() {}
+ virtual void CleanUpAfterSentenceProcessing(const InputType& source) {}
virtual const FFState* EmptyHypothesisState(const InputType &input) const = 0;
@@ -79,6 +79,17 @@ public:
* \param oovCount number of LM OOVs
*/
virtual void CalcScore(const Phrase &phrase, float &fullScore, float &ngramScore, std::size_t &oovCount) const = 0;
+ virtual void CalcScoreFromCache(const Phrase &phrase, float &fullScore, float &ngramScore, std::size_t &oovCount) const {
+ }
+
+ virtual void IssueRequestsFor(Hypothesis& hypo,
+ const FFState* input_state) {
+ }
+ virtual void sync() {
+ }
+ virtual void SetFFStateIdx(int state_idx) {
+ }
+
};
}
diff --git a/moses/src/LM/IRST.cpp b/moses/src/LM/IRST.cpp
index fd5a024ea..a0ff8b591 100644
--- a/moses/src/LM/IRST.cpp
+++ b/moses/src/LM/IRST.cpp
@@ -251,7 +251,7 @@ bool LMCacheCleanup(size_t sentences_done, size_t m_lmcache_cleanup_threshold)
}
-void LanguageModelIRST::CleanUpAfterSentenceProcessing()
+void LanguageModelIRST::CleanUpAfterSentenceProcessing(const InputType& source)
{
const StaticData &staticData = StaticData::Instance();
static int sentenceCount = 0;
diff --git a/moses/src/LM/IRST.h b/moses/src/LM/IRST.h
index 2ffe67f98..54f579e35 100644
--- a/moses/src/LM/IRST.h
+++ b/moses/src/LM/IRST.h
@@ -38,8 +38,8 @@ namespace Moses
class Phrase;
/** Implementation of single factor LM using IRST's code.
-* This is the default LM for Moses and is available from the same sourceforge repository
-*/
+ * This is available from the same sourceforge repository
+ */
class LanguageModelIRST : public LanguageModelPointerState
{
protected:
@@ -70,7 +70,7 @@ public:
virtual LMResult GetValue(const std::vector<const Word*> &contextFactor, State* finalState = NULL) const;
- void CleanUpAfterSentenceProcessing();
+ void CleanUpAfterSentenceProcessing(const InputType& source);
void InitializeBeforeSentenceProcessing();
void set_dictionary_upperbound(int dub) {
diff --git a/moses/src/LM/Implementation.cpp b/moses/src/LM/Implementation.cpp
index 589ed375a..2fe4bc122 100644
--- a/moses/src/LM/Implementation.cpp
+++ b/moses/src/LM/Implementation.cpp
@@ -231,8 +231,8 @@ private:
{
const TargetPhrase &target = hypo.GetCurrTargetPhrase();
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
- target.GetAlignmentInfo().GetNonTermIndexMap();
-
+ target.GetAlignmentInfo().GetNonTermIndexMap();
+
// loop over the rule that is being applied
for (size_t pos = 0; pos < target.GetSize(); ++pos) {
const Word &word = target.GetWord(pos);
@@ -283,10 +283,11 @@ private:
}
// construct suffix analogous to prefix
else {
+ const TargetPhrase& target = hypo.GetCurrTargetPhrase();
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
- hypo.GetCurrTargetPhrase().GetAlignmentInfo().GetNonTermIndexMap();
- for (int pos = (int) hypo.GetCurrTargetPhrase().GetSize() - 1; pos >= 0 ; --pos) {
- const Word &word = hypo.GetCurrTargetPhrase().GetWord(pos);
+ target.GetAlignmentInfo().GetNonTermIndexMap();
+ for (int pos = (int) target.GetSize() - 1; pos >= 0 ; --pos) {
+ const Word &word = target.GetWord(pos);
if (word.IsNonTerminal()) {
size_t nonTermInd = nonTermIndexMap[pos];
@@ -388,16 +389,17 @@ FFState* LanguageModelImplementation::EvaluateChart(const ChartHypothesis& hypo,
float finalizedScore = 0.0; // finalized, has sufficient context
// get index map for underlying hypotheses
+ const TargetPhrase &target = hypo.GetCurrTargetPhrase();
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
- hypo.GetCurrTargetPhrase().GetAlignmentInfo().GetNonTermIndexMap();
+ hypo.GetCurrTargetPhrase().GetAlignmentInfo().GetNonTermIndexMap();
// loop over rule
for (size_t phrasePos = 0, wordPos = 0;
- phrasePos < hypo.GetCurrTargetPhrase().GetSize();
+ phrasePos < target.GetSize();
phrasePos++)
{
// consult rule for either word or non-terminal
- const Word &word = hypo.GetCurrTargetPhrase().GetWord(phrasePos);
+ const Word &word = target.GetWord(phrasePos);
// regular word
if (!word.IsNonTerminal())
@@ -406,7 +408,7 @@ FFState* LanguageModelImplementation::EvaluateChart(const ChartHypothesis& hypo,
// beginning of sentence symbol <s>? -> just update state
if (word == GetSentenceStartArray())
- {
+ {
CHECK(phrasePos == 0);
delete lmState;
lmState = NewState( GetBeginSentenceState() );
diff --git a/moses/src/LM/Implementation.h b/moses/src/LM/Implementation.h
index b92054362..fe1a406da 100644
--- a/moses/src/LM/Implementation.h
+++ b/moses/src/LM/Implementation.h
@@ -42,6 +42,7 @@ class FactorCollection;
class Factor;
class Phrase;
+//! to be returned from LM functions
struct LMResult {
// log probability
float score;
@@ -120,7 +121,7 @@ public:
//! overrideable funtions for IRST LM to cleanup. Maybe something to do with on demand/cache loading/unloading
virtual void InitializeBeforeSentenceProcessing() {};
- virtual void CleanUpAfterSentenceProcessing() {};
+ virtual void CleanUpAfterSentenceProcessing(const InputType& source) {};
};
class LMRefCount : public LanguageModel {
@@ -135,8 +136,8 @@ class LMRefCount : public LanguageModel {
m_impl->InitializeBeforeSentenceProcessing();
}
- void CleanUpAfterSentenceProcessing() {
- m_impl->CleanUpAfterSentenceProcessing();
+ void CleanUpAfterSentenceProcessing(const InputType& source) {
+ m_impl->CleanUpAfterSentenceProcessing(source);
}
const FFState* EmptyHypothesisState(const InputType &/*input*/) const {
diff --git a/moses/src/LM/Jamfile b/moses/src/LM/Jamfile
index 81c94e9bd..60f73303d 100644
--- a/moses/src/LM/Jamfile
+++ b/moses/src/LM/Jamfile
@@ -5,7 +5,26 @@
#Each optional model has a section below. The top level rule is lib LM, which
#appears after the optional models.
-import option path build-system ;
+import option path ;
+
+#This is a kludge to force rebuilding if different --with options are passed.
+#Could have used features like <srilm>on but getting these to apply only to
+#linking was ugly and it still didn't trigger an install (since the install
+#path doesn't encode features). It stores a file lm.log with the previous
+#options and forces a rebuild if the current options differ.
+local current = ;
+for local i in srilm irstlm randlm {
+ local optval = [ option.get "with-$(i)" ] ;
+ if $(optval) {
+ current += "--with-$(i)=$(optval)" ;
+ }
+}
+current = $(current:J=" ") ;
+current ?= "" ;
+
+path-constant LM-LOG : bin/lm.log ;
+update-if-changed $(LM-LOG) $(current) ;
+
local dependencies = ;
@@ -69,46 +88,9 @@ if $(with-ldhtlm) {
obj ORLM.o : ORLM.cpp ..//headers ../DynSAInclude//dynsa : : : <include>../DynSAInclude ;
#The factory needs the macros LM_IRST etc to know which ones to use.
-obj Factory.o : Factory.cpp ..//headers $(dependencies) : <include>../DynSAInclude ;
+obj Factory.o : Factory.cpp ..//headers $(dependencies) : <include>../DynSAInclude <dependency>$(LM-LOG) ;
#Top-level LM library. If you've added a file that doesn't depend on external
#libraries, put it here.
lib LM : Base.cpp Factory.o Implementation.cpp Joint.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp ORLM.o
../../../lm//kenlm ..//headers $(dependencies) ;
-
-#Everything below is a kludge to force rebuilding if different --with options
-#are passed. Could have used features like <srilm>on but getting these to
-#apply only to linking was ugly and it still didn't trigger an install (since
-#the install path doesn't encode features). It stores a file lm.log with the
-#previous options and forces a rebuild if the current options differ.
-path-constant LM-LOG : bin/lm.log ;
-#Is there no other way to read a file with bjam?
-local previous = none ;
-if [ path.exists $(LM-LOG) ] {
- previous = [ _shell "cat $(LM-LOG)" ] ;
-}
-current = "" ;
-for local i in srilm irstlm randlm {
- local optval = [ option.get "with-$(i)" ] ;
- if $(optval) {
- current = "$(current) --with-$(i)=$(optval)" ;
- }
-}
-
-if $(current) != $(previous) {
- #Write inconsistent while the build is running
- if [ path.exists $(LM-LOG) ] {
- local ignored = @($(LM-LOG):E=inconsistent) ;
- }
- #Write $(current) to $(LM-LOG) after the build completes.
- rule post-build ( ok ? ) {
- if $(ok) {
- local ignored = @($(LM-LOG):E=$(current)) ;
- }
- }
- IMPORT $(__name__) : post-build : : $(__name__).post-build ;
- build-system.set-post-build-hook $(__name__).post-build ;
-
- always Factory.o ;
- always LM ;
-}
diff --git a/moses/src/LM/Joint.h b/moses/src/LM/Joint.h
index e000409be..d37d70265 100644
--- a/moses/src/LM/Joint.h
+++ b/moses/src/LM/Joint.h
@@ -38,8 +38,9 @@ class Phrase;
class FactorCollection;
/** LM of multiple factors. A simple extension of single factor LM - factors backoff together.
- * Rather slow as this uses string concatenation/split
-*/
+ * Rather slow as this uses string concatenation/split.
+ * Not used for a long time
+ */
class LanguageModelJoint : public LanguageModelMultiFactor
{
protected:
diff --git a/moses/src/LM/Ken.cpp b/moses/src/LM/Ken.cpp
index 9020c42d3..2d65ad7df 100644
--- a/moses/src/LM/Ken.cpp
+++ b/moses/src/LM/Ken.cpp
@@ -284,7 +284,9 @@ class LanguageModelChartStateKenLM : public FFState {
template <class Model> FFState *LanguageModelKen<Model>::EvaluateChart(const ChartHypothesis& hypo, int featureID, ScoreComponentCollection *accumulator) const {
LanguageModelChartStateKenLM *newState = new LanguageModelChartStateKenLM();
lm::ngram::RuleScore<Model> ruleScore(*m_ngram, newState->GetChartState());
- const AlignmentInfo::NonTermIndexMap &nonTermIndexMap = hypo.GetCurrTargetPhrase().GetAlignmentInfo().GetNonTermIndexMap();
+ const TargetPhrase &target = hypo.GetCurrTargetPhrase();
+ const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
+ target.GetAlignmentInfo().GetNonTermIndexMap();
const size_t size = hypo.GetCurrTargetPhrase().GetSize();
size_t phrasePos = 0;
@@ -326,15 +328,17 @@ LanguageModel *ConstructKenLM(const std::string &file, FactorType factorType, bo
lm::ngram::ModelType model_type;
if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) {
switch(model_type) {
- case lm::ngram::HASH_PROBING:
- return new LanguageModelKen<lm::ngram::ProbingModel>(file, factorType, lazy);
- case lm::ngram::TRIE_SORTED:
+ case lm::ngram::PROBING:
+ return new LanguageModelKen<lm::ngram::ProbingModel>(file, factorType, lazy);
+ case lm::ngram::REST_PROBING:
+ return new LanguageModelKen<lm::ngram::RestProbingModel>(file, factorType, lazy);
+ case lm::ngram::TRIE:
return new LanguageModelKen<lm::ngram::TrieModel>(file, factorType, lazy);
- case lm::ngram::QUANT_TRIE_SORTED:
+ case lm::ngram::QUANT_TRIE:
return new LanguageModelKen<lm::ngram::QuantTrieModel>(file, factorType, lazy);
- case lm::ngram::ARRAY_TRIE_SORTED:
+ case lm::ngram::ARRAY_TRIE:
return new LanguageModelKen<lm::ngram::ArrayTrieModel>(file, factorType, lazy);
- case lm::ngram::QUANT_ARRAY_TRIE_SORTED:
+ case lm::ngram::QUANT_ARRAY_TRIE:
return new LanguageModelKen<lm::ngram::QuantArrayTrieModel>(file, factorType, lazy);
default:
std::cerr << "Unrecognized kenlm model type " << model_type << std::endl;
diff --git a/moses/src/LM/Ken.h b/moses/src/LM/Ken.h
index f55679395..e2a8d613c 100644
--- a/moses/src/LM/Ken.h
+++ b/moses/src/LM/Ken.h
@@ -30,7 +30,7 @@ namespace Moses {
class LanguageModel;
-// This will also load.
+//! This will also load. Returns a templated KenLM class
LanguageModel *ConstructKenLM(const std::string &file, FactorType factorType, bool lazy);
} // namespace Moses
diff --git a/moses/src/LM/LDHT.cpp b/moses/src/LM/LDHT.cpp
index 0c61235b8..6da941b5e 100644
--- a/moses/src/LM/LDHT.cpp
+++ b/moses/src/LM/LDHT.cpp
@@ -7,6 +7,7 @@
#include "../FFState.h"
#include "../TypeDef.h"
#include "../Hypothesis.h"
+#include "../StaticData.h"
#include <LDHT/Client.h>
#include <LDHT/ClientLocal.h>
@@ -19,14 +20,43 @@ namespace Moses {
struct LDHTLMState : public FFState {
LDHT::NewNgram gram_fingerprints;
+ bool finalised;
+ std::vector<int> request_tags;
+
+ LDHTLMState(): finalised(false) {
+ }
+
+ void setFinalised() {
+ this->finalised = true;
+ }
+
+ void appendRequestTag(int tag) {
+ this->request_tags.push_back(tag);
+ }
+
+ void clearRequestTags() {
+ this->request_tags.clear();
+ }
+
+ std::vector<int>::iterator requestTagsBegin() {
+ return this->request_tags.begin();
+ }
+
+ std::vector<int>::iterator requestTagsEnd() {
+ return this->request_tags.end();
+ }
int Compare(const FFState& uncast_other) const {
const LDHTLMState &other = static_cast<const LDHTLMState&>(uncast_other);
+ //if (!this->finalised)
+ // return -1;
+
return gram_fingerprints.compareMoses(other.gram_fingerprints);
}
void copyFrom(const LDHTLMState& other) {
gram_fingerprints.copyFrom(other.gram_fingerprints);
+ finalised = false;
}
};
@@ -40,7 +70,7 @@ public:
LanguageModelLDHT& copyFrom);
std::string GetScoreProducerDescription(unsigned) const {
std::ostringstream oss;
- oss << "LM_" << LDHT::NewNgram::k_max_order << "gram";
+ oss << "DLM_" << LDHT::NewNgram::k_max_order << "gram";
return oss.str();
}
LDHT::Client* getClientUnsafe() const;
@@ -57,6 +87,10 @@ public:
float& fullScore,
float& ngramScore,
std::size_t& oovCount) const;
+ virtual void CalcScoreFromCache(const Phrase& phrase,
+ float& fullScore,
+ float& ngramScore,
+ std::size_t& oovCount) const;
FFState* Evaluate(const Hypothesis& hypo,
const FFState* input_state,
ScoreComponentCollection* score_output) const;
@@ -64,10 +98,19 @@ public:
int featureID,
ScoreComponentCollection* accumulator) const;
+ virtual void IssueRequestsFor(Hypothesis& hypo,
+ const FFState* input_state);
+ float calcScoreFromState(LDHTLMState* hypo) const;
+ void sync();
+ void SetFFStateIdx(int state_idx);
+
protected:
boost::thread_specific_ptr<LDHT::Client> m_client;
std::string m_configPath;
FactorType m_factorType;
+ int m_state_idx;
+ int m_calc_score_count;
+ uint64_t m_start_tick;
};
@@ -83,6 +126,7 @@ LanguageModelLDHT::LanguageModelLDHT() : LanguageModel(), m_client(NULL) {
LanguageModelLDHT::LanguageModelLDHT(ScoreIndexManager& manager,
LanguageModelLDHT& copyFrom) {
+ m_calc_score_count = 0;
//m_client = copyFrom.m_client;
m_factorType = copyFrom.m_factorType;
m_configPath = copyFrom.m_configPath;
@@ -99,7 +143,7 @@ LanguageModelLDHT::LanguageModelLDHT(const std::string& path,
LanguageModelLDHT::~LanguageModelLDHT() {
// TODO(wilson): should cleanup for each individual thread.
- delete getClientSafe();
+ //delete getClientSafe();
}
LanguageModel* LanguageModelLDHT::Duplicate(
@@ -131,8 +175,8 @@ LDHT::Client* LanguageModelLDHT::initTSSClient() {
LDHT::FactoryCollection::createDefaultFactoryCollection();
LDHT::Client* client;
- client = new LDHT::ClientLocal();
- //client = new LDHT::Client();
+ //client = new LDHT::ClientLocal();
+ client = new LDHT::Client();
client->fromXmlFiles(*factory_collection,
ldht_config_path,
ldhtlm_config_path);
@@ -141,9 +185,26 @@ LDHT::Client* LanguageModelLDHT::initTSSClient() {
void LanguageModelLDHT::InitializeBeforeSentenceProcessing() {
getClientSafe()->clearCache();
+ m_start_tick = LDHT::Util::rdtsc();
}
void LanguageModelLDHT::CleanUpAfterSentenceProcessing() {
+ LDHT::Client* client = getClientSafe();
+
+ std::cerr << "LDHT sentence stats:" << std::endl;
+ std::cerr << " ngrams submitted: " << client->getNumNgramsSubmitted() << std::endl
+ << " ngrams requested: " << client->getNumNgramsRequested() << std::endl
+ << " ngrams not found: " << client->getKeyNotFoundCount() << std::endl
+ << " cache hits: " << client->getCacheHitCount() << std::endl
+ << " inferences: " << client->getInferenceCount() << std::endl
+ << " pcnt latency: " << (float)client->getLatencyTicks() / (float)(LDHT::Util::rdtsc() - m_start_tick) * 100.0 << std::endl;
+ m_start_tick = 0;
+ client->resetLatencyTicks();
+ client->resetNumNgramsSubmitted();
+ client->resetNumNgramsRequested();
+ client->resetInferenceCount();
+ client->resetCacheHitCount();
+ client->resetKeyNotFoundCount();
}
const FFState* LanguageModelLDHT::EmptyHypothesisState(
@@ -159,6 +220,46 @@ void LanguageModelLDHT::CalcScore(const Phrase& phrase,
float& fullScore,
float& ngramScore,
std::size_t& oovCount) const {
+ const_cast<LanguageModelLDHT*>(this)->m_calc_score_count++;
+ if (m_calc_score_count > 10000) {
+ const_cast<LanguageModelLDHT*>(this)->m_calc_score_count = 0;
+ const_cast<LanguageModelLDHT*>(this)->sync();
+ }
+
+ // TODO(wilson): handle nonterminal words.
+ LDHT::Client* client = getClientUnsafe();
+ // Score the first order - 1 words of the phrase.
+ int order = LDHT::NewNgram::k_max_order;
+ int prefix_start = 0;
+ int prefix_end = std::min(phrase.GetSize(), static_cast<size_t>(order - 1));
+ LDHT::NewNgram ngram;
+ for (int word_idx = prefix_start; word_idx < prefix_end; ++word_idx) {
+ ngram.appendGram(phrase.GetWord(word_idx)
+ .GetFactor(m_factorType)->GetString().c_str());
+ client->requestNgram(ngram);
+ }
+ // Now score all subsequent ngrams to end of phrase.
+ int internal_start = prefix_end;
+ int internal_end = phrase.GetSize();
+ for (int word_idx = internal_start; word_idx < internal_end; ++word_idx) {
+ ngram.appendGram(phrase.GetWord(word_idx)
+ .GetFactor(m_factorType)->GetString().c_str());
+ client->requestNgram(ngram);
+ }
+
+ fullScore = 0;
+ ngramScore = 0;
+ oovCount = 0;
+}
+
+void LanguageModelLDHT::CalcScoreFromCache(const Phrase& phrase,
+ float& fullScore,
+ float& ngramScore,
+ std::size_t& oovCount) const {
+ // Issue requests for phrase internal ngrams.
+ // Sync if necessary. (or autosync).
+ const_cast<LanguageModelLDHT*>(this)->sync();
+
// TODO(wilson): handle nonterminal words.
LDHT::Client* client = getClientUnsafe();
// Score the first order - 1 words of the phrase.
@@ -183,7 +284,7 @@ void LanguageModelLDHT::CalcScore(const Phrase& phrase,
}
// Wait for resposes from the servers.
- client->awaitResponses();
+ //client->awaitResponses();
// Calculate the full phrase score, and the internal score.
fullScore = 0.0;
@@ -203,10 +304,8 @@ void LanguageModelLDHT::CalcScore(const Phrase& phrase,
oovCount = 0;
}
-FFState* LanguageModelLDHT::Evaluate(
- const Hypothesis& hypo,
- const FFState* input_state,
- ScoreComponentCollection* score_output) const {
+void LanguageModelLDHT::IssueRequestsFor(Hypothesis& hypo,
+ const FFState* input_state) {
// TODO(wilson): handle nonterminal words.
LDHT::Client* client = getClientUnsafe();
@@ -236,11 +335,10 @@ FFState* LanguageModelLDHT::Evaluate(
int overlap_end = std::min(phrase_end, phrase_start + order - 1);
int word_idx = overlap_start;
LDHT::NewNgram& ngram = new_state->gram_fingerprints;
- std::deque<int> request_tags;
for (; word_idx < overlap_end; ++word_idx) {
ngram.appendGram(
hypo.GetFactor(word_idx, m_factorType)->GetString().c_str());
- request_tags.push_back(client->requestNgram(ngram));
+ new_state->appendRequestTag(client->requestNgram(ngram));
}
// No need to score phrase internal ngrams, but keep track of them
// in the state (which in this case is the NewNgram containing the
@@ -253,22 +351,37 @@ FFState* LanguageModelLDHT::Evaluate(
// with the end of sentence marker on it.
if (hypo.IsSourceCompleted()) {
ngram.appendGram(EOS_);
- request_tags.push_back(client->requestNgram(ngram));
- }
- // Await responses from the server.
- client->awaitResponses();
-
- // Calculate scores given the request tags.
- float score = 0;
- while (!request_tags.empty()) {
- score += client->getNgramScore(request_tags.front());
- request_tags.pop_front();
+ //request_tags.push_back(client->requestNgram(ngram));
+ new_state->appendRequestTag(client->requestNgram(ngram));
}
+ hypo.SetFFState(m_state_idx, new_state);
+}
+
+void LanguageModelLDHT::sync() {
+ m_calc_score_count = 0;
+ getClientUnsafe()->awaitResponses();
+}
+void LanguageModelLDHT::SetFFStateIdx(int state_idx) {
+ m_state_idx = state_idx;
+}
+
+FFState* LanguageModelLDHT::Evaluate(
+ const Hypothesis& hypo,
+ const FFState* input_state_ignored,
+ ScoreComponentCollection* score_output) const {
+ // Input state is the state from the previous hypothesis, which
+ // we are not interested in. The requests for this hypo should
+ // already have been issued via IssueRequestsFor() and the LM then
+ // synced and all responses processed, and the tags placed in our
+ // FFState of hypo.
+ LDHTLMState* state = const_cast<LDHTLMState*>(static_cast<const LDHTLMState*>(hypo.GetFFState(m_state_idx)));
+
+ float score = calcScoreFromState(state);
score = FloorScore(TransformLMScore(score));
score_output->PlusEquals(this, score);
- return new_state;
+ return state;
}
FFState* LanguageModelLDHT::EvaluateChart(
@@ -278,5 +391,19 @@ FFState* LanguageModelLDHT::EvaluateChart(
return NULL;
}
+float LanguageModelLDHT::calcScoreFromState(LDHTLMState* state) const {
+ float score = 0.0;
+ std::vector<int>::iterator tag_iter;
+ LDHT::Client* client = getClientUnsafe();
+ for (tag_iter = state->requestTagsBegin();
+ tag_iter != state->requestTagsEnd();
+ ++tag_iter) {
+ score += client->getNgramScore(*tag_iter);
+ }
+ state->clearRequestTags();
+ state->setFinalised();
+ return score;
+}
+
} // namespace Moses.
diff --git a/moses/src/LM/MultiFactor.h b/moses/src/LM/MultiFactor.h
index 32cef51d9..f87d15db4 100644
--- a/moses/src/LM/MultiFactor.h
+++ b/moses/src/LM/MultiFactor.h
@@ -33,7 +33,9 @@ namespace Moses
class Phrase;
-//! Abstract class for for multi factor LM
+/* Abstract class for for multi factor LM. Only inherited by the JointLM at the moment.
+ * Could use this when factored LM are implemented
+ */
class LanguageModelMultiFactor : public LanguageModelImplementation
{
protected:
diff --git a/moses/src/LM/ORLM.h b/moses/src/LM/ORLM.h
index 55adb9d82..fb1280196 100644
--- a/moses/src/LM/ORLM.h
+++ b/moses/src/LM/ORLM.h
@@ -7,7 +7,7 @@
#include "LM/SingleFactor.h"
#include "DynSAInclude/onlineRLM.h"
//#include "multiOnlineRLM.h"
-#include "DynSAInclude/file.h"
+#include "DynSAInclude/FileHandler.h"
#include "DynSAInclude/vocab.h"
namespace Moses
@@ -15,6 +15,8 @@ namespace Moses
class Factor;
class Phrase;
+/** @todo ask ollie
+ */
class LanguageModelORLM : public LanguageModelPointerState {
public:
typedef count_t T; // type for ORLM filter
diff --git a/moses/src/LM/ParallelBackoff.h b/moses/src/LM/ParallelBackoff.h
index 8e4241395..223a07b5a 100644
--- a/moses/src/LM/ParallelBackoff.h
+++ b/moses/src/LM/ParallelBackoff.h
@@ -26,6 +26,8 @@ namespace Moses
class LanguageModelMultiFactor;
+/** @todo what is this?
+ */
LanguageModelMultiFactor *NewParallelBackoff();
}
diff --git a/moses/src/LM/Remote.h b/moses/src/LM/Remote.h
index 9a70d64c0..a9d6a7872 100644
--- a/moses/src/LM/Remote.h
+++ b/moses/src/LM/Remote.h
@@ -11,6 +11,8 @@
namespace Moses
{
+/** @todo ask miles
+ */
class LanguageModelRemote : public LanguageModelPointerState
{
private:
diff --git a/moses/src/LM/SRI.h b/moses/src/LM/SRI.h
index 69d55f117..f7711b478 100644
--- a/moses/src/LM/SRI.h
+++ b/moses/src/LM/SRI.h
@@ -36,6 +36,8 @@ class Ngram;
namespace Moses
{
+/** Implementation of single factor LM using IRST's code.
+ */
class LanguageModelSRI : public LanguageModelPointerState
{
protected:
diff --git a/moses/src/LMList.h b/moses/src/LMList.h
index a803e16df..96ff7f7a7 100644
--- a/moses/src/LMList.h
+++ b/moses/src/LMList.h
@@ -11,7 +11,7 @@ class Phrase;
class ScoreColl;
class ScoreComponentCollection;
-//! List of language models
+//! List of language models and function to calc scores from each LM, given a phrase
class LMList
{
protected:
@@ -38,8 +38,23 @@ public:
~LMList();
void CalcScore(const Phrase &phrase, float &retFullScore, float &retNGramScore, float &retOOVScore, ScoreComponentCollection* breakdown) const;
+ void InitializeBeforeSentenceProcessing() {
+ std::list<LanguageModel*>::iterator lm_iter;
+ for (lm_iter = m_coll.begin();
+ lm_iter != m_coll.end();
+ ++lm_iter) {
+ (*lm_iter)->InitializeBeforeSentenceProcessing();
+ }
+ }
+ void CleanUpAfterSentenceProcessing(const InputType& source) {
+ std::list<LanguageModel*>::iterator lm_iter;
+ for (lm_iter = m_coll.begin();
+ lm_iter != m_coll.end();
+ ++lm_iter) {
+ (*lm_iter)->CleanUpAfterSentenceProcessing(source);
+ }
+ }
-
void Add(LanguageModel *lm);
diff --git a/moses/src/LVoc.h b/moses/src/LVoc.h
index ec20fe3cd..485e3f481 100644
--- a/moses/src/LVoc.h
+++ b/moses/src/LVoc.h
@@ -13,8 +13,11 @@ extern const LabelId Epsilon;
typedef std::vector<LabelId> IPhrase;
-// A = type of things to numberize, ie, std::string
-// B = map type to use, might consider using hash_map for better performance
+/** class used in phrase-based binary phrase-table.
+ * @todo vocab?
+ * A = type of things to numberize, ie, std::string
+ * B = map type to use, might consider using hash_map for better performance
+ */
template<typename A,typename B=std::map<A,LabelId> >
class LVoc
{
diff --git a/moses/src/LexicalReordering.h b/moses/src/LexicalReordering.h
index e979b5bf5..f46679e4e 100644
--- a/moses/src/LexicalReordering.h
+++ b/moses/src/LexicalReordering.h
@@ -22,6 +22,8 @@ class Phrase;
class Hypothesis;
class InputType;
+/** implementation of lexical reordering (Tilman ...) for phrase-based decoding
+ */
class LexicalReordering : public StatefulFeatureFunction {
public:
LexicalReordering(std::vector<FactorType>& f_factors,
diff --git a/moses/src/LexicalReorderingState.cpp b/moses/src/LexicalReorderingState.cpp
index 346ac745b..4d9bcce07 100644
--- a/moses/src/LexicalReorderingState.cpp
+++ b/moses/src/LexicalReorderingState.cpp
@@ -173,6 +173,8 @@ int LexicalReorderingState::ComparePrevScores(const Scores *other) const
return 0;
}
+bool PhraseBasedReorderingState::m_useFirstBackwardScore = true;
+
PhraseBasedReorderingState::PhraseBasedReorderingState(const PhraseBasedReorderingState *prev, const TranslationOption &topt)
: LexicalReorderingState(prev, topt), m_prevRange(topt.GetSourceWordsRange()), m_first(false) {}
@@ -210,17 +212,18 @@ LexicalReorderingState* PhraseBasedReorderingState::Expand(const TranslationOpti
if (m_direction == LexicalReorderingConfiguration::Forward && m_first) {
ClearScores(scores);
} else {
- if (modelType == LexicalReorderingConfiguration::MSD) {
- reoType = GetOrientationTypeMSD(currWordsRange);
- } else if (modelType == LexicalReorderingConfiguration::MSLR) {
- reoType = GetOrientationTypeMSLR(currWordsRange);
- } else if (modelType == LexicalReorderingConfiguration::Monotonic) {
- reoType = GetOrientationTypeMonotonic(currWordsRange);
- } else {
- reoType = GetOrientationTypeLeftRight(currWordsRange);
+ if (!m_first || m_useFirstBackwardScore){
+ if (modelType == LexicalReorderingConfiguration::MSD) {
+ reoType = GetOrientationTypeMSD(currWordsRange);
+ } else if (modelType == LexicalReorderingConfiguration::MSLR) {
+ reoType = GetOrientationTypeMSLR(currWordsRange);
+ } else if (modelType == LexicalReorderingConfiguration::Monotonic) {
+ reoType = GetOrientationTypeMonotonic(currWordsRange);
+ } else {
+ reoType = GetOrientationTypeLeftRight(currWordsRange);
+ }
+ CopyScores(scores, topt, reoType);
}
-
- CopyScores(scores, topt, reoType);
}
return new PhraseBasedReorderingState(this, topt);
diff --git a/moses/src/LexicalReorderingState.h b/moses/src/LexicalReorderingState.h
index 802f1fcb0..0eb4cc42e 100644
--- a/moses/src/LexicalReorderingState.h
+++ b/moses/src/LexicalReorderingState.h
@@ -18,7 +18,9 @@ namespace Moses
class LexicalReorderingState;
class LexicalReordering;
-//! Factory class for lexical reordering states
+/** Factory class for lexical reordering states
+ * @todo There's a lot of classes for lexicalized reordering. Perhaps put them in a separate dir
+ */
class LexicalReorderingConfiguration
{
public:
@@ -122,6 +124,7 @@ protected:
static const ReorderingType L = 1; // left
};
+//! @todo what is this?
class BidirectionalReorderingState : public LexicalReorderingState
{
private:
@@ -148,6 +151,7 @@ private:
WordsRange m_prevRange;
bool m_first;
public:
+ static bool m_useFirstBackwardScore;
PhraseBasedReorderingState(const LexicalReorderingConfiguration &config, LexicalReorderingConfiguration::Direction dir, size_t offset);
PhraseBasedReorderingState(const PhraseBasedReorderingState *prev, const TranslationOption &topt);
diff --git a/moses/src/LexicalReorderingTable.cpp b/moses/src/LexicalReorderingTable.cpp
index 62667ee61..9fbcba515 100644
--- a/moses/src/LexicalReorderingTable.cpp
+++ b/moses/src/LexicalReorderingTable.cpp
@@ -8,6 +8,10 @@
#include "TargetPhrase.h"
#include "TargetPhraseCollection.h"
+#ifndef WIN32
+#include "CompactPT/LexicalReorderingTableCompact.h"
+#endif
+
namespace Moses
{
/*
@@ -47,7 +51,14 @@ void auxAppend(IPhrase& head, const IPhrase& tail)
LexicalReorderingTable* LexicalReorderingTable::LoadAvailable(const std::string& filePath, const FactorList& f_factors, const FactorList& e_factors, const FactorList& c_factors)
{
- //decide use Tree or Memory table
+ //decide use Compact or Tree or Memory table
+#ifndef WIN32
+ if(FileExists(filePath+".minlexr")) {
+ //there exists a compact binary version use that
+ VERBOSE(2,"Using compact lexical reordering table" << std::endl);
+ return new LexicalReorderingTableCompact(filePath+".minlexr", f_factors, e_factors, c_factors);
+ }
+#endif
if(FileExists(filePath+".binlexr.idx")) {
//there exists a binary version use that
return new LexicalReorderingTableTree(filePath, f_factors, e_factors, c_factors);
@@ -355,7 +366,6 @@ bool LexicalReorderingTableTree::Create(std::istream& inFile,
size_t numTokens = 0;
size_t numKeyTokens = 0;
while(getline(inFile, line)) {
- //TRACE_ERR(lnc<<":"<<line<<"\n");
++lnc;
if(0 == lnc % 10000) {
TRACE_ERR(".");
@@ -465,6 +475,10 @@ bool LexicalReorderingTableTree::Create(std::istream& inFile,
}
cands.push_back(GenericCandidate(tgt_phrases, scores));
}
+ if (lnc == 0) {
+ TRACE_ERR("ERROR: empty lexicalised reordering file\n" << std::endl);
+ return false;
+ }
//flush remainders
cands.writeBin(ot);
cands.clear();
diff --git a/moses/src/LexicalReorderingTable.h b/moses/src/LexicalReorderingTable.h
index ae10ae386..abd8e9ac9 100644
--- a/moses/src/LexicalReorderingTable.h
+++ b/moses/src/LexicalReorderingTable.h
@@ -27,8 +27,7 @@ class Phrase;
class InputType;
class ConfusionNet;
-//additional types
-
+//! additional types
class LexicalReorderingTable
{
public:
@@ -69,6 +68,7 @@ protected:
FactorList m_FactorsC;
};
+//! @todo what is this?
class LexicalReorderingTableMemory : public LexicalReorderingTable
{
//implements LexicalReorderingTable saving all scores in one large std::map<> thingy
diff --git a/moses/src/Manager.cpp b/moses/src/Manager.cpp
index c0c2e029a..468db0de3 100644
--- a/moses/src/Manager.cpp
+++ b/moses/src/Manager.cpp
@@ -39,6 +39,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "LMList.h"
#include "TranslationOptionCollection.h"
#include "DummyScoreProducers.h"
+#include "Timer.h"
+
#ifdef HAVE_PROTOBUF
#include "hypergraph.pb.h"
#include "rule.pb.h"
@@ -48,11 +50,11 @@ using namespace std;
namespace Moses
{
-Manager::Manager(InputType const& source, SearchAlgorithm searchAlgorithm, const TranslationSystem* system)
- :m_system(system)
+Manager::Manager(size_t lineNumber, InputType const& source, SearchAlgorithm searchAlgorithm, const TranslationSystem* system)
+ :m_lineNumber(lineNumber)
+ ,m_system(system)
,m_transOptColl(source.CreateTranslationOptionCollection(system))
,m_search(Search::CreateSearch(*this, source, searchAlgorithm, *m_transOptColl))
- ,m_start(clock())
,interrupted_flag(0)
,m_hypoId(0)
,m_source(source)
@@ -65,13 +67,7 @@ Manager::~Manager()
delete m_transOptColl;
delete m_search;
- m_system->CleanUpAfterSentenceProcessing();
-
- clock_t end = clock();
- float et = (end - m_start);
- et /= (float)CLOCKS_PER_SEC;
- VERBOSE(1, "Translation took " << et << " seconds" << endl);
- VERBOSE(1, "Finished translating" << endl);
+ m_system->CleanUpAfterSentenceProcessing(m_source);
}
/**
@@ -85,20 +81,23 @@ void Manager::ProcessSentence()
// collect translation options for this sentence
m_system->InitializeBeforeSentenceProcessing(m_source);
+
+ Timer getOptionsTime;
+ getOptionsTime.start();
m_transOptColl->CreateTranslationOptions();
+ VERBOSE(1, "Line "<< m_lineNumber << ": Collecting options took " << getOptionsTime << " seconds" << endl);
// some reporting on how long this took
- clock_t gotOptions = clock();
- float et = (gotOptions - m_start);
IFVERBOSE(2) {
- GetSentenceStats().AddTimeCollectOpts( gotOptions - m_start );
+ // TODO: XXX: Hack: SentenceStats.h currently requires all values to be of type clock_t
+ GetSentenceStats().AddTimeCollectOpts((clock_t) (getOptionsTime.get_elapsed_time() * CLOCKS_PER_SEC));
}
- et /= (float)CLOCKS_PER_SEC;
- VERBOSE(1, "Collecting options took " << et << " seconds" << endl);
// search for best translation with the specified algorithm
+ Timer searchTime;
+ searchTime.start();
m_search->ProcessSentence();
- VERBOSE(1, "Search took " << ((clock()-m_start)/(float)CLOCKS_PER_SEC) << " seconds" << endl);
+ VERBOSE(1, "Line " << m_lineNumber << ": Search took " << searchTime << " seconds" << endl);
}
/**
diff --git a/moses/src/Manager.h b/moses/src/Manager.h
index 3946e54d7..dd011bc84 100644
--- a/moses/src/Manager.h
+++ b/moses/src/Manager.h
@@ -24,7 +24,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <vector>
#include <list>
-#include <ctime>
#include "InputType.h"
#include "Hypothesis.h"
#include "StaticData.h"
@@ -59,7 +58,7 @@ struct SearchGraphNode {
};
-/** The Manager class implements a stack decoding algorithm.
+/** The Manager class implements a stack decoding algorithm for phrase-based decoding
* Hypotheses are organized in stacks. One stack contains all hypothesis that have
* the same number of foreign words translated. The data structure for hypothesis
* stacks is the class HypothesisStack. The data structure for a hypothesis
@@ -101,7 +100,6 @@ protected:
Search *m_search;
HypothesisStack* actual_hypoStack; /**actual (full expanded) stack of hypotheses*/
- clock_t m_start; /**< starting time, used for logging */
size_t interrupted_flag;
std::auto_ptr<SentenceStats> m_sentenceStats;
int m_hypoId; //used to number the hypos as they are created.
@@ -115,8 +113,9 @@ protected:
public:
+ size_t m_lineNumber;
InputType const& m_source; /**< source sentence to be translated */
- Manager(InputType const& source, SearchAlgorithm searchAlgorithm, const TranslationSystem* system);
+ Manager(size_t lineNumber, InputType const& source, SearchAlgorithm searchAlgorithm, const TranslationSystem* system);
~Manager();
const TranslationOptionCollection* getSntTranslationOptions();
const TranslationSystem* GetTranslationSystem() {
diff --git a/moses/src/NonTerminal.h b/moses/src/NonTerminal.h
index 3fa2ede92..393e32fa1 100644
--- a/moses/src/NonTerminal.h
+++ b/moses/src/NonTerminal.h
@@ -30,6 +30,9 @@
namespace Moses
{
+/** Functor to create hash for words.
+ * @todo uses all factors, not just factor 0
+ */
class NonTerminalHasher
{
public:
@@ -40,6 +43,9 @@ public:
}
};
+/** Functor to test whether 2 words are the same
+ * @todo uses all factors, not just factor 0
+ */
class NonTerminalEqualityPred
{
public:
diff --git a/moses/src/OnlineCommand.cpp b/moses/src/OnlineCommand.cpp
deleted file mode 100644
index 93024b92c..000000000
--- a/moses/src/OnlineCommand.cpp
+++ /dev/null
@@ -1,143 +0,0 @@
-// $Id: OnlineCommand.cpp 3428 2010-09-13 17:55:23Z nicolabertoldi $
-// vim:tabstop=2
-
-/***********************************************************************
-Moses - factored phrase-based language decoder
-Copyright (C) 2006 University of Edinburgh
-
-This library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Lesser General Public
-License as published by the Free Software Foundation; either
-version 2.1 of the License, or (at your option) any later version.
-
-This library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Lesser General Public License for more details.
-
-You should have received a copy of the GNU Lesser General Public
-License along with this library; if not, write to the Free Software
-Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-***********************************************************************/
-
-#include <stdexcept>
-
-#include "StaticData.h" // needed for debugging purpose only
-
-#include "OnlineCommand.h"
-#include "Util.h"
-
-using namespace std;
-
-#define COMMAND_KEYWORD "@CMD@"
-
-namespace Moses
-{
-
-OnlineCommand::OnlineCommand()
-{
- VERBOSE(3,"OnlineCommand::OnlineCommand()" << std::endl);
- VERBOSE(3,"COMMAND_KEYWORD: " << COMMAND_KEYWORD << std::endl);
- command_type = '\0';
- command_value = '\0';
-
- accepted_commands.push_back("-weight-l"); // weight(s) for language models
- accepted_commands.push_back("-weight-t"); // weight(s) for translation model components
- accepted_commands.push_back("-weight-d"); // weight(s) for distortion (reordering components)
- accepted_commands.push_back("-weight-w"); // weight for word penalty
- accepted_commands.push_back("-weight-u"); // weight for unknown words penalty
- accepted_commands.push_back("-weight-g"); // weight(s) for global lexical model components
- accepted_commands.push_back("-weight-b"); // weight(s) for global lexical model components
- accepted_commands.push_back("-verbose"); // weights for translation model components
-}
-
-bool OnlineCommand::Parse(std::string& line)
-{
- VERBOSE(3,"OnlineCommand::Parse(std::string& line)" << std::endl);
-
- int next_string_pos = 0;
- string firststring = GetFirstString(line, next_string_pos);
- bool flag = false;
-
- if (firststring.compare(COMMAND_KEYWORD) == 0){
-
- command_type = GetFirstString(line, next_string_pos);
-
-
- for (vector<string>::const_iterator iterParam = accepted_commands.begin(); iterParam!=accepted_commands.end(); ++iterParam) {
- if (command_type.compare(*iterParam) == 0){ //requested command is found
- command_value = line.substr(next_string_pos);
- flag = true;
- }
- }
- if (!flag){
- VERBOSE(3,"OnlineCommand::Parse: This command |" << command_type << "| is unknown." << std::endl);
- }
- return true;
- }else{
- return false;
- }
-}
-
-void OnlineCommand::Execute() const
-{
- std::cerr << "void OnlineCommand::Execute() const" << std::endl;
- VERBOSE(3,"OnlineCommand::Execute() const" << std::endl);
-
- StaticData &staticData = StaticData::InstanceNonConst();
-
- VERBOSE(1,"Handling online command: " << COMMAND_KEYWORD << " " << command_type << " " << command_value << std::endl);
- // weights
- vector<float> actual_weights;
- vector<float> weights;
- PARAM_VEC values;
-
- bool flag = false;
- for(vector<std::string>::const_iterator iterParam = accepted_commands.begin(); iterParam != accepted_commands.end(); iterParam++)
- {
- std::string paramName = *iterParam;
-
- if (command_type.compare(paramName) == 0){ //requested command is paramName
-
- Tokenize(values, command_value);
-
- //remove initial "-" character
- paramName.erase(0,1);
-
- staticData.GetParameter()->OverwriteParam(paramName, values);
-
- staticData.ReLoadParameter();
- // check on weights
-
- const ScoreComponentCollection& weights = staticData.GetAllWeights();
- IFVERBOSE(2) {
- TRACE_ERR("The global weight vector looks like this: ");
- TRACE_ERR(weights);
- TRACE_ERR("\n");
- }
-
-
- flag = true;
- }
- }
- if (!flag){
- TRACE_ERR("ERROR: The command |" << command_type << "| is unknown." << std::endl);
- }
-}
-
-void OnlineCommand::Print(std::ostream& out) const
-{
- VERBOSE(3,"OnlineCommand::Print(std::ostream& out) const" << std::endl);
- out << command_type << " -> " << command_value << "\n";
-}
-
-void OnlineCommand::Clean()
-{
- VERBOSE(3,"OnlineCommand::Clean() const" << std::endl);
- command_type = '\0';
- command_value = '\0';
-}
-
-
-}
-
diff --git a/moses/src/OutputCollector.h b/moses/src/OutputCollector.h
index 0b56c6d80..96353934e 100644
--- a/moses/src/OutputCollector.h
+++ b/moses/src/OutputCollector.h
@@ -37,14 +37,37 @@
namespace Moses
{
/**
- * Makes sure output goes in the correct order.
- **/
+* Makes sure output goes in the correct order when multi-threading
+**/
class OutputCollector
{
public:
OutputCollector(std::ostream* outStream= &std::cout, std::ostream* debugStream=&std::cerr) :
- m_nextOutput(0),m_outStream(outStream),m_debugStream(debugStream) {}
-
+ m_nextOutput(0),m_outStream(outStream),m_debugStream(debugStream),
+ m_isHoldingOutputStream(false), m_isHoldingDebugStream(false) {}
+
+ ~OutputCollector()
+ {
+ if (m_isHoldingOutputStream)
+ delete m_outStream;
+ if (m_isHoldingDebugStream)
+ delete m_debugStream;
+ }
+
+ void HoldOutputStream()
+ {
+ m_isHoldingOutputStream = true;
+ }
+
+ void HoldDebugStream()
+ {
+ m_isHoldingDebugStream = true;
+ }
+
+ bool OutputIsCout() const
+ {
+ return (m_outStream == std::cout);
+ }
/**
* Write or cache the output, as appropriate.
@@ -62,9 +85,9 @@ public:
std::map<int,std::string>::iterator iter;
while ((iter = m_outputs.find(m_nextOutput)) != m_outputs.end()) {
*m_outStream << iter->second << std::flush;
- m_outputs.erase(iter);
++m_nextOutput;
std::map<int,std::string>::iterator debugIter = m_debugs.find(iter->first);
+ m_outputs.erase(iter);
if (debugIter != m_debugs.end()) {
*m_debugStream << debugIter->second << std::flush;
m_debugs.erase(debugIter);
@@ -82,6 +105,8 @@ private:
int m_nextOutput;
std::ostream* m_outStream;
std::ostream* m_debugStream;
+ bool m_isHoldingOutputStream;
+ bool m_isHoldingDebugStream;
#ifdef WITH_THREADS
boost::mutex m_mutex;
#endif
diff --git a/moses/src/PCNTools.h b/moses/src/PCNTools.h
index 1147b73bb..8a31e99ad 100644
--- a/moses/src/PCNTools.h
+++ b/moses/src/PCNTools.h
@@ -28,8 +28,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <cstdlib>
/** A couple of utilities to read .pcn files. A python-compatible format
- * for encoding confusion networks and word lattices.
- */
+ * for encoding confusion networks and word lattices.
+ */
namespace PCN
{
@@ -38,8 +38,8 @@ typedef std::vector<CNAlt> CNCol;
typedef std::vector<CNCol> CN;
/** Given a string ((('foo',0.1,1),('bar',0.9,2)),...) representation of a
- * word lattice in PCN format, return a CN object representing the lattice
- */
+ * word lattice in PCN format, return a CN object representing the lattice
+ */
CN parsePCN(const std::string& in);
};
diff --git a/moses/src/PDTAimp.h b/moses/src/PDTAimp.h
index 5f958a0f9..2d2b6974c 100644
--- a/moses/src/PDTAimp.h
+++ b/moses/src/PDTAimp.h
@@ -26,6 +26,8 @@ inline double Exp(double x)
return exp(x);
}
+/** implementation of the binary phrase table for the phrase-based decoder. Used by PhraseDictionaryTreeAdaptor
+ */
class PDTAimp
{
// only these classes are allowed to instantiate this class
@@ -137,8 +139,9 @@ protected:
return 0;
}
+ //TODO: Multiple models broken here
const TranslationSystem& system = StaticData::Instance().GetTranslationSystem(TranslationSystem::DEFAULT);
- std::vector<float> weights = system.GetTranslationWeights(m_obj->GetDictIndex());
+ std::vector<float> weights = StaticData::Instance().GetWeights(m_obj->GetFeature());
float weightWP = system.GetWeightWordPenalty();
std::vector<TargetPhrase> tCands;
@@ -372,7 +375,7 @@ protected:
stack.push_back(State(i, i, m_dict->GetRoot(), std::vector<float>(m_numInputScores,0.0)));
const TranslationSystem& system = StaticData::Instance().GetTranslationSystem(TranslationSystem::DEFAULT);
- std::vector<float> weightT = system.GetTranslationWeights(m_obj->GetDictIndex());
+ std::vector<float> weightT = StaticData::Instance().GetWeights(m_obj->GetFeature());
float weightWP = system.GetWeightWordPenalty();
while(!stack.empty()) {
diff --git a/moses/src/Parameter.cpp b/moses/src/Parameter.cpp
index 482714596..230a9ddb5 100644
--- a/moses/src/Parameter.cpp
+++ b/moses/src/Parameter.cpp
@@ -133,6 +133,7 @@ Parameter::Parameter()
AddParam("output-search-graph", "osg", "Output connected hypotheses of search into specified filename");
AddParam("output-search-graph-extended", "osgx", "Output connected hypotheses of search into specified filename, in extended format");
AddParam("unpruned-search-graph", "usg", "When outputting chart search graph, do not exclude dead ends. Note: stack pruning may have eliminated some hypotheses");
+ AddParam("include-lhs-in-search-graph", "lhssg", "When outputting chart search graph, include the label of the LHS of the rule (useful when using syntax)");
#ifdef HAVE_PROTOBUF
AddParam("output-search-graph-pb", "pb", "Write phrase lattice to protocol buffer objects in the specified path.");
#endif
@@ -162,7 +163,7 @@ Parameter::Parameter()
AddParam("report-sparse-features", "Indicate which sparse feature functions should report detailed scores in n-best, instead of aggregate");
AddParam("cube-pruning-lazy-scoring", "cbls", "Don't fully score a hypothesis until it is popped");
AddParam("parsing-algorithm", "Which parsing algorithm to use. 0=CYK+, 1=scope-3. (default = 0)");
- AddParam("search-algorithm", "Which search algorithm to use. 0=normal stack, 1=cube pruning, 2=cube growing. (default = 0)");
+ AddParam("search-algorithm", "Which search algorithm to use. 0=normal stack, 1=cube pruning, 2=cube growing, 4=stack with batched lm requests (default = 0)");
AddParam("constraint", "Location of the file with target sentences to produce constraining the search");
AddParam("use-alignment-info", "Use word-to-word alignment: actually it is only used to output the word-to-word alignment. Word-to-word alignments are taken from the phrase table if any. Default is false.");
AddParam("print-alignment-info", "Output word-to-word alignment into the log file. Word-to-word alignments are takne from the phrase table if any. Default is false");
@@ -182,6 +183,11 @@ Parameter::Parameter()
AddParam("sort-word-alignment", "Sort word alignments for more consistent display. 0=no sort (default), 1=target order");
AddParam("start-translation-id", "Id of 1st input. Default = 0");
AddParam("text-type", "should be one of dev/devtest/test, used for domain adaptation features");
+ AddParam("output-unknowns", "Output the unknown (OOV) words to the given file, one line per sentence");
+
+ // Compact phrase table and reordering table.
+ AddParam("minlexr-memory", "Load lexical reordering table in minlexr format into memory");
+ AddParam("minphr-memory", "Load phrase table in minphr format into memory");
}
Parameter::~Parameter()
@@ -250,7 +256,9 @@ bool Parameter::LoadParam(int argc, char* argv[])
PrintCredit();
Explain();
+ cerr << endl;
UserMessage::Add("No configuration file was specified. Use -config or -f");
+ cerr << endl;
return false;
} else {
if (!ReadConfigFile(configPath)) {
@@ -377,6 +385,8 @@ bool Parameter::Validate()
ext.push_back(".gz");
//prefix tree format
ext.push_back(".binlexr.idx");
+ //prefix tree format
+ ext.push_back(".minlexr");
noErrorFlag = FilesExist("distortion-file", 3, ext);
}
return noErrorFlag;
diff --git a/moses/src/Parameter.h b/moses/src/Parameter.h
index de0d3f7e7..0e7b0cd24 100644
--- a/moses/src/Parameter.h
+++ b/moses/src/Parameter.h
@@ -37,7 +37,8 @@ typedef std::map<std::string, std::string > PARAM_STRING;
/** Handles parameter values set in config file or on command line.
* Process raw parameter data (names and values as strings) for StaticData
- * to parse; to get useful values, see StaticData. */
+ * to parse; to get useful values, see StaticData.
+ */
class Parameter
{
protected:
diff --git a/moses/src/PartialTranslOptColl.h b/moses/src/PartialTranslOptColl.h
index b8b16b901..bbe4d05a1 100644
--- a/moses/src/PartialTranslOptColl.h
+++ b/moses/src/PartialTranslOptColl.h
@@ -39,8 +39,8 @@ namespace Moses
* The expansion process itself may be still explode, so efficient handling
* of partial translation options during expansion is required.
* This class assists in this tasks by implementing pruning.
- * This implementation is similar to the one in HypothesisStack. */
-
+ * This implementation is similar to the one in HypothesisStack.
+ */
class PartialTranslOptColl
{
protected:
diff --git a/moses/src/Phrase.h b/moses/src/Phrase.h
index b64a84a25..49586271a 100644
--- a/moses/src/Phrase.h
+++ b/moses/src/Phrase.h
@@ -40,6 +40,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
namespace Moses
{
+/** Representation of a phrase, ie. a contiguous number of words.
+ * Wrapper for vector of words
+ */
class Phrase
{
friend std::ostream& operator<<(std::ostream&, const Phrase&);
diff --git a/moses/src/PhraseDictionary.cpp b/moses/src/PhraseDictionary.cpp
index 77e02dab6..d1992a9f4 100644
--- a/moses/src/PhraseDictionary.cpp
+++ b/moses/src/PhraseDictionary.cpp
@@ -24,8 +24,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "RuleTable/PhraseDictionarySCFG.h"
#include "RuleTable/PhraseDictionaryOnDisk.h"
#include "RuleTable/PhraseDictionaryALSuffixArray.h"
+#include "RuleTable/PhraseDictionaryFuzzyMatch.h"
+
#ifndef WIN32
#include "PhraseDictionaryDynSuffixArray.h"
+#include "CompactPT/PhraseDictionaryCompact.h"
#endif
#include "RuleTable/UTrie.h"
@@ -64,16 +67,17 @@ PhraseDictionaryFeature::PhraseDictionaryFeature
, const std::string &targetFile // default param
, const std::string &alignmentsFile) // default param
:DecodeFeature("PhraseModel",numScoreComponent,input,output),
+ m_dictIndex(dictIndex),
m_numInputScores(numInputScores),
m_filePath(filePath),
- m_dictIndex(dictIndex),
m_tableLimit(tableLimit),
m_implementation(implementation),
m_targetFile(targetFile),
m_alignmentsFile(alignmentsFile),
m_sparsePhraseDictionaryFeature(spdf)
{
- if (implementation == Memory || implementation == SCFG || implementation == SuffixArray) {
+ if (implementation == Memory || implementation == SCFG || implementation == SuffixArray ||
+ implementation==Compact) {
m_useThreadSafePhraseDictionary = true;
if (implementation == SuffixArray) {
cerr << "Warning: implementation holds chached weights!" << endl;
@@ -87,7 +91,7 @@ PhraseDictionaryFeature::PhraseDictionaryFeature
PhraseDictionary* PhraseDictionaryFeature::LoadPhraseTable(const TranslationSystem* system)
{
const StaticData& staticData = StaticData::Instance();
- std::vector<float> weightT = system->GetTranslationWeights(m_dictIndex);
+ std::vector<float> weightT = staticData.GetWeights(this);
if (m_implementation == Memory) {
// memory phrase table
@@ -126,7 +130,7 @@ PhraseDictionary* PhraseDictionaryFeature::LoadPhraseTable(const TranslationSyst
if (m_implementation == Hiero) {
VERBOSE(2,"using Hiero format phrase tables" << std::endl);
} else {
- VERBOSE(2,"using New Format phrase tables" << std::endl);
+ VERBOSE(2,"using Moses-formatted SCFG phrase tables" << std::endl);
}
if (!FileExists(m_filePath) && FileExists(m_filePath + ".gz")) {
m_filePath += ".gz";
@@ -162,8 +166,8 @@ PhraseDictionary* PhraseDictionaryFeature::LoadPhraseTable(const TranslationSyst
bool ret = pdm->Load(GetInput()
, GetOutput()
, m_filePath
- , weightT
- , m_tableLimit
+ , weightT
+ , m_tableLimit
, system->GetLanguageModels()
, system->GetWordPenaltyProducer());
CHECK(ret);
@@ -174,8 +178,8 @@ PhraseDictionary* PhraseDictionaryFeature::LoadPhraseTable(const TranslationSyst
bool ret = pdta->Load(GetInput()
, GetOutput()
, m_filePath
- , weightT
- , m_tableLimit
+ , weightT
+ , m_tableLimit
, system->GetLanguageModels()
, system->GetWordPenaltyProducer());
CHECK(ret);
@@ -203,7 +207,38 @@ PhraseDictionary* PhraseDictionaryFeature::LoadPhraseTable(const TranslationSyst
#else
CHECK(false);
#endif
- } else {
+ } else if (m_implementation == FuzzyMatch) {
+
+ PhraseDictionaryFuzzyMatch *dict = new PhraseDictionaryFuzzyMatch(GetNumScoreComponents(), this);
+
+ bool ret = dict->Load(GetInput()
+ , GetOutput()
+ , m_filePath
+ , weightT
+ , m_tableLimit
+ , system->GetLanguageModels()
+ , system->GetWordPenaltyProducer());
+ CHECK(ret);
+
+ return dict;
+ } else if (m_implementation == Compact) {
+#ifndef WIN32
+ VERBOSE(2,"Using compact phrase table" << std::endl);
+
+ PhraseDictionaryCompact* pd = new PhraseDictionaryCompact(GetNumScoreComponents(), m_implementation, this);
+ bool ret = pd->Load(GetInput(), GetOutput()
+ , m_filePath
+ , weightT
+ , m_tableLimit
+ , system->GetLanguageModels()
+ , system->GetWeightWordPenalty());
+ CHECK(ret);
+ return pd;
+#else
+ CHECK(false);
+#endif
+ }
+ else {
std::cerr << "Unknown phrase table type " << m_implementation << endl;
CHECK(false);
}
diff --git a/moses/src/PhraseDictionary.h b/moses/src/PhraseDictionary.h
index 18b48436e..3136c47de 100644
--- a/moses/src/PhraseDictionary.h
+++ b/moses/src/PhraseDictionary.h
@@ -26,6 +26,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <map>
#include <memory>
#include <list>
+#include <stdexcept>
#include <vector>
#include <string>
@@ -105,7 +106,7 @@ public:
, const std::vector<FactorType> &output
, const std::string &filePath
, const std::vector<float> &weight
- , size_t dictIndex
+ , size_t dictIndex
, size_t tableLimit
, const std::string &targetFile
, const std::string &alignmentsFile);
@@ -134,6 +135,22 @@ public:
PhraseDictionary* GetDictionary();
size_t GetDictIndex() const;
+ //Usual feature function methods are not implemented
+ virtual void Evaluate(const PhraseBasedFeatureContext& context,
+ ScoreComponentCollection* accumulator) const
+ {
+ throw std::logic_error("PhraseDictionary.Evaluate() Not implemented");
+ }
+
+ virtual void EvaluateChart(const ChartBasedFeatureContext& context,
+ ScoreComponentCollection* accumulator) const
+ {
+ throw std::logic_error("PhraseDictionary.EvaluateChart() Not implemented");
+ }
+
+ virtual bool ComputeValueInTranslationTable() const {return true;}
+
+
protected:
size_t m_dictIndex;
diff --git a/moses/src/PhraseDictionaryDynSuffixArray.cpp b/moses/src/PhraseDictionaryDynSuffixArray.cpp
index d4f0161db..8e32758fa 100644
--- a/moses/src/PhraseDictionaryDynSuffixArray.cpp
+++ b/moses/src/PhraseDictionaryDynSuffixArray.cpp
@@ -43,9 +43,9 @@ void PhraseDictionaryDynSuffixArray::InitializeForInput(const InputType& input)
CHECK(&input == &input);
}
-void PhraseDictionaryDynSuffixArray::CleanUp()
+void PhraseDictionaryDynSuffixArray::CleanUp(const InputType &source)
{
- m_biSA->CleanUp();
+ m_biSA->CleanUp(source);
}
const TargetPhraseCollection *PhraseDictionaryDynSuffixArray::GetTargetPhraseCollection(const Phrase& src) const
diff --git a/moses/src/PhraseDictionaryDynSuffixArray.h b/moses/src/PhraseDictionaryDynSuffixArray.h
index 0d6be08dd..8449e3e09 100644
--- a/moses/src/PhraseDictionaryDynSuffixArray.h
+++ b/moses/src/PhraseDictionaryDynSuffixArray.h
@@ -9,6 +9,9 @@
namespace Moses
{
+/** Implementation of a phrase table using the biconcor suffix array.
+ * Wrapper around a BilingualDynSuffixArray object
+ */
class PhraseDictionaryDynSuffixArray: public PhraseDictionary
{
public:
@@ -26,7 +29,7 @@ public:
// functions below required by base class
const TargetPhraseCollection* GetTargetPhraseCollection(const Phrase& src) const;
void InitializeForInput(const InputType& i);
- void CleanUp();
+ void CleanUp(const InputType &source);
void insertSnt(string&, string&, string&);
void deleteSnt(unsigned, unsigned);
ChartRuleLookupManager *CreateRuleLookupManager(const InputType&, const ChartCellCollection&);
diff --git a/moses/src/PhraseDictionaryMemory.cpp b/moses/src/PhraseDictionaryMemory.cpp
index 1ee449172..9eaaa6dec 100644
--- a/moses/src/PhraseDictionaryMemory.cpp
+++ b/moses/src/PhraseDictionaryMemory.cpp
@@ -66,6 +66,8 @@ bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input
, const LMList &languageModels
, float weightWP)
{
+ const_cast<LMList&>(languageModels).InitializeBeforeSentenceProcessing();
+
const StaticData &staticData = StaticData::Instance();
m_tableLimit = tableLimit;
@@ -105,7 +107,6 @@ bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input
//target
std::auto_ptr<TargetPhrase> targetPhrase(new TargetPhrase(Output));
- targetPhrase->SetSourcePhrase(sourcePhrase); // TODO(bhaddow): This is a dangling pointer
targetPhrase->CreateFromString(output, targetPhraseString, factorDelimiter);
scv.clear();
@@ -163,12 +164,15 @@ bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input
}
}
- // Reuse source if possible. Otherwise, create node for it.
+ //TODO: Would be better to reuse source phrases, but ownership has to be
+ //consistent across phrase table implementations
+ sourcePhrase.Clear();
+ sourcePhrase.CreateFromString(input, sourcePhraseString, factorDelimiter);
+ //Now that the source phrase is ready, we give the target phrase a copy
+ targetPhrase->SetSourcePhrase(sourcePhrase);
if (preSourceString == sourcePhraseString && preSourceNode) {
preSourceNode->Add(targetPhrase.release());
} else {
- sourcePhrase.Clear();
- sourcePhrase.CreateFromString(input, sourcePhraseString, factorDelimiter);
preSourceNode = CreateTargetPhraseCollection(sourcePhrase);
preSourceNode->Add(targetPhrase.release());
preSourceString.assign(sourcePhraseString.data(), sourcePhraseString.size());
@@ -178,6 +182,10 @@ bool PhraseDictionaryMemory::Load(const std::vector<FactorType> &input
// sort each target phrase collection
m_collection.Sort(m_tableLimit);
+ /* // TODO ASK OLIVER WHY THIS IS NEEDED
+ const_cast<LMList&>(languageModels).CleanUpAfterSentenceProcessing();
+ */
+
return true;
}
diff --git a/moses/src/PhraseDictionaryNode.h b/moses/src/PhraseDictionaryNode.h
index 95ae48b0e..69d013398 100644
--- a/moses/src/PhraseDictionaryNode.h
+++ b/moses/src/PhraseDictionaryNode.h
@@ -36,7 +36,7 @@ class PhraseDictionaryMemory;
class PhraseDictionaryFeature;
/** One node of the PhraseDictionaryMemory structure
-*/
+ */
class PhraseDictionaryNode
{
typedef std::map<Word, PhraseDictionaryNode> NodeMap;
diff --git a/moses/src/PhraseDictionaryTree.h b/moses/src/PhraseDictionaryTree.h
index ebe43a618..b060dd8bb 100644
--- a/moses/src/PhraseDictionaryTree.h
+++ b/moses/src/PhraseDictionaryTree.h
@@ -44,6 +44,9 @@ struct StringTgtCand
};
+/** A phrase table for phrase-based decoding that is held on disk, rather than in memory
+ * Wrapper around a PDTimp class
+ */
class PhraseDictionaryTree : public Dictionary
{
PDTimp *imp; //implementation
diff --git a/moses/src/PhraseDictionaryTreeAdaptor.h b/moses/src/PhraseDictionaryTreeAdaptor.h
index c38e234fb..97c7d1d3b 100644
--- a/moses/src/PhraseDictionaryTreeAdaptor.h
+++ b/moses/src/PhraseDictionaryTreeAdaptor.h
@@ -18,7 +18,7 @@ class WordsRange;
class InputType;
/*** Implementation of a phrase table in a trie that is binarized and
- * stored on disk.
+ * stored on disk. Wrapper around PDTAimp class
*/
class PhraseDictionaryTreeAdaptor : public PhraseDictionary
{
diff --git a/moses/src/PhraseLengthFeature.cpp b/moses/src/PhraseLengthFeature.cpp
index ece3b0923..b7745f27c 100644
--- a/moses/src/PhraseLengthFeature.cpp
+++ b/moses/src/PhraseLengthFeature.cpp
@@ -2,17 +2,19 @@
#include "PhraseLengthFeature.h"
#include "Hypothesis.h"
#include "ScoreComponentCollection.h"
+#include "TranslationOption.h"
namespace Moses {
using namespace std;
-void PhraseLengthFeature::Evaluate(const TargetPhrase& targetPhrase,
- ScoreComponentCollection* accumulator) const
+void PhraseLengthFeature::Evaluate(
+ const PhraseBasedFeatureContext& context,
+ ScoreComponentCollection* accumulator) const
{
// get length of source and target phrase
- size_t sourceLength = targetPhrase.GetSourcePhrase().GetSize();
- size_t targetLength = targetPhrase.GetSize();
+ size_t sourceLength = context.GetTargetPhrase().GetSize();
+ size_t targetLength = context.GetTranslationOption().GetSourcePhrase()->GetSize();
// create feature names
stringstream nameSource;
diff --git a/moses/src/PhraseLengthFeature.h b/moses/src/PhraseLengthFeature.h
index f25142230..041db0f0e 100644
--- a/moses/src/PhraseLengthFeature.h
+++ b/moses/src/PhraseLengthFeature.h
@@ -1,6 +1,7 @@
#ifndef moses_PhraseLengthFeature_h
#define moses_PhraseLengthFeature_h
+#include <stdexcept>
#include <string>
#include <map>
@@ -20,14 +21,12 @@ public:
StatelessFeatureFunction("pl", ScoreProducer::unlimited)
{}
- void Evaluate(const TargetPhrase& cur_phrase,
+ void Evaluate(const PhraseBasedFeatureContext& context,
ScoreComponentCollection* accumulator) const;
- void EvaluateChart(
- const ChartHypothesis&,
- int /* featureID */,
- ScoreComponentCollection*) const {
- CHECK(0); // feature function not valid in chart decoder
+ void EvaluateChart(const ChartBasedFeatureContext& context,
+ ScoreComponentCollection*) const {
+ throw std::logic_error("PhraseLengthFeature not valid in chart decoder");
}
// basic properties
diff --git a/moses/src/PhrasePairFeature.cpp b/moses/src/PhrasePairFeature.cpp
index 357f2fd87..020292748 100644
--- a/moses/src/PhrasePairFeature.cpp
+++ b/moses/src/PhrasePairFeature.cpp
@@ -1,7 +1,10 @@
+#include <boost/algorithm/string.hpp>
+
#include "AlignmentInfo.h"
#include "PhrasePairFeature.h"
#include "TargetPhrase.h"
#include "Hypothesis.h"
+#include "TranslationOption.h"
#include <boost/algorithm/string.hpp>
using namespace std;
@@ -18,7 +21,7 @@ size_t PhrasePairFeature::GetNumInputScores() const
return 0;
}
- bool PhrasePairFeature::Load(const std::string &filePathSource/*, const std::string &filePathTarget*/)
+bool PhrasePairFeature::Load(const std::string &filePathSource/*, const std::string &filePathTarget*/)
{
if (m_domainTrigger) {
// domain trigger terms for each input document
@@ -78,163 +81,146 @@ size_t PhrasePairFeature::GetNumInputScores() const
return true;
}
- void PhrasePairFeature::InitializeForInput( Sentence const& in )
+void PhrasePairFeature::Evaluate(
+ const PhraseBasedFeatureContext& context,
+ ScoreComponentCollection* accumulator) const
{
- m_local.reset(new ThreadLocalStorage);
- m_local->input = &in;
- m_local->docid = in.GetDocumentId();
- m_local->topicid = in.GetTopicId();
- m_local->use_topicid = in.GetUseTopicId();
- m_local->topicid_prob = in.GetTopicIdAndProb();
- m_local->use_topicid_prob = in.GetUseTopicIdAndProb();
-}
-
-void PhrasePairFeature::Evaluate(const Hypothesis& cur_hypo, ScoreComponentCollection* accumulator) const {
- const TargetPhrase& target = cur_hypo.GetCurrTargetPhrase();
- const Phrase& source = target.GetSourcePhrase();
- const long docid = m_local->docid;
- const long topicid = m_local->topicid;
- const bool use_topicid = m_local->use_topicid;
- const bool use_topicid_prob = m_local->use_topicid_prob;
-/* const AlignmentInfo& align = cur_hypo.GetAlignmentInfo();
- for (AlignmentInfo::const_iterator i = align.begin(); i != align.end(); ++i) {
- const Factor* sourceFactor = source.GetWord(i->first).GetFactor(m_sourceFactorId);
- const Factor* targetFactor = cur_hypo.GetWord(i->second).GetFactor(m_targetFactorId);
+ const TargetPhrase& target = context.GetTargetPhrase();
+ const Phrase& source = *(context.GetTranslationOption().GetSourcePhrase());
+ if (m_simple) {
ostringstream namestr;
- namestr << sourceFactor->GetString();
- namestr << ":";
- namestr << targetFactor->GetString();
- accumulator->PlusEquals(this,namestr.str(),1);
- }*/
-
- if (m_simple) {
- ostringstream namestr;
- namestr << "pp_";
- namestr << source.GetWord(0).GetFactor(m_sourceFactorId)->GetString();
- for (size_t i = 1; i < source.GetSize(); ++i) {
- const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
- namestr << ",";
- namestr << sourceFactor->GetString();
- }
- namestr << "~";
- namestr << target.GetWord(0).GetFactor(m_targetFactorId)->GetString();
- for (size_t i = 1; i < target.GetSize(); ++i) {
- const Factor* targetFactor = target.GetWord(i).GetFactor(m_targetFactorId);
- namestr << ",";
- namestr << targetFactor->GetString();
- }
-
- accumulator->SparsePlusEquals(namestr.str(),1);
- }
- if (m_domainTrigger) {
- // compute pair
- ostringstream pair;
- pair << source.GetWord(0).GetFactor(m_sourceFactorId)->GetString();
- for (size_t i = 1; i < source.GetSize(); ++i) {
- const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
- pair << ",";
- pair << sourceFactor->GetString();
- }
- pair << "~";
- pair << target.GetWord(0).GetFactor(m_targetFactorId)->GetString();
- for (size_t i = 1; i < target.GetSize(); ++i) {
- const Factor* targetFactor = target.GetWord(i).GetFactor(m_targetFactorId);
- pair << ",";
- pair << targetFactor->GetString();
- }
-
- if (use_topicid || use_topicid_prob) {
- // use topicid as trigger
- if(use_topicid) {
- stringstream feature;
- feature << "pp_";
- if (topicid == -1)
- feature << "unk";
- else
- feature << topicid;
-
- feature << "_";
- feature << pair.str();
- accumulator->SparsePlusEquals(feature.str(), 1);
- }
- else {
- // use topic probabilities
- const vector<string> &topicid_prob = *(m_local->topicid_prob);
- if (atol(topicid_prob[0].c_str()) == -1) {
- stringstream feature;
- feature << "pp_unk_";
- feature << pair.str();
- accumulator->SparsePlusEquals(feature.str(), 1);
- }
- else {
- for (size_t i=0; i+1 < topicid_prob.size(); i+=2) {
- stringstream feature;
- feature << "pp_";
- feature << topicid_prob[i];
- feature << "_";
- feature << pair.str();
- accumulator->SparsePlusEquals(feature.str(), atof((topicid_prob[i+1]).c_str()));
- }
- }
- }
- }
- else {
- // range over domain trigger words
- for (set<string>::const_iterator p = m_vocabDomain[docid].begin(); p != m_vocabDomain[docid].end(); ++p) {
- string sourceTrigger = *p;
- ostringstream namestr;
- namestr << "pp_";
- namestr << sourceTrigger;
- namestr << "_";
- namestr << pair.str();
- accumulator->SparsePlusEquals(namestr.str(),1);
- }
- }
- }
- if (m_sourceContext) {
- const Sentence& input = *(m_local->input);
-
- // range over source words to get context
- for(size_t contextIndex = 0; contextIndex < input.GetSize(); contextIndex++ ) {
- string sourceTrigger = input.GetWord(contextIndex).GetFactor(m_sourceFactorId)->GetString();
- if (m_ignorePunctuation) {
- // check if trigger is punctuation
- char firstChar = sourceTrigger.at(0);
- CharHash::const_iterator charIterator = m_punctuationHash.find( firstChar );
- if(charIterator != m_punctuationHash.end())
- continue;
- }
-
- bool sourceTriggerExists = false;
- if (!m_unrestricted)
- sourceTriggerExists = m_vocabSource.find( sourceTrigger ) != m_vocabSource.end();
-
- if (m_unrestricted || sourceTriggerExists) {
- ostringstream namestr;
- namestr << "pp_";
- namestr << sourceTrigger;
- namestr << "~";
- namestr << source.GetWord(0).GetFactor(m_sourceFactorId)->GetString();
- for (size_t i = 1; i < source.GetSize(); ++i) {
- const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
- namestr << ",";
- namestr << sourceFactor->GetString();
- }
- namestr << "~";
- namestr << target.GetWord(0).GetFactor(m_targetFactorId)->GetString();
- for (size_t i = 1; i < target.GetSize(); ++i) {
- const Factor* targetFactor = target.GetWord(i).GetFactor(m_targetFactorId);
- namestr << ",";
- namestr << targetFactor->GetString();
- }
-
- accumulator->SparsePlusEquals(namestr.str(),1);
- }
- }
+ namestr << "pp_";
+ namestr << source.GetWord(0).GetFactor(m_sourceFactorId)->GetString();
+ for (size_t i = 1; i < source.GetSize(); ++i) {
+ const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
+ namestr << ",";
+ namestr << sourceFactor->GetString();
+ }
+ namestr << "~";
+ namestr << target.GetWord(0).GetFactor(m_targetFactorId)->GetString();
+ for (size_t i = 1; i < target.GetSize(); ++i) {
+ const Factor* targetFactor = target.GetWord(i).GetFactor(m_targetFactorId);
+ namestr << ",";
+ namestr << targetFactor->GetString();
+ }
+
+ accumulator->SparsePlusEquals(namestr.str(),1);
+ }
+ if (m_domainTrigger) {
+ const Sentence& input = static_cast<const Sentence&>(context.GetSource());
+ const bool use_topicid = input.GetUseTopicId();
+ const bool use_topicid_prob = input.GetUseTopicIdAndProb();
+
+ // compute pair
+ ostringstream pair;
+ pair << source.GetWord(0).GetFactor(m_sourceFactorId)->GetString();
+ for (size_t i = 1; i < source.GetSize(); ++i) {
+ const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
+ pair << ",";
+ pair << sourceFactor->GetString();
+ }
+ pair << "~";
+ pair << target.GetWord(0).GetFactor(m_targetFactorId)->GetString();
+ for (size_t i = 1; i < target.GetSize(); ++i) {
+ const Factor* targetFactor = target.GetWord(i).GetFactor(m_targetFactorId);
+ pair << ",";
+ pair << targetFactor->GetString();
+ }
+
+ if (use_topicid || use_topicid_prob) {
+ if(use_topicid) {
+ // use topicid as trigger
+ const long topicid = input.GetTopicId();
+ stringstream feature;
+ feature << "pp_";
+ if (topicid == -1)
+ feature << "unk";
+ else
+ feature << topicid;
+
+ feature << "_";
+ feature << pair.str();
+ accumulator->SparsePlusEquals(feature.str(), 1);
+ }
+ else {
+ // use topic probabilities
+ const vector<string> &topicid_prob = *(input.GetTopicIdAndProb());
+ if (atol(topicid_prob[0].c_str()) == -1) {
+ stringstream feature;
+ feature << "pp_unk_";
+ feature << pair.str();
+ accumulator->SparsePlusEquals(feature.str(), 1);
+ }
+ else {
+ for (size_t i=0; i+1 < topicid_prob.size(); i+=2) {
+ stringstream feature;
+ feature << "pp_";
+ feature << topicid_prob[i];
+ feature << "_";
+ feature << pair.str();
+ accumulator->SparsePlusEquals(feature.str(), atof((topicid_prob[i+1]).c_str()));
+ }
}
+ }
+ }
+ else {
+ // range over domain trigger words
+ const long docid = input.GetDocumentId();
+ for (set<string>::const_iterator p = m_vocabDomain[docid].begin(); p != m_vocabDomain[docid].end(); ++p) {
+ string sourceTrigger = *p;
+ ostringstream namestr;
+ namestr << "pp_";
+ namestr << sourceTrigger;
+ namestr << "_";
+ namestr << pair.str();
+ accumulator->SparsePlusEquals(namestr.str(),1);
+ }
+ }
+ }
+ if (m_sourceContext) {
+ const Sentence& input = static_cast<const Sentence&>(context.GetSource());
+
+ // range over source words to get context
+ for(size_t contextIndex = 0; contextIndex < input.GetSize(); contextIndex++ ) {
+ string sourceTrigger = input.GetWord(contextIndex).GetFactor(m_sourceFactorId)->GetString();
+ if (m_ignorePunctuation) {
+ // check if trigger is punctuation
+ char firstChar = sourceTrigger.at(0);
+ CharHash::const_iterator charIterator = m_punctuationHash.find( firstChar );
+ if(charIterator != m_punctuationHash.end())
+ continue;
+ }
+
+ bool sourceTriggerExists = false;
+ if (!m_unrestricted)
+ sourceTriggerExists = m_vocabSource.find( sourceTrigger ) != m_vocabSource.end();
+
+ if (m_unrestricted || sourceTriggerExists) {
+ ostringstream namestr;
+ namestr << "pp_";
+ namestr << sourceTrigger;
+ namestr << "~";
+ namestr << source.GetWord(0).GetFactor(m_sourceFactorId)->GetString();
+ for (size_t i = 1; i < source.GetSize(); ++i) {
+ const Factor* sourceFactor = source.GetWord(i).GetFactor(m_sourceFactorId);
+ namestr << ",";
+ namestr << sourceFactor->GetString();
+ }
+ namestr << "~";
+ namestr << target.GetWord(0).GetFactor(m_targetFactorId)->GetString();
+ for (size_t i = 1; i < target.GetSize(); ++i) {
+ const Factor* targetFactor = target.GetWord(i).GetFactor(m_targetFactorId);
+ namestr << ",";
+ namestr << targetFactor->GetString();
+ }
+
+ accumulator->SparsePlusEquals(namestr.str(),1);
+ }
+ }
+ }
}
bool PhrasePairFeature::ComputeValueInTranslationOption() const {
- return false;
+ return true;
}
}
diff --git a/moses/src/PhrasePairFeature.h b/moses/src/PhrasePairFeature.h
index caf6a0717..d7cc3ea48 100644
--- a/moses/src/PhrasePairFeature.h
+++ b/moses/src/PhrasePairFeature.h
@@ -1,92 +1,69 @@
#ifndef moses_PhrasePairFeature_h
#define moses_PhrasePairFeature_h
+#include <stdexcept>
+
#include "Factor.h"
#include "FeatureFunction.h"
#include "Sentence.h"
-#ifdef WITH_THREADS
-#include <boost/thread/tss.hpp>
-#endif
-
namespace Moses {
/**
* Phrase pair feature: complete source/target phrase pair
**/
class PhrasePairFeature: public StatelessFeatureFunction {
-
- typedef std::map< char, short > CharHash;
- typedef std::vector< std::set<std::string> > DocumentVector;
-
- struct ThreadLocalStorage
- {
- const Sentence *input;
- long docid;
- long topicid;
- bool use_topicid;
- const std::vector<std::string> *topicid_prob;
- bool use_topicid_prob;
- };
-
- private:
-#ifdef WITH_THREADS
- boost::thread_specific_ptr<ThreadLocalStorage> m_local;
-#else
- std::auto_ptr<ThreadLocalStorage> m_local;
-#endif
-
- std::set<std::string> m_vocabSource;
- //std::set<std::string> m_vocabTarget;
- DocumentVector m_vocabDomain;
- FactorType m_sourceFactorId;
- FactorType m_targetFactorId;
- bool m_unrestricted;
- bool m_simple;
- bool m_sourceContext;
- bool m_domainTrigger;
- float m_sparseProducerWeight;
- bool m_ignorePunctuation;
- CharHash m_punctuationHash;
-
+
+ typedef std::map< char, short > CharHash;
+ typedef std::vector< std::set<std::string> > DocumentVector;
+
+ std::set<std::string> m_vocabSource;
+ //std::set<std::string> m_vocabTarget;
+ DocumentVector m_vocabDomain;
+ FactorType m_sourceFactorId;
+ FactorType m_targetFactorId;
+ bool m_unrestricted;
+ bool m_simple;
+ bool m_sourceContext;
+ bool m_domainTrigger;
+ float m_sparseProducerWeight;
+ bool m_ignorePunctuation;
+ CharHash m_punctuationHash;
+
public:
- PhrasePairFeature (FactorType sourceFactorId, FactorType targetFactorId,
- bool simple, bool sourceContext, bool ignorePunctuation, bool domainTrigger) :
- StatelessFeatureFunction("pp", ScoreProducer::unlimited),
- m_sourceFactorId(sourceFactorId),
- m_targetFactorId(targetFactorId),
- m_unrestricted(true),
- m_simple(simple),
- m_sourceContext(sourceContext),
- m_domainTrigger(domainTrigger),
- m_sparseProducerWeight(1),
- m_ignorePunctuation(ignorePunctuation) {
- std::cerr << "Creating phrase pair feature.. " << std::endl;
- if (m_simple == 1) std::cerr << "using simple phrase pairs.. ";
- if (m_sourceContext == 1) std::cerr << "using source context.. ";
- if (m_domainTrigger == 1) std::cerr << "using domain triggers.. ";
-
- // compile a list of punctuation characters
- if (m_ignorePunctuation) {
- std::cerr << "ignoring punctuation for triggers.. ";
- char punctuation[] = "\"'!?¿·()#_,.:;•&@‑/\\0123456789~=";
- for (size_t i=0; i < sizeof(punctuation)-1; ++i)
- m_punctuationHash[punctuation[i]] = 1;
- }
- }
-
- void Evaluate(const Hypothesis& cur_hypo,
- ScoreComponentCollection* accumulator) const;
-
- void EvaluateChart(
- const ChartHypothesis&,
- int /* featureID */,
- ScoreComponentCollection*) const {
- CHECK(0); // feature function not valid in chart decoder
- }
-
- //NB: Should really precompute this feature, but don't have
- //good hooks to do this.
+ PhrasePairFeature (FactorType sourceFactorId, FactorType targetFactorId,
+ bool simple, bool sourceContext, bool ignorePunctuation, bool domainTrigger) :
+ StatelessFeatureFunction("pp", ScoreProducer::unlimited),
+ m_sourceFactorId(sourceFactorId),
+ m_targetFactorId(targetFactorId),
+ m_unrestricted(true),
+ m_simple(simple),
+ m_sourceContext(sourceContext),
+ m_domainTrigger(domainTrigger),
+ m_sparseProducerWeight(1),
+ m_ignorePunctuation(ignorePunctuation) {
+ std::cerr << "Creating phrase pair feature.. " << std::endl;
+ if (m_simple == 1) std::cerr << "using simple phrase pairs.. ";
+ if (m_sourceContext == 1) std::cerr << "using source context.. ";
+ if (m_domainTrigger == 1) std::cerr << "using domain triggers.. ";
+
+ // compile a list of punctuation characters
+ if (m_ignorePunctuation) {
+ std::cerr << "ignoring punctuation for triggers.. ";
+ char punctuation[] = "\"'!?¿·()#_,.:;•&@‑/\\0123456789~=";
+ for (size_t i=0; i < sizeof(punctuation)-1; ++i)
+ m_punctuationHash[punctuation[i]] = 1;
+ }
+ }
+
+ void Evaluate(const PhraseBasedFeatureContext& context,
+ ScoreComponentCollection* accumulator) const;
+
+ void EvaluateChart(const ChartBasedFeatureContext& context,
+ ScoreComponentCollection*) const {
+ throw std::logic_error("PhrasePairFeature not valid in chart decoder");
+ }
+
bool ComputeValueInTranslationOption() const;
std::string GetScoreProducerWeightShortName(unsigned) const;
@@ -94,8 +71,6 @@ class PhrasePairFeature: public StatelessFeatureFunction {
bool Load(const std::string &filePathSource/*, const std::string &filePathTarget*/);
- void InitializeForInput( Sentence const& in );
-
void SetSparseProducerWeight(float weight) { m_sparseProducerWeight = weight; }
float GetSparseProducerWeight() const { return m_sparseProducerWeight; }
};
diff --git a/moses/src/PrefixTree.h b/moses/src/PrefixTree.h
index 3215fb834..9cf1360e6 100644
--- a/moses/src/PrefixTree.h
+++ b/moses/src/PrefixTree.h
@@ -18,6 +18,8 @@
namespace Moses
{
+/** @todo How is this used in the pb binary phrase table?
+ */
template<typename T,typename D>
class PrefixTreeSA
{
@@ -129,6 +131,8 @@ template<typename T,typename D> D PrefixTreeSA<T,D>::def;
/////////////////////////////////////////////////////////////////////////////
+/** @todo How is this used in the pb binary phrase table?
+ */
template<typename T,typename D>
class PrefixTreeF
{
diff --git a/moses/src/PrefixTreeMap.h b/moses/src/PrefixTreeMap.h
index 6bb6b769f..fae875bd4 100644
--- a/moses/src/PrefixTreeMap.h
+++ b/moses/src/PrefixTreeMap.h
@@ -21,6 +21,8 @@ typedef FilePtr<PTF> CPT;
typedef std::vector<CPT> Data;
typedef LVoc<std::string> WordVoc;
+/** @todo How is this used in the pb binary phrase table?
+ */
class GenericCandidate
{
public:
@@ -57,19 +59,9 @@ private:
ScoreList m_ScoreList;
};
-/*
-class PPtr {
- public:
- typedef unsigned IndexType;
- public:
- PPtr(PTF const* p, IndexType i, bool isRoot)
- : m_Ptr(p), m_Index(i), m_IsRoot(isRoot){
- };
- ~PPtr(){
- };
-};
-*/
-
+
+/** @todo How is this used in the pb binary phrase table?
+ */
struct PPimp {
PTF const*p;
unsigned idx;
@@ -89,6 +81,8 @@ struct PPimp {
};
+/** @todo How is this used in the pb binary phrase table?
+ */
class Candidates : public std::vector<GenericCandidate>
{
typedef std::vector<GenericCandidate> MyBase;
diff --git a/moses/src/ReorderingConstraint.h b/moses/src/ReorderingConstraint.h
index 742b2228b..13acd4081 100644
--- a/moses/src/ReorderingConstraint.h
+++ b/moses/src/ReorderingConstraint.h
@@ -39,7 +39,8 @@ namespace Moses
class InputType;
#define NOT_A_ZONE 999999999
-/** vector of boolean used to represent whether a word has been translated or not */
+/** A list of zones and walls to limit which reordering can occur
+ */
class ReorderingConstraint
{
friend std::ostream& operator<<(std::ostream& out, const ReorderingConstraint& reorderingConstraint);
diff --git a/moses/src/ReorderingStack.h b/moses/src/ReorderingStack.h
index 9a710b348..d819e5f47 100644
--- a/moses/src/ReorderingStack.h
+++ b/moses/src/ReorderingStack.h
@@ -19,6 +19,8 @@
namespace Moses
{
+/** @todo what is this?
+ */
class ReorderingStack
{
private:
diff --git a/moses/src/RuleCube.cpp b/moses/src/RuleCube.cpp
index b028abe3e..83e7d0278 100644
--- a/moses/src/RuleCube.cpp
+++ b/moses/src/RuleCube.cpp
@@ -21,8 +21,7 @@
#include "ChartCell.h"
#include "ChartCellCollection.h"
-#include "ChartTranslationOption.h"
-#include "ChartTranslationOptionCollection.h"
+#include "ChartTranslationOptions.h"
#include "RuleCube.h"
#include "RuleCubeQueue.h"
#include "StaticData.h"
@@ -35,7 +34,7 @@ namespace Moses
{
// initialise the RuleCube by creating the top-left corner item
-RuleCube::RuleCube(const ChartTranslationOption &transOpt,
+RuleCube::RuleCube(const ChartTranslationOptions &transOpt,
const ChartCellCollection &allChartCells,
ChartManager &manager)
: m_transOpt(transOpt)
diff --git a/moses/src/RuleCube.h b/moses/src/RuleCube.h
index 4ea17e80a..05f9f1a24 100644
--- a/moses/src/RuleCube.h
+++ b/moses/src/RuleCube.h
@@ -37,10 +37,11 @@ namespace Moses
class ChartCellCollection;
class ChartManager;
-class ChartTranslationOption;
+class ChartTranslationOptions;
-// Define an ordering between RuleCubeItems based on their scores. This
-// is used to order items in the cube's priority queue.
+/** Define an ordering between RuleCubeItems based on their scores.
+ * This is used to order items in the cube's priority queue.
+ */
class RuleCubeItemScoreOrderer
{
public:
@@ -49,9 +50,10 @@ class RuleCubeItemScoreOrderer
}
};
-// Define an ordering between RuleCubeItems based on their positions in the
-// cube. This is used to record which positions in the cube have been covered
-// during search.
+/** Define an ordering between RuleCubeItems based on their positions in the
+ * cube. This is used to record which positions in the cube have been covered
+ * during search.
+ */
class RuleCubeItemPositionOrderer
{
public:
@@ -60,6 +62,8 @@ class RuleCubeItemPositionOrderer
}
};
+/** @todo what is this?
+ */
class RuleCubeItemHasher
{
public:
@@ -71,6 +75,8 @@ class RuleCubeItemHasher
}
};
+/** @todo what is this?
+ */
class RuleCubeItemEqualityPred
{
public:
@@ -80,10 +86,12 @@ class RuleCubeItemEqualityPred
}
};
+/** @todo what is this?
+ */
class RuleCube
{
public:
- RuleCube(const ChartTranslationOption &, const ChartCellCollection &,
+ RuleCube(const ChartTranslationOptions &, const ChartCellCollection &,
ChartManager &);
~RuleCube();
@@ -98,7 +106,7 @@ class RuleCube
bool IsEmpty() const { return m_queue.empty(); }
- const ChartTranslationOption &GetTranslationOption() const {
+ const ChartTranslationOptions &GetTranslationOption() const {
return m_transOpt;
}
@@ -123,7 +131,7 @@ class RuleCube
void CreateNeighbors(const RuleCubeItem &, ChartManager &);
void CreateNeighbor(const RuleCubeItem &, int, ChartManager &);
- const ChartTranslationOption &m_transOpt;
+ const ChartTranslationOptions &m_transOpt;
ItemSet m_covered;
Queue m_queue;
};
diff --git a/moses/src/RuleCubeItem.cpp b/moses/src/RuleCubeItem.cpp
index 86970055e..ec914e5ee 100644
--- a/moses/src/RuleCubeItem.cpp
+++ b/moses/src/RuleCubeItem.cpp
@@ -19,8 +19,7 @@
#include "ChartCell.h"
#include "ChartCellCollection.h"
-#include "ChartTranslationOption.h"
-#include "ChartTranslationOptionCollection.h"
+#include "ChartTranslationOptions.h"
#include "RuleCubeItem.h"
#include "RuleCubeQueue.h"
#include "WordsRange.h"
@@ -37,7 +36,7 @@ std::size_t hash_value(const HypothesisDimension &dimension)
return hasher(dimension.GetHypothesis());
}
-RuleCubeItem::RuleCubeItem(const ChartTranslationOption &transOpt,
+RuleCubeItem::RuleCubeItem(const ChartTranslationOptions &transOpt,
const ChartCellCollection &/*allChartCells*/)
: m_translationDimension(0,
transOpt.GetTargetPhraseCollection().GetCollection())
@@ -75,7 +74,7 @@ void RuleCubeItem::EstimateScore()
}
}
-void RuleCubeItem::CreateHypothesis(const ChartTranslationOption &transOpt,
+void RuleCubeItem::CreateHypothesis(const ChartTranslationOptions &transOpt,
ChartManager &manager)
{
m_hypothesis = new ChartHypothesis(transOpt, *this, manager);
diff --git a/moses/src/RuleCubeItem.h b/moses/src/RuleCubeItem.h
index db6a45b44..612079172 100644
--- a/moses/src/RuleCubeItem.h
+++ b/moses/src/RuleCubeItem.h
@@ -29,12 +29,14 @@ namespace Moses
class ChartCellCollection;
class ChartHypothesis;
class ChartManager;
-class ChartTranslationOption;
+class ChartTranslationOptions;
class TargetPhrase;
typedef std::vector<const ChartHypothesis*> HypoList;
-// wrapper around list of target phrase translation options
+/** wrapper around list of target phrase translation options
+ * @todo How is this used. Split out into separate source file
+ */
class TranslationDimension
{
public:
@@ -68,7 +70,9 @@ class TranslationDimension
};
-// wrapper around list of hypotheses for a particular non-term of a trans opt
+/** wrapper around list of hypotheses for a particular non-term of a trans opt
+ * @todo How is this used. Split out into separate source file
+ */
class HypothesisDimension
{
public:
@@ -102,10 +106,11 @@ private:
std::size_t hash_value(const HypothesisDimension &);
+/** @todo How is this used. Split out into separate source file */
class RuleCubeItem
{
public:
- RuleCubeItem(const ChartTranslationOption &, const ChartCellCollection &);
+ RuleCubeItem(const ChartTranslationOptions &, const ChartCellCollection &);
RuleCubeItem(const RuleCubeItem &, int);
~RuleCubeItem();
@@ -121,7 +126,7 @@ class RuleCubeItem
void EstimateScore();
- void CreateHypothesis(const ChartTranslationOption &, ChartManager &);
+ void CreateHypothesis(const ChartTranslationOptions &, ChartManager &);
ChartHypothesis *ReleaseHypothesis();
diff --git a/moses/src/RuleCubeQueue.h b/moses/src/RuleCubeQueue.h
index b09035016..9763b3877 100644
--- a/moses/src/RuleCubeQueue.h
+++ b/moses/src/RuleCubeQueue.h
@@ -31,8 +31,9 @@ namespace Moses
class ChartManager;
-// Define an ordering between RuleCube based on their best item scores. This
-// is used to order items in the priority queue.
+/** Define an ordering between RuleCube based on their best item scores. This
+ * is used to order items in the priority queue.
+ */
class RuleCubeOrderer
{
public:
@@ -41,6 +42,7 @@ class RuleCubeOrderer
}
};
+/** @todo how is this used */
class RuleCubeQueue
{
public:
diff --git a/moses/src/RuleTable/Jamfile b/moses/src/RuleTable/Jamfile
index fabc802ff..5512863f7 100644
--- a/moses/src/RuleTable/Jamfile
+++ b/moses/src/RuleTable/Jamfile
@@ -1 +1 @@
-lib RuleTable : [ glob *.cpp ] ..//moses_internal ..//Scope3Parser ..//CYKPlusParser ;
+lib RuleTable : [ glob *.cpp ] ..//moses_internal ..//Scope3Parser ..//CYKPlusParser ..//fuzzy-match ;
diff --git a/moses/src/RuleTable/Loader.h b/moses/src/RuleTable/Loader.h
index 0bf116920..fac8900bd 100644
--- a/moses/src/RuleTable/Loader.h
+++ b/moses/src/RuleTable/Loader.h
@@ -31,8 +31,8 @@ namespace Moses
class LMList;
class WordPenaltyProducer;
-// Abstract base class defining RuleTableLoader interface. Friend of
-// RuleTableTrie.
+/** Abstract base class defining RuleTableLoader interface. Friend of RuleTableTrie.
+ */
class RuleTableLoader
{
public:
diff --git a/moses/src/RuleTable/LoaderCompact.h b/moses/src/RuleTable/LoaderCompact.h
index ecf3843ed..42453ac1c 100644
--- a/moses/src/RuleTable/LoaderCompact.h
+++ b/moses/src/RuleTable/LoaderCompact.h
@@ -35,6 +35,7 @@ class LMList;
class RuleTableTrie;
class WordPenaltyProducer;
+//! @todo ask phil williams
class RuleTableLoaderCompact : public RuleTableLoader
{
public:
diff --git a/moses/src/RuleTable/LoaderFactory.cpp b/moses/src/RuleTable/LoaderFactory.cpp
index 676af1a2a..eead22b21 100644
--- a/moses/src/RuleTable/LoaderFactory.cpp
+++ b/moses/src/RuleTable/LoaderFactory.cpp
@@ -27,6 +27,9 @@
#include "Util.h"
#include <sstream>
+#include <iostream>
+
+using namespace std;
namespace Moses
{
@@ -38,25 +41,32 @@ std::auto_ptr<RuleTableLoader> RuleTableLoaderFactory::Create(
{
InputFileStream input(path);
std::string line;
- std::getline(input, line);
- std::vector<std::string> tokens;
- Tokenize(tokens, line);
- if (tokens.size() == 1) {
- if (tokens[0] == "1") {
- return std::auto_ptr<RuleTableLoader>(new RuleTableLoaderCompact());
+ bool cont = std::getline(input, line);
+
+ if (cont) {
+ std::vector<std::string> tokens;
+ Tokenize(tokens, line);
+ if (tokens.size() == 1) {
+ if (tokens[0] == "1") {
+ return std::auto_ptr<RuleTableLoader>(new RuleTableLoaderCompact());
+ }
+ std::stringstream msg;
+ msg << "Unsupported compact rule table format: " << tokens[0];
+ UserMessage::Add(msg.str());
+ return std::auto_ptr<RuleTableLoader>();
+ }
+ else if (tokens[0] == "[X]" && tokens[1] == "|||") {
+ return std::auto_ptr<RuleTableLoader>(new
+ RuleTableLoaderHiero());
+
}
- std::stringstream msg;
- msg << "Unsupported compact rule table format: " << tokens[0];
- UserMessage::Add(msg.str());
- return std::auto_ptr<RuleTableLoader>();
- }
- else if (tokens[0] == "[X]" && tokens[1] == "|||") {
- return std::auto_ptr<RuleTableLoader>(new
- RuleTableLoaderHiero());
+ return std::auto_ptr<RuleTableLoader>(new RuleTableLoaderStandard());
+ }
+ else
+ { // empty phrase table
+ return std::auto_ptr<RuleTableLoader>(new RuleTableLoaderStandard());
}
-
- return std::auto_ptr<RuleTableLoader>(new RuleTableLoaderStandard());
}
} // namespace Moses
diff --git a/moses/src/RuleTable/LoaderFactory.h b/moses/src/RuleTable/LoaderFactory.h
index c9c4daf1d..01c168680 100644
--- a/moses/src/RuleTable/LoaderFactory.h
+++ b/moses/src/RuleTable/LoaderFactory.h
@@ -27,7 +27,7 @@ namespace Moses
class RuleTableLoader;
-// Creates a RuleTableLoader object suitable for loading the specified file.
+//! Creates a RuleTableLoader object suitable for loading the specified file.
class RuleTableLoaderFactory
{
public:
diff --git a/moses/src/RuleTable/LoaderHiero.h b/moses/src/RuleTable/LoaderHiero.h
index c2d1cdecc..4a74f90b8 100644
--- a/moses/src/RuleTable/LoaderHiero.h
+++ b/moses/src/RuleTable/LoaderHiero.h
@@ -13,6 +13,7 @@
namespace Moses {
+//! specific implementation of SCFG loader to load rule tables formatted in Hiero-style format
class RuleTableLoaderHiero : public RuleTableLoaderStandard
{
public:
diff --git a/moses/src/RuleTable/LoaderStandard.cpp b/moses/src/RuleTable/LoaderStandard.cpp
index 065741bae..5aa5df255 100644
--- a/moses/src/RuleTable/LoaderStandard.cpp
+++ b/moses/src/RuleTable/LoaderStandard.cpp
@@ -150,7 +150,7 @@ bool RuleTableLoaderStandard::Load(FormatType format
, const WordPenaltyProducer* wpProducer
, RuleTableTrie &ruleTable)
{
- PrintUserTime("Start loading new format pt model");
+ PrintUserTime(string("Start loading text SCFG phrase table. ") + (format==MosesFormat?"Moses ":"Hiero ") + " format");
const StaticData &staticData = StaticData::Instance();
const std::string& factorDelimiter = staticData.GetFactorDelimiter();
@@ -181,6 +181,10 @@ bool RuleTableLoaderStandard::Load(FormatType format
abort();
}
+ if (tokens.size() == 4) {
+ tokens.push_back("1 1"); //dummy rule count for glue rules
+ }
+
const string &sourcePhraseString = tokens[0]
, &targetPhraseString = tokens[1]
, &scoreString = tokens[2]
diff --git a/moses/src/RuleTable/LoaderStandard.h b/moses/src/RuleTable/LoaderStandard.h
index aea1b447e..6fea42794 100644
--- a/moses/src/RuleTable/LoaderStandard.h
+++ b/moses/src/RuleTable/LoaderStandard.h
@@ -24,12 +24,7 @@
namespace Moses
{
-enum FormatType
-{
- MosesFormat
- ,HieroFormat
-};
-
+//! Loader to load Moses-formatted SCFG rules from a text file
class RuleTableLoaderStandard : public RuleTableLoader
{
protected:
diff --git a/moses/src/RuleTable/PhraseDictionaryALSuffixArray.cpp b/moses/src/RuleTable/PhraseDictionaryALSuffixArray.cpp
index b0ab23e09..21acbcab0 100644
--- a/moses/src/RuleTable/PhraseDictionaryALSuffixArray.cpp
+++ b/moses/src/RuleTable/PhraseDictionaryALSuffixArray.cpp
@@ -57,14 +57,14 @@ void PhraseDictionaryALSuffixArray::InitializeForInput(InputType const& source)
// populate with rules for this sentence
long translationId = source.GetTranslationId();
- string grammarFile = GetFilePath() + "/grammar.out." + SPrint(translationId);
+ string grammarFile = GetFilePath() + "/grammar.out." + SPrint(translationId) + ".gz";
// data from file
InputFileStream inFile(grammarFile);
std::auto_ptr<RuleTableLoader> loader =
RuleTableLoaderFactory::Create(grammarFile);
- std::vector<float> weightT = StaticData::Instance().GetTranslationSystem(TranslationSystem::DEFAULT).GetTranslationWeights(GetDictIndex());
+ std::vector<float> weightT = StaticData::Instance().GetWeights(GetFeature());
bool ret = loader->Load(*m_input, *m_output, inFile, weightT, m_tableLimit,
*m_languageModels, m_wpProducer, *this);
diff --git a/moses/src/RuleTable/PhraseDictionaryALSuffixArray.h b/moses/src/RuleTable/PhraseDictionaryALSuffixArray.h
index 79c928c69..4bd6e09ca 100644
--- a/moses/src/RuleTable/PhraseDictionaryALSuffixArray.h
+++ b/moses/src/RuleTable/PhraseDictionaryALSuffixArray.h
@@ -13,6 +13,11 @@
namespace Moses {
+/** Implementation of in-memory phrase table for use with Adam Lopez's suffix array.
+ * Does 2 things that the normal in-memory pt doesn't do:
+ * 1. Loads grammar for a sentence to be decoded only when the sentence is being decoded. Unload afterwards
+ 2. Format of the pt file follows Hiero, rather than Moses
+ */
class PhraseDictionaryALSuffixArray : public PhraseDictionarySCFG
{
public:
diff --git a/moses/src/RuleTable/PhraseDictionaryFuzzyMatch.cpp b/moses/src/RuleTable/PhraseDictionaryFuzzyMatch.cpp
new file mode 100644
index 000000000..628da9862
--- /dev/null
+++ b/moses/src/RuleTable/PhraseDictionaryFuzzyMatch.cpp
@@ -0,0 +1,311 @@
+// vim:tabstop=2
+
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2006 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ ***********************************************************************/
+
+#include <fstream>
+#include <string>
+#include <iterator>
+#include <algorithm>
+#include "RuleTable/Loader.h"
+#include "RuleTable/LoaderFactory.h"
+#include "PhraseDictionaryFuzzyMatch.h"
+#include "FactorCollection.h"
+#include "Word.h"
+#include "Util.h"
+#include "InputFileStream.h"
+#include "StaticData.h"
+#include "WordsRange.h"
+#include "UserMessage.h"
+#include "util/file.hh"
+#include "CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.h"
+
+using namespace std;
+
+namespace Moses
+{
+
+ PhraseDictionaryFuzzyMatch::PhraseDictionaryFuzzyMatch(size_t numScoreComponents,
+ PhraseDictionaryFeature* feature)
+ : PhraseDictionary(numScoreComponents, feature)
+ {
+ const StaticData &staticData = StaticData::Instance();
+ CHECK(staticData.ThreadCount() == 1);
+ }
+
+ bool PhraseDictionaryFuzzyMatch::Load(const std::vector<FactorType> &input
+ , const std::vector<FactorType> &output
+ , const std::string &initStr
+ , const std::vector<float> &weight
+ , size_t tableLimit,
+ const LMList& languageModels,
+ const WordPenaltyProducer* wpProducer)
+ {
+ m_languageModels = &(languageModels);
+ m_wpProducer = wpProducer;
+ m_tableLimit = tableLimit;
+ m_input = &input;
+ m_output = &output;
+
+ m_weight = &weight;
+
+ cerr << "initStr=" << initStr << endl;
+ m_config = Tokenize(initStr, ";");
+ assert(m_config.size() == 3);
+
+ m_FuzzyMatchWrapper = new tmmt::FuzzyMatchWrapper(m_config[0], m_config[1], m_config[2]);
+
+ return true;
+ }
+
+ ChartRuleLookupManager *PhraseDictionaryFuzzyMatch::CreateRuleLookupManager(
+ const InputType &sentence,
+ const ChartCellCollection &cellCollection)
+ {
+ return new ChartRuleLookupManagerMemoryPerSentence(sentence, cellCollection, *this);
+ }
+
+ void PhraseDictionaryFuzzyMatch::InitializeForInput(InputType const& inputSentence)
+ {
+ util::TempMaker tempFile("moses");
+ util::scoped_fd alive;
+ string inFileName(tempFile.Name(alive));
+
+ ofstream inFile(inFileName.c_str());
+
+ for (size_t i = 1; i < inputSentence.GetSize() - 1; ++i)
+ {
+ inFile << inputSentence.GetWord(i);
+ }
+ inFile << endl;
+ inFile.close();
+
+ string ptFileName = m_FuzzyMatchWrapper->Extract(inFileName);
+
+ // populate with rules for this sentence
+ long translationId = inputSentence.GetTranslationId();
+
+ PhraseDictionaryNodeSCFG &rootNode = m_collection[translationId];
+ FormatType format = MosesFormat;
+
+ // data from file
+ InputFileStream inStream(ptFileName);
+
+ // copied from class LoaderStandard
+ PrintUserTime("Start loading fuzzy-match phrase model");
+
+ const StaticData &staticData = StaticData::Instance();
+ const std::string& factorDelimiter = staticData.GetFactorDelimiter();
+
+
+ string lineOrig;
+ size_t count = 0;
+
+ while(getline(inStream, lineOrig)) {
+ const string *line;
+ if (format == HieroFormat) { // reformat line
+ assert(false);
+ //line = ReformatHieroRule(lineOrig);
+ }
+ else
+ { // do nothing to format of line
+ line = &lineOrig;
+ }
+
+ vector<string> tokens;
+ vector<float> scoreVector;
+
+ TokenizeMultiCharSeparator(tokens, *line , "|||" );
+
+ if (tokens.size() != 4 && tokens.size() != 5) {
+ stringstream strme;
+ strme << "Syntax error at " << ptFileName << ":" << count;
+ UserMessage::Add(strme.str());
+ abort();
+ }
+
+ const string &sourcePhraseString = tokens[0]
+ , &targetPhraseString = tokens[1]
+ , &scoreString = tokens[2]
+ , &alignString = tokens[3];
+
+ bool isLHSEmpty = (sourcePhraseString.find_first_not_of(" \t", 0) == string::npos);
+ if (isLHSEmpty && !staticData.IsWordDeletionEnabled()) {
+ TRACE_ERR( ptFileName << ":" << count << ": pt entry contains empty target, skipping\n");
+ continue;
+ }
+
+ Tokenize<float>(scoreVector, scoreString);
+ const size_t numScoreComponents = GetFeature()->GetNumScoreComponents();
+ if (scoreVector.size() != numScoreComponents) {
+ stringstream strme;
+ strme << "Size of scoreVector != number (" << scoreVector.size() << "!="
+ << numScoreComponents << ") of score components on line " << count;
+ UserMessage::Add(strme.str());
+ abort();
+ }
+ CHECK(scoreVector.size() == numScoreComponents);
+
+ // parse source & find pt node
+
+ // constituent labels
+ Word sourceLHS, targetLHS;
+
+ // source
+ Phrase sourcePhrase( 0);
+ sourcePhrase.CreateFromStringNewFormat(Input, *m_input, sourcePhraseString, factorDelimiter, sourceLHS);
+
+ // create target phrase obj
+ TargetPhrase *targetPhrase = new TargetPhrase(Output);
+ targetPhrase->CreateFromStringNewFormat(Output, *m_output, targetPhraseString, factorDelimiter, targetLHS);
+
+ // rest of target phrase
+ targetPhrase->SetAlignmentInfo(alignString);
+ targetPhrase->SetTargetLHS(targetLHS);
+ //targetPhrase->SetDebugOutput(string("New Format pt ") + line);
+
+ // component score, for n-best output
+ std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),TransformScore);
+ std::transform(scoreVector.begin(),scoreVector.end(),scoreVector.begin(),FloorScore);
+
+ targetPhrase->SetScoreChart(GetFeature(), scoreVector, *m_weight, *m_languageModels, m_wpProducer);
+
+ TargetPhraseCollection &phraseColl = GetOrCreateTargetPhraseCollection(rootNode, sourcePhrase, *targetPhrase, sourceLHS);
+ phraseColl.Add(targetPhrase);
+
+ count++;
+
+ if (format == HieroFormat) { // reformat line
+ delete line;
+ }
+ else
+ { // do nothing
+ }
+
+ }
+
+ // sort and prune each target phrase collection
+ SortAndPrune(rootNode);
+
+ remove(ptFileName.c_str());
+ remove(inFileName.c_str());
+ }
+
+ TargetPhraseCollection &PhraseDictionaryFuzzyMatch::GetOrCreateTargetPhraseCollection(PhraseDictionaryNodeSCFG &rootNode
+ , const Phrase &source
+ , const TargetPhrase &target
+ , const Word &sourceLHS)
+ {
+ PhraseDictionaryNodeSCFG &currNode = GetOrCreateNode(rootNode, source, target, sourceLHS);
+ return currNode.GetOrCreateTargetPhraseCollection();
+ }
+
+ PhraseDictionaryNodeSCFG &PhraseDictionaryFuzzyMatch::GetOrCreateNode(PhraseDictionaryNodeSCFG &rootNode
+ , const Phrase &source
+ , const TargetPhrase &target
+ , const Word &sourceLHS)
+ {
+ cerr << source << endl << target << endl;
+ const size_t size = source.GetSize();
+
+ const AlignmentInfo &alignmentInfo = target.GetAlignmentInfo();
+ AlignmentInfo::const_iterator iterAlign = alignmentInfo.begin();
+
+ PhraseDictionaryNodeSCFG *currNode = &rootNode;
+ for (size_t pos = 0 ; pos < size ; ++pos) {
+ const Word& word = source.GetWord(pos);
+
+ if (word.IsNonTerminal()) {
+ // indexed by source label 1st
+ const Word &sourceNonTerm = word;
+
+ CHECK(iterAlign != target.GetAlignmentInfo().end());
+ CHECK(iterAlign->first == pos);
+ size_t targetNonTermInd = iterAlign->second;
+ ++iterAlign;
+ const Word &targetNonTerm = target.GetWord(targetNonTermInd);
+
+ currNode = currNode->GetOrCreateChild(sourceNonTerm, targetNonTerm);
+ } else {
+ currNode = currNode->GetOrCreateChild(word);
+ }
+
+ CHECK(currNode != NULL);
+ }
+
+ // finally, the source LHS
+ //currNode = currNode->GetOrCreateChild(sourceLHS);
+ //CHECK(currNode != NULL);
+
+
+ return *currNode;
+ }
+
+ void PhraseDictionaryFuzzyMatch::SortAndPrune(PhraseDictionaryNodeSCFG &rootNode)
+ {
+ if (GetTableLimit())
+ {
+ rootNode.Sort(GetTableLimit());
+ }
+ }
+
+ void PhraseDictionaryFuzzyMatch::CleanUp(const InputType &source)
+ {
+ m_collection.erase(source.GetTranslationId());
+ }
+
+ const PhraseDictionaryNodeSCFG &PhraseDictionaryFuzzyMatch::GetRootNode(const InputType &source) const
+ {
+ long transId = source.GetTranslationId();
+ std::map<long, PhraseDictionaryNodeSCFG>::const_iterator iter = m_collection.find(transId);
+ CHECK(iter != m_collection.end());
+ return iter->second;
+ }
+ PhraseDictionaryNodeSCFG &PhraseDictionaryFuzzyMatch::GetRootNode(const InputType &source)
+ {
+ long transId = source.GetTranslationId();
+ std::map<long, PhraseDictionaryNodeSCFG>::iterator iter = m_collection.find(transId);
+ CHECK(iter != m_collection.end());
+ return iter->second;
+ }
+
+ TO_STRING_BODY(PhraseDictionaryFuzzyMatch);
+
+ // friend
+ ostream& operator<<(ostream& out, const PhraseDictionaryFuzzyMatch& phraseDict)
+ {
+ typedef PhraseDictionaryNodeSCFG::TerminalMap TermMap;
+ typedef PhraseDictionaryNodeSCFG::NonTerminalMap NonTermMap;
+
+ /*
+ const PhraseDictionaryNodeSCFG &coll = phraseDict.m_collection;
+ for (NonTermMap::const_iterator p = coll.m_nonTermMap.begin(); p != coll.m_nonTermMap.end(); ++p) {
+ const Word &sourceNonTerm = p->first.first;
+ out << sourceNonTerm;
+ }
+ for (TermMap::const_iterator p = coll.m_sourceTermMap.begin(); p != coll.m_sourceTermMap.end(); ++p) {
+ const Word &sourceTerm = p->first;
+ out << sourceTerm;
+ }
+ */
+
+ return out;
+ }
+
+}
diff --git a/moses/src/RuleTable/PhraseDictionaryFuzzyMatch.h b/moses/src/RuleTable/PhraseDictionaryFuzzyMatch.h
new file mode 100644
index 000000000..8679bdd1c
--- /dev/null
+++ b/moses/src/RuleTable/PhraseDictionaryFuzzyMatch.h
@@ -0,0 +1,98 @@
+/***********************************************************************
+ Moses - statistical machine translation system
+ Copyright (C) 2006-2011 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ ***********************************************************************/
+
+#pragma once
+
+#include "PhraseDictionary.h"
+#include "PhraseDictionaryNodeSCFG.h"
+#include "PhraseDictionarySCFG.h"
+#include "InputType.h"
+#include "NonTerminal.h"
+#include "RuleTable/Trie.h"
+#include "fuzzy-match/FuzzyMatchWrapper.h"
+
+namespace Moses
+{
+ class PhraseDictionaryNodeSCFG;
+
+ /** Implementation of a SCFG rule table in a trie. Looking up a rule of
+ * length n symbols requires n look-ups to find the TargetPhraseCollection.
+ */
+ class PhraseDictionaryFuzzyMatch : public PhraseDictionary
+ {
+ friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryFuzzyMatch&);
+ friend class RuleTableLoader;
+
+ public:
+ PhraseDictionaryFuzzyMatch(size_t numScoreComponents,
+ PhraseDictionaryFeature* feature);
+ bool Load(const std::vector<FactorType> &input
+ , const std::vector<FactorType> &output
+ , const std::string &initStr
+ , const std::vector<float> &weight
+ , size_t tableLimit,
+ const LMList& languageModels,
+ const WordPenaltyProducer* wpProducer);
+
+ const PhraseDictionaryNodeSCFG &GetRootNode(const InputType &source) const;
+
+ ChartRuleLookupManager *CreateRuleLookupManager(
+ const InputType &,
+ const ChartCellCollection &);
+ void InitializeForInput(InputType const& inputSentence);
+ void CleanUp(const InputType& source);
+
+ virtual const TargetPhraseCollection *GetTargetPhraseCollection(const Phrase& src) const
+ {
+ assert(false);
+ }
+ virtual DecodeType GetDecodeType() const
+ {
+ assert(false);
+ }
+
+ TO_STRING();
+
+ protected:
+ TargetPhraseCollection &GetOrCreateTargetPhraseCollection(PhraseDictionaryNodeSCFG &rootNode
+ , const Phrase &source
+ , const TargetPhrase &target
+ , const Word &sourceLHS);
+
+ PhraseDictionaryNodeSCFG &GetOrCreateNode(PhraseDictionaryNodeSCFG &rootNode
+ , const Phrase &source
+ , const TargetPhrase &target
+ , const Word &sourceLHS);
+
+ void SortAndPrune(PhraseDictionaryNodeSCFG &rootNode);
+ PhraseDictionaryNodeSCFG &GetRootNode(const InputType &source);
+
+ std::map<long, PhraseDictionaryNodeSCFG> m_collection;
+ std::vector<std::string> m_config;
+
+ const std::vector<FactorType> *m_input, *m_output;
+ const LMList *m_languageModels;
+ const WordPenaltyProducer *m_wpProducer;
+ const std::vector<float> *m_weight;
+
+ tmmt::FuzzyMatchWrapper *m_FuzzyMatchWrapper;
+
+ };
+
+} // namespace Moses
diff --git a/moses/src/RuleTable/PhraseDictionaryNodeSCFG.h b/moses/src/RuleTable/PhraseDictionaryNodeSCFG.h
index 3bd2338dc..95b77f3e4 100644
--- a/moses/src/RuleTable/PhraseDictionaryNodeSCFG.h
+++ b/moses/src/RuleTable/PhraseDictionaryNodeSCFG.h
@@ -38,7 +38,9 @@ namespace Moses
{
class PhraseDictionarySCFG;
-
+class PhraseDictionaryFuzzyMatch;
+
+ //! @todo why?
class NonTerminalMapKeyHasher
{
public:
@@ -55,6 +57,7 @@ public:
}
};
+//! @todo why?
class NonTerminalMapKeyEqualityPred
{
public:
@@ -109,10 +112,13 @@ public:
private:
friend std::ostream& operator<<(std::ostream&, const PhraseDictionarySCFG&);
+ friend std::ostream& operator<<(std::ostream&, const PhraseDictionaryFuzzyMatch&);
// only these classes are allowed to instantiate this class
friend class PhraseDictionarySCFG;
+ friend class PhraseDictionaryFuzzyMatch;
friend class std::map<Word, PhraseDictionaryNodeSCFG>;
+ friend class std::map<long, PhraseDictionaryNodeSCFG>;
protected:
TerminalMap m_sourceTermMap;
diff --git a/moses/src/RuleTable/PhraseDictionaryOnDisk.cpp b/moses/src/RuleTable/PhraseDictionaryOnDisk.cpp
index ad66b7b0f..9b0124605 100644
--- a/moses/src/RuleTable/PhraseDictionaryOnDisk.cpp
+++ b/moses/src/RuleTable/PhraseDictionaryOnDisk.cpp
@@ -1,4 +1,4 @@
-// vim:tabstop=2
+ // vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2010 Hieu Hoang
@@ -43,6 +43,8 @@ bool PhraseDictionaryOnDisk::Load(const std::vector<FactorType> &input
, const LMList& languageModels
, const WordPenaltyProducer* wpProducer)
{
+ PrintUserTime("Start loading binary SCFG phrase table. ");
+
m_languageModels = &(languageModels);
m_wpProducer = wpProducer;
m_filePath = filePath;
@@ -55,7 +57,7 @@ bool PhraseDictionaryOnDisk::Load(const std::vector<FactorType> &input
if (!m_dbWrapper.BeginLoad(filePath))
return false;
- CHECK(m_dbWrapper.GetMisc("Version") == 3);
+ CHECK(m_dbWrapper.GetMisc("Version") == 4);
CHECK(m_dbWrapper.GetMisc("NumSourceFactors") == input.size());
CHECK(m_dbWrapper.GetMisc("NumTargetFactors") == output.size());
CHECK(m_dbWrapper.GetMisc("NumScores") == weight.size());
@@ -91,7 +93,6 @@ ChartRuleLookupManager *PhraseDictionaryOnDisk::CreateRuleLookupManager(
const InputType &sentence,
const ChartCellCollection &cellCollection)
{
- std::vector<float> weightT = StaticData::Instance().GetTranslationSystem(TranslationSystem::DEFAULT).GetTranslationWeights(GetDictIndex());
return new ChartRuleLookupManagerOnDisk(sentence, cellCollection, *this,
m_dbWrapper, m_languageModels,
m_wpProducer, m_inputFactorsVec,
diff --git a/moses/src/RuleTable/PhraseDictionaryOnDisk.h b/moses/src/RuleTable/PhraseDictionaryOnDisk.h
index 64c77cbd7..f9dcf3a8f 100644
--- a/moses/src/RuleTable/PhraseDictionaryOnDisk.h
+++ b/moses/src/RuleTable/PhraseDictionaryOnDisk.h
@@ -35,6 +35,8 @@ class TargetPhraseCollection;
class DottedRuleStackOnDisk;
class WordPenaltyProducer;
+/** Implementation of on-disk phrase table for hierarchical/syntax model.
+ */
class PhraseDictionaryOnDisk : public PhraseDictionary
{
typedef PhraseDictionary MyBase;
diff --git a/moses/src/RuleTable/PhraseDictionarySCFG.cpp b/moses/src/RuleTable/PhraseDictionarySCFG.cpp
index e000deee4..3fb7f0cc7 100644
--- a/moses/src/RuleTable/PhraseDictionarySCFG.cpp
+++ b/moses/src/RuleTable/PhraseDictionarySCFG.cpp
@@ -55,10 +55,8 @@ PhraseDictionaryNodeSCFG &PhraseDictionarySCFG::GetOrCreateNode(const Phrase &so
{
const size_t size = source.GetSize();
- const AlignmentInfo &alignmentInfo = target.GetAlignmentInfo();
- AlignmentInfo::const_iterator iterAlign = alignmentInfo.begin();
-
PhraseDictionaryNodeSCFG *currNode = &m_collection;
+ map<size_t, size_t> sourceToTargetMap(target.GetAlignmentInfo().begin(), target.GetAlignmentInfo().end());
for (size_t pos = 0 ; pos < size ; ++pos) {
const Word& word = source.GetWord(pos);
@@ -66,10 +64,9 @@ PhraseDictionaryNodeSCFG &PhraseDictionarySCFG::GetOrCreateNode(const Phrase &so
// indexed by source label 1st
const Word &sourceNonTerm = word;
- CHECK(iterAlign != target.GetAlignmentInfo().end());
- CHECK(iterAlign->first == pos);
+ map<size_t, size_t>::const_iterator iterAlign = sourceToTargetMap.find(pos);
+ CHECK(iterAlign != sourceToTargetMap.end());
size_t targetNonTermInd = iterAlign->second;
- ++iterAlign;
const Word &targetNonTerm = target.GetWord(targetNonTermInd);
currNode = currNode->GetOrCreateChild(sourceNonTerm, targetNonTerm);
diff --git a/moses/src/RuleTable/PhraseDictionarySCFG.h b/moses/src/RuleTable/PhraseDictionarySCFG.h
index 3a2d6f13c..9e3fcf446 100644
--- a/moses/src/RuleTable/PhraseDictionarySCFG.h
+++ b/moses/src/RuleTable/PhraseDictionarySCFG.h
@@ -28,7 +28,7 @@
namespace Moses
{
-/*** Implementation of a SCFG rule table in a trie. Looking up a rule of
+/** Implementation of a SCFG rule table in a trie. Looking up a rule of
* length n symbols requires n look-ups to find the TargetPhraseCollection.
*/
class PhraseDictionarySCFG : public RuleTableTrie
diff --git a/moses/src/RuleTable/Trie.h b/moses/src/RuleTable/Trie.h
index edb1685c1..374432c6a 100644
--- a/moses/src/RuleTable/Trie.h
+++ b/moses/src/RuleTable/Trie.h
@@ -37,6 +37,7 @@ class WordPenaltyProducer;
/*** Implementation of a SCFG rule table in a trie. Looking up a rule of
* length n symbols requires n look-ups to find the TargetPhraseCollection.
+ * @todo why need this and PhraseDictionarySCFG?
*/
class RuleTableTrie : public PhraseDictionary
{
diff --git a/moses/src/RuleTable/UTrie.h b/moses/src/RuleTable/UTrie.h
index d016ea269..da8d6d90d 100644
--- a/moses/src/RuleTable/UTrie.h
+++ b/moses/src/RuleTable/UTrie.h
@@ -30,7 +30,7 @@ class TargetPhrase;
class TargetPhraseCollection;
class Word;
-/*** Implementation of RuleTableTrie. A RuleTableUTrie is designed to store
+/** Implementation of RuleTableTrie. A RuleTableUTrie is designed to store
* string-to-tree SCFG grammars only (i.e. rules can have distinct labels on
* the target side, but only a generic non-terminal on the source side).
* A key is the source RHS (one symbol per edge) of a rule and a mapped value
diff --git a/moses/src/RuleTable/UTrieNode.h b/moses/src/RuleTable/UTrieNode.h
index 8241e5a92..8ad988806 100644
--- a/moses/src/RuleTable/UTrieNode.h
+++ b/moses/src/RuleTable/UTrieNode.h
@@ -39,6 +39,7 @@ namespace Moses
class RuleTableUTrie;
+//! @todo ask phil williams - whats the diff between this and phrasedictionaryNode
class UTrieNode
{
public:
diff --git a/moses/src/Scope3Parser/ApplicableRuleTrie.h b/moses/src/Scope3Parser/ApplicableRuleTrie.h
index 549e4988e..ea21248ab 100644
--- a/moses/src/Scope3Parser/ApplicableRuleTrie.h
+++ b/moses/src/Scope3Parser/ApplicableRuleTrie.h
@@ -31,6 +31,8 @@ namespace Moses
struct VarSpanNode;
+/** @todo what is this?
+ */
struct ApplicableRuleTrie
{
public:
diff --git a/moses/src/Scope3Parser/IntermediateVarSpanNode.h b/moses/src/Scope3Parser/IntermediateVarSpanNode.h
index 9189dabdd..353fabf22 100644
--- a/moses/src/Scope3Parser/IntermediateVarSpanNode.h
+++ b/moses/src/Scope3Parser/IntermediateVarSpanNode.h
@@ -24,6 +24,8 @@
namespace Moses
{
+/** @todo what is this?
+ */
struct IntermediateVarSpanNode
{
public:
diff --git a/moses/src/Scope3Parser/Parser.h b/moses/src/Scope3Parser/Parser.h
index 0b2386469..af466ec37 100644
--- a/moses/src/Scope3Parser/Parser.h
+++ b/moses/src/Scope3Parser/Parser.h
@@ -42,6 +42,8 @@ class ChartCellCollection;
class ChartHypothesisCollection;
class WordsRange;
+/** @todo what is this?
+ */
class Scope3Parser : public ChartRuleLookupManager
{
public:
diff --git a/moses/src/Scope3Parser/StackLattice.h b/moses/src/Scope3Parser/StackLattice.h
index e7f8f8c67..5ad9d7e55 100644
--- a/moses/src/Scope3Parser/StackLattice.h
+++ b/moses/src/Scope3Parser/StackLattice.h
@@ -27,11 +27,12 @@
namespace Moses
{
-// For an entry, lattice[i][j][k][l]:
-// i = offset from span start
-// j = NT index (zero-based, from left of rule)
-// k = span
-// l = label index (as in UTrieNode)
+/** For an entry, lattice[i][j][k][l]:
+ * i = offset from span start
+ * j = NT index (zero-based, from left of rule)
+ * k = span
+ * l = label index (as in UTrieNode)
+ */
typedef std::vector<std::vector<std::vector<StackVec> > > StackLattice;
}
diff --git a/moses/src/Scope3Parser/StackLatticeBuilder.h b/moses/src/Scope3Parser/StackLatticeBuilder.h
index ef018ccc0..f17051dc2 100644
--- a/moses/src/Scope3Parser/StackLatticeBuilder.h
+++ b/moses/src/Scope3Parser/StackLatticeBuilder.h
@@ -28,6 +28,8 @@ namespace Moses
class ChartCellCollection;
+/** @todo what is this?
+ */
class StackLatticeBuilder
{
public:
diff --git a/moses/src/Scope3Parser/VarSpanNode.h b/moses/src/Scope3Parser/VarSpanNode.h
index 1c53113dc..7ae8437ef 100644
--- a/moses/src/Scope3Parser/VarSpanNode.h
+++ b/moses/src/Scope3Parser/VarSpanNode.h
@@ -31,6 +31,8 @@
namespace Moses
{
+/** @todo what is this?
+ */
struct VarSpanNode
{
public:
diff --git a/moses/src/Scope3Parser/VarSpanTrieBuilder.h b/moses/src/Scope3Parser/VarSpanTrieBuilder.h
index cad0e1b2c..e987ed5db 100644
--- a/moses/src/Scope3Parser/VarSpanTrieBuilder.h
+++ b/moses/src/Scope3Parser/VarSpanTrieBuilder.h
@@ -30,6 +30,8 @@ namespace Moses
struct ApplicableRuleTrie;
struct VarSpanNode;
+/** @todo what is this?
+ */
class VarSpanTrieBuilder
{
public:
diff --git a/moses/src/ScoreComponentCollection.h b/moses/src/ScoreComponentCollection.h
index 53f680d7a..3b9ff1d08 100644
--- a/moses/src/ScoreComponentCollection.h
+++ b/moses/src/ScoreComponentCollection.h
@@ -73,7 +73,13 @@ private:
static IndexPair GetIndexes(const ScoreProducer* sp)
{
ScoreIndexMap::const_iterator indexIter = s_scoreIndexes.find(sp);
- CHECK(indexIter != s_scoreIndexes.end());
+ if (indexIter == s_scoreIndexes.end()) {
+ std::cerr << "ERROR: ScoreProducer: " << sp->GetScoreProducerDescription() <<
+ " not registered with ScoreIndexMap" << std::endl;
+ std::cerr << "You must call ScoreComponentCollection.RegisterScoreProducer() " <<
+ " for every ScoreProducer" << std::endl;
+ abort();
+ }
return indexIter->second;
}
diff --git a/moses/src/Search.cpp b/moses/src/Search.cpp
index aa3bfe59f..9fc2f9d3f 100644
--- a/moses/src/Search.cpp
+++ b/moses/src/Search.cpp
@@ -2,6 +2,7 @@
#include "Manager.h"
#include "SearchCubePruning.h"
#include "SearchNormal.h"
+#include "SearchNormalBatch.h"
#include "UserMessage.h"
namespace Moses
@@ -18,6 +19,8 @@ Search *Search::CreateSearch(Manager& manager, const InputType &source,
return new SearchCubePruning(manager, source, transOptColl);
case CubeGrowing:
return NULL;
+ case NormalBatch:
+ return new SearchNormalBatch(manager, source, transOptColl);
default:
UserMessage::Add("ERROR: search. Aborting\n");
abort();
diff --git a/moses/src/Search.h b/moses/src/Search.h
index 67a6c69ea..d961d17cd 100644
--- a/moses/src/Search.h
+++ b/moses/src/Search.h
@@ -14,6 +14,9 @@ class InputType;
class TranslationOptionCollection;
class Manager;
+/** Abstract class used in the phrase-based decoder.
+ * Cube pruning and normal searches are the classes that inherits from this class
+ */
class Search
{
public:
diff --git a/moses/src/SearchCubePruning.h b/moses/src/SearchCubePruning.h
index 9f8f73ef5..56565a81f 100644
--- a/moses/src/SearchCubePruning.h
+++ b/moses/src/SearchCubePruning.h
@@ -11,6 +11,9 @@ namespace Moses
class InputType;
class TranslationOptionCollection;
+/** Functions and variables you need to decoder an input using the phrase-based decoder with cube-pruning
+ * Instantiated by the Manager class
+ */
class SearchCubePruning: public Search
{
protected:
diff --git a/moses/src/SearchNormal.h b/moses/src/SearchNormal.h
index 8b4f904d9..c1f3fb0ba 100644
--- a/moses/src/SearchNormal.h
+++ b/moses/src/SearchNormal.h
@@ -14,6 +14,9 @@ class Manager;
class InputType;
class TranslationOptionCollection;
+/** Functions and variables you need to decoder an input using the phrase-based decoder (NO cube-pruning)
+ * Instantiated by the Manager class
+ */
class SearchNormal: public Search
{
protected:
@@ -29,7 +32,7 @@ protected:
// functions for creating hypotheses
void ProcessOneHypothesis(const Hypothesis &hypothesis);
void ExpandAllHypotheses(const Hypothesis &hypothesis, size_t startPos, size_t endPos);
- void ExpandHypothesis(const Hypothesis &hypothesis,const TranslationOption &transOpt, float expectedScore);
+ virtual void ExpandHypothesis(const Hypothesis &hypothesis,const TranslationOption &transOpt, float expectedScore);
public:
SearchNormal(Manager& manager, const InputType &source, const TranslationOptionCollection &transOptColl);
diff --git a/moses/src/SearchNormalBatch.cpp b/moses/src/SearchNormalBatch.cpp
new file mode 100644
index 000000000..b32cab0d2
--- /dev/null
+++ b/moses/src/SearchNormalBatch.cpp
@@ -0,0 +1,221 @@
+#include "SearchNormalBatch.h"
+#include "LM/Base.h"
+#include "Manager.h"
+
+//#include <google/profiler.h>
+
+using namespace std;
+
+namespace Moses
+{
+SearchNormalBatch::SearchNormalBatch(Manager& manager, const InputType &source, const TranslationOptionCollection &transOptColl)
+ :SearchNormal(manager, source, transOptColl)
+ ,m_batch_size(10000)
+{
+ m_max_stack_size = StaticData::Instance().GetMaxHypoStackSize();
+
+ // Split the feature functions into sets of stateless, stateful
+ // distributed lm, and stateful non-distributed.
+ const vector<const StatefulFeatureFunction*>& ffs =
+ m_manager.GetTranslationSystem()->GetStatefulFeatureFunctions();
+ for (unsigned i = 0; i < ffs.size(); ++i) {
+ if (ffs[i]->GetScoreProducerDescription() == "DLM_5gram") {
+ m_dlm_ffs[i] = const_cast<LanguageModel*>(static_cast<const LanguageModel* const>(ffs[i]));
+ m_dlm_ffs[i]->SetFFStateIdx(i);
+ }
+ else {
+ m_stateful_ffs[i] = const_cast<StatefulFeatureFunction*>(ffs[i]);
+ }
+ }
+ m_stateless_ffs = const_cast< vector<const StatelessFeatureFunction*>& >(m_manager.GetTranslationSystem()->GetStatelessFeatureFunctions());
+
+}
+
+SearchNormalBatch::~SearchNormalBatch() {
+}
+
+/**
+ * Main decoder loop that translates a sentence by expanding
+ * hypotheses stack by stack, until the end of the sentence.
+ */
+void SearchNormalBatch::ProcessSentence()
+{
+ const StaticData &staticData = StaticData::Instance();
+ SentenceStats &stats = m_manager.GetSentenceStats();
+ clock_t t=0; // used to track time for steps
+
+ // initial seed hypothesis: nothing translated, no words produced
+ Hypothesis *hypo = Hypothesis::Create(m_manager,m_source, m_initialTargetPhrase);
+ m_hypoStackColl[0]->AddPrune(hypo);
+
+ // go through each stack
+ std::vector < HypothesisStack* >::iterator iterStack;
+ for (iterStack = m_hypoStackColl.begin() ; iterStack != m_hypoStackColl.end() ; ++iterStack) {
+ // check if decoding ran out of time
+ double _elapsed_time = GetUserTime();
+ if (_elapsed_time > staticData.GetTimeoutThreshold()) {
+ VERBOSE(1,"Decoding is out of time (" << _elapsed_time << "," << staticData.GetTimeoutThreshold() << ")" << std::endl);
+ interrupted_flag = 1;
+ return;
+ }
+ HypothesisStackNormal &sourceHypoColl = *static_cast<HypothesisStackNormal*>(*iterStack);
+
+ // the stack is pruned before processing (lazy pruning):
+ VERBOSE(3,"processing hypothesis from next stack");
+ IFVERBOSE(2) {
+ t = clock();
+ }
+ sourceHypoColl.PruneToSize(staticData.GetMaxHypoStackSize());
+ VERBOSE(3,std::endl);
+ sourceHypoColl.CleanupArcList();
+ IFVERBOSE(2) {
+ stats.AddTimeStack( clock()-t );
+ }
+
+ // go through each hypothesis on the stack and try to expand it
+ HypothesisStackNormal::const_iterator iterHypo;
+ for (iterHypo = sourceHypoColl.begin() ; iterHypo != sourceHypoColl.end() ; ++iterHypo) {
+ Hypothesis &hypothesis = **iterHypo;
+ ProcessOneHypothesis(hypothesis); // expand the hypothesis
+ }
+ EvalAndMergePartialHypos();
+
+ // some logging
+ IFVERBOSE(2) {
+ OutputHypoStackSize();
+ }
+
+ // this stack is fully expanded;
+ actual_hypoStack = &sourceHypoColl;
+ }
+
+ EvalAndMergePartialHypos();
+
+ // some more logging
+ IFVERBOSE(2) {
+ m_manager.GetSentenceStats().SetTimeTotal( clock()-m_start );
+ }
+ VERBOSE(2, m_manager.GetSentenceStats());
+}
+
+/**
+ * Expand one hypothesis with a translation option.
+ * this involves initial creation, scoring and adding it to the proper stack
+ * \param hypothesis hypothesis to be expanded upon
+ * \param transOpt translation option (phrase translation)
+ * that is applied to create the new hypothesis
+ * \param expectedScore base score for early discarding
+ * (base hypothesis score plus future score estimation)
+ */
+void SearchNormalBatch::ExpandHypothesis(const Hypothesis &hypothesis, const TranslationOption &transOpt, float expectedScore)
+{
+ // Check if the number of partial hypotheses exceeds the batch size.
+ if (m_partial_hypos.size() >= m_batch_size) {
+ EvalAndMergePartialHypos();
+ }
+
+ const StaticData &staticData = StaticData::Instance();
+ SentenceStats &stats = m_manager.GetSentenceStats();
+ clock_t t=0; // used to track time for steps
+
+ Hypothesis *newHypo;
+ if (! staticData.UseEarlyDiscarding()) {
+ // simple build, no questions asked
+ IFVERBOSE(2) {
+ t = clock();
+ }
+ newHypo = hypothesis.CreateNext(transOpt, m_constraint);
+ IFVERBOSE(2) {
+ stats.AddTimeBuildHyp( clock()-t );
+ }
+ if (newHypo==NULL) return;
+ //newHypo->CalcScore(m_transOptColl.GetFutureScore());
+
+ // Issue DLM requests for new hypothesis and put into the list of
+ // partial hypotheses.
+ std::map<int, LanguageModel*>::iterator dlm_iter;
+ for (dlm_iter = m_dlm_ffs.begin();
+ dlm_iter != m_dlm_ffs.end();
+ ++dlm_iter) {
+ const FFState* input_state = newHypo->GetPrevHypo() ? newHypo->GetPrevHypo()->GetFFState((*dlm_iter).first) : NULL;
+ (*dlm_iter).second->IssueRequestsFor(*newHypo, input_state);
+ }
+ m_partial_hypos.push_back(newHypo);
+ }
+ else {
+ std::cerr << "can't use early discarding with batch decoding!" << std::endl;
+ abort();
+ }
+}
+
+void SearchNormalBatch::EvalAndMergePartialHypos() {
+ std::vector<Hypothesis*>::iterator partial_hypo_iter;
+ for (partial_hypo_iter = m_partial_hypos.begin();
+ partial_hypo_iter != m_partial_hypos.end();
+ ++partial_hypo_iter) {
+ Hypothesis* hypo = *partial_hypo_iter;
+
+ // Incorporate the translation option scores.
+ hypo->IncorporateTransOptScores();
+
+ // Evaluate with other ffs.
+ std::map<int, StatefulFeatureFunction*>::iterator sfff_iter;
+ for (sfff_iter = m_stateful_ffs.begin();
+ sfff_iter != m_stateful_ffs.end();
+ ++sfff_iter) {
+ hypo->EvaluateWith((*sfff_iter).second, (*sfff_iter).first);
+ }
+ std::vector<const StatelessFeatureFunction*>::iterator slff_iter;
+ for (slff_iter = m_stateless_ffs.begin();
+ slff_iter != m_stateless_ffs.end();
+ ++slff_iter) {
+ hypo->EvaluateWith(*slff_iter);
+ }
+
+ // Calculate future score.
+ hypo->CalculateFutureScore(m_transOptColl.GetFutureScore());
+ }
+
+ // Wait for all requests from the distributed LM to come back.
+ std::map<int, LanguageModel*>::iterator dlm_iter;
+ for (dlm_iter = m_dlm_ffs.begin();
+ dlm_iter != m_dlm_ffs.end();
+ ++dlm_iter) {
+ (*dlm_iter).second->sync();
+ }
+
+ // Incorporate the DLM scores into all hypotheses and put into their
+ // stacks.
+ for (partial_hypo_iter = m_partial_hypos.begin();
+ partial_hypo_iter != m_partial_hypos.end();
+ ++partial_hypo_iter) {
+ Hypothesis* hypo = *partial_hypo_iter;
+
+ // Calculate DLM scores.
+ std::map<int, LanguageModel*>::iterator dlm_iter;
+ for (dlm_iter = m_dlm_ffs.begin();
+ dlm_iter != m_dlm_ffs.end();
+ ++dlm_iter) {
+ hypo->EvaluateWith((*dlm_iter).second, (*dlm_iter).first);
+ }
+
+ // Calculate the final score.
+ hypo->CalculateFinalScore();
+
+ // Put completed hypothesis onto its stack.
+ size_t wordsTranslated = hypo->GetWordsBitmap().GetNumWordsCovered();
+ m_hypoStackColl[wordsTranslated]->AddPrune(hypo);
+ }
+ m_partial_hypos.clear();
+
+ std::vector < HypothesisStack* >::iterator stack_iter;
+ HypothesisStackNormal* stack;
+ for (stack_iter = m_hypoStackColl.begin();
+ stack_iter != m_hypoStackColl.end();
+ ++stack_iter) {
+ stack = static_cast<HypothesisStackNormal*>(*stack_iter);
+ stack->PruneToSize(m_max_stack_size);
+ }
+}
+
+}
diff --git a/moses/src/SearchNormalBatch.h b/moses/src/SearchNormalBatch.h
new file mode 100644
index 000000000..fcfda7054
--- /dev/null
+++ b/moses/src/SearchNormalBatch.h
@@ -0,0 +1,43 @@
+#ifndef moses_SearchNormalBatch_h
+#define moses_SearchNormalBatch_h
+
+#include "SearchNormal.h"
+
+namespace Moses
+{
+
+class Manager;
+class InputType;
+class TranslationOptionCollection;
+
+/** Implements the phrase-based stack decoding algorithm (no cube pruning) with a twist...
+ * Language model requests are batched together, duplicate requests are removed, and requests are sent together.
+ * Useful for distributed LM where network latency is an issue.
+ */
+class SearchNormalBatch: public SearchNormal
+{
+protected:
+
+ // Added for asynclm decoding.
+ std::vector<const StatelessFeatureFunction*> m_stateless_ffs;
+ std::map<int, LanguageModel*> m_dlm_ffs;
+ std::map<int, StatefulFeatureFunction*> m_stateful_ffs;
+ std::vector<Hypothesis*> m_partial_hypos;
+ int m_batch_size;
+ int m_max_stack_size;
+
+ // functions for creating hypotheses
+ void ExpandHypothesis(const Hypothesis &hypothesis,const TranslationOption &transOpt, float expectedScore);
+ void EvalAndMergePartialHypos();
+
+public:
+ SearchNormalBatch(Manager& manager, const InputType &source, const TranslationOptionCollection &transOptColl);
+ ~SearchNormalBatch();
+
+ void ProcessSentence();
+
+};
+
+}
+
+#endif
diff --git a/moses/src/Sentence.h b/moses/src/Sentence.h
index b1c25f9dd..eb5b586b1 100644
--- a/moses/src/Sentence.h
+++ b/moses/src/Sentence.h
@@ -38,7 +38,7 @@ class TranslationOption;
class TranslationOptionCollection;
-/***
+/**
* A Phrase class with an ID. Used specifically as source input so contains functionality to read
* from IODevice and create trans opt
*/
diff --git a/moses/src/SentenceStats.h b/moses/src/SentenceStats.h
index 6a72008ba..1aff01a43 100644
--- a/moses/src/SentenceStats.h
+++ b/moses/src/SentenceStats.h
@@ -35,6 +35,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
namespace Moses
{
+//! Hold info about recombination. Used by SentenceStats class
struct RecombinationInfo {
RecombinationInfo() {} //for std::vector
RecombinationInfo(size_t srcWords, float gProb, float bProb)
@@ -44,7 +45,7 @@ struct RecombinationInfo {
float betterProb, worseProb;
};
-/***
+/**
* stats relating to decoder operation on a given sentence
*/
class SentenceStats
@@ -185,6 +186,9 @@ protected:
void AddDeletedWords(const Hypothesis& hypo);
//hypotheses
+ // TODO: Move away from clock_t in favor of just storing doubles of the number of seconds
+ // since clock seconds aren't reliable in a multi-threaded environment -Jon
+ // (see Manager.cpp for some initial work moving in this direction)
std::vector<RecombinationInfo> m_recombinationInfos;
unsigned int m_numHyposCreated;
unsigned int m_numHyposPruned;
diff --git a/moses/src/SourceWordDeletionFeature.cpp b/moses/src/SourceWordDeletionFeature.cpp
index 4f247c7e7..52da2fdf7 100644
--- a/moses/src/SourceWordDeletionFeature.cpp
+++ b/moses/src/SourceWordDeletionFeature.cpp
@@ -5,6 +5,7 @@
#include "Hypothesis.h"
#include "ChartHypothesis.h"
#include "ScoreComponentCollection.h"
+#include "TranslationOption.h"
namespace Moses {
@@ -30,22 +31,23 @@ bool SourceWordDeletionFeature::Load(const std::string &filePath)
return true;
}
-void SourceWordDeletionFeature::Evaluate(const Hypothesis& cur_hypo,
- ScoreComponentCollection* accumulator) const
+void SourceWordDeletionFeature::Evaluate(
+ const PhraseBasedFeatureContext& context,
+ ScoreComponentCollection* accumulator) const
{
- TargetPhrase targetPhrase = cur_hypo.GetCurrTargetPhrase();
+ const TargetPhrase& targetPhrase = context.GetTargetPhrase();
const AlignmentInfo &alignmentInfo = targetPhrase.GetAlignmentInfo();
const AlignmentInfo::CollType &alignment = alignmentInfo.GetAlignments();
ComputeFeatures(targetPhrase, accumulator, alignment);
}
-void SourceWordDeletionFeature::EvaluateChart(const ChartHypothesis& cur_hypo, int featureId,
- ScoreComponentCollection* accumulator) const
+void SourceWordDeletionFeature::EvaluateChart(
+ const ChartBasedFeatureContext& context,
+ ScoreComponentCollection* accumulator) const
{
- TargetPhrase targetPhrase = cur_hypo.GetCurrTargetPhrase();
- const AlignmentInfo &alignmentInfo = targetPhrase.GetAlignmentInfo();
+ const AlignmentInfo &alignmentInfo = context.GetTargetPhrase().GetAlignmentInfo();
const AlignmentInfo::CollType &alignment = alignmentInfo.GetTerminalAlignments();
- ComputeFeatures(targetPhrase, accumulator, alignment);
+ ComputeFeatures(context.GetTargetPhrase(), accumulator, alignment);
}
void SourceWordDeletionFeature::ComputeFeatures(const TargetPhrase& targetPhrase,
diff --git a/moses/src/SourceWordDeletionFeature.h b/moses/src/SourceWordDeletionFeature.h
index 6d8d21a42..c3a7b3f6b 100644
--- a/moses/src/SourceWordDeletionFeature.h
+++ b/moses/src/SourceWordDeletionFeature.h
@@ -29,11 +29,10 @@ public:
}
bool Load(const std::string &filePath);
- void Evaluate(const Hypothesis& cur_hypo,
- ScoreComponentCollection* accumulator) const;
+ void Evaluate(const PhraseBasedFeatureContext& context,
+ ScoreComponentCollection* accumulator) const;
- void EvaluateChart(const ChartHypothesis& cur_hypo,
- int featureId,
+ void EvaluateChart(const ChartBasedFeatureContext& context,
ScoreComponentCollection* accumulator) const;
void ComputeFeatures(const TargetPhrase& targetPhrase,
diff --git a/moses/src/SparsePhraseDictionaryFeature.cpp b/moses/src/SparsePhraseDictionaryFeature.cpp
index 8b1a35ee3..7177159df 100644
--- a/moses/src/SparsePhraseDictionaryFeature.cpp
+++ b/moses/src/SparsePhraseDictionaryFeature.cpp
@@ -6,9 +6,10 @@ namespace Moses
void SparsePhraseDictionaryFeature::Evaluate(
- const TargetPhrase& ,
- ScoreComponentCollection* ) const
+ const PhraseBasedFeatureContext& context,
+ ScoreComponentCollection* accumulator) const
{
+ //not used
}
diff --git a/moses/src/SparsePhraseDictionaryFeature.h b/moses/src/SparsePhraseDictionaryFeature.h
index ad38e7078..4185cac0d 100644
--- a/moses/src/SparsePhraseDictionaryFeature.h
+++ b/moses/src/SparsePhraseDictionaryFeature.h
@@ -1,6 +1,8 @@
#ifndef moses_SparsePhraseFeature_h
#define moses_SparsePhraseFeature_h
+#include <stdexcept>
+
#include "FactorCollection.h"
#include "FeatureFunction.h"
@@ -16,14 +18,13 @@ public:
SparsePhraseDictionaryFeature():
StatelessFeatureFunction("stm", ScoreProducer::unlimited) {}
- void Evaluate(const TargetPhrase& cur_phrase,
- ScoreComponentCollection* accumulator) const;
+ void Evaluate(const PhraseBasedFeatureContext& context,
+ ScoreComponentCollection* accumulator) const;
void EvaluateChart(
- const ChartHypothesis&,
- int /* featureID */,
+ const ChartBasedFeatureContext& context,
ScoreComponentCollection*) const {
- CHECK(0); // feature function not valid in chart decoder
+ throw std::logic_error("SparsePhraseDictionaryFeature not valid in chart decoder");
}
// basic properties
diff --git a/moses/src/SquareMatrix.h b/moses/src/SquareMatrix.h
index 3d36962d4..ed9a5e8fa 100644
--- a/moses/src/SquareMatrix.h
+++ b/moses/src/SquareMatrix.h
@@ -30,7 +30,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
namespace Moses
{
-//! A square array of floats to store future costs
+//! A square array of floats to store future costs in the phrase-based decoder
class SquareMatrix
{
friend std::ostream& operator<<(std::ostream &out, const SquareMatrix &matrix);
diff --git a/moses/src/StaticData.cpp b/moses/src/StaticData.cpp
index 60f6bc9c9..39c47178a 100644
--- a/moses/src/StaticData.cpp
+++ b/moses/src/StaticData.cpp
@@ -20,6 +20,9 @@ License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
+#include <boost/filesystem/operations.hpp>
+#include <boost/filesystem/path.hpp>
+
#include <string>
#include "util/check.hh"
#include "PhraseDictionaryMemory.h"
@@ -114,47 +117,9 @@ StaticData::StaticData()
Phrase::InitializeMemPool();
}
-void StaticData::ClearData() {
- for (size_t i=0; i < m_decodeGraphs.size(); ++i)
- delete m_decodeGraphs[i];
- m_decodeGraphs.clear();
- m_decodeGraphBackoff.clear();
-
- m_translationSystems.clear();
- for (size_t i=0; i < m_wordPenaltyProducers.size(); ++i) {
- ScoreComponentCollection::UnregisterScoreProducer(m_wordPenaltyProducers[i]);
- delete m_wordPenaltyProducers[i];
- }
- m_wordPenaltyProducers.clear();
- for (size_t i=0; i < m_distortionScoreProducers.size(); ++i) {
- ScoreComponentCollection::UnregisterScoreProducer(m_distortionScoreProducers[i]);
- delete m_distortionScoreProducers[i];
- }
- m_distortionScoreProducers.clear();
- for (size_t i=0; i < m_phraseDictionary.size(); ++i) {
- ScoreComponentCollection::UnregisterScoreProducer(m_phraseDictionary[i]);
- delete m_phraseDictionary[i];
- }
- m_phraseDictionary.clear();
- for (size_t i=0; i < m_reorderModels.size(); ++i) {
- ScoreComponentCollection::UnregisterScoreProducer(m_reorderModels[i]);
- delete m_reorderModels[i];
- }
- m_reorderModels.clear();
- for (LMList::const_iterator k = m_languageModel.begin(); k != m_languageModel.end(); ++k) {
- ScoreComponentCollection::UnregisterScoreProducer(*k);
- // delete *k;
- }
- m_languageModel.CleanUp();
-
- ScoreComponentCollection::UnregisterScoreProducer(m_bleuScoreFeature);
- ScoreComponentCollection::UnregisterScoreProducer(m_unknownWordPenaltyProducer);
-
- m_inputFactorOrder.clear();
- m_outputFactorOrder.clear();
-
- ScoreComponentCollection::ResetCounter();
- ScoreProducer::ResetDescriptionCounts();
+bool StaticData::LoadDataStatic(Parameter *parameter, const std::string &execPath) {
+ s_instance.SetExecPath(execPath);
+ return s_instance.LoadData(parameter);
}
bool StaticData::LoadData(Parameter *parameter)
@@ -296,7 +261,18 @@ bool StaticData::LoadData(Parameter *parameter)
} else
m_outputSearchGraphPB = false;
#endif
- SetBooleanParameter( &m_unprunedSearchGraph, "unpruned-search-graph", true );
+ SetBooleanParameter( &m_unprunedSearchGraph, "unpruned-search-graph", false );
+ SetBooleanParameter( &m_includeLHSInSearchGraph, "include-lhs-in-search-graph", false );
+
+ if (m_parameter->isParamSpecified("output-unknowns")) {
+
+ if (m_parameter->GetParam("output-unknowns").size() == 1) {
+ m_outputUnknownsFile =Scan<string>(m_parameter->GetParam("output-unknowns")[0]);
+ } else {
+ UserMessage::Add(string("need to specify exactly one file name for unknowns"));
+ return false;
+ }
+ }
// include feature names in the n-best list
SetBooleanParameter( &m_labeledNBestList, "labeled-n-best-list", true );
@@ -322,13 +298,6 @@ bool StaticData::LoadData(Parameter *parameter)
m_useTransOptCache = false;
}
- SetBooleanParameter( &m_enableOnlineCommand, "enable-online-command", false );
- if (m_enableOnlineCommand == true) {
- VERBOSE(1,"Online commands are enabled.\n");
- VERBOSE(1,"Cache for translation options is disabled.\n");
- m_useTransOptCache = false;
- }
-
std::cerr << "transOptCache: " << m_useTransOptCache << std::endl;
std::cerr << "transOptCache max size: " << m_transOptCacheMaxSize << std::endl;
@@ -416,10 +385,6 @@ bool StaticData::LoadData(Parameter *parameter)
TransformScore(Scan<float>(m_parameter->GetParam("translation-option-threshold")[0]))
: TransformScore(DEFAULT_TRANSLATION_OPTION_THRESHOLD);
- std::cerr << "beamwidth: " << m_beamWidth << std::endl;
- std::cerr << "early discarding threshold: " << m_earlyDiscardingThreshold << std::endl;
- std::cerr << "translOptThreshold: " << m_translationOptionThreshold << std::endl;
-
m_maxNoTransOptPerCoverage = (m_parameter->GetParam("max-trans-opt-per-coverage").size() > 0)
? Scan<size_t>(m_parameter->GetParam("max-trans-opt-per-coverage")[0]) : DEFAULT_MAX_TRANS_OPT_SIZE;
@@ -437,6 +402,9 @@ bool StaticData::LoadData(Parameter *parameter)
SetBooleanParameter(&m_cubePruningLazyScoring, "cube-pruning-lazy-scoring", false);
+ // early distortion cost
+ SetBooleanParameter( &m_useEarlyDistortionCost, "early-distortion-cost", false );
+
// unknown word processing
SetBooleanParameter( &m_dropUnknown, "drop-unknown", false );
@@ -479,7 +447,10 @@ bool StaticData::LoadData(Parameter *parameter)
exit(1);
}
if (m_useConsensusDecoding) m_mbr=true;
-
+
+ // Compact phrase table and reordering model
+ SetBooleanParameter( &m_minphrMemory, "minphr-memory", false );
+ SetBooleanParameter( &m_minlexrMemory, "minlexr-memory", false );
m_timeout_threshold = (m_parameter->GetParam("time-out").size() > 0) ?
Scan<size_t>(m_parameter->GetParam("time-out")[0]) : -1;
@@ -1108,7 +1079,8 @@ bool StaticData::LoadGlobalLexicalModelUnlimited()
const vector<FactorType> inputFactors = Tokenize<FactorType>(factors[0],",");
const vector<FactorType> outputFactors = Tokenize<FactorType>(factors[1],",");
- GlobalLexicalModelUnlimited* glmu = new GlobalLexicalModelUnlimited(inputFactors, outputFactors, biasFeature, ignorePunctuation, context);
+ throw runtime_error("GlobalLexicalModelUnlimited should be reimplemented as a stateful feature");
+ GlobalLexicalModelUnlimited* glmu = NULL; // new GlobalLexicalModelUnlimited(inputFactors, outputFactors, biasFeature, ignorePunctuation, context);
m_globalLexicalModelsUnlimited.push_back(glmu);
if (restricted) {
cerr << "loading word translation word lists from " << filenameSource << " and " << filenameTarget << endl;
@@ -1917,7 +1889,7 @@ bool StaticData::LoadWordTranslationFeature()
UserMessage::Add("Word translation feature needs word alignments in phrase table.");
return false;
}
-
+
// set factor
vector <string> factors = Tokenize(tokens[0],"-");
FactorType factorIdSource = Scan<size_t>(factors[0]);
@@ -2121,6 +2093,25 @@ void StaticData::ReLoadBleuScoreFeatureParameter(float weight)
// ScoreComponentCollection StaticData::GetAllWeightsScoreComponentCollection() const {}
// in ScoreComponentCollection.h
+void StaticData::SetExecPath(const std::string &path)
+{
+ namespace fs = boost::filesystem;
+
+ fs::path full_path( fs::initial_path<fs::path>() );
+
+ full_path = fs::system_complete( fs::path( path ) );
+
+ //Without file name
+ m_binPath = full_path.parent_path().string();
+ cerr << m_binPath << endl;
+
+}
+
+const string &StaticData::GetBinDirectory() const
+{
+ return m_binPath;
+}
+
}
diff --git a/moses/src/StaticData.h b/moses/src/StaticData.h
index d8bc03b06..4d69bf95d 100644
--- a/moses/src/StaticData.h
+++ b/moses/src/StaticData.h
@@ -78,7 +78,10 @@ class TranslationSystem;
typedef std::pair<std::string, float> UnknownLHSEntry;
typedef std::vector<UnknownLHSEntry> UnknownLHSList;
-/** Contains global variables and contants */
+/** Contains global variables and contants.
+ * Only 1 object of this class should be instantiated.
+ * A const object of this class is accessible by any function during decoding by calling StaticData::Instance();
+ */
class StaticData
{
private:
@@ -118,12 +121,14 @@ protected:
m_translationOptionThreshold,
m_wordDeletionWeight;
+
// PhraseTrans, Generation & LanguageModelScore has multiple weights.
int m_maxDistortion;
// do it differently from old pharaoh
// -ve = no limit on distortion
// 0 = no disortion (monotone in old pharaoh)
bool m_reorderingConstraint; //! use additional reordering constraints
+ bool m_useEarlyDistortionCost;
size_t
m_maxHypoStackSize //! hypothesis-stack size that triggers pruning
, m_minHypoStackDiversity //! minimum number of hypothesis in stack for each source word coverage
@@ -149,7 +154,6 @@ protected:
bool m_sourceStartPosMattersForRecombination;
bool m_recoverPath;
bool m_outputHypoScore;
- bool m_enableOnlineCommand; //! flag indicating whether online commands to change some decoder parameters are enable; if yes, the persistent translation option cache is disabled
ParsingAlgorithm m_parsingAlgorithm;
SearchAlgorithm m_searchAlgorithm;
@@ -217,12 +221,17 @@ protected:
bool m_outputSearchGraphPB; //! whether to output search graph as a protobuf
#endif
bool m_unprunedSearchGraph; //! do not exclude dead ends (chart decoder only)
+ bool m_includeLHSInSearchGraph; //! include LHS of rules in search graph
+ std::string m_outputUnknownsFile; //! output unknowns in this file
size_t m_cubePruningPopLimit;
size_t m_cubePruningDiversity;
bool m_cubePruningLazyScoring;
size_t m_ruleLimit;
+ // Whether to load compact phrase table and reordering table into memory
+ bool m_minphrMemory;
+ bool m_minlexrMemory;
// Initial = 0 = can be used when creating poss trans
// Other = 1 = used to calculate LM score once all steps have been processed
@@ -272,6 +281,8 @@ protected:
void ReduceTransOptCache() const;
bool m_continuePartialTranslation;
+ std::string m_binPath;
+
public:
bool IsAlwaysCreateDirectTranslationOption() const {
@@ -300,16 +311,7 @@ public:
#endif
//! Load data into static instance. This function is required as LoadData() is not const
- static bool LoadDataStatic(Parameter *parameter) {
- std::cerr << "Load static data.." << std::endl;
- return s_instance.LoadData(parameter);
- std::cerr << "done.." << std::endl;
- }
- static void ClearDataStatic() {
- std::cerr << "Clear static data.." << std::endl;
- s_instance.ClearData();
- std::cerr << "done.." << std::endl;
- }
+ static bool LoadDataStatic(Parameter *parameter, const std::string &execPath);
//! Main function to load everything. Also initialize the Parameter object
bool LoadData(Parameter *parameter);
@@ -392,6 +394,9 @@ public:
bool UseEarlyDiscarding() const {
return m_earlyDiscardingThreshold != -std::numeric_limits<float>::infinity();
}
+ bool UseEarlyDistortionCost() const {
+ return m_useEarlyDistortionCost;
+ }
float GetTranslationOptionThreshold() const {
return m_translationOptionThreshold;
}
@@ -438,6 +443,15 @@ public:
bool NBestIncludesAlignment() const {
return m_nBestIncludesAlignment;
}
+
+ bool UseMinphrInMemory() const {
+ return m_minphrMemory;
+ }
+
+ bool UseMinlexrInMemory() const {
+ return m_minlexrMemory;
+ }
+
size_t GetNumLinkParams() const {
return m_numLinkParams;
}
@@ -641,10 +655,18 @@ public:
return m_outputSearchGraphPB;
}
#endif
+ const std::string& GetOutputUnknownsFile() const {
+ return m_outputUnknownsFile;
+ }
+
bool GetUnprunedSearchGraph() const {
return m_unprunedSearchGraph;
}
+ bool GetIncludeLHSInSearchGraph() const {
+ return m_includeLHSInSearchGraph;
+ }
+
XmlInputType GetXmlInputType() const {
return m_xmlInputType;
}
@@ -714,6 +736,9 @@ public:
long GetStartTranslationId() const
{ return m_startTranslationId; }
+
+ void SetExecPath(const std::string &path);
+ const std::string &GetBinDirectory() const;
};
}
diff --git a/moses/src/TargetPhrase.cpp b/moses/src/TargetPhrase.cpp
index 2783a7604..f108a0c5a 100644
--- a/moses/src/TargetPhrase.cpp
+++ b/moses/src/TargetPhrase.cpp
@@ -70,10 +70,6 @@ TargetPhrase::TargetPhrase(const Phrase &phrase)
{
}
-TargetPhrase::~TargetPhrase()
-{
-}
-
void TargetPhrase::SetScore(const TranslationSystem* system)
{
// used when creating translations of unknown words:
@@ -289,7 +285,6 @@ TargetPhrase *TargetPhrase::MergeNext(const TargetPhrase &inputPhrase) const
return clone;
}
-
namespace {
void MosesShouldUseExceptions(bool value) {
if (!value) {
@@ -299,6 +294,7 @@ void MosesShouldUseExceptions(bool value) {
}
} // namespace
+
void TargetPhrase::SetAlignmentInfo(const StringPiece &alignString)
{
set<pair<size_t,size_t> > alignmentInfo;
@@ -316,6 +312,8 @@ void TargetPhrase::SetAlignmentInfo(const StringPiece &alignString)
SetAlignmentInfo(alignmentInfo);
}
+
+
void TargetPhrase::SetAlignmentInfo(const StringPiece &alignString, Phrase &sourcePhrase)
{
std::vector<std::string> alignPoints;
@@ -341,9 +339,10 @@ void TargetPhrase::SetAlignmentInfo(const StringPiece &alignString, Phrase &sour
void TargetPhrase::SetAlignmentInfo(const std::set<std::pair<size_t,size_t> > &alignmentInfo)
{
- m_alignmentInfo = AlignmentInfoCollection::Instance().Add(alignmentInfo);
+ m_alignmentInfo = AlignmentInfoCollection::Instance().Add(alignmentInfo);
}
+
void TargetPhrase::SetAlignmentInfo(const std::set<std::pair<size_t,size_t> > &alignmentInfo, int* indicator)
{
m_alignmentInfo = AlignmentInfoCollection::Instance().Add(alignmentInfo, indicator);
diff --git a/moses/src/TargetPhrase.h b/moses/src/TargetPhrase.h
index 0660fefa9..24c69c529 100644
--- a/moses/src/TargetPhrase.h
+++ b/moses/src/TargetPhrase.h
@@ -62,7 +62,6 @@ public:
TargetPhrase();
TargetPhrase(std::string out_string);
TargetPhrase(const Phrase &targetPhrase);
- ~TargetPhrase();
//! used by the unknown word handler- these targets
//! don't have a translation score, so wp is the only thing used
@@ -127,13 +126,15 @@ public:
inline float GetFutureScore() const {
return m_fullScore;
}
+ inline void SetFutureScore(float fullScore) {
+ m_fullScore = fullScore;
+ }
inline const ScoreComponentCollection &GetScoreBreakdown() const
{
return m_scoreBreakdown;
}
- //! TODO - why is this needed and is it set correctly by every phrase dictionary class ? should be set in constructor
- // NOTE: this is not set correctly for unbinarized phrase tables
+ //TODO: Probably shouldn't copy this, but otherwise ownership is unclear
void SetSourcePhrase(const Phrase& p)
{
m_sourcePhrase=p;
@@ -148,6 +149,10 @@ public:
const Word &GetTargetLHS() const
{ return m_lhsTarget; }
+ Word &MutableTargetLHS() {
+ return m_lhsTarget;
+ }
+
void SetAlignmentInfo(const StringPiece &alignString);
void SetAlignmentInfo(const StringPiece &alignString, Phrase &sourcePhrase);
void SetAlignmentInfo(const std::set<std::pair<size_t,size_t> > &alignmentInfo);
@@ -169,6 +174,30 @@ public:
std::ostream& operator<<(std::ostream&, const TargetPhrase&);
+/**
+ * Hasher that looks at source and target phrase.
+ **/
+struct RuleHash
+{
+ inline size_t operator()(const TargetPhrase& targetPhrase) const
+ {
+ size_t seed = 0;
+ boost::hash_combine(seed, targetPhrase);
+ boost::hash_combine(seed, targetPhrase.GetSourcePhrase());
+ return seed;
+ }
+};
+
+struct RuleComparator
+{
+ inline bool operator()(const TargetPhrase& lhs, const TargetPhrase& rhs) const
+ {
+ return lhs.Compare(rhs) == 0 &&
+ lhs.GetSourcePhrase().Compare(rhs.GetSourcePhrase()) == 0;
+ }
+
+};
+
}
#endif
diff --git a/moses/src/TargetWordInsertionFeature.cpp b/moses/src/TargetWordInsertionFeature.cpp
index f7d5a5c7e..8b95ccd96 100644
--- a/moses/src/TargetWordInsertionFeature.cpp
+++ b/moses/src/TargetWordInsertionFeature.cpp
@@ -5,6 +5,7 @@
#include "Hypothesis.h"
#include "ChartHypothesis.h"
#include "ScoreComponentCollection.h"
+#include "TranslationOption.h"
namespace Moses {
@@ -30,21 +31,22 @@ bool TargetWordInsertionFeature::Load(const std::string &filePath)
return true;
}
-void TargetWordInsertionFeature::Evaluate(const Hypothesis& cur_hypo,
- ScoreComponentCollection* accumulator) const
+void TargetWordInsertionFeature::Evaluate(
+ const PhraseBasedFeatureContext& context,
+ ScoreComponentCollection* accumulator) const
{
- const TargetPhrase& targetPhrase = cur_hypo.GetCurrTargetPhrase();
+ const TargetPhrase& targetPhrase = context.GetTargetPhrase();
const AlignmentInfo &alignmentInfo = targetPhrase.GetAlignmentInfo();
const AlignmentInfo::CollType &alignment = alignmentInfo.GetAlignments();
ComputeFeatures(targetPhrase, accumulator, alignment);
}
-void TargetWordInsertionFeature::EvaluateChart(const ChartHypothesis& cur_hypo,
- int featureID,
- ScoreComponentCollection* accumulator) const
+void TargetWordInsertionFeature::EvaluateChart(
+ const ChartBasedFeatureContext& context,
+ ScoreComponentCollection* accumulator) const
{
- const TargetPhrase& targetPhrase = cur_hypo.GetCurrTargetPhrase();
- const AlignmentInfo &alignmentInfo = targetPhrase.GetAlignmentInfo();
+ const TargetPhrase& targetPhrase = context.GetTargetPhrase();
+ const AlignmentInfo &alignmentInfo = context.GetTargetPhrase().GetAlignmentInfo();
const AlignmentInfo::CollType &alignment = alignmentInfo.GetTerminalAlignments();
ComputeFeatures(targetPhrase, accumulator, alignment);
}
diff --git a/moses/src/TargetWordInsertionFeature.h b/moses/src/TargetWordInsertionFeature.h
index 0d53582d5..3fd22ec37 100644
--- a/moses/src/TargetWordInsertionFeature.h
+++ b/moses/src/TargetWordInsertionFeature.h
@@ -29,13 +29,11 @@ public:
}
bool Load(const std::string &filePath);
- void Evaluate(const Hypothesis& cur_hypo,
- ScoreComponentCollection* accumulator) const;
+ void Evaluate( const PhraseBasedFeatureContext& context,
+ ScoreComponentCollection* accumulator) const;
- void EvaluateChart(
- const ChartHypothesis& cur_hypo,
- int featureID,
- ScoreComponentCollection* accumulator) const;
+ void EvaluateChart( const ChartBasedFeatureContext& context,
+ ScoreComponentCollection* accumulator) const;
void ComputeFeatures(const TargetPhrase& targetPhrase,
ScoreComponentCollection* accumulator,
diff --git a/moses/src/ThreadPool.h b/moses/src/ThreadPool.h
index 4f0f64488..fad236a98 100644
--- a/moses/src/ThreadPool.h
+++ b/moses/src/ThreadPool.h
@@ -41,13 +41,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
/**
* Classes to implement a ThreadPool.
- **/
-
+**/
namespace Moses {
-/**
-* A task to be executed by the ThreadPool
-**/
+/** A task to be executed by the ThreadPool
+ */
class Task
{
public:
@@ -111,6 +109,7 @@ public:
#ifdef BOOST_HAS_PTHREADS
pthread_t tid = pthread_self();
#else
+ typedef void * pthread_t;
pthread_t tid = 0;
#endif
std::cerr << "Executing " << m_id << " in thread id " << tid << std::endl;
diff --git a/moses/src/Timer.cpp b/moses/src/Timer.cpp
index bbe1bcabd..318c6a410 100644
--- a/moses/src/Timer.cpp
+++ b/moses/src/Timer.cpp
@@ -15,9 +15,17 @@ namespace Moses
*/
double Timer::elapsed_time()
{
+#ifdef CLOCK_MONOTONIC
+ struct timespec now;
+ clock_gettime(CLOCK_MONOTONIC, &now);
+ double elapsed = (now.tv_sec - start_time.tv_sec);
+ elapsed += (now.tv_nsec - start_time.tv_nsec) / 1000000000.0;
+ return elapsed;
+#else
time_t now;
time(&now);
return difftime(now, start_time);
+#endif
}
/***
@@ -48,7 +56,11 @@ void Timer::start(const char* msg)
running = true;
// Set the start time;
+#ifdef CLOCK_MONOTONIC
+ clock_gettime(CLOCK_MONOTONIC, &start_time);
+#else
time(&start_time);
+#endif
}
/***
diff --git a/moses/src/Timer.h b/moses/src/Timer.h
index 1d7c4aacc..be2fbc6ab 100644
--- a/moses/src/Timer.h
+++ b/moses/src/Timer.h
@@ -9,13 +9,24 @@
namespace Moses
{
+/** Wrapper around time_t to time how long things have been running
+ * according to walltime. We avoid CPU time since it is less reliable
+ * in a multi-threaded environment and can spuriously include clock cycles
+ * used by other threads in the same process.
+ */
class Timer
{
friend std::ostream& operator<<(std::ostream& os, Timer& t);
private:
bool running;
+ // note: this only has the resolution of seconds, we'd often like better resolution
+ // we make our best effort to do this on a system-by-system basis
+#ifdef CLOCK_MONOTONIC
+ struct timespec start_time;
+#else
time_t start_time;
+#endif
// in seconds
double elapsed_time();
@@ -25,7 +36,14 @@ public:
* 'running' is initially false. A timer needs to be explicitly started
* using 'start' or 'restart'
*/
- Timer() : running(false), start_time(0) { }
+ Timer() : running(false) {
+#ifdef CLOCK_MONOTONIC
+ start_time.tv_sec = 0;
+ start_time.tv_nsec = 0;
+#else
+ start_time = 0;
+#endif
+ }
void start(const char* msg = 0);
// void restart(const char* msg = 0);
diff --git a/moses/src/TranslationOption.cpp b/moses/src/TranslationOption.cpp
index c823dadb8..e15cf03c9 100644
--- a/moses/src/TranslationOption.cpp
+++ b/moses/src/TranslationOption.cpp
@@ -40,20 +40,8 @@ TranslationOption::TranslationOption(const WordsRange &wordsRange
, const InputType &inputType)
: m_targetPhrase(targetPhrase)
, m_sourceWordsRange(wordsRange)
-{
- // set score
- m_scoreBreakdown.PlusEquals(targetPhrase.GetScoreBreakdown());
-
- if (inputType.GetType() == SentenceInput)
- {
- Phrase phrase = inputType.GetSubString(wordsRange);
- m_sourcePhrase = new Phrase(phrase);
- }
- else
- { // TODO lex reordering with confusion network
- m_sourcePhrase = new Phrase(targetPhrase.GetSourcePhrase());
- }
-}
+ , m_scoreBreakdown(targetPhrase.GetScoreBreakdown())
+{}
//TODO this should be a factory function!
TranslationOption::TranslationOption(const WordsRange &wordsRange
@@ -70,35 +58,11 @@ TranslationOption::TranslationOption(const WordsRange &wordsRange
score[0] = FloorScore(-numeric_limits<float>::infinity());
m_scoreBreakdown.Assign(scoreProducer, score);
}
-
- if (inputType.GetType() == SentenceInput)
- {
- Phrase phrase = inputType.GetSubString(wordsRange);
- m_sourcePhrase = new Phrase(phrase);
- }
- else
- { // TODO lex reordering with confusion network
- m_sourcePhrase = new Phrase(targetPhrase.GetSourcePhrase());
- //the target phrase from a confusion network/lattice has input scores that we want to keep
- m_scoreBreakdown.PlusEquals(targetPhrase.GetScoreBreakdown());
-
- }
}
-TranslationOption::TranslationOption(const TranslationOption &copy)
- : m_targetPhrase(copy.m_targetPhrase)
-//, m_sourcePhrase(new Phrase(*copy.m_sourcePhrase)) // TODO use when confusion network trans opt for confusion net properly implemented
- , m_sourcePhrase( (copy.m_sourcePhrase == NULL) ? new Phrase(ARRAY_SIZE_INCR) : new Phrase(*copy.m_sourcePhrase))
- , m_sourceWordsRange(copy.m_sourceWordsRange)
- , m_futureScore(copy.m_futureScore)
- , m_scoreBreakdown(copy.m_scoreBreakdown)
- , m_cachedScores(copy.m_cachedScores)
-{}
-
TranslationOption::TranslationOption(const TranslationOption &copy, const WordsRange &sourceWordsRange)
: m_targetPhrase(copy.m_targetPhrase)
//, m_sourcePhrase(new Phrase(*copy.m_sourcePhrase)) // TODO use when confusion network trans opt for confusion net properly implemented
- , m_sourcePhrase( (copy.m_sourcePhrase == NULL) ? new Phrase(ARRAY_SIZE_INCR) : new Phrase(*copy.m_sourcePhrase))
, m_sourceWordsRange(sourceWordsRange)
, m_futureScore(copy.m_futureScore)
, m_scoreBreakdown(copy.m_scoreBreakdown)
@@ -148,6 +112,7 @@ void TranslationOption::CalcScore(const TranslationSystem* system)
allLM.CalcScore(GetTargetPhrase(), retFullScore, ngramScore, oovScore, &m_scoreBreakdown);
size_t phraseSize = GetTargetPhrase().GetSize();
+
// future score
m_futureScore = retFullScore - ngramScore + oovScore
+ m_scoreBreakdown.InnerProduct(StaticData::Instance().GetAllWeights()) - phraseSize *
@@ -168,7 +133,7 @@ ostream& operator<<(ostream& out, const TranslationOption& possibleTranslation)
void TranslationOption::CacheScores(const ScoreProducer &producer, const Scores &score)
{
- m_cachedScores[&producer] = new Scores(score);
+ m_cachedScores[&producer] = score;
}
}
diff --git a/moses/src/TranslationOption.h b/moses/src/TranslationOption.h
index fa4065b80..89ebdc05b 100644
--- a/moses/src/TranslationOption.h
+++ b/moses/src/TranslationOption.h
@@ -24,6 +24,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <map>
#include <vector>
+#include <boost/functional/hash.hpp>
#include "WordsBitmap.h"
#include "WordsRange.h"
#include "Phrase.h"
@@ -63,7 +64,6 @@ class TranslationOption
protected:
TargetPhrase m_targetPhrase; /*< output phrase when using this translation option */
- Phrase *m_sourcePhrase; /*< input phrase translated by this */
const WordsRange m_sourceWordsRange; /*< word position in the input that are covered by this translation option */
float m_futureScore; /*< estimate of total cost when using this translation option, includes language model probabilities */
@@ -73,7 +73,7 @@ protected:
//! possible to estimate, it is included here.
ScoreComponentCollection m_scoreBreakdown;
- typedef std::map<const ScoreProducer *, const Scores *> _ScoreCacheMap;
+ typedef std::map<const ScoreProducer *, Scores> _ScoreCacheMap;
_ScoreCacheMap m_cachedScores;
public:
@@ -86,17 +86,10 @@ public:
, const TargetPhrase &targetPhrase
, const InputType &inputType
, const UnknownWordPenaltyProducer* uwpProducer);
- /** copy constructor */
- TranslationOption(const TranslationOption &copy);
/** copy constructor, but change words range. used by caching */
TranslationOption(const TranslationOption &copy, const WordsRange &sourceWordsRange);
- ~TranslationOption() {
- delete m_sourcePhrase;
- for(_ScoreCacheMap::const_iterator it = m_cachedScores.begin(); it != m_cachedScores.end(); ++it)
- delete it->second;
- }
/** returns true if all feature types in featuresToCheck are compatible between the two phrases */
bool IsCompatible(const Phrase& phrase, const std::vector<FactorType>& featuresToCheck) const;
@@ -116,7 +109,7 @@ public:
/** returns source phrase */
const Phrase *GetSourcePhrase() const {
- return m_sourcePhrase;
+ return &(m_targetPhrase.GetSourcePhrase());
}
/** whether source span overlaps with those of a hypothesis */
@@ -158,7 +151,7 @@ public:
if(it == m_cachedScores.end())
return NULL;
else
- return it->second;
+ return &(it->second);
}
/** Calculate future score and n-gram score of this trans option, plus the score breakdowns */
@@ -167,8 +160,26 @@ public:
void CacheScores(const ScoreProducer &scoreProducer, const Scores &score);
TO_STRING();
+
+ bool operator== (const TranslationOption &rhs) const
+ {
+ return m_sourceWordsRange == rhs.m_sourceWordsRange &&
+ m_targetPhrase == rhs.m_targetPhrase;
+ }
+
};
+
+//XXX: This doesn't look at the alignment. Is this correct?
+inline size_t hash_value(const TranslationOption& translationOption) {
+ size_t seed = 0;
+ boost::hash_combine(seed, translationOption.GetTargetPhrase());
+ boost::hash_combine(seed, translationOption.GetStartPos());
+ boost::hash_combine(seed, translationOption.GetEndPos());
+ return seed;
+}
+
+
}
#endif
diff --git a/moses/src/TranslationOptionCollection.cpp b/moses/src/TranslationOptionCollection.cpp
index c556d6c47..5b3c4d94f 100644
--- a/moses/src/TranslationOptionCollection.cpp
+++ b/moses/src/TranslationOptionCollection.cpp
@@ -405,6 +405,9 @@ void TranslationOptionCollection::CreateTranslationOptions()
VERBOSE(2,"Translation Option Collection\n " << *this << endl);
+ // Incorporate distributed lm scores.
+ IncorporateDLMScores();
+
ProcessUnknownWord();
// Prune
@@ -417,6 +420,75 @@ void TranslationOptionCollection::CreateTranslationOptions()
// Cached lex reodering costs
CacheLexReordering();
+
+ // stateless feature scores
+ PreCalculateScores();
+}
+
+void TranslationOptionCollection::IncorporateDLMScores() {
+ // Build list of dlms.
+ const vector<const StatefulFeatureFunction*>& ffs =
+ m_system->GetStatefulFeatureFunctions();
+ std::map<int, LanguageModel*> dlm_ffs;
+ for (unsigned i = 0; i < ffs.size(); ++i) {
+ if (ffs[i]->GetScoreProducerDescription() == "DLM_5gram") {
+ dlm_ffs[i] = const_cast<LanguageModel*>(static_cast<const LanguageModel* const>(ffs[i]));
+ dlm_ffs[i]->SetFFStateIdx(i);
+ }
+ }
+
+ // Don't need to do anything if we don't have any distributed
+ // language models.
+ if (dlm_ffs.size() == 0) {
+ return;
+ }
+
+ // Iterate over all translation options in the collection.
+ std::vector< std::vector< TranslationOptionList > >::iterator start_iter;
+ for (start_iter = m_collection.begin();
+ start_iter != m_collection.end();
+ ++start_iter) {
+ std::vector< TranslationOptionList >::iterator end_iter;
+ for (end_iter = (*start_iter).begin();
+ end_iter != (*start_iter).end();
+ ++end_iter) {
+ std::vector< TranslationOption* >::iterator option_iter;
+ for (option_iter = (*end_iter).begin();
+ option_iter != (*end_iter).end();
+ ++option_iter) {
+
+ // Get a handle on the current translation option.
+ TranslationOption* option = *option_iter;
+
+ std::map<int, LanguageModel*>::iterator dlm_iter;
+ for (dlm_iter = dlm_ffs.begin();
+ dlm_iter != dlm_ffs.end();
+ ++dlm_iter) {
+ LanguageModel* dlm = (*dlm_iter).second;
+
+
+ float full_score;
+ float ngram_score;
+ size_t oov_count;
+ TargetPhrase& phrase =
+ const_cast<TargetPhrase&>(option->GetTargetPhrase());
+ dlm->CalcScoreFromCache(phrase,
+ full_score,
+ ngram_score,
+ oov_count);
+ ScoreComponentCollection& option_scores =
+ const_cast<ScoreComponentCollection&>(option->GetScoreBreakdown());
+ option_scores.Assign(dlm, ngram_score);
+ ScoreComponentCollection& phrase_scores =
+ const_cast<ScoreComponentCollection&>(phrase.GetScoreBreakdown());
+ phrase_scores.Assign(dlm, ngram_score);
+
+ float weighted_score = full_score * dlm->GetWeight();
+ phrase.SetFutureScore(phrase.GetFutureScore() + weighted_score);
+ }
+ }
+ }
+ }
}
void TranslationOptionCollection::Sort()
@@ -605,6 +677,11 @@ std::ostream& operator<<(std::ostream& out, const TranslationOptionCollection& c
return out;
}
+const std::vector<Phrase*>& TranslationOptionCollection::GetUnknownSources() const
+{
+ return m_unksrcs;
+}
+
void TranslationOptionCollection::CacheLexReordering()
{
const vector<LexicalReordering*> &lexReorderingModels = m_system->GetReorderModels();
@@ -637,6 +714,51 @@ void TranslationOptionCollection::CacheLexReordering()
}
}
}
+
+void TranslationOptionCollection::PreCalculateScores()
+{
+ //Figure out which features need to be precalculated
+ const vector<const StatelessFeatureFunction*>& sfs =
+ m_system->GetStatelessFeatureFunctions();
+ vector<const StatelessFeatureFunction*> precomputedFeatures;
+ for (unsigned i = 0; i < sfs.size(); ++i) {
+ if (sfs[i]->ComputeValueInTranslationOption() &&
+ !sfs[i]->ComputeValueInTranslationTable()) {
+ precomputedFeatures.push_back(sfs[i]);
+ }
+ }
+ //empty coverage vector
+ WordsBitmap coverage(m_source.GetSize());
+
+ //Go through translation options and precompute features
+ for (size_t i = 0; i < m_collection.size(); ++i) {
+ for (size_t j = 0; j < m_collection[i].size(); ++j) {
+ for (size_t k = 0; k < m_collection[i][j].size(); ++k) {
+ const TranslationOption* toption = m_collection[i][j].Get(k);
+ ScoreComponentCollection& breakdown = m_precalculatedScores[*toption];
+ PhraseBasedFeatureContext context(*toption, m_source);
+ for (size_t si = 0; si < precomputedFeatures.size(); ++si) {
+ precomputedFeatures[si]->Evaluate(context, &breakdown);
+ }
+ }
+ }
+ }
+}
+
+void TranslationOptionCollection::InsertPreCalculatedScores
+ (const TranslationOption& translationOption, ScoreComponentCollection* scoreBreakdown)
+ const
+{
+ boost::unordered_map<TranslationOption,ScoreComponentCollection>::const_iterator scoreIter =
+ m_precalculatedScores.find(translationOption);
+ if (scoreIter != m_precalculatedScores.end()) {
+ scoreBreakdown->PlusEquals(scoreIter->second);
+ } else {
+ TRACE_ERR("ERROR: " << translationOption << " missing from precalculation cache" << endl);
+ assert(0);
+ }
+}
+
//! list of trans opt for a particular span
TranslationOptionList &TranslationOptionCollection::GetTranslationOptionList(size_t startPos, size_t endPos)
{
diff --git a/moses/src/TranslationOptionCollection.h b/moses/src/TranslationOptionCollection.h
index c6452559c..5b123421d 100644
--- a/moses/src/TranslationOptionCollection.h
+++ b/moses/src/TranslationOptionCollection.h
@@ -23,6 +23,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#define moses_TranslationOptionCollection_h
#include <list>
+#include <boost/unordered_map.hpp>
#include "TypeDef.h"
#include "TranslationOption.h"
#include "TranslationOptionList.h"
@@ -42,7 +43,8 @@ class InputType;
class LMList;
class FactorMask;
class Word;
-
+class DecodeGraph;
+
/** Contains all phrase translations applicable to current input type (a sentence or confusion network).
* A key insight into efficient decoding is that various input
* conditions (trelliss, factored input, normal text, xml markup)
@@ -53,12 +55,9 @@ class Word;
* depends on the input condition, but they all are presented to
* decoding algorithm in the same form, using this class.
*
- * This class cannot, and should not be instantiated directly. Instantiate 1 of the inherited
+ * This is a abstract class, and cannot be instantiated directly. Instantiate 1 of the inherited
* classes instead, for a particular input type
**/
-
-class DecodeGraph;
-
class TranslationOptionCollection
{
friend std::ostream& operator<<(std::ostream& out, const TranslationOptionCollection& coll);
@@ -71,6 +70,7 @@ protected:
const size_t m_maxNoTransOptPerCoverage; /*< maximum number of translation options per input span */
const float m_translationOptionThreshold; /*< threshold for translation options with regard to best option for input span */
std::vector<Phrase*> m_unksrcs;
+ boost::unordered_map<TranslationOption,ScoreComponentCollection> m_precalculatedScores;
TranslationOptionCollection(const TranslationSystem* system, InputType const& src, size_t maxNoTransOptPerCoverage,
@@ -82,6 +82,9 @@ protected:
void ProcessUnknownWord();
//! special handling of ONE unknown words.
virtual void ProcessOneUnknownWord(const Word &sourceWord, size_t sourcePos, size_t length = 1, const Scores *inputScores = NULL);
+
+ void IncorporateDLMScores();
+
//! pruning: only keep the top n (m_maxNoTransOptPerCoverage) elements */
void Prune();
@@ -95,8 +98,14 @@ protected:
//! implemented by inherited class, called by this class
virtual void ProcessUnknownWord(size_t sourcePos)=0;
+
+
+
void CacheLexReordering();
+ //! Pre-calculate most stateless feature values
+ void PreCalculateScores();
+
public:
virtual ~TranslationOptionCollection();
@@ -105,6 +114,9 @@ public:
return m_source;
}
+ //!List of unknowns (OOVs)
+ const std::vector<Phrase*>& GetUnknownSources() const;
+
//! get length/size of source input
size_t GetSize() const {
return m_source.GetSize();
@@ -136,6 +148,10 @@ public:
return GetTranslationOptionList(coverage.GetStartPos(), coverage.GetEndPos());
}
+ //! Access these pre-calculated values
+ void InsertPreCalculatedScores(const TranslationOption& translationOption,
+ ScoreComponentCollection* scoreBreakdown) const;
+
TO_STRING();
};
diff --git a/moses/src/TranslationOptionCollectionConfusionNet.h b/moses/src/TranslationOptionCollectionConfusionNet.h
index 66ef953a8..be6e3ab4e 100644
--- a/moses/src/TranslationOptionCollectionConfusionNet.h
+++ b/moses/src/TranslationOptionCollectionConfusionNet.h
@@ -10,6 +10,9 @@ namespace Moses
class ConfusionNet;
class TranslationSystem;
+/** Holds all translation options, for all spans, of a particular confusion network input
+ * Inherited from TranslationOptionCollection.
+ */
class TranslationOptionCollectionConfusionNet : public TranslationOptionCollection
{
public:
diff --git a/moses/src/TranslationOptionCollectionText.h b/moses/src/TranslationOptionCollectionText.h
index bb819441e..b4ce3e7ea 100644
--- a/moses/src/TranslationOptionCollectionText.h
+++ b/moses/src/TranslationOptionCollectionText.h
@@ -30,6 +30,9 @@ namespace Moses
class Sentence;
class LMList;
+/** Holds all translation options, for all spans, of a particular sentence input
+ * Inherited from TranslationOptionCollection.
+ */
class TranslationOptionCollectionText : public TranslationOptionCollection
{
public:
diff --git a/moses/src/TranslationOptionList.h b/moses/src/TranslationOptionList.h
index d56578029..bde2212d7 100644
--- a/moses/src/TranslationOptionList.h
+++ b/moses/src/TranslationOptionList.h
@@ -11,6 +11,8 @@ namespace Moses
class TranslationOption;
+/** wrapper around vector of translation options
+ */
class TranslationOptionList
{
friend std::ostream& operator<<(std::ostream& out, const TranslationOptionList& coll);
diff --git a/moses/src/TranslationSystem.cpp b/moses/src/TranslationSystem.cpp
index 7760a0c81..e9a97bbbc 100644
--- a/moses/src/TranslationSystem.cpp
+++ b/moses/src/TranslationSystem.cpp
@@ -78,10 +78,7 @@ namespace Moses {
m_producers.push_back(ff);
if (ff->IsStateless()) {
- const StatelessFeatureFunction* statelessFF = static_cast<const StatelessFeatureFunction*>(ff);
- if (!statelessFF->ComputeValueInTranslationOption()) {
- m_statelessFFs.push_back(statelessFF);
- }
+ m_statelessFFs.push_back(static_cast<const StatelessFeatureFunction*>(ff));
} else {
m_statefulFFs.push_back(static_cast<const StatefulFeatureFunction*>(ff));
}
@@ -126,12 +123,10 @@ namespace Moses {
//for(size_t i=0;i<m_statefulFFs.size();++i) {
//}
for(size_t i=0;i<m_statelessFFs.size();++i) {
- if (m_statelessFFs[i]->GetScoreProducerWeightShortName() == "pp")
- ((PhrasePairFeature*)m_statelessFFs[i])->InitializeForInput((Sentence const&)source);
- else if (m_statelessFFs[i]->GetScoreProducerWeightShortName() == "glm")
- ((GlobalLexicalModelUnlimited*)m_statelessFFs[i])->InitializeForInput((Sentence const&)source);
- else if (m_statelessFFs[i]->GetScoreProducerWeightShortName() == "wt")
- ((WordTranslationFeature*)m_statelessFFs[i])->InitializeForInput((Sentence const&)source);
+ if (m_statelessFFs[i]->GetScoreProducerWeightShortName() == "glm")
+ {
+ ((GlobalLexicalModelUnlimited*)m_statelessFFs[i])->InitializeForInput((Sentence const&)source);
+ }
}
LMList::const_iterator iterLM;
@@ -142,25 +137,25 @@ namespace Moses {
}
}
- void TranslationSystem::CleanUpAfterSentenceProcessing() const {
+ void TranslationSystem::CleanUpAfterSentenceProcessing(const InputType& source) const {
for(size_t i=0;i<m_phraseDictionaries.size();++i)
{
PhraseDictionaryFeature &phraseDictionaryFeature = *m_phraseDictionaries[i];
PhraseDictionary* phraseDictionary = const_cast<PhraseDictionary*>(phraseDictionaryFeature.GetDictionary());
- phraseDictionary->CleanUp();
+ phraseDictionary->CleanUp(source);
}
for(size_t i=0;i<m_generationDictionaries.size();++i)
- m_generationDictionaries[i]->CleanUp();
+ m_generationDictionaries[i]->CleanUp(source);
//something LMs could do after each sentence
LMList::const_iterator iterLM;
for (iterLM = m_languageModels.begin() ; iterLM != m_languageModels.end() ; ++iterLM)
{
LanguageModel &languageModel = **iterLM;
- languageModel.CleanUpAfterSentenceProcessing();
+ languageModel.CleanUpAfterSentenceProcessing(source);
}
}
@@ -180,11 +175,7 @@ namespace Moses {
}
std::vector<float> TranslationSystem::GetTranslationWeights(size_t index) const {
- std::vector<float> weights = StaticData::Instance().GetWeights(GetTranslationScoreProducer(index));
- //VERBOSE(1, "Read weightT from translation sytem.. ");
- //for (size_t i = 0; i < weights.size(); ++i)
- //VERBOSE(1, weights[i] << " ");
- //VERBOSE(1, std::endl);
- return weights;
+ std::vector<float> weights = StaticData::Instance().GetWeights(GetTranslationScoreProducer(index));
+ return weights;
}
};
diff --git a/moses/src/TranslationSystem.h b/moses/src/TranslationSystem.h
index 55c8d927f..f9ac3802a 100644
--- a/moses/src/TranslationSystem.h
+++ b/moses/src/TranslationSystem.h
@@ -88,7 +88,7 @@ class TranslationSystem {
const UnknownWordPenaltyProducer *GetUnknownWordPenaltyProducer() const { return m_unknownWpProducer; }
const DistortionScoreProducer* GetDistortionProducer() const {return m_distortionScoreProducer;}
- const PhraseDictionaryFeature *GetTranslationScoreProducer(size_t index) const { return GetPhraseDictionaries()[index]; }
+ const PhraseDictionaryFeature *GetTranslationScoreProducer(size_t index) const { return GetPhraseDictionaries().at(index); }
float GetWeightWordPenalty() const;
float GetWeightUnknownWordPenalty() const;
@@ -97,10 +97,9 @@ class TranslationSystem {
//sentence (and thread) specific initialisationn and cleanup
void InitializeBeforeSentenceProcessing(const InputType& source) const;
- void CleanUpAfterSentenceProcessing() const;
+ void CleanUpAfterSentenceProcessing(const InputType& source) const;
- const std::vector<const ScoreProducer*>& GetFeatureFunctions() const { return m_producers; }
-
+ const std::vector<const ScoreProducer*>& GetFeatureFunctions() const { return m_producers; }
static const std::string DEFAULT;
@@ -127,7 +126,7 @@ class TranslationSystem {
const UnknownWordPenaltyProducer* m_unknownWpProducer;
const DistortionScoreProducer* m_distortionScoreProducer;
- std::vector<const ScoreProducer*> m_producers; /**< all the score producers in this run */
+ std::vector<const ScoreProducer*> m_producers; /**< all the score producers in this run */
};
diff --git a/moses/src/TreeInput.cpp b/moses/src/TreeInput.cpp
index a0b04b800..d36ea7e55 100644
--- a/moses/src/TreeInput.cpp
+++ b/moses/src/TreeInput.cpp
@@ -288,7 +288,7 @@ int TreeInput::Read(std::istream& in,const std::vector<FactorType>& factorOrder)
TargetPhraseCollection *tpc = new TargetPhraseCollection;
tpc->Add(targetPhrase);
- ChartTranslationOption *transOpt = new ChartTranslationOption(*tpc, emptyStackVec, *range, 0.0f);
+ ChartTranslationOptions *transOpt = new ChartTranslationOptions(*tpc, emptyStackVec, *range, 0.0f);
m_xmlChartOptionsList.push_back(transOpt);
//TODO: needed to handle exclusive
diff --git a/moses/src/TreeInput.h b/moses/src/TreeInput.h
index e460ad6b3..d9fbcc397 100644
--- a/moses/src/TreeInput.h
+++ b/moses/src/TreeInput.h
@@ -4,10 +4,11 @@
#include <vector>
#include "Sentence.h"
-#include "ChartTranslationOption.h"
+#include "ChartTranslationOptions.h"
namespace Moses
{
+//! @todo what is this?
class XMLParseOutput
{
public:
@@ -20,13 +21,18 @@ public:
{}
};
+/** An input to the decoder that represent a parse tree.
+ * Implemented as a sentence with non-terminal labels over certain ranges.
+ * This representation doesn't necessarily have to form a tree, it's up to the user to make sure it does if they really want a tree.
+ * @todo Need to rewrite if you want packed forest, or packed forest over lattice - not sure if can inherit from this
+ */
class TreeInput : public Sentence
{
friend std::ostream& operator<<(std::ostream&, const TreeInput&);
protected:
std::vector<std::vector<NonTerminalSet> > m_sourceChart;
- std::vector <ChartTranslationOption*> m_xmlChartOptionsList;
+ std::vector <ChartTranslationOptions*> m_xmlChartOptionsList;
void AddChartLabel(size_t startPos, size_t endPos, const std::string &label
,const std::vector<FactorType>& factorOrder);
@@ -59,7 +65,7 @@ public:
return m_sourceChart[startPos][endPos - startPos];
}
- std::vector <ChartTranslationOption*> GetXmlChartTranslationOptions() const {
+ std::vector <ChartTranslationOptions*> GetXmlChartTranslationOptions() const {
return m_xmlChartOptionsList;
};
};
diff --git a/moses/src/TrellisPath.h b/moses/src/TrellisPath.h
index c1b347de9..d8005435c 100644
--- a/moses/src/TrellisPath.h
+++ b/moses/src/TrellisPath.h
@@ -36,7 +36,8 @@ class TrellisPathList;
/** Encapsulate the set of hypotheses/arcs that goes from decoding 1 phrase to all the source phrases
* to reach a final translation. For the best translation, this consist of all hypotheses, for the other
- * n-best paths, the node on the path can consist of hypotheses or arcs
+ * n-best paths, the node on the path can consist of hypotheses or arcs.
+ * Used by phrase-based decoding
*/
class TrellisPath
{
diff --git a/moses/src/TrellisPathCollection.h b/moses/src/TrellisPathCollection.h
index 82e771d78..43532cc9a 100644
--- a/moses/src/TrellisPathCollection.h
+++ b/moses/src/TrellisPathCollection.h
@@ -36,8 +36,9 @@ struct CompareTrellisPathCollection {
};
/** priority queue used in Manager to store list of contenders for N-Best list.
- * Stored in order of total score so that the best path can just be popped from the top
- */
+ * Stored in order of total score so that the best path can just be popped from the top
+ * Used by phrase-based decoding
+ */
class TrellisPathCollection
{
friend std::ostream& operator<<(std::ostream&, const TrellisPathCollection&);
diff --git a/moses/src/TrellisPathList.h b/moses/src/TrellisPathList.h
index 073efb9b8..cdbe70ca0 100644
--- a/moses/src/TrellisPathList.h
+++ b/moses/src/TrellisPathList.h
@@ -29,7 +29,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
namespace Moses
{
-/** used to return n-best list of Trellis Paths from the Manager to the caller */
+/** used to return n-best list of Trellis Paths from the Manager to the caller.
+ * Used by phrase-based decoding
+ */
class TrellisPathList
{
protected:
diff --git a/moses/src/TypeDef.h b/moses/src/TypeDef.h
index 0b525ad43..a16f49fc2 100644
--- a/moses/src/TypeDef.h
+++ b/moses/src/TypeDef.h
@@ -25,6 +25,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <vector>
#include <string>
+//! all the typedefs and enums goes here
+
#ifdef WIN32
#include <BaseTsd.h>
#else
@@ -135,6 +137,8 @@ enum PhraseTableImplementation {
,SuffixArray = 8
,Hiero = 9
,ALSuffixArray = 10
+ ,FuzzyMatch = 11
+ ,Compact = 12
};
enum InputTypeEnum {
@@ -168,6 +172,7 @@ enum SearchAlgorithm {
,CubePruning = 1
,CubeGrowing = 2
,ChartDecoding= 3
+ ,NormalBatch = 4
};
enum SourceLabelOverlap {
@@ -181,6 +186,12 @@ enum WordAlignmentSort {
,TargetOrder = 1
};
+enum FormatType
+{
+ MosesFormat
+ ,HieroFormat
+};
+
// typedef
typedef size_t FactorType;
diff --git a/moses/src/UniqueObject.h b/moses/src/UniqueObject.h
index 789c5e1b3..6e35cd5a7 100644
--- a/moses/src/UniqueObject.h
+++ b/moses/src/UniqueObject.h
@@ -28,6 +28,7 @@ template<class T> T const* uniqueObject(const T& x,int mode=0)
}
}
+//! @todo what is this?
template<class T> class UniqueObjectManager
{
public:
diff --git a/moses/src/UserMessage.h b/moses/src/UserMessage.h
index aac38cae4..ddb21e830 100644
--- a/moses/src/UserMessage.h
+++ b/moses/src/UserMessage.h
@@ -29,8 +29,8 @@ namespace Moses
{
/** User warnings/error messages.
- * Not the same as tracing messages, this should be usable even if Moses front-end if GUI
-*/
+ * Not the same as tracing messages, this should be usable even if Moses front-end if GUI
+ */
class UserMessage
{
protected:
diff --git a/moses/src/Util.cpp b/moses/src/Util.cpp
index be02fb27b..98de1241e 100644
--- a/moses/src/Util.cpp
+++ b/moses/src/Util.cpp
@@ -35,6 +35,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "TypeDef.h"
#include "Util.h"
#include "Timer.h"
+#include "util/file.hh"
using namespace std;
@@ -57,24 +58,6 @@ string GetTempFolder()
#endif
}
-void CreateTempFile(ofstream &fileStream, string &filePath)
-{
-#ifdef _WIN32
- char buffer[BUFSIZ];
- ::GetTempFileNameA(GetTempFolder().c_str(), "", 0, buffer);
- filePath = buffer;
-#else
- char buffer[L_tmpnam];
- strcpy(buffer, GetTempFolder().c_str());
- strcat(buffer, PROJECT_NAME);
- strcat(buffer, "--XXXXXX");
- mkstemp(buffer);
- filePath = buffer;
-#endif
- fileStream.open(filePath.c_str(), ofstream::out | ofstream::app);
-}
-
-
const std::string ToLower(const std::string& str)
{
std::string lc(str);
diff --git a/moses/src/Util.h b/moses/src/Util.h
index 5d4aa12bb..6f485a623 100644
--- a/moses/src/Util.h
+++ b/moses/src/Util.h
@@ -41,7 +41,7 @@ namespace Moses
/** Outputting debugging/verbose information to stderr.
* Use TRACE_ENABLE flag to redirect tracing output into oblivion
* so that you can output your own ad-hoc debugging info.
- * However, if you use stderr diretly, please delete calls to it once
+ * However, if you use stderr directly, please delete calls to it once
* you finished debugging so that it won't clutter up.
* Also use TRACE_ENABLE to turn off output of any debugging info
* when compiling for a gui front-end so that running gui won't generate
@@ -322,8 +322,6 @@ void RemoveAllInColl(COLL &coll)
//! x-platform reference to temp folder
std::string GetTempFolder();
-//! Create temp file and return output stream and full file path as arguments
-void CreateTempFile(std::ofstream &fileStream, std::string &filePath);
//! MD5 hash of a file
std::string GetMD5Hash(const std::string &filePath);
diff --git a/moses/src/Word.cpp b/moses/src/Word.cpp
index 1ff5df496..3659fa0e3 100644
--- a/moses/src/Word.cpp
+++ b/moses/src/Word.cpp
@@ -102,9 +102,11 @@ void Word::CreateFromString(FactorDirection direction
FactorCollection &factorCollection = FactorCollection::Instance();
vector<string> wordVec;
- Tokenize(wordVec, str, "|");
+ const std::string& factorDelimiter = StaticData::Instance().GetFactorDelimiter();
+ TokenizeMultiCharSeparator(wordVec, str, factorDelimiter);
+ //Tokenize(wordVec, str, "|");
if (!isNonTerminal)
- assert(wordVec.size() == factorOrder.size());
+ CHECK(wordVec.size() <= factorOrder.size());
const Factor *factor;
for (size_t ind = 0; ind < wordVec.size(); ++ind) {
diff --git a/moses/src/Word.h b/moses/src/Word.h
index 7dd395030..872b90e13 100644
--- a/moses/src/Word.h
+++ b/moses/src/Word.h
@@ -38,8 +38,8 @@ namespace Moses
class Phrase;
-/***
- * hold a set of factors for a single word
+/** Represent a word (terminal or non-term)
+ * Wrapper around hold a set of factors for a single word
*/
class Word
{
diff --git a/moses/src/WordLattice.h b/moses/src/WordLattice.h
index 21df48054..3fb3beba8 100644
--- a/moses/src/WordLattice.h
+++ b/moses/src/WordLattice.h
@@ -8,7 +8,9 @@
namespace Moses
{
-/** General word lattice */
+/** An input to the decoder that represent a word lattice.
+ * @todo why is this inherited from confusion net?
+ */
class WordLattice: public ConfusionNet
{
private:
diff --git a/moses/src/WordTranslationFeature.cpp b/moses/src/WordTranslationFeature.cpp
index 42bf8ef54..ab120caf1 100644
--- a/moses/src/WordTranslationFeature.cpp
+++ b/moses/src/WordTranslationFeature.cpp
@@ -1,10 +1,12 @@
#include <sstream>
+#include <boost/algorithm/string.hpp>
#include "WordTranslationFeature.h"
#include "Phrase.h"
#include "TargetPhrase.h"
#include "Hypothesis.h"
#include "ChartHypothesis.h"
#include "ScoreComponentCollection.h"
+#include "TranslationOption.h"
#include <boost/algorithm/string.hpp>
namespace Moses {
@@ -70,25 +72,12 @@ bool WordTranslationFeature::Load(const std::string &filePathSource, const std::
return true;
}
- void WordTranslationFeature::InitializeForInput( Sentence const& in )
+void WordTranslationFeature::Evaluate
+ (const PhraseBasedFeatureContext& context,
+ ScoreComponentCollection* accumulator) const
{
- m_local.reset(new ThreadLocalStorage);
- m_local->input = &in;
- m_local->docid = in.GetDocumentId();
- m_local->topicid = in.GetTopicId();
- m_local->topicid_prob = in.GetTopicIdAndProb();
- m_local->use_topicid = in.GetUseTopicId();
- m_local->use_topicid_prob = in.GetUseTopicIdAndProb();
-}
-
-void WordTranslationFeature::Evaluate(const Hypothesis& cur_hypo, ScoreComponentCollection* accumulator) const
-{
- const Sentence& input = *(m_local->input);
- const long docid = m_local->docid;
- const long topicid = m_local->topicid;
- const bool use_topicid = m_local->use_topicid;
- const bool use_topicid_prob = m_local->use_topicid_prob;
- const TargetPhrase& targetPhrase = cur_hypo.GetCurrTargetPhrase();
+ const Sentence& input = static_cast<const Sentence&>(context.GetSource());
+ const TargetPhrase& targetPhrase = context.GetTargetPhrase();
const AlignmentInfo &alignment = targetPhrase.GetAlignmentInfo();
// process aligned words
@@ -115,27 +104,28 @@ void WordTranslationFeature::Evaluate(const Hypothesis& cur_hypo, ScoreComponent
}
if (!m_unrestricted) {
- if (m_vocabSource.find(sourceWord) == m_vocabSource.end())
- sourceWord = "OTHER";
- if (m_vocabTarget.find(targetWord) == m_vocabTarget.end())
- targetWord = "OTHER";
+ if (m_vocabSource.find(sourceWord) == m_vocabSource.end())
+ sourceWord = "OTHER";
+ if (m_vocabTarget.find(targetWord) == m_vocabTarget.end())
+ targetWord = "OTHER";
}
if (m_simple) {
- // construct feature name
- stringstream featureName;
- featureName << "wt_";
- //featureName << ((sourceExists||m_unrestricted) ? sourceWord : "OTHER");
- featureName << sourceWord;
- featureName << "~";
- //featureName << ((targetExists||m_unrestricted) ? targetWord : "OTHER");
- featureName << targetWord;
- accumulator->SparsePlusEquals(featureName.str(), 1);
+ // construct feature name
+ stringstream featureName;
+ featureName << "wt_";
+ featureName << sourceWord;
+ featureName << "~";
+ featureName << targetWord;
+ accumulator->SparsePlusEquals(featureName.str(), 1);
}
- if (m_domainTrigger && !m_sourceContext) {
+ if (m_domainTrigger && !m_sourceContext) {
+ const bool use_topicid = input.GetUseTopicId();
+ const bool use_topicid_prob = input.GetUseTopicIdAndProb();
if (use_topicid || use_topicid_prob) {
- // use topicid as trigger
if(use_topicid) {
+ // use topicid as trigger
+ const long topicid = input.GetTopicId();
stringstream feature;
feature << "wt_";
if (topicid == -1)
@@ -151,7 +141,7 @@ void WordTranslationFeature::Evaluate(const Hypothesis& cur_hypo, ScoreComponent
}
else {
// use topic probabilities
- const vector<string> &topicid_prob = *(m_local->topicid_prob);
+ const vector<string> &topicid_prob = *(input.GetTopicIdAndProb());
if (atol(topicid_prob[0].c_str()) == -1) {
stringstream feature;
feature << "wt_unk_";
@@ -173,29 +163,10 @@ void WordTranslationFeature::Evaluate(const Hypothesis& cur_hypo, ScoreComponent
}
}
}
-
- // general feature
- /*stringstream feature;
- feature << "wt_";
- feature << sourceWord;
- feature << "~";
- feature << targetWord;
- accumulator->SparsePlusEquals(feature.str(), 1);
-
- if (topicid != -1) {
- // topic-specific feature
- stringstream feature;
- feature << "wt_";
- feature << topicid;
- feature << "_";
- feature << sourceWord;
- feature << "~";
- feature << targetWord;
- accumulator->SparsePlusEquals(feature.str(), 1);
- }*/
}
else {
- // range over domain trigger words
+ // range over domain trigger words (keywords)
+ const long docid = input.GetDocumentId();
for (set<string>::const_iterator p = m_vocabDomain[docid].begin(); p != m_vocabDomain[docid].end(); ++p) {
string sourceTrigger = *p;
stringstream feature;
@@ -210,68 +181,71 @@ void WordTranslationFeature::Evaluate(const Hypothesis& cur_hypo, ScoreComponent
}
}
if (m_sourceContext) {
- size_t globalSourceIndex = cur_hypo.GetCurrSourceWordsRange().GetStartPos() + sourceIndex;
- if (!m_domainTrigger && globalSourceIndex == 0) {
- // add <s> trigger feature for source
- stringstream feature;
- feature << "wt_";
- feature << "<s>,";
- feature << sourceWord;
- feature << "~";
- feature << targetWord;
- accumulator->SparsePlusEquals(feature.str(), 1);
- }
-
- // range over source words to get context
- for(size_t contextIndex = 0; contextIndex < input.GetSize(); contextIndex++ ) {
- if (contextIndex == globalSourceIndex) continue;
- string sourceTrigger = input.GetWord(contextIndex).GetFactor(m_factorTypeSource)->GetString();
- if (m_ignorePunctuation) {
- // check if trigger is punctuation
- char firstChar = sourceTrigger.at(0);
- CharHash::const_iterator charIterator = m_punctuationHash.find( firstChar );
- if(charIterator != m_punctuationHash.end())
- continue;
- }
-
- bool sourceTriggerExists = false;
- if (m_domainTrigger)
- sourceTriggerExists = m_vocabDomain[docid].find( sourceTrigger ) != m_vocabDomain[docid].end();
- else if (!m_unrestricted)
- sourceTriggerExists = m_vocabSource.find( sourceTrigger ) != m_vocabSource.end();
-
- if (m_domainTrigger) {
- if (sourceTriggerExists) {
- stringstream feature;
- feature << "wt_";
- feature << sourceTrigger;
- feature << "_";
- feature << sourceWord;
- feature << "~";
- feature << targetWord;
- accumulator->SparsePlusEquals(feature.str(), 1);
- }
- }
- else if (m_unrestricted || sourceTriggerExists) {
- stringstream feature;
- feature << "wt_";
- if (contextIndex < globalSourceIndex) {
- feature << sourceTrigger;
- feature << ",";
- feature << sourceWord;
- }
- else {
- feature << sourceWord;
- feature << ",";
- feature << sourceTrigger;
- }
- feature << "~";
- feature << targetWord;
- accumulator->SparsePlusEquals(feature.str(), 1);
- }
- }
+ size_t globalSourceIndex = context.GetTranslationOption().GetStartPos() + sourceIndex;
+ if (!m_domainTrigger && globalSourceIndex == 0) {
+ // add <s> trigger feature for source
+ stringstream feature;
+ feature << "wt_";
+ feature << "<s>,";
+ feature << sourceWord;
+ feature << "~";
+ feature << targetWord;
+ accumulator->SparsePlusEquals(feature.str(), 1);
+ }
+
+ // range over source words to get context
+ for(size_t contextIndex = 0; contextIndex < input.GetSize(); contextIndex++ ) {
+ if (contextIndex == globalSourceIndex) continue;
+ string sourceTrigger = input.GetWord(contextIndex).GetFactor(m_factorTypeSource)->GetString();
+ if (m_ignorePunctuation) {
+ // check if trigger is punctuation
+ char firstChar = sourceTrigger.at(0);
+ CharHash::const_iterator charIterator = m_punctuationHash.find( firstChar );
+ if(charIterator != m_punctuationHash.end())
+ continue;
+ }
+
+ const long docid = input.GetDocumentId();
+ bool sourceTriggerExists = false;
+ if (m_domainTrigger)
+ sourceTriggerExists = m_vocabDomain[docid].find( sourceTrigger ) != m_vocabDomain[docid].end();
+ else if (!m_unrestricted)
+ sourceTriggerExists = m_vocabSource.find( sourceTrigger ) != m_vocabSource.end();
+
+ if (m_domainTrigger) {
+ if (sourceTriggerExists) {
+ stringstream feature;
+ feature << "wt_";
+ feature << sourceTrigger;
+ feature << "_";
+ feature << sourceWord;
+ feature << "~";
+ feature << targetWord;
+ accumulator->SparsePlusEquals(feature.str(), 1);
+ }
+ }
+ else if (m_unrestricted || sourceTriggerExists) {
+ stringstream feature;
+ feature << "wt_";
+ if (contextIndex < globalSourceIndex) {
+ feature << sourceTrigger;
+ feature << ",";
+ feature << sourceWord;
+ }
+ else {
+ feature << sourceWord;
+ feature << ",";
+ feature << sourceTrigger;
+ }
+ feature << "~";
+ feature << targetWord;
+ accumulator->SparsePlusEquals(feature.str(), 1);
+ }
+ }
}
if (m_targetContext) {
+ throw runtime_error("Can't use target words outside current translation option in a stateless feature");
+ /*
size_t globalTargetIndex = cur_hypo.GetCurrTargetWordsRange().GetStartPos() + targetIndex;
if (globalTargetIndex == 0) {
// add <s> trigger feature for source
@@ -309,16 +283,16 @@ void WordTranslationFeature::Evaluate(const Hypothesis& cur_hypo, ScoreComponent
feature << targetWord;
accumulator->SparsePlusEquals(feature.str(), 1);
}
- }
+ }*/
}
}
}
-void WordTranslationFeature::EvaluateChart(const ChartHypothesis& cur_hypo, int featureID,
- ScoreComponentCollection* accumulator) const
+void WordTranslationFeature::EvaluateChart(
+ const ChartBasedFeatureContext& context,
+ ScoreComponentCollection* accumulator) const
{
- //const Sentence& input = *(m_local->input);
- const TargetPhrase& targetPhrase = cur_hypo.GetCurrTargetPhrase();
+ const TargetPhrase& targetPhrase = context.GetTargetPhrase();
const AlignmentInfo &alignmentInfo = targetPhrase.GetAlignmentInfo();
const AlignmentInfo::CollType &alignment = alignmentInfo.GetTerminalAlignments();
@@ -459,6 +433,7 @@ void WordTranslationFeature::EvaluateChart(const ChartHypothesis& cur_hypo, int
}
}*/
}
+
}
}
diff --git a/moses/src/WordTranslationFeature.h b/moses/src/WordTranslationFeature.h
index be3a02f86..7f74ae4e3 100644
--- a/moses/src/WordTranslationFeature.h
+++ b/moses/src/WordTranslationFeature.h
@@ -10,10 +10,6 @@
#include "Sentence.h"
#include "FFState.h"
-#ifdef WITH_THREADS
-#include <boost/thread/tss.hpp>
-#endif
-
namespace Moses
{
@@ -24,24 +20,7 @@ class WordTranslationFeature : public StatelessFeatureFunction {
typedef std::map< char, short > CharHash;
typedef std::vector< std::set<std::string> > DocumentVector;
- struct ThreadLocalStorage
- {
- const Sentence *input;
- long docid;
- long topicid;
- bool use_topicid;
- const std::vector<std::string> *topicid_prob;
- bool use_topicid_prob;
- };
-
-
private:
-#ifdef WITH_THREADS
- boost::thread_specific_ptr<ThreadLocalStorage> m_local;
-#else
- std::auto_ptr<ThreadLocalStorage> m_local;
-#endif
-
std::set<std::string> m_vocabSource;
std::set<std::string> m_vocabTarget;
DocumentVector m_vocabDomain;
@@ -57,59 +36,57 @@ private:
CharHash m_punctuationHash;
public:
- WordTranslationFeature(FactorType factorTypeSource, FactorType factorTypeTarget,
- bool simple, bool sourceContext, bool targetContext, bool ignorePunctuation, bool domainTrigger):
- StatelessFeatureFunction("wt", ScoreProducer::unlimited),
- m_factorTypeSource(factorTypeSource),
- m_factorTypeTarget(factorTypeTarget),
- m_unrestricted(true),
- m_simple(simple),
- m_sourceContext(sourceContext),
- m_targetContext(targetContext),
- m_domainTrigger(domainTrigger),
- m_sparseProducerWeight(1),
- m_ignorePunctuation(ignorePunctuation)
- {
- std::cerr << "Initializing word translation feature.. ";
- if (m_simple == 1) std::cerr << "using simple word translations.. ";
- if (m_sourceContext == 1) std::cerr << "using source context.. ";
- if (m_targetContext == 1) std::cerr << "using target context.. ";
- if (m_domainTrigger == 1) std::cerr << "using domain triggers.. ";
-
- // compile a list of punctuation characters
- if (m_ignorePunctuation) {
- std::cerr << "ignoring punctuation for triggers.. ";
- char punctuation[] = "\"'!?¿·()#_,.:;•&@‑/\\0123456789~=";
- for (size_t i=0; i < sizeof(punctuation)-1; ++i)
- m_punctuationHash[punctuation[i]] = 1;
- }
-
- std::cerr << "done." << std::endl;
- }
+ WordTranslationFeature(FactorType factorTypeSource, FactorType factorTypeTarget,
+ bool simple, bool sourceContext, bool targetContext, bool ignorePunctuation,
+ bool domainTrigger):
+ StatelessFeatureFunction("wt", ScoreProducer::unlimited),
+ m_factorTypeSource(factorTypeSource),
+ m_factorTypeTarget(factorTypeTarget),
+ m_unrestricted(true),
+ m_simple(simple),
+ m_sourceContext(sourceContext),
+ m_targetContext(targetContext),
+ m_domainTrigger(domainTrigger),
+ m_sparseProducerWeight(1),
+ m_ignorePunctuation(ignorePunctuation)
+ {
+ std::cerr << "Initializing word translation feature.. ";
+ if (m_simple == 1) std::cerr << "using simple word translations.. ";
+ if (m_sourceContext == 1) std::cerr << "using source context.. ";
+ if (m_targetContext == 1) std::cerr << "using target context.. ";
+ if (m_domainTrigger == 1) std::cerr << "using domain triggers.. ";
- bool Load(const std::string &filePathSource, const std::string &filePathTarget);
-
- void InitializeForInput( Sentence const& in );
-
-// void Evaluate(const TargetPhrase& cur_phrase, ScoreComponentCollection* accumulator) const;
-
+ // compile a list of punctuation characters
+ if (m_ignorePunctuation) {
+ std::cerr << "ignoring punctuation for triggers.. ";
+ char punctuation[] = "\"'!?¿·()#_,.:;•&@‑/\\0123456789~=";
+ for (size_t i=0; i < sizeof(punctuation)-1; ++i)
+ m_punctuationHash[punctuation[i]] = 1;
+ }
+
+ std::cerr << "done." << std::endl;
+ }
+
+ bool Load(const std::string &filePathSource, const std::string &filePathTarget);
+
const FFState* EmptyHypothesisState(const InputType &) const {
- return new DummyState();
+ return new DummyState();
}
+
+ void Evaluate(const PhraseBasedFeatureContext& context,
+ ScoreComponentCollection* accumulator) const;
- void Evaluate(const Hypothesis& cur_hypo,
- ScoreComponentCollection* accumulator) const;
-
- void EvaluateChart(const ChartHypothesis& cur_hypo,
- int featureID,
+ void EvaluateChart(const ChartBasedFeatureContext& context,
ScoreComponentCollection* accumulator) const;
// basic properties
- std::string GetScoreProducerWeightShortName(unsigned) const { return "wt"; }
- size_t GetNumInputScores() const { return 0; }
-
- void SetSparseProducerWeight(float weight) { m_sparseProducerWeight = weight; }
- float GetSparseProducerWeight() const { return m_sparseProducerWeight; }
+ std::string GetScoreProducerWeightShortName(unsigned) const { return "wt"; }
+ size_t GetNumInputScores() const { return 0; }
+
+ bool ComputeValueInTranslationOption() const {return true;}
+
+ void SetSparseProducerWeight(float weight) { m_sparseProducerWeight = weight; }
+ float GetSparseProducerWeight() const { return m_sparseProducerWeight; }
};
}
diff --git a/moses/src/WordsRange.h b/moses/src/WordsRange.h
index 7191d259e..23b9a03a2 100644
--- a/moses/src/WordsRange.h
+++ b/moses/src/WordsRange.h
@@ -26,6 +26,10 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "TypeDef.h"
#include "Util.h"
+#ifdef WIN32
+#undef max
+#endif
+
namespace Moses
{
@@ -78,10 +82,10 @@ public:
CHECK(!Overlap(x));
if (x.m_endPos < m_startPos) {
- return m_startPos - x.m_endPos;
+ return m_startPos - x.m_endPos - 1;
}
- return x.m_startPos - m_endPos;
+ return x.m_startPos - m_endPos - 1;
}
diff --git a/moses/src/fuzzy-match/FuzzyMatchWrapper.cpp b/moses/src/fuzzy-match/FuzzyMatchWrapper.cpp
new file mode 100644
index 000000000..bb3d790ae
--- /dev/null
+++ b/moses/src/fuzzy-match/FuzzyMatchWrapper.cpp
@@ -0,0 +1,1077 @@
+//
+// FuzzyMatchWrapper.cpp
+// moses
+//
+// Created by Hieu Hoang on 26/07/2012.
+// Copyright 2012 __MyCompanyName__. All rights reserved.
+//
+
+#include <iostream>
+#include "fuzzy-match/FuzzyMatchWrapper.h"
+#include "fuzzy-match/SentenceAlignment.h"
+#include "fuzzy-match/Vocabulary.h"
+#include "fuzzy-match/Match.h"
+#include "Util.h"
+#include "StaticData.h"
+#include "util/file.hh"
+
+using namespace std;
+
+namespace tmmt
+{
+ int basic_flag = false;
+ int lsed_flag = true;
+ int refined_flag = true;
+ int length_filter_flag = true;
+ int parse_flag = true;
+ int min_match = 70;
+ int multiple_flag = false;
+ int multiple_slack = 0;
+ int multiple_max = 100;
+ map< WORD_ID,vector< int > > single_word_index;
+
+
+ FuzzyMatchWrapper::FuzzyMatchWrapper(const std::string &sourcePath, const std::string &targetPath, const std::string &alignmentPath)
+ {
+ // create suffix array
+ //load_corpus(m_config[0], input);
+ load_corpus(sourcePath, source);
+ load_target(targetPath, targetAndAlignment);
+ load_alignment(alignmentPath, targetAndAlignment);
+
+ cerr << "creating suffix array" << endl;
+ suffixArray = new tmmt::SuffixArray( sourcePath );
+ cerr << "done creating suffix array" << endl;
+
+ }
+
+ string FuzzyMatchWrapper::Extract(const string &inputPath)
+ {
+ const Moses::StaticData &staticData = Moses::StaticData::Instance();
+
+ string fuzzyMatchFile = ExtractTM(inputPath);
+
+ string cmd = string("perl ");
+#ifdef IS_XCODE
+ cmd += "/Users/hieuhoang/unison/workspace/github/hieuhoang/scripts/fuzzy-match/create_xml.perl " + fuzzyMatchFile;
+#else
+ cmd += staticData.GetBinDirectory() + "/../scripts/fuzzy-match/create_xml.perl " + fuzzyMatchFile;
+#endif
+ cerr << cmd << endl;
+ system(cmd.c_str());
+
+ remove(fuzzyMatchFile.c_str());
+ remove((fuzzyMatchFile + ".extract").c_str());
+ remove((fuzzyMatchFile + ".extract.inv").c_str());
+ remove((fuzzyMatchFile + ".extract.sorted.gz").c_str());
+ remove((fuzzyMatchFile + ".extract.inv.sorted.gz").c_str());
+
+ return fuzzyMatchFile + ".pt.gz";
+ }
+
+ string FuzzyMatchWrapper::ExtractTM(const string &inputPath)
+ {
+ util::TempMaker tempFile("moses");
+ util::scoped_fd alive;
+ string outputFileName(tempFile.Name(alive));
+
+ ofstream outputFile(outputFileName.c_str());
+
+ vector< vector< WORD_ID > > input;
+ load_corpus(inputPath, input);
+
+ assert(input.size() == 1);
+ size_t sentenceInd = 0;
+
+ clock_t start_clock = clock();
+ // if (i % 10 == 0) cerr << ".";
+
+ // establish some basic statistics
+
+ // int input_length = compute_length( input[i] );
+ int input_length = input[sentenceInd].size();
+ int best_cost = input_length * (100-min_match) / 100 + 1;
+
+ int match_count = 0; // how many substring matches to be considered
+ //cerr << endl << "sentence " << i << ", length " << input_length << ", best_cost " << best_cost << endl;
+
+ // find match ranges in suffix array
+ vector< vector< pair< SuffixArray::INDEX, SuffixArray::INDEX > > > match_range;
+ for(size_t start=0;start<input[sentenceInd].size();start++)
+ {
+ SuffixArray::INDEX prior_first_match = 0;
+ SuffixArray::INDEX prior_last_match = suffixArray->GetSize()-1;
+ vector< string > substring;
+ bool stillMatched = true;
+ vector< pair< SuffixArray::INDEX, SuffixArray::INDEX > > matchedAtThisStart;
+ //cerr << "start: " << start;
+ for(int word=start; stillMatched && word<input[sentenceInd].size(); word++)
+ {
+ substring.push_back( vocabulary.GetWord( input[sentenceInd][word] ) );
+
+ // only look up, if needed (i.e. no unnecessary short gram lookups)
+ // if (! word-start+1 <= short_match_max_length( input_length ) )
+ // {
+ SuffixArray::INDEX first_match, last_match;
+ stillMatched = false;
+ if (suffixArray->FindMatches( substring, first_match, last_match, prior_first_match, prior_last_match ) )
+ {
+ stillMatched = true;
+ matchedAtThisStart.push_back( make_pair( first_match, last_match ) );
+ //cerr << " (" << first_match << "," << last_match << ")";
+ //cerr << " " << ( last_match - first_match + 1 );
+ prior_first_match = first_match;
+ prior_last_match = last_match;
+ }
+ //}
+ }
+ //cerr << endl;
+ match_range.push_back( matchedAtThisStart );
+ }
+
+ clock_t clock_range = clock();
+
+ map< int, vector< Match > > sentence_match;
+ map< int, int > sentence_match_word_count;
+
+ // go through all matches, longest first
+ for(int length = input[sentenceInd].size(); length >= 1; length--)
+ {
+ // do not create matches, if these are handled by the short match function
+ if (length <= short_match_max_length( input_length ) )
+ {
+ continue;
+ }
+
+ unsigned int count = 0;
+ for(int start = 0; start <= input[sentenceInd].size() - length; start++)
+ {
+ if (match_range[start].size() >= length)
+ {
+ pair< SuffixArray::INDEX, SuffixArray::INDEX > &range = match_range[start][length-1];
+ // cerr << " (" << range.first << "," << range.second << ")";
+ count += range.second - range.first + 1;
+
+ for(SuffixArray::INDEX i=range.first; i<=range.second; i++)
+ {
+ int position = suffixArray->GetPosition( i );
+
+ // sentence length mismatch
+ size_t sentence_id = suffixArray->GetSentence( position );
+ int sentence_length = suffixArray->GetSentenceLength( sentence_id );
+ int diff = abs( (int)sentence_length - (int)input_length );
+ // cerr << endl << i << "\tsentence " << sentence_id << ", length " << sentence_length;
+ //if (length <= 2 && input_length>=5 &&
+ // sentence_match.find( sentence_id ) == sentence_match.end())
+ // continue;
+
+ if (diff > best_cost)
+ continue;
+
+ // compute minimal cost
+ int start_pos = suffixArray->GetWordInSentence( position );
+ int end_pos = start_pos + length-1;
+ // cerr << endl << "\t" << start_pos << "-" << end_pos << " (" << sentence_length << ") vs. "
+ // << start << "-" << (start+length-1) << " (" << input_length << ")";
+ // different number of prior words -> cost is at least diff
+ int min_cost = abs( start - start_pos );
+
+ // same number of words, but not sent. start -> cost is at least 1
+ if (start == start_pos && start>0)
+ min_cost++;
+
+ // different number of remaining words -> cost is at least diff
+ min_cost += abs( ( sentence_length-1 - end_pos ) -
+ ( input_length-1 - (start+length-1) ) );
+
+ // same number of words, but not sent. end -> cost is at least 1
+ if ( sentence_length-1 - end_pos ==
+ input_length-1 - (start+length-1)
+ && end_pos != sentence_length-1 )
+ min_cost++;
+
+ // cerr << " -> min_cost " << min_cost;
+ if (min_cost > best_cost)
+ continue;
+
+ // valid match
+ match_count++;
+
+ // compute maximal cost
+ int max_cost = max( start, start_pos )
+ + max( sentence_length-1 - end_pos,
+ input_length-1 - (start+length-1) );
+ // cerr << ", max_cost " << max_cost;
+
+ Match m = Match( start, start+length-1,
+ start_pos, start_pos+length-1,
+ min_cost, max_cost, 0);
+ sentence_match[ sentence_id ].push_back( m );
+ sentence_match_word_count[ sentence_id ] += length;
+
+ if (max_cost < best_cost)
+ {
+ best_cost = max_cost;
+ if (best_cost == 0) break;
+ }
+ //if (match_count >= MAX_MATCH_COUNT) break;
+ }
+ }
+ // cerr << endl;
+ if (best_cost == 0) break;
+ //if (match_count >= MAX_MATCH_COUNT) break;
+ }
+ // cerr << count << " matches at length " << length << " in " << sentence_match.size() << " tm." << endl;
+
+ if (best_cost == 0) break;
+ //if (match_count >= MAX_MATCH_COUNT) break;
+ }
+ cerr << match_count << " matches in " << sentence_match.size() << " sentences." << endl;
+
+ clock_t clock_matches = clock();
+
+ // consider each sentence for which we have matches
+ int old_best_cost = best_cost;
+ int tm_count_word_match = 0;
+ int tm_count_word_match2 = 0;
+ int pruned_match_count = 0;
+ if (short_match_max_length( input_length ))
+ {
+ init_short_matches( input[sentenceInd] );
+ }
+ vector< int > best_tm;
+ typedef map< int, vector< Match > >::iterator I;
+
+ clock_t clock_validation_sum = 0;
+
+ for(I tm=sentence_match.begin(); tm!=sentence_match.end(); tm++)
+ {
+ int tmID = tm->first;
+ int tm_length = suffixArray->GetSentenceLength(tmID);
+ vector< Match > &match = tm->second;
+ add_short_matches( match, source[tmID], input_length, best_cost );
+
+ //cerr << "match in sentence " << tmID << ": " << match.size() << " [" << tm_length << "]" << endl;
+
+ // quick look: how many words are matched
+ int words_matched = 0;
+ for(int m=0;m<match.size();m++) {
+
+ if (match[m].min_cost <= best_cost) // makes no difference
+ words_matched += match[m].input_end - match[m].input_start + 1;
+ }
+ if (max(input_length,tm_length) - words_matched > best_cost)
+ {
+ if (length_filter_flag) continue;
+ }
+ tm_count_word_match++;
+
+ // prune, check again how many words are matched
+ vector< Match > pruned = prune_matches( match, best_cost );
+ words_matched = 0;
+ for(int p=0;p<pruned.size();p++) {
+ words_matched += pruned[p].input_end - pruned[p].input_start + 1;
+ }
+ if (max(input_length,tm_length) - words_matched > best_cost)
+ {
+ if (length_filter_flag) continue;
+ }
+ tm_count_word_match2++;
+
+ pruned_match_count += pruned.size();
+ int prior_best_cost = best_cost;
+ int cost;
+
+ clock_t clock_validation_start = clock();
+ if (! parse_flag ||
+ pruned.size()>=10) // to prevent worst cases
+ {
+ string path;
+ cost = sed( input[sentenceInd], source[tmID], path, false );
+ if (cost < best_cost)
+ {
+ best_cost = cost;
+ }
+ }
+
+ else
+ {
+ cost = parse_matches( pruned, input_length, tm_length, best_cost );
+ if (prior_best_cost != best_cost)
+ {
+ best_tm.clear();
+ }
+ }
+ clock_validation_sum += clock() - clock_validation_start;
+ if (cost == best_cost)
+ {
+ best_tm.push_back( tmID );
+ }
+ }
+ cerr << "reduced best cost from " << old_best_cost << " to " << best_cost << endl;
+ cerr << "tm considered: " << sentence_match.size()
+ << " word-matched: " << tm_count_word_match
+ << " word-matched2: " << tm_count_word_match2
+ << " best: " << best_tm.size() << endl;
+
+ cerr << "pruned matches: " << ((float)pruned_match_count/(float)tm_count_word_match2) << endl;
+
+ // create xml and extract files
+ string inputStr, sourceStr;
+ for (size_t pos = 0; pos < input_length; ++pos) {
+ inputStr += vocabulary.GetWord(input[sentenceInd][pos]) + " ";
+ }
+
+ // do not try to find the best ... report multiple matches
+ if (multiple_flag) {
+ int input_letter_length = compute_length( input[sentenceInd] );
+ for(int si=0; si<best_tm.size(); si++) {
+ int s = best_tm[si];
+ string path;
+ unsigned int letter_cost = sed( input[sentenceInd], source[s], path, true );
+ // do not report multiple identical sentences, but just their count
+ cout << sentenceInd << " "; // sentence number
+ cout << letter_cost << "/" << input_letter_length << " ";
+ cout << "(" << best_cost <<"/" << input_length <<") ";
+ cout << "||| " << s << " ||| " << path << endl;
+
+ vector<WORD_ID> &sourceSentence = source[s];
+ vector<SentenceAlignment> &targets = targetAndAlignment[s];
+ create_extract(sentenceInd, best_cost, sourceSentence, targets, inputStr, path, outputFile);
+
+ }
+ } // if (multiple_flag)
+ else {
+
+ // find the best matches according to letter sed
+ string best_path = "";
+ int best_match = -1;
+ int best_letter_cost;
+ if (lsed_flag) {
+ best_letter_cost = compute_length( input[sentenceInd] ) * min_match / 100 + 1;
+ for(int si=0; si<best_tm.size(); si++)
+ {
+ int s = best_tm[si];
+ string path;
+ unsigned int letter_cost = sed( input[sentenceInd], source[s], path, true );
+ if (letter_cost < best_letter_cost)
+ {
+ best_letter_cost = letter_cost;
+ best_path = path;
+ best_match = s;
+ }
+ }
+ }
+ // if letter sed turned off, just compute path for first match
+ else {
+ if (best_tm.size() > 0) {
+ string path;
+ sed( input[sentenceInd], source[best_tm[0]], path, false );
+ best_path = path;
+ best_match = best_tm[0];
+ }
+ }
+ cerr << "elapsed: " << (1000 * (clock()-start_clock) / CLOCKS_PER_SEC)
+ << " ( range: " << (1000 * (clock_range-start_clock) / CLOCKS_PER_SEC)
+ << " match: " << (1000 * (clock_matches-clock_range) / CLOCKS_PER_SEC)
+ << " tm: " << (1000 * (clock()-clock_matches) / CLOCKS_PER_SEC)
+ << " (validation: " << (1000 * (clock_validation_sum) / CLOCKS_PER_SEC) << ")"
+ << " )" << endl;
+ if (lsed_flag) {
+ cout << best_letter_cost << "/" << compute_length( input[sentenceInd] ) << " (";
+ }
+ cout << best_cost <<"/" << input_length;
+ if (lsed_flag) cout << ")";
+ cout << " ||| " << best_match << " ||| " << best_path << endl;
+
+ // creat xml & extracts
+ vector<WORD_ID> &sourceSentence = source[best_match];
+ vector<SentenceAlignment> &targets = targetAndAlignment[best_match];
+ create_extract(sentenceInd, best_cost, sourceSentence, targets, inputStr, best_path, outputFile);
+
+ } // else if (multiple_flag)
+
+ outputFile.close();
+
+ return outputFileName;
+ }
+
+ void FuzzyMatchWrapper::load_corpus( const std::string &fileName, vector< vector< WORD_ID > > &corpus )
+ { // source
+ ifstream fileStream;
+ fileStream.open(fileName.c_str());
+ if (!fileStream) {
+ cerr << "file not found: " << fileName << endl;
+ exit(1);
+ }
+ cerr << "loading " << fileName << endl;
+
+ istream *fileStreamP = &fileStream;
+
+ char line[LINE_MAX_LENGTH];
+ while(true)
+ {
+ SAFE_GETLINE((*fileStreamP), line, LINE_MAX_LENGTH, '\n');
+ if (fileStreamP->eof()) break;
+ corpus.push_back( vocabulary.Tokenize( line ) );
+ }
+ }
+
+ void FuzzyMatchWrapper::load_target(const std::string &fileName, vector< vector< SentenceAlignment > > &corpus)
+ {
+ ifstream fileStream;
+ fileStream.open(fileName.c_str());
+ if (!fileStream) {
+ cerr << "file not found: " << fileName << endl;
+ exit(1);
+ }
+ cerr << "loading " << fileName << endl;
+
+ istream *fileStreamP = &fileStream;
+
+ WORD_ID delimiter = vocabulary.StoreIfNew("|||");
+
+ int lineNum = 0;
+ char line[LINE_MAX_LENGTH];
+ while(true)
+ {
+ SAFE_GETLINE((*fileStreamP), line, LINE_MAX_LENGTH, '\n');
+ if (fileStreamP->eof()) break;
+
+ vector<WORD_ID> toks = vocabulary.Tokenize( line );
+
+ corpus.push_back(vector< SentenceAlignment >());
+ vector< SentenceAlignment > &vec = corpus.back();
+
+ vec.push_back(SentenceAlignment());
+ SentenceAlignment *sentence = &vec.back();
+
+ const WORD &countStr = vocabulary.GetWord(toks[0]);
+ sentence->count = atoi(countStr.c_str());
+
+ for (size_t i = 1; i < toks.size(); ++i) {
+ WORD_ID wordId = toks[i];
+
+ if (wordId == delimiter) {
+ // target and alignments can have multiple sentences.
+ vec.push_back(SentenceAlignment());
+ sentence = &vec.back();
+
+ // count
+ ++i;
+
+ const WORD &countStr = vocabulary.GetWord(toks[i]);
+ sentence->count = atoi(countStr.c_str());
+ }
+ else {
+ // just a normal word, add
+ sentence->target.push_back(wordId);
+ }
+ }
+
+ ++lineNum;
+
+ }
+
+ }
+
+
+ void FuzzyMatchWrapper::load_alignment(const std::string &fileName, vector< vector< SentenceAlignment > > &corpus )
+ {
+ ifstream fileStream;
+ fileStream.open(fileName.c_str());
+ if (!fileStream) {
+ cerr << "file not found: " << fileName << endl;
+ exit(1);
+ }
+ cerr << "loading " << fileName << endl;
+
+ istream *fileStreamP = &fileStream;
+
+ string delimiter = "|||";
+
+ int lineNum = 0;
+ char line[LINE_MAX_LENGTH];
+ while(true)
+ {
+ SAFE_GETLINE((*fileStreamP), line, LINE_MAX_LENGTH, '\n');
+ if (fileStreamP->eof()) break;
+
+ vector< SentenceAlignment > &vec = corpus[lineNum];
+ size_t targetInd = 0;
+ SentenceAlignment *sentence = &vec[targetInd];
+
+ vector<string> toks = Moses::Tokenize(line);
+
+ for (size_t i = 0; i < toks.size(); ++i) {
+ string &tok = toks[i];
+
+ if (tok == delimiter) {
+ // target and alignments can have multiple sentences.
+ ++targetInd;
+ sentence = &vec[targetInd];
+
+ ++i;
+ }
+ else {
+ // just a normal alignment, add
+ vector<int> alignPoint = Moses::Tokenize<int>(tok, "-");
+ assert(alignPoint.size() == 2);
+ sentence->alignment.push_back(pair<int,int>(alignPoint[0], alignPoint[1]));
+ }
+ }
+
+ ++lineNum;
+
+ }
+ }
+
+/* Letter string edit distance, e.g. sub 'their' to 'there' costs 2 */
+
+unsigned int FuzzyMatchWrapper::letter_sed( WORD_ID aIdx, WORD_ID bIdx )
+{
+ // check if already computed -> lookup in cache
+ pair< WORD_ID, WORD_ID > pIdx = make_pair( aIdx, bIdx );
+ map< pair< WORD_ID, WORD_ID >, unsigned int >::const_iterator lookup = lsed.find( pIdx );
+ if (lookup != lsed.end())
+ {
+ return (lookup->second);
+ }
+
+ // get surface strings for word indices
+ const string &a = vocabulary.GetWord( aIdx );
+ const string &b = vocabulary.GetWord( bIdx );
+
+ // initialize cost matrix
+ unsigned int **cost = (unsigned int**) calloc( sizeof( unsigned int* ), a.size()+1 );
+ for( unsigned int i=0; i<=a.size(); i++ ) {
+ cost[i] = (unsigned int*) calloc( sizeof(unsigned int), b.size()+1 );
+ cost[i][0] = i;
+ }
+ for( unsigned int j=0; j<=b.size(); j++ ) {
+ cost[0][j] = j;
+ }
+
+ // core string edit distance loop
+ for( unsigned int i=1; i<=a.size(); i++ ) {
+ for( unsigned int j=1; j<=b.size(); j++ ) {
+
+ unsigned int ins = cost[i-1][j] + 1;
+ unsigned int del = cost[i][j-1] + 1;
+ bool match = (a.substr(i-1,1).compare( b.substr(j-1,1) ) == 0);
+ unsigned int diag = cost[i-1][j-1] + (match ? 0 : 1);
+
+ unsigned int min = (ins < del) ? ins : del;
+ min = (diag < min) ? diag : min;
+
+ cost[i][j] = min;
+ }
+ }
+
+ // clear out memory
+ unsigned int final = cost[a.size()][b.size()];
+ for( unsigned int i=0; i<=a.size(); i++ ) {
+ free( cost[i] );
+ }
+ free( cost );
+
+ // cache and return result
+ lsed[ pIdx ] = final;
+ return final;
+}
+
+/* string edit distance implementation */
+
+unsigned int FuzzyMatchWrapper::sed( const vector< WORD_ID > &a, const vector< WORD_ID > &b, string &best_path, bool use_letter_sed ) {
+
+ // initialize cost and path matrices
+ unsigned int **cost = (unsigned int**) calloc( sizeof( unsigned int* ), a.size()+1 );
+ char **path = (char**) calloc( sizeof( char* ), a.size()+1 );
+
+ for( unsigned int i=0; i<=a.size(); i++ ) {
+ cost[i] = (unsigned int*) calloc( sizeof(unsigned int), b.size()+1 );
+ path[i] = (char*) calloc( sizeof(char), b.size()+1 );
+ if (i>0)
+ {
+ cost[i][0] = cost[i-1][0];
+ if (use_letter_sed)
+ {
+ cost[i][0] += vocabulary.GetWord( a[i-1] ).size();
+ }
+ else
+ {
+ cost[i][0]++;
+ }
+ }
+ else
+ {
+ cost[i][0] = 0;
+ }
+ path[i][0] = 'I';
+ }
+
+ for( unsigned int j=0; j<=b.size(); j++ ) {
+ if (j>0)
+ {
+ cost[0][j] = cost[0][j-1];
+ if (use_letter_sed)
+ {
+ cost[0][j] += vocabulary.GetWord( b[j-1] ).size();
+ }
+ else
+ {
+ cost[0][j]++;
+ }
+ }
+ else
+ {
+ cost[0][j] = 0;
+ }
+ path[0][j] = 'D';
+ }
+
+ // core string edit distance algorithm
+ for( unsigned int i=1; i<=a.size(); i++ ) {
+ for( unsigned int j=1; j<=b.size(); j++ ) {
+ unsigned int ins = cost[i-1][j];
+ unsigned int del = cost[i][j-1];
+ unsigned int match;
+ if (use_letter_sed)
+ {
+ ins += vocabulary.GetWord( a[i-1] ).size();
+ del += vocabulary.GetWord( b[j-1] ).size();
+ match = letter_sed( a[i-1], b[j-1] );
+ }
+ else
+ {
+ ins++;
+ del++;
+ match = ( a[i-1] == b[j-1] ) ? 0 : 1;
+ }
+ unsigned int diag = cost[i-1][j-1] + match;
+
+ char action = (ins < del) ? 'I' : 'D';
+ unsigned int min = (ins < del) ? ins : del;
+ if (diag < min)
+ {
+ action = (match>0) ? 'S' : 'M';
+ min = diag;
+ }
+
+ cost[i][j] = min;
+ path[i][j] = action;
+ }
+ }
+
+ // construct string for best path
+ unsigned int i = a.size();
+ unsigned int j = b.size();
+ best_path = "";
+ while( i>0 || j>0 )
+ {
+ best_path = path[i][j] + best_path;
+ if (path[i][j] == 'I')
+ {
+ i--;
+ }
+ else if (path[i][j] == 'D')
+ {
+ j--;
+ }
+ else
+ {
+ i--;
+ j--;
+ }
+ }
+
+
+ // clear out memory
+ unsigned int final = cost[a.size()][b.size()];
+
+ for( unsigned int i=0; i<=a.size(); i++ ) {
+ free( cost[i] );
+ free( path[i] );
+ }
+ free( cost );
+ free( path );
+
+ // return result
+ return final;
+}
+
+/* utlility function: compute length of sentence in characters
+ (spaces do not count) */
+
+unsigned int FuzzyMatchWrapper::compute_length( const vector< WORD_ID > &sentence )
+{
+ unsigned int length = 0; for( unsigned int i=0; i<sentence.size(); i++ )
+ {
+ length += vocabulary.GetWord( sentence[i] ).size();
+ }
+ return length;
+}
+
+/* brute force method: compare input to all corpus sentences */
+
+ int FuzzyMatchWrapper::basic_fuzzy_match( vector< vector< WORD_ID > > source,
+ vector< vector< WORD_ID > > input )
+{
+ // go through input set...
+ for(unsigned int i=0;i<input.size();i++)
+ {
+ bool use_letter_sed = false;
+
+ // compute sentence length and worst allowed cost
+ unsigned int input_length;
+ if (use_letter_sed)
+ {
+ input_length = compute_length( input[i] );
+ }
+ else
+ {
+ input_length = input[i].size();
+ }
+ unsigned int best_cost = input_length * (100-min_match) / 100 + 2;
+ string best_path = "";
+ int best_match = -1;
+
+ // go through all corpus sentences
+ for(unsigned int s=0;s<source.size();s++)
+ {
+ int source_length;
+ if (use_letter_sed)
+ {
+ source_length = compute_length( source[s] );
+ }
+ else
+ {
+ source_length = source[s].size();
+ }
+ int diff = abs((int)source_length - (int)input_length);
+ if (length_filter_flag && (diff >= best_cost))
+ {
+ continue;
+ }
+
+ // compute string edit distance
+ string path;
+ unsigned int cost = sed( input[i], source[s], path, use_letter_sed );
+
+ // update if new best
+ if (cost < best_cost)
+ {
+ best_cost = cost;
+ best_path = path;
+ best_match = s;
+ }
+ }
+ cout << best_cost << " ||| " << best_match << " ||| " << best_path << endl;
+ }
+}
+
+/* definition of short matches
+ very short n-gram matches (1-grams) will not be looked up in
+ the suffix array, since there are too many matches
+ and for longer sentences, at least one 2-gram match must occur */
+
+int FuzzyMatchWrapper::short_match_max_length( int input_length )
+{
+ if ( ! refined_flag )
+ return 0;
+ if ( input_length >= 5 )
+ return 1;
+ return 0;
+}
+
+
+/* if we have non-short matches in a sentence, we need to
+ take a closer look at it.
+ this function creates a hash map for all input words and their positions
+ (to be used by the next function)
+ (done here, because this has be done only once for an input sentence) */
+
+void FuzzyMatchWrapper::init_short_matches( const vector< WORD_ID > &input )
+{
+ int max_length = short_match_max_length( input.size() );
+ if (max_length == 0)
+ return;
+
+ single_word_index.clear();
+
+ // store input words and their positions in hash map
+ for(int i=0; i<input.size(); i++)
+ {
+ if (single_word_index.find( input[i] ) == single_word_index.end())
+ {
+ vector< int > position_vector;
+ single_word_index[ input[i] ] = position_vector;
+ }
+ single_word_index[ input[i] ].push_back( i );
+ }
+}
+
+/* add all short matches to list of matches for a sentence */
+
+void FuzzyMatchWrapper::add_short_matches( vector< Match > &match, const vector< WORD_ID > &tm, int input_length, int best_cost )
+{
+ int max_length = short_match_max_length( input_length );
+ if (max_length == 0)
+ return;
+
+ int tm_length = tm.size();
+ map< WORD_ID,vector< int > >::iterator input_word_hit;
+ for(int t_pos=0; t_pos<tm.size(); t_pos++)
+ {
+ input_word_hit = single_word_index.find( tm[t_pos] );
+ if (input_word_hit != single_word_index.end())
+ {
+ vector< int > &position_vector = input_word_hit->second;
+ for(int j=0; j<position_vector.size(); j++)
+ {
+ int &i_pos = position_vector[j];
+
+ // before match
+ int max_cost = max( i_pos , t_pos );
+ int min_cost = abs( i_pos - t_pos );
+ if ( i_pos>0 && i_pos == t_pos )
+ min_cost++;
+
+ // after match
+ max_cost += max( (input_length-i_pos) , (tm_length-t_pos));
+ min_cost += abs( (input_length-i_pos) - (tm_length-t_pos));
+ if ( i_pos != input_length-1 && (input_length-i_pos) == (tm_length-t_pos))
+ min_cost++;
+
+ if (min_cost <= best_cost)
+ {
+ Match new_match( i_pos,i_pos, t_pos,t_pos, min_cost,max_cost,0 );
+ match.push_back( new_match );
+ }
+ }
+ }
+ }
+}
+
+/* remove matches that are subsumed by a larger match */
+
+vector< Match > FuzzyMatchWrapper::prune_matches( const vector< Match > &match, int best_cost )
+{
+ //cerr << "\tpruning";
+ vector< Match > pruned;
+ for(int i=match.size()-1; i>=0; i--)
+ {
+ //cerr << " (" << match[i].input_start << "," << match[i].input_end
+ // << " ; " << match[i].tm_start << "," << match[i].tm_end
+ // << " * " << match[i].min_cost << ")";
+
+ //if (match[i].min_cost > best_cost)
+ // continue;
+
+ bool subsumed = false;
+ for(int j=match.size()-1; j>=0; j--)
+ {
+ if (i!=j // do not compare match with itself
+ && ( match[i].input_end - match[i].input_start <=
+ match[j].input_end - match[j].input_start ) // i shorter than j
+ && ((match[i].input_start == match[j].input_start &&
+ match[i].tm_start == match[j].tm_start ) ||
+ (match[i].input_end == match[j].input_end &&
+ match[i].tm_end == match[j].tm_end) ) )
+ {
+ subsumed = true;
+ }
+ }
+ if (! subsumed && match[i].min_cost <= best_cost)
+ {
+ //cerr << "*";
+ pruned.push_back( match[i] );
+ }
+ }
+ //cerr << endl;
+ return pruned;
+}
+
+/* A* parsing method to compute string edit distance */
+
+int FuzzyMatchWrapper::parse_matches( vector< Match > &match, int input_length, int tm_length, int &best_cost )
+{
+ // cerr << "sentence has " << match.size() << " matches, best cost: " << best_cost << ", lengths input: " << input_length << " tm: " << tm_length << endl;
+
+ if (match.size() == 1)
+ return match[0].max_cost;
+ if (match.size() == 0)
+ return input_length+tm_length;
+
+ int this_best_cost = input_length + tm_length;
+ for(int i=0;i<match.size();i++)
+ {
+ this_best_cost = min( this_best_cost, match[i].max_cost );
+ }
+ // cerr << "\tthis best cost: " << this_best_cost << endl;
+
+ // bottom up combination of spans
+ vector< vector< Match > > multi_match;
+ multi_match.push_back( match );
+
+ int match_level = 1;
+ while(multi_match[ match_level-1 ].size()>0)
+ {
+ // init vector
+ vector< Match > empty;
+ multi_match.push_back( empty );
+
+ for(int first_level = 0; first_level <= (match_level-1)/2; first_level++)
+ {
+ int second_level = match_level - first_level -1;
+ //cerr << "\tcombining level " << first_level << " and " << second_level << endl;
+
+ vector< Match > &first_match = multi_match[ first_level ];
+ vector< Match > &second_match = multi_match[ second_level ];
+
+ for(int i1 = 0; i1 < first_match.size(); i1++) {
+ for(int i2 = 0; i2 < second_match.size(); i2++) {
+
+ // do not combine the same pair twice
+ if (first_level == second_level && i2 <= i1)
+ {
+ continue;
+ }
+
+ // get sorted matches (first is before second)
+ Match *first, *second;
+ if (first_match[i1].input_start < second_match[i2].input_start )
+ {
+ first = &first_match[i1];
+ second = &second_match[i2];
+ }
+ else
+ {
+ second = &first_match[i1];
+ first = &second_match[i2];
+ }
+
+ //cerr << "\tcombining "
+ // << "(" << first->input_start << "," << first->input_end << "), "
+ // << first->tm_start << " [" << first->internal_cost << "]"
+ // << " with "
+ // << "(" << second->input_start << "," << second->input_end << "), "
+ // << second->tm_start<< " [" << second->internal_cost << "]"
+ // << endl;
+
+ // do not process overlapping matches
+ if (first->input_end >= second->input_start)
+ {
+ continue;
+ }
+
+ // no overlap / mismatch in tm
+ if (first->tm_end >= second->tm_start)
+ {
+ continue;
+ }
+
+ // compute cost
+ int min_cost = 0;
+ int max_cost = 0;
+
+ // initial
+ min_cost += abs( first->input_start - first->tm_start );
+ max_cost += max( first->input_start, first->tm_start );
+
+ // same number of words, but not sent. start -> cost is at least 1
+ if (first->input_start == first->tm_start && first->input_start > 0)
+ {
+ min_cost++;
+ }
+
+ // in-between
+ int skipped_words = second->input_start - first->input_end -1;
+ int skipped_words_tm = second->tm_start - first->tm_end -1;
+ int internal_cost = max( skipped_words, skipped_words_tm );
+ internal_cost += first->internal_cost + second->internal_cost;
+ min_cost += internal_cost;
+ max_cost += internal_cost;
+
+ // final
+ min_cost += abs( (tm_length-1 - second->tm_end) -
+ (input_length-1 - second->input_end) );
+ max_cost += max( (tm_length-1 - second->tm_end),
+ (input_length-1 - second->input_end) );
+
+ // same number of words, but not sent. end -> cost is at least 1
+ if ( ( input_length-1 - second->input_end
+ == tm_length-1 - second->tm_end )
+ && input_length-1 != second->input_end )
+ {
+ min_cost++;
+ }
+
+ // cerr << "\tcost: " << min_cost << "-" << max_cost << endl;
+
+ // if worst than best cost, forget it
+ if (min_cost > best_cost)
+ {
+ continue;
+ }
+
+ // add match
+ Match new_match( first->input_start,
+ second->input_end,
+ first->tm_start,
+ second->tm_end,
+ min_cost,
+ max_cost,
+ internal_cost);
+ multi_match[ match_level ].push_back( new_match );
+ // cerr << "\tstored\n";
+
+ // possibly updating this_best_cost
+ if (max_cost < this_best_cost)
+ {
+ // cerr << "\tupdating this best cost to " << max_cost << "\n";
+ this_best_cost = max_cost;
+
+ // possibly updating best_cost
+ if (max_cost < best_cost)
+ {
+ // cerr << "\tupdating best cost to " << max_cost << "\n";
+ best_cost = max_cost;
+ }
+ }
+ }
+ }
+ }
+ match_level++;
+ }
+ return this_best_cost;
+}
+
+
+void FuzzyMatchWrapper::create_extract(int sentenceInd, int cost, const vector< WORD_ID > &sourceSentence, const vector<SentenceAlignment> &targets, const string &inputStr, const string &path, ofstream &outputFile)
+{
+ string sourceStr;
+ for (size_t pos = 0; pos < sourceSentence.size(); ++pos) {
+ WORD_ID wordId = sourceSentence[pos];
+ sourceStr += vocabulary.GetWord(wordId) + " ";
+ }
+
+ for (size_t targetInd = 0; targetInd < targets.size(); ++targetInd) {
+ const SentenceAlignment &sentenceAlignment = targets[targetInd];
+ string targetStr = sentenceAlignment.getTargetString(vocabulary);
+ string alignStr = sentenceAlignment.getAlignmentString();
+
+ outputFile
+ << sentenceInd << endl
+ << cost << endl
+ << sourceStr << endl
+ << inputStr << endl
+ << targetStr << endl
+ << alignStr << endl
+ << path << endl
+ << sentenceAlignment.count << endl;
+
+ }
+}
+
+} // namespace
diff --git a/moses/src/fuzzy-match/FuzzyMatchWrapper.h b/moses/src/fuzzy-match/FuzzyMatchWrapper.h
new file mode 100644
index 000000000..842da4732
--- /dev/null
+++ b/moses/src/fuzzy-match/FuzzyMatchWrapper.h
@@ -0,0 +1,67 @@
+//
+// FuzzyMatchWrapper.h
+// moses
+//
+// Created by Hieu Hoang on 26/07/2012.
+// Copyright 2012 __MyCompanyName__. All rights reserved.
+//
+
+#ifndef moses_FuzzyMatchWrapper_h
+#define moses_FuzzyMatchWrapper_h
+
+#include <fstream>
+#include <string>
+#include "fuzzy-match/SuffixArray.h"
+#include "fuzzy-match/Vocabulary.h"
+#include "fuzzy-match/Match.h"
+
+namespace tmmt
+{
+class Match;
+class SentenceAlignment;
+
+class FuzzyMatchWrapper
+{
+public:
+ FuzzyMatchWrapper(const std::string &source, const std::string &target, const std::string &alignment);
+
+ std::string Extract(const std::string &inputPath);
+
+protected:
+ // tm-mt
+ tmmt::Vocabulary vocabulary;
+ std::vector< std::vector< tmmt::WORD_ID > > source;
+ std::vector< std::vector< tmmt::SentenceAlignment > > targetAndAlignment;
+ tmmt::SuffixArray *suffixArray;
+
+ // global cache for word pairs
+ std::map< std::pair< WORD_ID, WORD_ID >, unsigned int > lsed;
+
+ void load_corpus( const std::string &fileName, std::vector< std::vector< tmmt::WORD_ID > > &corpus );
+ void load_target( const std::string &fileName, std::vector< std::vector< tmmt::SentenceAlignment > > &corpus);
+ void load_alignment( const std::string &fileName, std::vector< std::vector< tmmt::SentenceAlignment > > &corpus );
+
+ /** brute force method: compare input to all corpus sentences */
+ int basic_fuzzy_match( std::vector< std::vector< tmmt::WORD_ID > > source,
+ std::vector< std::vector< tmmt::WORD_ID > > input ) ;
+
+ /** utlility function: compute length of sentence in characters
+ (spaces do not count) */
+ unsigned int compute_length( const std::vector< tmmt::WORD_ID > &sentence );
+ unsigned int letter_sed( WORD_ID aIdx, WORD_ID bIdx );
+ unsigned int sed( const std::vector< WORD_ID > &a, const std::vector< WORD_ID > &b, std::string &best_path, bool use_letter_sed );
+ void init_short_matches( const std::vector< WORD_ID > &input );
+ int short_match_max_length( int input_length );
+ void add_short_matches( std::vector< Match > &match, const std::vector< WORD_ID > &tm, int input_length, int best_cost );
+ std::vector< Match > prune_matches( const std::vector< Match > &match, int best_cost );
+ int parse_matches( std::vector< Match > &match, int input_length, int tm_length, int &best_cost );
+
+ void create_extract(int sentenceInd, int cost, const std::vector< WORD_ID > &sourceSentence, const std::vector<SentenceAlignment> &targets, const std::string &inputStr, const std::string &path, std::ofstream &outputFile);
+
+ std::string ExtractTM(const std::string &inputPath);
+
+};
+
+}
+
+#endif
diff --git a/moses/src/fuzzy-match/Jamfile b/moses/src/fuzzy-match/Jamfile
new file mode 100644
index 000000000..3635297a8
--- /dev/null
+++ b/moses/src/fuzzy-match/Jamfile
@@ -0,0 +1 @@
+lib fuzzy-match : [ glob *.cpp ] ..//moses_internal ;
diff --git a/moses/src/fuzzy-match/Match.h b/moses/src/fuzzy-match/Match.h
new file mode 100644
index 000000000..7feb25769
--- /dev/null
+++ b/moses/src/fuzzy-match/Match.h
@@ -0,0 +1,33 @@
+//
+// Match.h
+// fuzzy-match
+//
+// Created by Hieu Hoang on 25/07/2012.
+// Copyright 2012 __MyCompanyName__. All rights reserved.
+//
+
+#ifndef fuzzy_match_Match_h
+#define fuzzy_match_Match_h
+
+namespace tmmt
+{
+
+/* data structure for n-gram match between input and corpus */
+
+class Match {
+public:
+ int input_start;
+ int input_end;
+ int tm_start;
+ int tm_end;
+ int min_cost;
+ int max_cost;
+ int internal_cost;
+ Match( int is, int ie, int ts, int te, int min, int max, int i )
+ :input_start(is), input_end(ie), tm_start(ts), tm_end(te), min_cost(min), max_cost(max), internal_cost(i)
+ {}
+};
+
+}
+
+#endif
diff --git a/moses/src/fuzzy-match/SentenceAlignment.cpp b/moses/src/fuzzy-match/SentenceAlignment.cpp
new file mode 100644
index 000000000..1559c404c
--- /dev/null
+++ b/moses/src/fuzzy-match/SentenceAlignment.cpp
@@ -0,0 +1,24 @@
+//
+// SentenceAlignment.cpp
+// moses
+//
+// Created by Hieu Hoang on 26/07/2012.
+// Copyright 2012 __MyCompanyName__. All rights reserved.
+//
+
+#include <iostream>
+#include "fuzzy-match/SentenceAlignment.h"
+
+namespace tmmt
+{
+std::string SentenceAlignment::getTargetString(const Vocabulary &vocab) const
+{
+ std::stringstream strme;
+ for (size_t i = 0; i < target.size(); ++i) {
+ const WORD &word = vocab.GetWord(target[i]);
+ strme << word << " ";
+ }
+ return strme.str();
+}
+
+}
diff --git a/moses/src/fuzzy-match/SentenceAlignment.h b/moses/src/fuzzy-match/SentenceAlignment.h
new file mode 100644
index 000000000..2ba6e2458
--- /dev/null
+++ b/moses/src/fuzzy-match/SentenceAlignment.h
@@ -0,0 +1,44 @@
+//
+// SentenceAlignment.h
+// fuzzy-match
+//
+// Created by Hieu Hoang on 25/07/2012.
+// Copyright 2012 __MyCompanyName__. All rights reserved.
+//
+
+#ifndef fuzzy_match_SentenceAlignment_h
+#define fuzzy_match_SentenceAlignment_h
+
+#include <sstream>
+#include <vector>
+#include "fuzzy-match/Vocabulary.h"
+
+namespace tmmt
+{
+
+struct SentenceAlignment
+{
+ int count;
+ std::vector< WORD_ID > target;
+ std::vector< std::pair<int,int> > alignment;
+
+ SentenceAlignment()
+ {}
+
+ std::string getTargetString(const Vocabulary &vocab) const;
+
+ std::string getAlignmentString() const
+ {
+ std::stringstream strme;
+ for (size_t i = 0; i < alignment.size(); ++i) {
+ const std::pair<int,int> &alignPair = alignment[i];
+ strme << alignPair.first << "-" << alignPair.second << " ";
+ }
+ return strme.str();
+ }
+
+};
+
+}
+
+#endif
diff --git a/moses/src/fuzzy-match/SuffixArray.cpp b/moses/src/fuzzy-match/SuffixArray.cpp
new file mode 100644
index 000000000..4e3e0fcd4
--- /dev/null
+++ b/moses/src/fuzzy-match/SuffixArray.cpp
@@ -0,0 +1,250 @@
+#include "SuffixArray.h"
+#include <string>
+#include <stdlib.h>
+#include <cstring>
+
+using namespace std;
+
+namespace tmmt
+{
+
+SuffixArray::SuffixArray( string fileName )
+{
+ m_vcb.StoreIfNew( "<uNk>" );
+ m_endOfSentence = m_vcb.StoreIfNew( "<s>" );
+
+ ifstream extractFile;
+ char line[LINE_MAX_LENGTH];
+
+ // count the number of words first;
+ extractFile.open(fileName.c_str());
+ istream *fileP = &extractFile;
+ m_size = 0;
+ size_t sentenceCount = 0;
+ while(!fileP->eof()) {
+ SAFE_GETLINE((*fileP), line, LINE_MAX_LENGTH, '\n');
+ if (fileP->eof()) break;
+ vector< WORD_ID > words = m_vcb.Tokenize( line );
+ m_size += words.size() + 1;
+ sentenceCount++;
+ }
+ extractFile.close();
+ cerr << m_size << " words (incl. sentence boundaries)" << endl;
+
+ // allocate memory
+ m_array = (WORD_ID*) calloc( sizeof( WORD_ID ), m_size );
+ m_index = (INDEX*) calloc( sizeof( INDEX ), m_size );
+ m_wordInSentence = (char*) calloc( sizeof( char ), m_size );
+ m_sentence = (size_t*) calloc( sizeof( size_t ), m_size );
+ m_sentenceLength = (char*) calloc( sizeof( char ), sentenceCount );
+
+ // fill the array
+ int wordIndex = 0;
+ int sentenceId = 0;
+ extractFile.open(fileName.c_str());
+ fileP = &extractFile;
+ while(!fileP->eof()) {
+ SAFE_GETLINE((*fileP), line, LINE_MAX_LENGTH, '\n');
+ if (fileP->eof()) break;
+ vector< WORD_ID > words = m_vcb.Tokenize( line );
+ vector< WORD_ID >::const_iterator i;
+
+ for( i=words.begin(); i!=words.end(); i++)
+ {
+ m_index[ wordIndex ] = wordIndex;
+ m_sentence[ wordIndex ] = sentenceId;
+ m_wordInSentence[ wordIndex ] = i-words.begin();
+ m_array[ wordIndex++ ] = *i;
+ }
+ m_index[ wordIndex ] = wordIndex;
+ m_array[ wordIndex++ ] = m_endOfSentence;
+ m_sentenceLength[ sentenceId++ ] = words.size();
+ }
+ extractFile.close();
+ cerr << "done reading " << wordIndex << " words, " << sentenceId << " sentences." << endl;
+ // List(0,9);
+
+ // sort
+ m_buffer = (INDEX*) calloc( sizeof( INDEX ), m_size );
+ Sort( 0, m_size-1 );
+ free( m_buffer );
+ cerr << "done sorting" << endl;
+}
+
+// good ol' quick sort
+void SuffixArray::Sort(INDEX start, INDEX end) {
+ if (start == end) return;
+ INDEX mid = (start+end+1)/2;
+ Sort( start, mid-1 );
+ Sort( mid, end );
+
+ // merge
+ int i = start;
+ int j = mid;
+ int k = 0;
+ int length = end-start+1;
+ while( k<length )
+ {
+ if (i == mid )
+ {
+ m_buffer[ k++ ] = m_index[ j++ ];
+ }
+ else if (j > end )
+ {
+ m_buffer[ k++ ] = m_index[ i++ ];
+ }
+ else {
+ if (CompareIndex( m_index[i], m_index[j] ) < 0)
+ {
+ m_buffer[ k++ ] = m_index[ i++ ];
+ }
+ else
+ {
+ m_buffer[ k++ ] = m_index[ j++ ];
+ }
+ }
+ }
+
+ memcpy( ((char*)m_index) + sizeof( INDEX ) * start,
+ ((char*)m_buffer), sizeof( INDEX ) * (end-start+1) );
+}
+
+SuffixArray::~SuffixArray()
+{
+ free(m_index);
+ free(m_array);
+}
+
+int SuffixArray::CompareIndex( INDEX a, INDEX b ) const
+{
+ // skip over identical words
+ INDEX offset = 0;
+ while( a+offset < m_size &&
+ b+offset < m_size &&
+ m_array[ a+offset ] == m_array[ b+offset ] )
+ { offset++; }
+
+ if( a+offset == m_size ) return -1;
+ if( b+offset == m_size ) return 1;
+ return CompareWord( m_array[ a+offset ], m_array[ b+offset ] );
+}
+
+inline int SuffixArray::CompareWord( WORD_ID a, WORD_ID b ) const
+{
+ // cerr << "c(" << m_vcb.GetWord(a) << ":" << m_vcb.GetWord(b) << ")=" << m_vcb.GetWord(a).compare( m_vcb.GetWord(b) ) << endl;
+ return m_vcb.GetWord(a).compare( m_vcb.GetWord(b) );
+}
+
+int SuffixArray::Count( const vector< WORD > &phrase )
+{
+ INDEX dummy;
+ return LimitedCount( phrase, m_size, dummy, dummy, 0, m_size-1 );
+}
+
+bool SuffixArray::MinCount( const vector< WORD > &phrase, INDEX min )
+{
+ INDEX dummy;
+ return LimitedCount( phrase, min, dummy, dummy, 0, m_size-1 ) >= min;
+}
+
+bool SuffixArray::Exists( const vector< WORD > &phrase )
+{
+ INDEX dummy;
+ return LimitedCount( phrase, 1, dummy, dummy, 0, m_size-1 ) == 1;
+}
+
+int SuffixArray::FindMatches( const vector< WORD > &phrase, INDEX &firstMatch, INDEX &lastMatch, INDEX search_start, INDEX search_end )
+{
+ return LimitedCount( phrase, m_size, firstMatch, lastMatch, search_start, search_end );
+}
+
+int SuffixArray::LimitedCount( const vector< WORD > &phrase, INDEX min, INDEX &firstMatch, INDEX &lastMatch, INDEX search_start, INDEX search_end )
+{
+ // cerr << "FindFirst\n";
+ INDEX start = search_start;
+ INDEX end = (search_end == -1) ? (m_size-1) : search_end;
+ INDEX mid = FindFirst( phrase, start, end );
+ // cerr << "done\n";
+ if (mid == m_size) return 0; // no matches
+ if (min == 1) return 1; // only existance check
+
+ int matchCount = 1;
+
+ //cerr << "before...\n";
+ firstMatch = FindLast( phrase, mid, start, -1 );
+ matchCount += mid - firstMatch;
+
+ //cerr << "after...\n";
+ lastMatch = FindLast( phrase, mid, end, 1 );
+ matchCount += lastMatch - mid;
+
+ return matchCount;
+}
+
+SuffixArray::INDEX SuffixArray::FindLast( const vector< WORD > &phrase, INDEX start, INDEX end, int direction )
+{
+ end += direction;
+ while(true)
+ {
+ INDEX mid = ( start + end + (direction>0 ? 0 : 1) )/2;
+
+ int match = Match( phrase, mid );
+ int matchNext = Match( phrase, mid+direction );
+ //cerr << "\t" << start << ";" << mid << ";" << end << " -> " << match << "," << matchNext << endl;
+
+ if (match == 0 && matchNext != 0) return mid;
+
+ if (match == 0) // mid point is a match
+ start = mid;
+ else
+ end = mid;
+ }
+}
+
+SuffixArray::INDEX SuffixArray::FindFirst( const vector< WORD > &phrase, INDEX &start, INDEX &end )
+{
+ while(true)
+ {
+ INDEX mid = ( start + end + 1 )/2;
+ //cerr << "FindFirst(" << start << ";" << mid << ";" << end << ")\n";
+ int match = Match( phrase, mid );
+
+ if (match == 0) return mid;
+ if (start >= end && match != 0 ) return m_size;
+
+ if (match > 0)
+ start = mid+1;
+ else
+ end = mid-1;
+ }
+}
+
+int SuffixArray::Match( const vector< WORD > &phrase, INDEX index )
+{
+ INDEX pos = m_index[ index ];
+ for(INDEX i=0; i<phrase.size() && i+pos<m_size; i++)
+ {
+ int match = CompareWord( m_vcb.GetWordID( phrase[i] ), m_array[ pos+i ] );
+ // cerr << "{" << index << "+" << i << "," << pos+i << ":" << match << "}" << endl;
+ if (match != 0)
+ return match;
+ }
+ return 0;
+}
+
+void SuffixArray::List(INDEX start, INDEX end)
+{
+ for(INDEX i=start; i<=end; i++)
+ {
+ INDEX pos = m_index[ i ];
+ // cerr << i << ":" << pos << "\t";
+ for(int j=0; j<5 && j+pos<m_size; j++)
+ {
+ cout << " " << m_vcb.GetWord( m_array[ pos+j ] );
+ }
+ // cerr << "\n";
+ }
+}
+
+}
+
diff --git a/moses/src/fuzzy-match/SuffixArray.h b/moses/src/fuzzy-match/SuffixArray.h
new file mode 100644
index 000000000..f0be2a305
--- /dev/null
+++ b/moses/src/fuzzy-match/SuffixArray.h
@@ -0,0 +1,50 @@
+#include "Vocabulary.h"
+
+#pragma once
+
+#define LINE_MAX_LENGTH 10000
+
+namespace tmmt
+{
+
+class SuffixArray
+{
+public:
+ typedef unsigned int INDEX;
+
+private:
+ WORD_ID *m_array;
+ INDEX *m_index;
+ INDEX *m_buffer;
+ char *m_wordInSentence;
+ size_t *m_sentence;
+ char *m_sentenceLength;
+ WORD_ID m_endOfSentence;
+ Vocabulary m_vcb;
+ INDEX m_size;
+
+public:
+ SuffixArray( std::string fileName );
+ ~SuffixArray();
+
+ void Sort(INDEX start, INDEX end);
+ int CompareIndex( INDEX a, INDEX b ) const;
+ inline int CompareWord( WORD_ID a, WORD_ID b ) const;
+ int Count( const std::vector< WORD > &phrase );
+ bool MinCount( const std::vector< WORD > &phrase, INDEX min );
+ bool Exists( const std::vector< WORD > &phrase );
+ int FindMatches( const std::vector< WORD > &phrase, INDEX &firstMatch, INDEX &lastMatch, INDEX search_start = 0, INDEX search_end = -1 );
+ int LimitedCount( const std::vector< WORD > &phrase, INDEX min, INDEX &firstMatch, INDEX &lastMatch, INDEX search_start = -1, INDEX search_end = 0 );
+ INDEX FindFirst( const std::vector< WORD > &phrase, INDEX &start, INDEX &end );
+ INDEX FindLast( const std::vector< WORD > &phrase, INDEX start, INDEX end, int direction );
+ int Match( const std::vector< WORD > &phrase, INDEX index );
+ void List( INDEX start, INDEX end );
+ inline INDEX GetPosition( INDEX index ) { return m_index[ index ]; }
+ inline size_t GetSentence( INDEX position ) { return m_sentence[position]; }
+ inline char GetWordInSentence( INDEX position ) { return m_wordInSentence[position]; }
+ inline char GetSentenceLength( size_t sentenceId ) { return m_sentenceLength[sentenceId]; }
+ inline INDEX GetSize() { return m_size; }
+};
+
+}
+
diff --git a/moses/src/fuzzy-match/Vocabulary.cpp b/moses/src/fuzzy-match/Vocabulary.cpp
new file mode 100644
index 000000000..c322168ae
--- /dev/null
+++ b/moses/src/fuzzy-match/Vocabulary.cpp
@@ -0,0 +1,53 @@
+// $Id: Vocabulary.cpp 1565 2008-02-22 14:42:01Z bojar $
+#include "Vocabulary.h"
+
+using namespace std;
+
+namespace tmmt
+{
+
+// as in beamdecoder/tables.cpp
+vector<WORD_ID> Vocabulary::Tokenize( const char input[] ) {
+ vector< WORD_ID > token;
+ bool betweenWords = true;
+ int start=0;
+ int i=0;
+ for(; input[i] != '\0'; i++) {
+ bool isSpace = (input[i] == ' ' || input[i] == '\t');
+
+ if (!isSpace && betweenWords) {
+ start = i;
+ betweenWords = false;
+ }
+ else if (isSpace && !betweenWords) {
+ token.push_back( StoreIfNew ( string( input+start, i-start ) ) );
+ betweenWords = true;
+ }
+ }
+ if (!betweenWords)
+ token.push_back( StoreIfNew ( string( input+start, i-start ) ) );
+ return token;
+}
+
+WORD_ID Vocabulary::StoreIfNew( const WORD& word ) {
+ map<WORD, WORD_ID>::iterator i = lookup.find( word );
+
+ if( i != lookup.end() )
+ return i->second;
+
+ WORD_ID id = vocab.size();
+ vocab.push_back( word );
+ lookup[ word ] = id;
+ return id;
+}
+
+WORD_ID Vocabulary::GetWordID( const WORD &word ) {
+ map<WORD, WORD_ID>::iterator i = lookup.find( word );
+ if( i == lookup.end() )
+ return 0;
+ WORD_ID w= (WORD_ID) i->second;
+ return w;
+}
+
+}
+
diff --git a/moses/src/fuzzy-match/Vocabulary.h b/moses/src/fuzzy-match/Vocabulary.h
new file mode 100644
index 000000000..59ba72f8e
--- /dev/null
+++ b/moses/src/fuzzy-match/Vocabulary.h
@@ -0,0 +1,44 @@
+// $Id: tables-core.h 1470 2007-10-02 21:43:54Z redpony $
+
+#pragma once
+
+#include <iostream>
+#include <fstream>
+#include <assert.h>
+#include <stdlib.h>
+#include <string>
+#include <queue>
+#include <map>
+#include <cmath>
+
+namespace tmmt
+{
+
+#define MAX_LENGTH 10000
+
+#define SAFE_GETLINE(_IS, _LINE, _SIZE, _DELIM) { \
+ _IS.getline(_LINE, _SIZE, _DELIM); \
+ if(_IS.fail() && !_IS.bad() && !_IS.eof()) _IS.clear(); \
+ if (_IS.gcount() == _SIZE-1) { \
+ cerr << "Line too long! Buffer overflow. Delete lines >=" \
+ << _SIZE << " chars or raise MAX_LENGTH in phrase-extract/tables-core.cpp" \
+ << endl; \
+ exit(1); \
+ } \
+ }
+
+typedef std::string WORD;
+typedef unsigned int WORD_ID;
+
+class Vocabulary {
+ public:
+ std::map<WORD, WORD_ID> lookup;
+ std::vector< WORD > vocab;
+ WORD_ID StoreIfNew( const WORD& );
+ WORD_ID GetWordID( const WORD& );
+ std::vector<WORD_ID> Tokenize( const char[] );
+ inline WORD &GetWord( WORD_ID id ) const { WORD &i = (WORD&) vocab[ id ]; return i; }
+};
+
+}
+
diff --git a/moses/src/gzfilebuf.h b/moses/src/gzfilebuf.h
index d59d3e1b7..2376c2875 100644
--- a/moses/src/gzfilebuf.h
+++ b/moses/src/gzfilebuf.h
@@ -5,6 +5,9 @@
#include <zlib.h>
#include <cstring>
+/** wrapper around gzip input stream. Unknown parentage
+ * @todo replace with boost version - output stream already uses it
+ */
class gzfilebuf : public std::streambuf
{
public:
diff --git a/scripts/training/phrase-extract/AlignmentPhrase.cpp b/phrase-extract/AlignmentPhrase.cpp
index 2d4439567..d51aadd01 100644
--- a/scripts/training/phrase-extract/AlignmentPhrase.cpp
+++ b/phrase-extract/AlignmentPhrase.cpp
@@ -24,6 +24,9 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
using namespace std;
+namespace MosesTraining
+{
+
void AlignmentElement::Merge(size_t align)
{
m_elements.insert(align);
@@ -40,3 +43,7 @@ void AlignmentPhrase::Merge(const std::vector< std::vector<size_t> > &source)
}
}
}
+
+} // namespace
+
+
diff --git a/scripts/training/phrase-extract/AlignmentPhrase.h b/phrase-extract/AlignmentPhrase.h
index f77b44f36..ec6431f18 100644
--- a/scripts/training/phrase-extract/AlignmentPhrase.h
+++ b/phrase-extract/AlignmentPhrase.h
@@ -23,9 +23,11 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <vector>
#include <set>
+namespace MosesTraining
+{
+
class WordsRange;
-
class AlignmentElement
{
protected:
@@ -68,5 +70,5 @@ public:
}
};
-
+} // namespace
diff --git a/scripts/training/phrase-extract/ExtractedRule.cpp b/phrase-extract/ExtractedRule.cpp
index c566e842a..985f2f093 100644
--- a/scripts/training/phrase-extract/ExtractedRule.cpp
+++ b/phrase-extract/ExtractedRule.cpp
@@ -10,6 +10,9 @@
using namespace std;
+namespace MosesTraining
+{
+
void ExtractedRule::OutputNTLengths(std::ostream &out) const
{
ostringstream outString;
@@ -39,3 +42,4 @@ std::ostream& operator<<(std::ostream &out, const ExtractedRule &obj)
return out;
}
+} // namespace
diff --git a/scripts/training/phrase-extract/ExtractedRule.h b/phrase-extract/ExtractedRule.h
index be6e30836..992a807b3 100644
--- a/scripts/training/phrase-extract/ExtractedRule.h
+++ b/phrase-extract/ExtractedRule.h
@@ -26,6 +26,9 @@
#include <sstream>
#include <map>
+namespace MosesTraining
+{
+
// sentence-level collection of rules
class ExtractedRule
{
@@ -71,4 +74,6 @@ public:
void OutputNTLengths(std::ostringstream &out) const;
};
+}
+
#endif
diff --git a/scripts/training/phrase-extract/Hole.h b/phrase-extract/Hole.h
index 31e928f6e..c570ec7a1 100644
--- a/scripts/training/phrase-extract/Hole.h
+++ b/phrase-extract/Hole.h
@@ -26,6 +26,9 @@
#include <string>
#include <vector>
+namespace MosesTraining
+{
+
class Hole
{
protected:
@@ -108,4 +111,6 @@ public:
}
};
+}
+
#endif
diff --git a/scripts/training/phrase-extract/HoleCollection.cpp b/phrase-extract/HoleCollection.cpp
index 4cffab7fd..fd79d74b1 100644
--- a/scripts/training/phrase-extract/HoleCollection.cpp
+++ b/phrase-extract/HoleCollection.cpp
@@ -21,6 +21,9 @@
#include <algorithm>
+namespace MosesTraining
+{
+
void HoleCollection::SortSourceHoles()
{
assert(m_sortedSourceHoles.size() == 0);
@@ -60,3 +63,5 @@ int HoleCollection::Scope(const Hole &proposedHole) const
}
return scope;
}
+
+}
diff --git a/scripts/training/phrase-extract/HoleCollection.h b/phrase-extract/HoleCollection.h
index 355e825fb..2894101bd 100644
--- a/scripts/training/phrase-extract/HoleCollection.h
+++ b/phrase-extract/HoleCollection.h
@@ -26,6 +26,9 @@
#include "Hole.h"
+namespace MosesTraining
+{
+
class HoleCollection
{
protected:
@@ -94,4 +97,6 @@ public:
};
+}
+
#endif
diff --git a/scripts/training/phrase-extract/InputFileStream.cpp b/phrase-extract/InputFileStream.cpp
index d111903e6..d111903e6 100644
--- a/scripts/training/phrase-extract/InputFileStream.cpp
+++ b/phrase-extract/InputFileStream.cpp
diff --git a/scripts/training/phrase-extract/InputFileStream.h b/phrase-extract/InputFileStream.h
index e2a31bc82..e2a31bc82 100644
--- a/scripts/training/phrase-extract/InputFileStream.h
+++ b/phrase-extract/InputFileStream.h
diff --git a/scripts/training/phrase-extract/Jamfile b/phrase-extract/Jamfile
index 9be67e80a..e4f801089 100644
--- a/scripts/training/phrase-extract/Jamfile
+++ b/phrase-extract/Jamfile
@@ -1,7 +1,8 @@
obj InputFileStream.o : InputFileStream.cpp : <include>. ;
-alias InputFileStream : InputFileStream.o ../../..//z ;
+alias InputFileStream : InputFileStream.o ..//z ;
obj tables-core.o : tables-core.cpp : <include>. ;
+obj domain.o : domain.cpp : <include>. ;
obj AlignmentPhrase.o : AlignmentPhrase.cpp : <include>. ;
obj SentenceAlignment.o : SentenceAlignment.cpp : <include>. ;
obj SyntaxTree.o : SyntaxTree.cpp : <include>. ;
@@ -10,28 +11,23 @@ obj XmlTree.o : XmlTree.cpp : <include>. ;
alias filestreams : InputFileStream.cpp OutputFileStream.cpp : : : <include>. ;
alias trees : SyntaxTree.cpp tables-core.o XmlTree.o : : : <include>. ;
-exe extract : tables-core.o SentenceAlignment.o extract.cpp OutputFileStream.cpp InputFileStream ../../..//boost_iostreams ;
+exe extract : tables-core.o SentenceAlignment.o extract.cpp OutputFileStream.cpp InputFileStream ../moses/src//ThreadPool ..//boost_iostreams ;
-exe extract-rules : tables-core.o SentenceAlignment.o SyntaxTree.o XmlTree.o SentenceAlignmentWithSyntax.cpp HoleCollection.cpp extract-rules.cpp ExtractedRule.cpp OutputFileStream.cpp InputFileStream ../../../moses/src//ThreadPool ../../..//boost_iostreams ;
+exe extract-rules : tables-core.o SentenceAlignment.o SyntaxTree.o XmlTree.o SentenceAlignmentWithSyntax.cpp HoleCollection.cpp extract-rules.cpp ExtractedRule.cpp OutputFileStream.cpp InputFileStream ..//boost_iostreams ;
exe extract-lex : extract-lex.cpp InputFileStream ;
-exe score : tables-core.o AlignmentPhrase.o score.cpp PhraseAlignment.cpp OutputFileStream.cpp InputFileStream ../../..//boost_iostreams ;
+exe score : tables-core.o domain.o AlignmentPhrase.o score.cpp PhraseAlignment.cpp OutputFileStream.cpp InputFileStream ..//boost_iostreams ;
-exe consolidate : consolidate.cpp tables-core.o OutputFileStream.cpp InputFileStream ../../..//boost_iostreams ;
+exe consolidate : consolidate.cpp tables-core.o OutputFileStream.cpp InputFileStream ..//boost_iostreams ;
-exe consolidate-direct : consolidate-direct.cpp OutputFileStream.cpp InputFileStream ../../..//boost_iostreams ;
+exe consolidate-direct : consolidate-direct.cpp OutputFileStream.cpp InputFileStream ..//boost_iostreams ;
exe consolidate-reverse : consolidate-reverse.cpp tables-core.o InputFileStream ;
-exe relax-parse : tables-core.o SyntaxTree.o XmlTree.o relax-parse.cpp ;
+exe relax-parse : tables-core.o SyntaxTree.o XmlTree.o relax-parse.cpp InputFileStream ;
exe statistics : tables-core.o AlignmentPhrase.o statistics.cpp InputFileStream ;
alias programs : extract extract-rules extract-lex score consolidate consolidate-direct consolidate-reverse relax-parse statistics ;
-install legacy : programs : <location>. <install-type>EXE ;
-
-build-project extract-ghkm ;
-build-project pcfg-extract ;
-build-project pcfg-score ;
diff --git a/scripts/training/phrase-extract/OutputFileStream.cpp b/phrase-extract/OutputFileStream.cpp
index 2cad33bb9..2cad33bb9 100644
--- a/scripts/training/phrase-extract/OutputFileStream.cpp
+++ b/phrase-extract/OutputFileStream.cpp
diff --git a/scripts/training/phrase-extract/OutputFileStream.h b/phrase-extract/OutputFileStream.h
index f52e36d76..f52e36d76 100644
--- a/scripts/training/phrase-extract/OutputFileStream.h
+++ b/phrase-extract/OutputFileStream.h
diff --git a/scripts/training/phrase-extract/PhraseAlignment.cpp b/phrase-extract/PhraseAlignment.cpp
index ceb74f04c..bdfead082 100644
--- a/scripts/training/phrase-extract/PhraseAlignment.cpp
+++ b/phrase-extract/PhraseAlignment.cpp
@@ -17,6 +17,9 @@
using namespace std;
+namespace MosesTraining
+{
+
extern Vocabulary vcbT;
extern Vocabulary vcbS;
@@ -76,12 +79,11 @@ inline void Tokenize( std::vector<T> &output
}
// read in a phrase pair and store it
-void PhraseAlignment::create( char line[], int lineID )
+void PhraseAlignment::create( char line[], int lineID, bool includeSentenceIdFlag )
{
assert(phraseS.empty());
assert(phraseT.empty());
- //cerr << "processing " << line;
vector< string > token = tokenize( line );
int item = 1;
for (size_t j=0; j<token.size(); j++) {
@@ -108,12 +110,13 @@ void PhraseAlignment::create( char line[], int lineID )
alignedToT[t].insert( s );
alignedToS[s].insert( t );
}
- } else if (item == 4) { // count
+ } else if (includeSentenceIdFlag && item == 4) { // optional sentence id
+ sscanf(token[j].c_str(), "%d", &sentenceId);
+ } else if (item + (includeSentenceIdFlag?-1:0) == 4) { // count
sscanf(token[j].c_str(), "%f", &count);
- }
- else if (item == 5) { // non-term lengths
+ } else if (item + (includeSentenceIdFlag?-1:0) == 5) { // non-term lengths
addNTLength(token[j]);
- } else if (item == 6) { // target syntax PCFG score
+ } else if (item + (includeSentenceIdFlag?-1:0) == 6) { // target syntax PCFG score
float pcfgScore = std::atof(token[j].c_str());
pcfgSum = pcfgScore * count;
}
@@ -121,7 +124,7 @@ void PhraseAlignment::create( char line[], int lineID )
createAlignVec(phraseS.size(), phraseT.size());
- if (item == 3) {
+ if (item + (includeSentenceIdFlag?-1:0) == 3) {
count = 1.0;
}
if (item < 3 || item > 6) {
@@ -236,4 +239,5 @@ int PhraseAlignment::Compare(const PhraseAlignment &other) const
}
+}
diff --git a/scripts/training/phrase-extract/PhraseAlignment.h b/phrase-extract/PhraseAlignment.h
index 8bd83503d..35afb314b 100644
--- a/scripts/training/phrase-extract/PhraseAlignment.h
+++ b/phrase-extract/PhraseAlignment.h
@@ -13,6 +13,9 @@
#include <set>
#include <map>
+namespace MosesTraining
+{
+
// data structure for a single phrase pair
class PhraseAlignment
{
@@ -27,10 +30,13 @@ protected:
public:
float pcfgSum;
float count;
+ int sentenceId;
+ std::string domain;
+
std::vector< std::set<size_t> > alignedToT;
std::vector< std::set<size_t> > alignedToS;
- void create( char*, int );
+ void create( char*, int, bool );
void clear();
bool equals( const PhraseAlignment& );
bool match( const PhraseAlignment& );
@@ -52,3 +58,6 @@ public:
{ return m_ntLengths; }
};
+
+}
+
diff --git a/phrase-extract/PhraseExtractionOptions.h b/phrase-extract/PhraseExtractionOptions.h
new file mode 100644
index 000000000..eeec39750
--- /dev/null
+++ b/phrase-extract/PhraseExtractionOptions.h
@@ -0,0 +1,152 @@
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2010 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ ***********************************************************************/
+
+/* Created by Rohit Gupta, CDAC, Mumbai, India on 18 July, 2012*/
+
+#pragma once
+#ifndef PHRASEEXTRACTIONOPTIONS_H_INCLUDED_
+#define PHRASEEXTRACTIONOPTIONS_H_INCLUDED_
+
+namespace MosesTraining
+{
+enum REO_MODEL_TYPE {REO_MSD, REO_MSLR, REO_MONO};
+enum REO_POS {LEFT, RIGHT, DLEFT, DRIGHT, UNKNOWN};
+
+
+class PhraseExtractionOptions {
+
+ public:
+ const int maxPhraseLength;
+ private:
+ bool allModelsOutputFlag;
+ bool wordModel;
+ REO_MODEL_TYPE wordType;
+ bool phraseModel;
+ REO_MODEL_TYPE phraseType;
+ bool hierModel;
+ REO_MODEL_TYPE hierType;
+ bool orientationFlag;
+ bool translationFlag;
+ bool sentenceIdFlag; //create extract file with sentence id
+ bool includeSentenceIdFlag; //include sentence id in extract file
+ bool onlyOutputSpanInfo;
+ bool gzOutput;
+
+public:
+ PhraseExtractionOptions(const int initmaxPhraseLength):
+ maxPhraseLength(initmaxPhraseLength),
+ allModelsOutputFlag(false),
+ wordModel(false),
+ wordType(REO_MSD),
+ phraseModel(false),
+ phraseType(REO_MSD),
+ hierModel(false),
+ hierType(REO_MSD),
+ orientationFlag(false),
+ translationFlag(true),
+ sentenceIdFlag(false),
+ includeSentenceIdFlag(false),
+ onlyOutputSpanInfo(false),
+ gzOutput(false){}
+
+ //functions for initialization of options
+ void initAllModelsOutputFlag(const bool initallModelsOutputFlag){
+ allModelsOutputFlag=initallModelsOutputFlag;
+ }
+ void initWordModel(const bool initwordModel){
+ wordModel=initwordModel;
+ }
+ void initWordType(REO_MODEL_TYPE initwordType ){
+ wordType=initwordType;
+ }
+ void initPhraseModel(const bool initphraseModel ){
+ phraseModel=initphraseModel;
+ }
+ void initPhraseType(REO_MODEL_TYPE initphraseType){
+ phraseType=initphraseType;
+ }
+ void initHierModel(const bool inithierModel){
+ hierModel=inithierModel;
+ }
+ void initHierType(REO_MODEL_TYPE inithierType){
+ hierType=inithierType;
+ }
+ void initOrientationFlag(const bool initorientationFlag){
+ orientationFlag=initorientationFlag;
+ }
+ void initTranslationFlag(const bool inittranslationFlag){
+ translationFlag=inittranslationFlag;
+ }
+ void initSentenceIdFlag(const bool initsentenceIdFlag){
+ sentenceIdFlag=initsentenceIdFlag;
+ }
+ void initIncludeSentenceIdFlag(const bool initincludeSentenceIdFlag){
+ includeSentenceIdFlag=initincludeSentenceIdFlag;
+ }
+ void initOnlyOutputSpanInfo(const bool initonlyOutputSpanInfo){
+ onlyOutputSpanInfo= initonlyOutputSpanInfo;
+ }
+ void initGzOutput (const bool initgzOutput){
+ gzOutput= initgzOutput;
+ }
+ // functions for getting values
+ bool isAllModelsOutputFlag() const {
+ return allModelsOutputFlag;
+ }
+ bool isWordModel() const {
+ return wordModel;
+ }
+ REO_MODEL_TYPE isWordType() const {
+ return wordType;
+ }
+ bool isPhraseModel() const {
+ return phraseModel;
+ }
+ REO_MODEL_TYPE isPhraseType() const {
+ return phraseType;
+ }
+ bool isHierModel() const {
+ return hierModel;
+ }
+ REO_MODEL_TYPE isHierType() const {
+ return hierType;
+ }
+ bool isOrientationFlag() const {
+ return orientationFlag;
+ }
+ bool isTranslationFlag() const {
+ return translationFlag;
+ }
+ bool isSentenceIdFlag() const {
+ return sentenceIdFlag;
+ }
+ bool isIncludeSentenceIdFlag() const {
+ return includeSentenceIdFlag;
+ }
+ bool isOnlyOutputSpanInfo() const {
+ return onlyOutputSpanInfo;
+ }
+ bool isGzOutput () const {
+ return gzOutput;
+ }
+};
+
+}
+
+#endif
diff --git a/scripts/training/phrase-extract/RuleExist.h b/phrase-extract/RuleExist.h
index cf7fae3cd..94ea4b98e 100644
--- a/scripts/training/phrase-extract/RuleExist.h
+++ b/phrase-extract/RuleExist.h
@@ -25,6 +25,9 @@
#include "Hole.h"
+namespace MosesTraining
+{
+
// reposity of extracted phrase pairs
// which are potential holes in larger phrase pairs
class RuleExist
@@ -56,4 +59,7 @@ public:
};
+}
+
+
#endif
diff --git a/scripts/training/phrase-extract/RuleExtractionOptions.h b/phrase-extract/RuleExtractionOptions.h
index 272af2c76..431be58b0 100644
--- a/scripts/training/phrase-extract/RuleExtractionOptions.h
+++ b/phrase-extract/RuleExtractionOptions.h
@@ -21,6 +21,9 @@
#ifndef RULEEXTRACTIONOPTIONS_H_INCLUDED_
#define RULEEXTRACTIONOPTIONS_H_INCLUDED_
+namespace MosesTraining
+{
+
struct RuleExtractionOptions {
public:
int maxSpan;
@@ -50,6 +53,7 @@ public:
bool gzOutput;
bool unpairedExtractFormat;
bool conditionOnTargetLhs;
+ bool boundaryRules;
RuleExtractionOptions()
: maxSpan(10)
@@ -82,7 +86,10 @@ public:
, gzOutput(false)
, unpairedExtractFormat(false)
, conditionOnTargetLhs(false)
+ , boundaryRules(false)
{}
};
+}
+
#endif
diff --git a/scripts/training/phrase-extract/SafeGetline.h b/phrase-extract/SafeGetline.h
index 0e03b8468..0e03b8468 100644
--- a/scripts/training/phrase-extract/SafeGetline.h
+++ b/phrase-extract/SafeGetline.h
diff --git a/scripts/training/phrase-extract/SentenceAlignment.cpp b/phrase-extract/SentenceAlignment.cpp
index 8b513cfb4..af1cfa953 100644
--- a/scripts/training/phrase-extract/SentenceAlignment.cpp
+++ b/phrase-extract/SentenceAlignment.cpp
@@ -25,30 +25,45 @@
#include "tables-core.h"
+using namespace std;
+
+namespace MosesTraining
+{
+
SentenceAlignment::~SentenceAlignment() {}
-bool SentenceAlignment::processTargetSentence(const char * targetString, int)
+void addBoundaryWords(vector<string> &phrase)
+{
+ phrase.insert(phrase.begin(), "<s>");
+ phrase.push_back("</s>");
+}
+
+bool SentenceAlignment::processTargetSentence(const char * targetString, int, bool boundaryRules)
{
target = tokenize(targetString);
+ if (boundaryRules)
+ addBoundaryWords(target);
return true;
}
-bool SentenceAlignment::processSourceSentence(const char * sourceString, int)
+bool SentenceAlignment::processSourceSentence(const char * sourceString, int, bool boundaryRules)
{
source = tokenize(sourceString);
+ if (boundaryRules)
+ addBoundaryWords(source);
return true;
}
-bool SentenceAlignment::create( char targetString[], char sourceString[], char alignmentString[], int sentenceID)
+bool SentenceAlignment::create( char targetString[], char sourceString[], char alignmentString[], int sentenceID, bool boundaryRules)
{
using namespace std;
this->sentenceID = sentenceID;
// process sentence strings and store in target and source members.
- if (!processTargetSentence(targetString, sentenceID)) {
+ if (!processTargetSentence(targetString, sentenceID, boundaryRules)) {
return false;
}
- if (!processSourceSentence(sourceString, sentenceID)) {
+ if (!processSourceSentence(sourceString, sentenceID, boundaryRules)) {
return false;
}
@@ -78,6 +93,12 @@ bool SentenceAlignment::create( char targetString[], char sourceString[], char a
cerr << "T: " << targetString << endl << "S: " << sourceString << endl;
return false;
}
+
+ if (boundaryRules) {
+ ++s;
+ ++t;
+ }
+
// cout << "alignmentSequence[i] " << alignmentSequence[i] << " is " << s << ", " << t << endl;
if ((size_t)t >= target.size() || (size_t)s >= source.size()) {
cerr << "WARNING: sentence " << sentenceID << " has alignment point (" << s << ", " << t << ") out of bounds (" << source.size() << ", " << target.size() << ")\n";
@@ -87,5 +108,18 @@ bool SentenceAlignment::create( char targetString[], char sourceString[], char a
alignedToT[t].push_back( s );
alignedCountS[s]++;
}
+
+ if (boundaryRules) {
+ alignedToT[0].push_back(0);
+ alignedCountS[0]++;
+
+ alignedToT.back().push_back(alignedCountS.size() - 1);
+ alignedCountS.back()++;
+
+ }
+
return true;
}
+
+}
+
diff --git a/scripts/training/phrase-extract/SentenceAlignment.h b/phrase-extract/SentenceAlignment.h
index df3987198..7c2988780 100644
--- a/scripts/training/phrase-extract/SentenceAlignment.h
+++ b/phrase-extract/SentenceAlignment.h
@@ -24,6 +24,9 @@
#include <string>
#include <vector>
+namespace MosesTraining
+{
+
class SentenceAlignment
{
public:
@@ -35,12 +38,16 @@ public:
virtual ~SentenceAlignment();
- virtual bool processTargetSentence(const char *, int);
+ virtual bool processTargetSentence(const char *, int, bool boundaryRules);
- virtual bool processSourceSentence(const char *, int);
+ virtual bool processSourceSentence(const char *, int, bool boundaryRules);
bool create(char targetString[], char sourceString[],
- char alignmentString[], int sentenceID);
+ char alignmentString[], int sentenceID, bool boundaryRules);
+
};
+}
+
+
#endif
diff --git a/scripts/training/phrase-extract/SentenceAlignmentWithSyntax.cpp b/phrase-extract/SentenceAlignmentWithSyntax.cpp
index 06dc3919f..5d866edfb 100644
--- a/scripts/training/phrase-extract/SentenceAlignmentWithSyntax.cpp
+++ b/phrase-extract/SentenceAlignmentWithSyntax.cpp
@@ -29,10 +29,13 @@
using namespace std;
-bool SentenceAlignmentWithSyntax::processTargetSentence(const char * targetString, int sentenceID)
+namespace MosesTraining
+{
+
+bool SentenceAlignmentWithSyntax::processTargetSentence(const char * targetString, int sentenceID, bool boundaryRules)
{
if (!m_options.targetSyntax) {
- return SentenceAlignment::processTargetSentence(targetString, sentenceID);
+ return SentenceAlignment::processTargetSentence(targetString, sentenceID, boundaryRules);
}
string targetStringCPP(targetString);
@@ -49,10 +52,10 @@ bool SentenceAlignmentWithSyntax::processTargetSentence(const char * targetStrin
return true;
}
-bool SentenceAlignmentWithSyntax::processSourceSentence(const char * sourceString, int sentenceID)
+bool SentenceAlignmentWithSyntax::processSourceSentence(const char * sourceString, int sentenceID, bool boundaryRules)
{
if (!m_options.sourceSyntax) {
- return SentenceAlignment::processSourceSentence(sourceString, sentenceID);
+ return SentenceAlignment::processSourceSentence(sourceString, sentenceID, boundaryRules);
}
string sourceStringCPP(sourceString);
@@ -68,3 +71,8 @@ bool SentenceAlignmentWithSyntax::processSourceSentence(const char * sourceStrin
source = tokenize(sourceStringCPP.c_str());
return true;
}
+
+} // namespace
+
+
+
diff --git a/scripts/training/phrase-extract/SentenceAlignmentWithSyntax.h b/phrase-extract/SentenceAlignmentWithSyntax.h
index a2c164655..28eef57b7 100644
--- a/scripts/training/phrase-extract/SentenceAlignmentWithSyntax.h
+++ b/phrase-extract/SentenceAlignmentWithSyntax.h
@@ -30,6 +30,9 @@
#include "SentenceAlignment.h"
#include "SyntaxTree.h"
+namespace MosesTraining
+{
+
class SentenceAlignmentWithSyntax : public SentenceAlignment
{
public:
@@ -56,10 +59,12 @@ public:
virtual ~SentenceAlignmentWithSyntax() {}
bool
- processTargetSentence(const char *, int);
+ processTargetSentence(const char *, int, bool boundaryRules);
bool
- processSourceSentence(const char *, int);
+ processSourceSentence(const char *, int, bool boundaryRules);
};
+}
+
#endif
diff --git a/scripts/training/phrase-extract/SyntaxTree.cpp b/phrase-extract/SyntaxTree.cpp
index f2783ffd2..0b99f0d22 100644
--- a/scripts/training/phrase-extract/SyntaxTree.cpp
+++ b/phrase-extract/SyntaxTree.cpp
@@ -26,6 +26,9 @@
#include <cassert>
#include <iostream>
+namespace MosesTraining
+{
+
SyntaxTree::~SyntaxTree()
{
Clear();
@@ -178,3 +181,6 @@ std::ostream& operator<<(std::ostream& os, const SyntaxTree& t)
}
return os;
}
+
+}
+
diff --git a/scripts/training/phrase-extract/SyntaxTree.h b/phrase-extract/SyntaxTree.h
index 17c106b49..dac20f9b2 100644
--- a/scripts/training/phrase-extract/SyntaxTree.h
+++ b/phrase-extract/SyntaxTree.h
@@ -27,6 +27,9 @@
#include <map>
#include <sstream>
+namespace MosesTraining
+{
+
class SyntaxNode
{
protected:
@@ -119,3 +122,5 @@ public:
std::ostream& operator<<(std::ostream&, const SyntaxTree&);
+}
+
diff --git a/scripts/training/phrase-extract/XmlException.h b/phrase-extract/XmlException.h
index 15ee91f20..08038a423 100644
--- a/scripts/training/phrase-extract/XmlException.h
+++ b/phrase-extract/XmlException.h
@@ -22,6 +22,9 @@
#include <string>
+namespace MosesTraining
+{
+
class XmlException
{
public:
@@ -38,4 +41,6 @@ private:
std::string m_msg;
};
+}
+
#endif
diff --git a/scripts/training/phrase-extract/XmlTree.cpp b/phrase-extract/XmlTree.cpp
index 29c0d94aa..e826263f9 100644
--- a/scripts/training/phrase-extract/XmlTree.cpp
+++ b/phrase-extract/XmlTree.cpp
@@ -32,6 +32,8 @@
using namespace std;
+namespace MosesTraining
+{
inline std::vector<std::string> Tokenize(const std::string& str,
const std::string& delimiters = " \t")
@@ -390,3 +392,5 @@ bool ProcessAndStripXMLTags(string &line, SyntaxTree &tree, set< string > &label
line = cleanLine;
return true;
}
+
+}
diff --git a/scripts/training/phrase-extract/XmlTree.h b/phrase-extract/XmlTree.h
index 7e6bbecea..ebf235b64 100644
--- a/scripts/training/phrase-extract/XmlTree.h
+++ b/phrase-extract/XmlTree.h
@@ -27,6 +27,9 @@
#include <map>
#include "SyntaxTree.h"
+namespace MosesTraining
+{
+
std::string ParseXmlTagAttribute(const std::string& tag,const std::string& attributeName);
std::string Trim(const std::string& str, const std::string dropChars = " \t\n\r");
std::string TrimXml(const std::string& str);
@@ -34,3 +37,7 @@ bool isXmlTag(const std::string& tag);
std::vector<std::string> TokenizeXml(const std::string& str);
bool ProcessAndStripXMLTags(std::string &line, SyntaxTree &tree, std::set< std::string > &labelCollection, std::map< std::string, int > &topLabelCollection );
std::string unescape(const std::string &str);
+
+
+} // namespace
+
diff --git a/scripts/training/phrase-extract/consolidate-direct.cpp b/phrase-extract/consolidate-direct.cpp
index e7e68e977..e7e68e977 100644
--- a/scripts/training/phrase-extract/consolidate-direct.cpp
+++ b/phrase-extract/consolidate-direct.cpp
diff --git a/scripts/training/phrase-extract/consolidate-direct.vcxproj b/phrase-extract/consolidate-direct.vcxproj
index a84d2cce6..a84d2cce6 100644
--- a/scripts/training/phrase-extract/consolidate-direct.vcxproj
+++ b/phrase-extract/consolidate-direct.vcxproj
diff --git a/scripts/training/phrase-extract/consolidate-reverse.cpp b/phrase-extract/consolidate-reverse.cpp
index c86d870c8..c86d870c8 100644
--- a/scripts/training/phrase-extract/consolidate-reverse.cpp
+++ b/phrase-extract/consolidate-reverse.cpp
diff --git a/scripts/training/phrase-extract/consolidate.cpp b/phrase-extract/consolidate.cpp
index ab466afae..43b3f32a1 100644
--- a/scripts/training/phrase-extract/consolidate.cpp
+++ b/phrase-extract/consolidate.cpp
@@ -47,8 +47,11 @@ inline float maybeLogProb( float a ) { return logProbFlag ? log(a) : a; }
char line[LINE_MAX_LENGTH];
void processFiles( char*, char*, char*, char* );
void loadCountOfCounts( char* );
+void breakdownCoreAndSparse( string combined, string &core, string &sparse );
bool getLine( istream &fileP, vector< string > &item );
vector< string > splitLine();
+vector< int > countBin;
+bool sparseCountBinFeatureFlag = false;
int main(int argc, char* argv[])
{
@@ -93,6 +96,20 @@ int main(int argc, char* argv[])
} else if (strcmp(argv[i],"--LowCountFeature") == 0) {
lowCountFlag = true;
cerr << "including the low count feature\n";
+ } else if (strcmp(argv[i],"--CountBinFeature") == 0 ||
+ strcmp(argv[i],"--SparseCountBinFeature") == 0) {
+ if (strcmp(argv[i],"--SparseCountBinFeature") == 0)
+ sparseCountBinFeatureFlag = true;
+ cerr << "include "<< (sparseCountBinFeatureFlag ? "sparse " : "") << "count bin feature:";
+ int prev = 0;
+ while(i+1<argc && argv[i+1][0]>='0' && argv[i+1][0]<='9') {
+ int binCount = atoi(argv[++i]);
+ countBin.push_back( binCount );
+ if (prev+1 == binCount) { cerr << " " << binCount; }
+ else { cerr << " " << (prev+1) << "-" << binCount; }
+ prev = binCount;
+ }
+ cerr << " " << (prev+1) << "+\n";
} else if (strcmp(argv[i],"--LogProb") == 0) {
logProbFlag = true;
cerr << "using log-probabilities\n";
@@ -211,10 +228,13 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
}
// output hierarchical phrase pair (with separated labels)
- fileConsolidated << itemDirect[0] << " ||| " << itemDirect[1];
+ fileConsolidated << itemDirect[0] << " ||| " << itemDirect[1] << " |||";
// SCORES ...
- fileConsolidated << " |||";
+ string directScores, directSparseScores, indirectScores, indirectSparseScores;
+ breakdownCoreAndSparse( itemDirect[2], directScores, directSparseScores );
+ breakdownCoreAndSparse( itemIndirect[2], indirectScores, indirectSparseScores );
+
vector<string> directCounts = tokenize(itemDirect[4].c_str());
vector<string> indirectCounts = tokenize(itemIndirect[4].c_str());
float countF = atof(directCounts[0].c_str());
@@ -252,12 +272,12 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
// prob indirect
if (!onlyDirectFlag) {
fileConsolidated << " " << maybeLogProb(adjustedCountEF_indirect/countE);
- fileConsolidated << " " << itemIndirect[2];
+ fileConsolidated << " " << directScores;
}
// prob direct
fileConsolidated << " " << maybeLogProb(adjustedCountEF/countF);
- fileConsolidated << " " << itemDirect[2];
+ fileConsolidated << " " << indirectScores;
// phrase count feature
if (phraseCountFlag) {
@@ -269,6 +289,21 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
fileConsolidated << " " << maybeLogProb(exp(-1.0/countEF));
}
+ // count bin feature (as a core feature)
+ if (countBin.size()>0 && !sparseCountBinFeatureFlag) {
+ bool foundBin = false;
+ for(size_t i=0; i < countBin.size(); i++) {
+ if (!foundBin && countEF <= countBin[i]) {
+ fileConsolidated << " " << maybeLogProb(2.718);
+ foundBin = true;
+ }
+ else {
+ fileConsolidated << " " << maybeLogProb(1);
+ }
+ }
+ fileConsolidated << " " << maybeLogProb( foundBin ? 1 : 2.718 );
+ }
+
// alignment
fileConsolidated << " ||| " << itemDirect[3];
@@ -280,6 +315,35 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
fileConsolidated << " ||| " << itemDirect[5];
}
+ // count bin feature (as a sparse feature)
+ if (sparseCountBinFeatureFlag ||
+ directSparseScores.compare("") != 0 ||
+ indirectSparseScores.compare("") != 0)
+ {
+ fileConsolidated << " |||";
+ if (directSparseScores.compare("") != 0)
+ fileConsolidated << " " << directSparseScores;
+ if (indirectSparseScores.compare("") != 0)
+ fileConsolidated << " " << indirectSparseScores;
+ if (sparseCountBinFeatureFlag) {
+ bool foundBin = false;
+ for(size_t i=0; i < countBin.size(); i++) {
+ if (!foundBin && countEF <= countBin[i]) {
+ fileConsolidated << " cb_";
+ if (i == 0 && countBin[i] > 1)
+ fileConsolidated << "1_";
+ else if (i > 0 && countBin[i-1]+1 < countBin[i])
+ fileConsolidated << (countBin[i-1]+1) << "_";
+ fileConsolidated << countBin[i] << " 1";
+ foundBin = true;
+ }
+ }
+ if (!foundBin) {
+ fileConsolidated << " cb_max 1";
+ }
+ }
+ }
+
fileConsolidated << endl;
}
fileDirect.Close();
@@ -287,6 +351,22 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
fileConsolidated.Close();
}
+void breakdownCoreAndSparse( string combined, string &core, string &sparse )
+{
+ core = "";
+ sparse = "";
+ vector<string> score = tokenize( combined.c_str() );
+ for(size_t i=0; i<score.size(); i++) {
+ if ((score[i][0] >= '0' && score[i][0] <= '9') || i+1 == score.size())
+ core += " " + score[i];
+ else {
+ sparse += " " + score[i];
+ sparse += " " + score[++i];
+ }
+ }
+ if (core.size() > 0 ) core = core.substr(1);
+ if (sparse.size() > 0 ) sparse = sparse.substr(1);
+}
bool getLine( istream &fileP, vector< string > &item )
{
@@ -305,7 +385,6 @@ bool getLine( istream &fileP, vector< string > &item )
vector< string > splitLine()
{
vector< string > item;
- bool betweenWords = true;
int start=0;
int i=0;
for(; line[i] != '\0'; i++) {
diff --git a/scripts/training/phrase-extract/consolidate.vcxproj b/phrase-extract/consolidate.vcxproj
index 1e77a90f0..1e77a90f0 100644
--- a/scripts/training/phrase-extract/consolidate.vcxproj
+++ b/phrase-extract/consolidate.vcxproj
diff --git a/phrase-extract/domain.cpp b/phrase-extract/domain.cpp
new file mode 100644
index 000000000..aacb7160d
--- /dev/null
+++ b/phrase-extract/domain.cpp
@@ -0,0 +1,52 @@
+// $Id$
+//#include "beammain.h"
+#include "domain.h"
+#include "tables-core.h"
+#include "InputFileStream.h"
+#include "SafeGetline.h"
+
+#define TABLE_LINE_MAX_LENGTH 1000
+
+using namespace std;
+
+namespace MosesTraining
+{
+
+// handling of domain names: load database with sentence-id / domain name info
+void Domain::load( const std::string &domainFileName ) {
+ Moses::InputFileStream fileS( domainFileName );
+ istream *fileP = &fileS;
+ while(true) {
+ char line[TABLE_LINE_MAX_LENGTH];
+ SAFE_GETLINE((*fileP), line, TABLE_LINE_MAX_LENGTH, '\n', __FILE__);
+ if (fileP->eof()) break;
+ // read
+ vector< string > domainSpecLine = tokenize( line );
+ int lineNumber;
+ if (domainSpecLine.size() != 2 ||
+ ! sscanf(domainSpecLine[0].c_str(), "%d", &lineNumber)) {
+ cerr << "ERROR: in domain specification line: '" << line << "'" << endl;
+ exit(1);
+ }
+ // store
+ string &name = domainSpecLine[1];
+ spec.push_back( make_pair( lineNumber, name ));
+ if (name2id.find( name ) == name2id.end()) {
+ name2id[ name ] = list.size();
+ list.push_back( name );
+ }
+ }
+}
+
+// get domain name based on sentence number
+string Domain::getDomainOfSentence( int sentenceId ) {
+ for(size_t i=0; i<spec.size(); i++) {
+ if (sentenceId <= spec[i].first) {
+ return spec[i].second;
+ }
+ }
+ return "undefined";
+}
+
+}
+
diff --git a/phrase-extract/domain.h b/phrase-extract/domain.h
new file mode 100644
index 000000000..cf675c17e
--- /dev/null
+++ b/phrase-extract/domain.h
@@ -0,0 +1,32 @@
+// $Id$
+
+#ifndef _DOMAIN_H
+#define _DOMAIN_H
+
+#include <iostream>
+#include <fstream>
+#include <assert.h>
+#include <stdlib.h>
+#include <string>
+#include <queue>
+#include <map>
+#include <cmath>
+
+extern std::vector<std::string> tokenize( const char*);
+
+namespace MosesTraining
+{
+
+class Domain
+{
+public:
+ std::vector< std::pair< int, std::string > > spec;
+ std::vector< std::string > list;
+ std::map< std::string, int > name2id;
+ void load( const std::string &fileName );
+ std::string getDomainOfSentence( int sentenceId );
+};
+
+}
+
+#endif
diff --git a/scripts/training/phrase-extract/extract-ghkm/Alignment.cpp b/phrase-extract/extract-ghkm/Alignment.cpp
index fcd5e14e1..fcd5e14e1 100644
--- a/scripts/training/phrase-extract/extract-ghkm/Alignment.cpp
+++ b/phrase-extract/extract-ghkm/Alignment.cpp
diff --git a/scripts/training/phrase-extract/extract-ghkm/Alignment.h b/phrase-extract/extract-ghkm/Alignment.h
index bc42191e1..bc42191e1 100644
--- a/scripts/training/phrase-extract/extract-ghkm/Alignment.h
+++ b/phrase-extract/extract-ghkm/Alignment.h
diff --git a/scripts/training/phrase-extract/extract-ghkm/AlignmentGraph.cpp b/phrase-extract/extract-ghkm/AlignmentGraph.cpp
index 6bd32a13b..6bd32a13b 100644
--- a/scripts/training/phrase-extract/extract-ghkm/AlignmentGraph.cpp
+++ b/phrase-extract/extract-ghkm/AlignmentGraph.cpp
diff --git a/scripts/training/phrase-extract/extract-ghkm/AlignmentGraph.h b/phrase-extract/extract-ghkm/AlignmentGraph.h
index 94948758a..94948758a 100644
--- a/scripts/training/phrase-extract/extract-ghkm/AlignmentGraph.h
+++ b/phrase-extract/extract-ghkm/AlignmentGraph.h
diff --git a/scripts/training/phrase-extract/extract-ghkm/ComposedRule.cpp b/phrase-extract/extract-ghkm/ComposedRule.cpp
index 8bf3cfc72..8bf3cfc72 100644
--- a/scripts/training/phrase-extract/extract-ghkm/ComposedRule.cpp
+++ b/phrase-extract/extract-ghkm/ComposedRule.cpp
diff --git a/scripts/training/phrase-extract/extract-ghkm/ComposedRule.h b/phrase-extract/extract-ghkm/ComposedRule.h
index 65ce9ac70..65ce9ac70 100644
--- a/scripts/training/phrase-extract/extract-ghkm/ComposedRule.h
+++ b/phrase-extract/extract-ghkm/ComposedRule.h
diff --git a/scripts/training/phrase-extract/extract-ghkm/Exception.h b/phrase-extract/extract-ghkm/Exception.h
index 9928785f0..9928785f0 100644
--- a/scripts/training/phrase-extract/extract-ghkm/Exception.h
+++ b/phrase-extract/extract-ghkm/Exception.h
diff --git a/scripts/training/phrase-extract/extract-ghkm/ExtractGHKM.cpp b/phrase-extract/extract-ghkm/ExtractGHKM.cpp
index dae876116..dae876116 100644
--- a/scripts/training/phrase-extract/extract-ghkm/ExtractGHKM.cpp
+++ b/phrase-extract/extract-ghkm/ExtractGHKM.cpp
diff --git a/scripts/training/phrase-extract/extract-ghkm/ExtractGHKM.h b/phrase-extract/extract-ghkm/ExtractGHKM.h
index f16abc5f3..f16abc5f3 100644
--- a/scripts/training/phrase-extract/extract-ghkm/ExtractGHKM.h
+++ b/phrase-extract/extract-ghkm/ExtractGHKM.h
diff --git a/phrase-extract/extract-ghkm/Jamfile b/phrase-extract/extract-ghkm/Jamfile
new file mode 100644
index 000000000..1a81c5f87
--- /dev/null
+++ b/phrase-extract/extract-ghkm/Jamfile
@@ -0,0 +1 @@
+exe extract-ghkm : [ glob *.cpp ] ..//filestreams ..//trees ../..//boost_iostreams ../..//boost_program_options ../..//z ;
diff --git a/scripts/training/phrase-extract/extract-ghkm/Main.cpp b/phrase-extract/extract-ghkm/Main.cpp
index faf3230a6..faf3230a6 100644
--- a/scripts/training/phrase-extract/extract-ghkm/Main.cpp
+++ b/phrase-extract/extract-ghkm/Main.cpp
diff --git a/scripts/training/phrase-extract/extract-ghkm/Node.cpp b/phrase-extract/extract-ghkm/Node.cpp
index beb7470b8..beb7470b8 100644
--- a/scripts/training/phrase-extract/extract-ghkm/Node.cpp
+++ b/phrase-extract/extract-ghkm/Node.cpp
diff --git a/scripts/training/phrase-extract/extract-ghkm/Node.h b/phrase-extract/extract-ghkm/Node.h
index 775473362..775473362 100644
--- a/scripts/training/phrase-extract/extract-ghkm/Node.h
+++ b/phrase-extract/extract-ghkm/Node.h
diff --git a/scripts/training/phrase-extract/extract-ghkm/Options.h b/phrase-extract/extract-ghkm/Options.h
index 362fc95d2..362fc95d2 100644
--- a/scripts/training/phrase-extract/extract-ghkm/Options.h
+++ b/phrase-extract/extract-ghkm/Options.h
diff --git a/scripts/training/phrase-extract/extract-ghkm/ParseTree.cpp b/phrase-extract/extract-ghkm/ParseTree.cpp
index 052b8dee1..052b8dee1 100644
--- a/scripts/training/phrase-extract/extract-ghkm/ParseTree.cpp
+++ b/phrase-extract/extract-ghkm/ParseTree.cpp
diff --git a/scripts/training/phrase-extract/extract-ghkm/ParseTree.h b/phrase-extract/extract-ghkm/ParseTree.h
index 273e2e04e..273e2e04e 100644
--- a/scripts/training/phrase-extract/extract-ghkm/ParseTree.h
+++ b/phrase-extract/extract-ghkm/ParseTree.h
diff --git a/scripts/training/phrase-extract/extract-ghkm/ScfgRule.cpp b/phrase-extract/extract-ghkm/ScfgRule.cpp
index 5dc70052c..5dc70052c 100644
--- a/scripts/training/phrase-extract/extract-ghkm/ScfgRule.cpp
+++ b/phrase-extract/extract-ghkm/ScfgRule.cpp
diff --git a/scripts/training/phrase-extract/extract-ghkm/ScfgRule.h b/phrase-extract/extract-ghkm/ScfgRule.h
index 2405d8fa3..2405d8fa3 100644
--- a/scripts/training/phrase-extract/extract-ghkm/ScfgRule.h
+++ b/phrase-extract/extract-ghkm/ScfgRule.h
diff --git a/scripts/training/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp b/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp
index cd993d6e8..cd993d6e8 100644
--- a/scripts/training/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp
+++ b/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp
diff --git a/scripts/training/phrase-extract/extract-ghkm/ScfgRuleWriter.h b/phrase-extract/extract-ghkm/ScfgRuleWriter.h
index b92a432a1..b92a432a1 100644
--- a/scripts/training/phrase-extract/extract-ghkm/ScfgRuleWriter.h
+++ b/phrase-extract/extract-ghkm/ScfgRuleWriter.h
diff --git a/scripts/training/phrase-extract/extract-ghkm/Span.cpp b/phrase-extract/extract-ghkm/Span.cpp
index f0eccbdf2..f0eccbdf2 100644
--- a/scripts/training/phrase-extract/extract-ghkm/Span.cpp
+++ b/phrase-extract/extract-ghkm/Span.cpp
diff --git a/scripts/training/phrase-extract/extract-ghkm/Span.h b/phrase-extract/extract-ghkm/Span.h
index 003d1ef84..003d1ef84 100644
--- a/scripts/training/phrase-extract/extract-ghkm/Span.h
+++ b/phrase-extract/extract-ghkm/Span.h
diff --git a/scripts/training/phrase-extract/extract-ghkm/Subgraph.cpp b/phrase-extract/extract-ghkm/Subgraph.cpp
index e048f2c55..e048f2c55 100644
--- a/scripts/training/phrase-extract/extract-ghkm/Subgraph.cpp
+++ b/phrase-extract/extract-ghkm/Subgraph.cpp
diff --git a/scripts/training/phrase-extract/extract-ghkm/Subgraph.h b/phrase-extract/extract-ghkm/Subgraph.h
index ede1233e9..ede1233e9 100644
--- a/scripts/training/phrase-extract/extract-ghkm/Subgraph.h
+++ b/phrase-extract/extract-ghkm/Subgraph.h
diff --git a/scripts/training/phrase-extract/extract-ghkm/XmlTreeParser.cpp b/phrase-extract/extract-ghkm/XmlTreeParser.cpp
index cc961dc0c..b195131cb 100644
--- a/scripts/training/phrase-extract/extract-ghkm/XmlTreeParser.cpp
+++ b/phrase-extract/extract-ghkm/XmlTreeParser.cpp
@@ -27,6 +27,8 @@
#include <cassert>
#include <vector>
+using namespace MosesTraining;
+
namespace Moses {
namespace GHKM {
diff --git a/scripts/training/phrase-extract/extract-ghkm/XmlTreeParser.h b/phrase-extract/extract-ghkm/XmlTreeParser.h
index 664ab11a3..7b63ae1e4 100644
--- a/scripts/training/phrase-extract/extract-ghkm/XmlTreeParser.h
+++ b/phrase-extract/extract-ghkm/XmlTreeParser.h
@@ -43,13 +43,13 @@ class XmlTreeParser {
XmlTreeParser(std::set<std::string> &, std::map<std::string, int> &);
std::auto_ptr<ParseTree> Parse(const std::string &);
private:
- std::auto_ptr<ParseTree> ConvertTree(const SyntaxNode &,
+ std::auto_ptr<ParseTree> ConvertTree(const MosesTraining::SyntaxNode &,
const std::vector<std::string> &);
std::set<std::string> &m_labelSet;
std::map<std::string, int> &m_topLabelSet;
std::string m_line;
- SyntaxTree m_tree;
+ MosesTraining::SyntaxTree m_tree;
std::vector<std::string> m_words;
};
diff --git a/scripts/training/phrase-extract/extract-lex.cpp b/phrase-extract/extract-lex.cpp
index 9b03a6da0..a59450da8 100644
--- a/scripts/training/phrase-extract/extract-lex.cpp
+++ b/phrase-extract/extract-lex.cpp
@@ -6,6 +6,7 @@
#include "InputFileStream.h"
using namespace std;
+using namespace MosesTraining;
float COUNT_INCR = 1;
@@ -80,6 +81,9 @@ int main(int argc, char* argv[])
cerr << "\nFinished\n";
}
+namespace MosesTraining
+{
+
const std::string *Vocab::GetOrAdd(const std::string &word)
{
const string *ret = &(*m_coll.insert(word).first);
@@ -219,4 +223,5 @@ void WordCount::AddCount(float incr)
m_count += incr;
}
+} // namespace
diff --git a/scripts/training/phrase-extract/extract-lex.h b/phrase-extract/extract-lex.h
index e2225ecbc..d272cf6ff 100644
--- a/scripts/training/phrase-extract/extract-lex.h
+++ b/phrase-extract/extract-lex.h
@@ -6,6 +6,9 @@
#include <fstream>
#include <iostream>
+namespace MosesTraining
+{
+
//! convert string to variable of type T. Used to reading floats, int etc from files
template<typename T>
@@ -115,3 +118,4 @@ public:
};
+} // namespace
diff --git a/scripts/training/phrase-extract/extract-lex.vcxproj b/phrase-extract/extract-lex.vcxproj
index a291d51aa..a291d51aa 100644
--- a/scripts/training/phrase-extract/extract-lex.vcxproj
+++ b/phrase-extract/extract-lex.vcxproj
diff --git a/scripts/training/phrase-extract/extract-rules.cpp b/phrase-extract/extract-rules.cpp
index 997038224..5c308fd9b 100644
--- a/scripts/training/phrase-extract/extract-rules.cpp
+++ b/phrase-extract/extract-rules.cpp
@@ -46,65 +46,62 @@
#include "XmlTree.h"
#include "InputFileStream.h"
#include "OutputFileStream.h"
-#include "../../../moses/src/ThreadPool.h"
-#include "../../../moses/src/OutputCollector.h"
#define LINE_MAX_LENGTH 500000
using namespace std;
+using namespace MosesTraining;
typedef vector< int > LabelIndex;
typedef map< int, int > WordIndex;
-class ExtractTask : public Moses::Task {
+class ExtractTask
+{
private:
- size_t m_id;
- SentenceAlignmentWithSyntax *m_sentence;
- RuleExtractionOptions &m_options;
- Moses::OutputCollector* m_extractCollector;
- Moses::OutputCollector* m_extractCollectorInv;
+ SentenceAlignmentWithSyntax &m_sentence;
+ const RuleExtractionOptions &m_options;
+ Moses::OutputFileStream& m_extractFile;
+ Moses::OutputFileStream& m_extractFileInv;
+
+ vector< ExtractedRule > m_extractedRules;
+
+ // main functions
+ void extractRules();
+ void addRuleToCollection(ExtractedRule &rule);
+ void consolidateRules();
+ void writeRulesToFile();
+
+ // subs
+ void addRule( int, int, int, int, int, RuleExist &ruleExist);
+ void addHieroRule( int startT, int endT, int startS, int endS
+ , RuleExist &ruleExist, const HoleCollection &holeColl, int numHoles, int initStartF, int wordCountT, int wordCountS);
+ void printHieroPhrase( int startT, int endT, int startS, int endS
+ , HoleCollection &holeColl, LabelIndex &labelIndex, int countS);
+ string printTargetHieroPhrase( int startT, int endT, int startS, int endS
+ , WordIndex &indexT, HoleCollection &holeColl, const LabelIndex &labelIndex, double &logPCFGScore, int countS);
+ string printSourceHieroPhrase( int startT, int endT, int startS, int endS
+ , HoleCollection &holeColl, const LabelIndex &labelIndex);
+ void preprocessSourceHieroPhrase( int startT, int endT, int startS, int endS
+ , WordIndex &indexS, HoleCollection &holeColl, const LabelIndex &labelIndex);
+ void printHieroAlignment( int startT, int endT, int startS, int endS
+ , const WordIndex &indexS, const WordIndex &indexT, HoleCollection &holeColl, ExtractedRule &rule);
+ void printAllHieroPhrases( int startT, int endT, int startS, int endS, HoleCollection &holeColl, int countS);
+
+ inline string IntToString( int i )
+ {
+ stringstream out;
+ out << i;
+ return out.str();
+ }
public:
- ExtractTask(size_t id, SentenceAlignmentWithSyntax *sentence, RuleExtractionOptions &options, Moses::OutputCollector* extractCollector, Moses::OutputCollector* extractCollectorInv):
- m_id(id),
+ ExtractTask(SentenceAlignmentWithSyntax &sentence, const RuleExtractionOptions &options, Moses::OutputFileStream &extractFile, Moses::OutputFileStream &extractFileInv):
m_sentence(sentence),
m_options(options),
- m_extractCollector(extractCollector),
- m_extractCollectorInv(extractCollectorInv) {}
- ~ExtractTask() { delete m_sentence; }
+ m_extractFile(extractFile),
+ m_extractFileInv(extractFileInv) {}
void Run();
-private:
-vector< ExtractedRule > m_extractedRules;
-
-// main functions
-void extractRules();
-void addRuleToCollection(ExtractedRule &rule);
-void consolidateRules();
-void writeRulesToFile();
-
-// subs
-void addRule( int, int, int, int, RuleExist &ruleExist);
-void addHieroRule( int startT, int endT, int startS, int endS
- , RuleExist &ruleExist, const HoleCollection &holeColl, int numHoles, int initStartF, int wordCountT, int wordCountS);
-void printHieroPhrase( int startT, int endT, int startS, int endS
- , HoleCollection &holeColl, LabelIndex &labelIndex);
-string printTargetHieroPhrase( int startT, int endT, int startS, int endS
- , WordIndex &indexT, HoleCollection &holeColl, const LabelIndex &labelIndex, double &logPCFGScore);
-string printSourceHieroPhrase( int startT, int endT, int startS, int endS
- , HoleCollection &holeColl, const LabelIndex &labelIndex);
-void preprocessSourceHieroPhrase( int startT, int endT, int startS, int endS
- , WordIndex &indexS, HoleCollection &holeColl, const LabelIndex &labelIndex);
-void printHieroAlignment( int startT, int endT, int startS, int endS
- , const WordIndex &indexS, const WordIndex &indexT, HoleCollection &holeColl, ExtractedRule &rule);
-void printAllHieroPhrases( int startT, int endT, int startS, int endS, HoleCollection &holeColl);
-
-inline string IntToString( int i )
-{
- stringstream out;
- out << i;
- return out.str();
-}
};
// stats for glue grammar and unknown word label probabilities
@@ -119,15 +116,14 @@ int main(int argc, char* argv[])
<< "rule extraction from an aligned parallel corpus\n";
RuleExtractionOptions options;
+ int sentenceOffset = 0;
#ifdef WITH_THREADS
int thread_count = 1;
#endif
if (argc < 5) {
cerr << "syntax: extract-rules corpus.target corpus.source corpus.align extract ["
-#ifdef WITH_THREADS
- << " --threads NUM |"
-#endif
- << " --GlueGrammar FILE"
+
+ << " --GlueGrammar FILE"
<< " | --UnknownWordLabel FILE"
<< " | --OnlyDirect"
<< " | --OutputNTLengths"
@@ -142,7 +138,9 @@ int main(int argc, char* argv[])
<< " | --SourceSyntax | --TargetSyntax"
<< " | --AllowOnlyUnalignedWords | --DisallowNonTermConsecTarget |--NonTermConsecSource | --NoNonTermFirstWord | --NoFractionalCounting"
<< " | --UnpairedExtractFormat"
- << " | --ConditionOnTargetLHS ]\n";
+ << " | --ConditionOnTargetLHS ]"
+ << " | --BoundaryRules[" << options.boundaryRules << "]";
+
exit(1);
}
char* &fileNameT = argv[1];
@@ -267,12 +265,23 @@ int main(int argc, char* argv[])
options.unpairedExtractFormat = true;
} else if (strcmp(argv[i],"--ConditionOnTargetLHS") == 0) {
options.conditionOnTargetLhs = true;
-#ifdef WITH_THREADS
} else if (strcmp(argv[i],"-threads") == 0 ||
strcmp(argv[i],"--threads") == 0 ||
strcmp(argv[i],"--Threads") == 0) {
+#ifdef WITH_THREADS
thread_count = atoi(argv[++i]);
+#else
+ cerr << "thread support not compiled in." << '\n';
+ exit(1);
#endif
+ } else if (strcmp(argv[i], "--SentenceOffset") == 0) {
+ if (i+1 >= argc || argv[i+1][0] < '0' || argv[i+1][0] > '9') {
+ cerr << "extract: syntax error, used switch --SentenceOffset without a number" << endl;
+ exit(1);
+ }
+ sentenceOffset = atoi(argv[++i]);
+ } else if (strcmp(argv[i],"--BoundaryRules") == 0) {
+ options.boundaryRules = true;
} else {
cerr << "extract: syntax error, unknown option '" << string(argv[i]) << "'\n";
exit(1);
@@ -298,27 +307,17 @@ int main(int argc, char* argv[])
if (!options.onlyDirectFlag)
extractFileInv.Open(fileNameExtractInv.c_str());
- // output into file
- Moses::OutputCollector* extractCollector = new Moses::OutputCollector(&extractFile);
- Moses::OutputCollector* extractCollectorInv = new Moses::OutputCollector(&extractFileInv);
// stats on labels for glue grammar and unknown word label probabilities
set< string > targetLabelCollection, sourceLabelCollection;
map< string, int > targetTopLabelCollection, sourceTopLabelCollection;
-#ifdef WITH_THREADS
- // set up thread pool
- Moses::ThreadPool pool(thread_count);
- pool.SetQueueLimit(1000);
-#endif
-
// loop through all sentence pairs
- size_t i=0;
+ size_t i=sentenceOffset;
while(true) {
i++;
- if (i%1000 == 0) cerr << "." << flush;
- if (i%10000 == 0) cerr << ":" << flush;
- if (i%100000 == 0) cerr << "!" << flush;
+ if (i%1000 == 0) cerr << i << " " << flush;
+
char targetString[LINE_MAX_LENGTH];
char sourceString[LINE_MAX_LENGTH];
char alignmentString[LINE_MAX_LENGTH];
@@ -327,7 +326,7 @@ int main(int argc, char* argv[])
SAFE_GETLINE((*sFileP), sourceString, LINE_MAX_LENGTH, '\n', __FILE__);
SAFE_GETLINE((*aFileP), alignmentString, LINE_MAX_LENGTH, '\n', __FILE__);
- SentenceAlignmentWithSyntax *sentence = new SentenceAlignmentWithSyntax
+ SentenceAlignmentWithSyntax sentence
(targetLabelCollection, sourceLabelCollection,
targetTopLabelCollection, sourceTopLabelCollection, options);
//az: output src, tgt, and alingment line
@@ -338,32 +337,17 @@ int main(int argc, char* argv[])
cout << "LOG: PHRASES_BEGIN:" << endl;
}
- if (sentence->create(targetString, sourceString, alignmentString, i)) {
+ if (sentence.create(targetString, sourceString, alignmentString, i, options.boundaryRules)) {
if (options.unknownWordLabelFlag) {
- collectWordLabelCounts(*sentence);
- }
- ExtractTask *task = new ExtractTask(i-1, sentence, options, extractCollector, extractCollectorInv);
-#ifdef WITH_THREADS
- if (thread_count == 1) {
- task->Run();
- delete task;
+ collectWordLabelCounts(sentence);
}
- else {
- pool.Submit(task);
- }
-#else
+ ExtractTask *task = new ExtractTask(sentence, options, extractFile, extractFileInv);
task->Run();
delete task;
-#endif
}
if (options.onlyOutputSpanInfo) cout << "LOG: PHRASES_END:" << endl; //az: mark end of phrases
}
-#ifdef WITH_THREADS
- // wait for all threads to finish
- pool.Stop(true);
-#endif
-
tFile.Close();
sFile.Close();
aFile.Close();
@@ -389,8 +373,8 @@ void ExtractTask::Run() {
void ExtractTask::extractRules()
{
- int countT = m_sentence->target.size();
- int countS = m_sentence->source.size();
+ int countT = m_sentence.target.size();
+ int countS = m_sentence.source.size();
// phrase repository for creating hiero phrases
RuleExist ruleExist(countT);
@@ -405,17 +389,17 @@ void ExtractTask::extractRules()
int endT = startT + lengthT - 1;
// if there is target side syntax, there has to be a node
- if (m_options.targetSyntax && !m_sentence->targetTree.HasNode(startT,endT))
+ if (m_options.targetSyntax && !m_sentence.targetTree.HasNode(startT,endT))
continue;
// find find aligned source words
// first: find minimum and maximum source word
int minS = 9999;
int maxS = -1;
- vector< int > usedS = m_sentence->alignedCountS;
+ vector< int > usedS = m_sentence.alignedCountS;
for(int ti=startT; ti<=endT; ti++) {
- for(unsigned int i=0; i<m_sentence->alignedToT[ti].size(); i++) {
- int si = m_sentence->alignedToT[ti][i];
+ for(unsigned int i=0; i<m_sentence.alignedToT[ti].size(); i++) {
+ int si = m_sentence.alignedToT[ti][i];
if (si<minS) {
minS = si;
}
@@ -450,22 +434,22 @@ void ExtractTask::extractRules()
for(int startS=minS;
(startS>=0 &&
startS>maxS - m_options.maxSpan && // within length limit
- (startS==minS || m_sentence->alignedCountS[startS]==0)); // unaligned
+ (startS==minS || m_sentence.alignedCountS[startS]==0)); // unaligned
startS--) {
// end point of source phrase may advance over unaligned
for(int endS=maxS;
(endS<countS && endS<startS + m_options.maxSpan && // within length limit
- (endS==maxS || m_sentence->alignedCountS[endS]==0)); // unaligned
+ (endS==maxS || m_sentence.alignedCountS[endS]==0)); // unaligned
endS++) {
// if there is source side syntax, there has to be a node
- if (m_options.sourceSyntax && !m_sentence->sourceTree.HasNode(startS,endS))
+ if (m_options.sourceSyntax && !m_sentence.sourceTree.HasNode(startS,endS))
continue;
// TODO: loop over all source and target syntax labels
// if within length limits, add as fully-lexical phrase pair
if (endT-startT < m_options.maxSymbolsTarget && endS-startS < m_options.maxSymbolsSource) {
- addRule(startT,endT,startS,endS, ruleExist);
+ addRule(startT,endT,startS,endS, countS, ruleExist);
}
// take note that this is a valid phrase alignment
@@ -507,7 +491,7 @@ void ExtractTask::preprocessSourceHieroPhrase( int startT, int endT, int startS,
int labelI = labelIndex[ 2+holeCount+holeTotal ];
string label = m_options.sourceSyntax ?
- m_sentence->sourceTree.GetNodes(currPos,hole.GetEnd(0))[ labelI ]->GetLabel() : "X";
+ m_sentence.sourceTree.GetNodes(currPos,hole.GetEnd(0))[ labelI ]->GetLabel() : "X";
hole.SetLabel(label, 0);
currPos = hole.GetEnd(0);
@@ -525,7 +509,8 @@ void ExtractTask::preprocessSourceHieroPhrase( int startT, int endT, int startS,
}
string ExtractTask::printTargetHieroPhrase( int startT, int endT, int startS, int endS
- , WordIndex &indexT, HoleCollection &holeColl, const LabelIndex &labelIndex, double &logPCFGScore)
+ , WordIndex &indexT, HoleCollection &holeColl, const LabelIndex &labelIndex, double &logPCFGScore
+ , int countS)
{
HoleList::iterator iterHoleList = holeColl.GetHoles().begin();
assert(iterHoleList != holeColl.GetHoles().end());
@@ -547,8 +532,15 @@ string ExtractTask::printTargetHieroPhrase( int startT, int endT, int startS, in
assert(sourceLabel != "");
int labelI = labelIndex[ 2+holeCount ];
- string targetLabel = m_options.targetSyntax ?
- m_sentence->targetTree.GetNodes(currPos,hole.GetEnd(1))[ labelI ]->GetLabel() : "X";
+ string targetLabel;
+ if (m_options.targetSyntax) {
+ targetLabel = m_sentence.targetTree.GetNodes(currPos,hole.GetEnd(1))[labelI]->GetLabel();
+ } else if (m_options.boundaryRules && (startS == 0 || endS == countS - 1)) {
+ targetLabel = "S";
+ } else {
+ targetLabel = "X";
+ }
+
hole.SetLabel(targetLabel, 1);
if (m_options.unpairedExtractFormat) {
@@ -558,7 +550,7 @@ string ExtractTask::printTargetHieroPhrase( int startT, int endT, int startS, in
}
if (m_options.pcfgScore) {
- double score = m_sentence->targetTree.GetNodes(currPos,hole.GetEnd(1))[labelI]->GetPcfgScore();
+ double score = m_sentence.targetTree.GetNodes(currPos,hole.GetEnd(1))[labelI]->GetPcfgScore();
logPCFGScore -= score;
}
@@ -568,7 +560,7 @@ string ExtractTask::printTargetHieroPhrase( int startT, int endT, int startS, in
holeCount++;
} else {
indexT[currPos] = outPos;
- out += m_sentence->target[currPos] + " ";
+ out += m_sentence.target[currPos] + " ";
}
outPos++;
@@ -612,7 +604,7 @@ string ExtractTask::printSourceHieroPhrase( int startT, int endT, int startS, in
++iterHoleList;
++holeCount;
} else {
- out += m_sentence->source[currPos] + " ";
+ out += m_sentence.source[currPos] + " ";
}
outPos++;
@@ -629,8 +621,8 @@ void ExtractTask::printHieroAlignment( int startT, int endT, int startS, int end
for(int ti=startT; ti<=endT; ti++) {
WordIndex::const_iterator p = indexT.find(ti);
if (p != indexT.end()) { // does word still exist?
- for(unsigned int i=0; i<m_sentence->alignedToT[ti].size(); i++) {
- int si = m_sentence->alignedToT[ti][i];
+ for(unsigned int i=0; i<m_sentence.alignedToT[ti].size(); i++) {
+ int si = m_sentence.alignedToT[ti][i];
std::string sourceSymbolIndex = IntToString(indexS.find(si)->second);
std::string targetSymbolIndex = IntToString(p->second);
rule.alignment += sourceSymbolIndex + "-" + targetSymbolIndex + " ";
@@ -662,30 +654,37 @@ void ExtractTask::printHieroAlignment( int startT, int endT, int startS, int end
}
void ExtractTask::printHieroPhrase( int startT, int endT, int startS, int endS
- , HoleCollection &holeColl, LabelIndex &labelIndex)
+ , HoleCollection &holeColl, LabelIndex &labelIndex, int countS)
{
WordIndex indexS, indexT; // to keep track of word positions in rule
ExtractedRule rule( startT, endT, startS, endS );
// phrase labels
- string targetLabel = m_options.targetSyntax ?
- m_sentence->targetTree.GetNodes(startT,endT)[ labelIndex[0] ]->GetLabel() : "X";
+ string targetLabel;
+ if (m_options.targetSyntax) {
+ targetLabel = m_sentence.targetTree.GetNodes(startT,endT)[labelIndex[0] ]->GetLabel();
+ } else if (m_options.boundaryRules && (startS == 0 || endS == countS - 1)) {
+ targetLabel = "S";
+ } else {
+ targetLabel = "X";
+ }
+
string sourceLabel = m_options.sourceSyntax ?
- m_sentence->sourceTree.GetNodes(startS,endS)[ labelIndex[1] ]->GetLabel() : "X";
+ m_sentence.sourceTree.GetNodes(startS,endS)[ labelIndex[1] ]->GetLabel() : "X";
// create non-terms on the source side
preprocessSourceHieroPhrase(startT, endT, startS, endS, indexS, holeColl, labelIndex);
// target
if (m_options.pcfgScore) {
- double logPCFGScore = m_sentence->targetTree.GetNodes(startT,endT)[labelIndex[0]]->GetPcfgScore();
- rule.target = printTargetHieroPhrase(startT, endT, startS, endS, indexT, holeColl, labelIndex, logPCFGScore)
+ double logPCFGScore = m_sentence.targetTree.GetNodes(startT,endT)[labelIndex[0]]->GetPcfgScore();
+ rule.target = printTargetHieroPhrase(startT, endT, startS, endS, indexT, holeColl, labelIndex, logPCFGScore, countS)
+ " [" + targetLabel + "]";
rule.pcfgScore = std::exp(logPCFGScore);
} else {
double logPCFGScore = 0.0f;
- rule.target = printTargetHieroPhrase(startT, endT, startS, endS, indexT, holeColl, labelIndex, logPCFGScore)
+ rule.target = printTargetHieroPhrase(startT, endT, startS, endS, indexT, holeColl, labelIndex, logPCFGScore, countS)
+ " [" + targetLabel + "]";
}
@@ -703,24 +702,24 @@ void ExtractTask::printHieroPhrase( int startT, int endT, int startS, int endS
addRuleToCollection( rule );
}
-void ExtractTask::printAllHieroPhrases( int startT, int endT, int startS, int endS, HoleCollection &holeColl)
+void ExtractTask::printAllHieroPhrases( int startT, int endT, int startS, int endS, HoleCollection &holeColl, int countS)
{
LabelIndex labelIndex,labelCount;
// number of target head labels
- int numLabels = m_options.targetSyntax ? m_sentence->targetTree.GetNodes(startT,endT).size() : 1;
+ int numLabels = m_options.targetSyntax ? m_sentence.targetTree.GetNodes(startT,endT).size() : 1;
labelCount.push_back(numLabels);
labelIndex.push_back(0);
// number of source head labels
- numLabels = m_options.sourceSyntax ? m_sentence->sourceTree.GetNodes(startS,endS).size() : 1;
+ numLabels = m_options.sourceSyntax ? m_sentence.sourceTree.GetNodes(startS,endS).size() : 1;
labelCount.push_back(numLabels);
labelIndex.push_back(0);
// number of target hole labels
for( HoleList::const_iterator hole = holeColl.GetHoles().begin();
hole != holeColl.GetHoles().end(); hole++ ) {
- int numLabels = m_options.targetSyntax ? m_sentence->targetTree.GetNodes(hole->GetStart(1),hole->GetEnd(1)).size() : 1 ;
+ int numLabels = m_options.targetSyntax ? m_sentence.targetTree.GetNodes(hole->GetStart(1),hole->GetEnd(1)).size() : 1 ;
labelCount.push_back(numLabels);
labelIndex.push_back(0);
}
@@ -730,7 +729,7 @@ void ExtractTask::printAllHieroPhrases( int startT, int endT, int startS, int en
for( vector<Hole*>::iterator i = holeColl.GetSortedSourceHoles().begin();
i != holeColl.GetSortedSourceHoles().end(); i++ ) {
const Hole &hole = **i;
- int numLabels = m_options.sourceSyntax ? m_sentence->sourceTree.GetNodes(hole.GetStart(0),hole.GetEnd(0)).size() : 1 ;
+ int numLabels = m_options.sourceSyntax ? m_sentence.sourceTree.GetNodes(hole.GetStart(0),hole.GetEnd(0)).size() : 1 ;
labelCount.push_back(numLabels);
labelIndex.push_back(0);
}
@@ -738,7 +737,7 @@ void ExtractTask::printAllHieroPhrases( int startT, int endT, int startS, int en
// loop through the holes
bool done = false;
while(!done) {
- printHieroPhrase( startT, endT, startS, endS, holeColl, labelIndex );
+ printHieroPhrase( startT, endT, startS, endS, holeColl, labelIndex, countS );
for(unsigned int i=0; i<labelIndex.size(); i++) {
labelIndex[i]++;
if(labelIndex[i] == labelCount[i]) {
@@ -842,7 +841,7 @@ void ExtractTask::addHieroRule( int startT, int endT, int startS, int endS
}
// covered by word? check if it is aligned
else {
- if (m_sentence->alignedToT[pos].size() > 0)
+ if (m_sentence.alignedToT[pos].size() > 0)
foundAlignedWord = true;
}
}
@@ -866,7 +865,7 @@ void ExtractTask::addHieroRule( int startT, int endT, int startS, int endS
// passed all checks...
if (allowablePhrase)
- printAllHieroPhrases(startT, endT, startS, endS, copyHoleColl);
+ printAllHieroPhrases(startT, endT, startS, endS, copyHoleColl, wordCountS);
// recursively search for next hole
int nextInitStartT = m_options.nonTermConsecTarget ? endHoleT + 1 : endHoleT + 2;
@@ -878,10 +877,15 @@ void ExtractTask::addHieroRule( int startT, int endT, int startS, int endS
}
}
-void ExtractTask::addRule( int startT, int endT, int startS, int endS, RuleExist &ruleExist)
+void ExtractTask::addRule( int startT, int endT, int startS, int endS, int countS, RuleExist &ruleExist)
{
- // source
-
+ // contains only <s> or </s>. Don't output
+ if (m_options.boundaryRules
+ && ( (startS == 0 && endS == 0)
+ || (startS == countS-1 && endS == countS-1))) {
+ return;
+ }
+
if (m_options.onlyOutputSpanInfo) {
cout << startS << " " << endS << " " << startT << " " << endT << endl;
return;
@@ -892,36 +896,42 @@ void ExtractTask::addRule( int startT, int endT, int startS, int endS, RuleExist
// phrase labels
string targetLabel,sourceLabel;
if (m_options.targetSyntax && m_options.conditionOnTargetLhs) {
- sourceLabel = targetLabel = m_sentence->targetTree.GetNodes(startT,endT)[0]->GetLabel();
+ sourceLabel = targetLabel = m_sentence.targetTree.GetNodes(startT,endT)[0]->GetLabel();
}
else {
sourceLabel = m_options.sourceSyntax ?
- m_sentence->sourceTree.GetNodes(startS,endS)[0]->GetLabel() : "X";
- targetLabel = m_options.targetSyntax ?
- m_sentence->targetTree.GetNodes(startT,endT)[0]->GetLabel() : "X";
+ m_sentence.sourceTree.GetNodes(startS,endS)[0]->GetLabel() : "X";
+
+ if (m_options.targetSyntax) {
+ targetLabel = m_sentence.targetTree.GetNodes(startT,endT)[0]->GetLabel();
+ } else if (m_options.boundaryRules && (startS == 0 || endS == countS - 1)) {
+ targetLabel = "S";
+ } else {
+ targetLabel = "X";
+ }
}
// source
rule.source = "";
for(int si=startS; si<=endS; si++)
- rule.source += m_sentence->source[si] + " ";
+ rule.source += m_sentence.source[si] + " ";
rule.source += "[" + sourceLabel + "]";
// target
rule.target = "";
for(int ti=startT; ti<=endT; ti++)
- rule.target += m_sentence->target[ti] + " ";
+ rule.target += m_sentence.target[ti] + " ";
rule.target += "[" + targetLabel + "]";
if (m_options.pcfgScore) {
- double logPCFGScore = m_sentence->targetTree.GetNodes(startT,endT)[0]->GetPcfgScore();
+ double logPCFGScore = m_sentence.targetTree.GetNodes(startT,endT)[0]->GetPcfgScore();
rule.pcfgScore = std::exp(logPCFGScore);
}
// alignment
for(int ti=startT; ti<=endT; ti++) {
- for(unsigned int i=0; i<m_sentence->alignedToT[ti].size(); i++) {
- int si = m_sentence->alignedToT[ti][i];
+ for(unsigned int i=0; i<m_sentence.alignedToT[ti].size(); i++) {
+ int si = m_sentence.alignedToT[ti][i];
std::string sourceSymbolIndex = IntToString(si-startS);
std::string targetSymbolIndex = IntToString(ti-startT);
rule.alignment += sourceSymbolIndex + "-" + targetSymbolIndex + " ";
@@ -1014,8 +1024,8 @@ void ExtractTask::writeRulesToFile()
<< rule->count << "\n";
}
}
- m_extractCollector->Write( m_id, out.str() );
- m_extractCollectorInv->Write( m_id, outInv.str() );;
+ m_extractFile << out.str();
+ m_extractFileInv << outInv.str();
}
void writeGlueGrammar( const string & fileName, RuleExtractionOptions &options, set< string > &targetLabelCollection, map< string, int > &targetTopLabelCollection )
diff --git a/scripts/training/phrase-extract/extract-rules.vcxproj b/phrase-extract/extract-rules.vcxproj
index ecd36fe50..ecd36fe50 100644
--- a/scripts/training/phrase-extract/extract-rules.vcxproj
+++ b/phrase-extract/extract-rules.vcxproj
diff --git a/scripts/training/phrase-extract/extract.cpp b/phrase-extract/extract.cpp
index 16b413da9..6a1ee77ab 100644
--- a/scripts/training/phrase-extract/extract.cpp
+++ b/phrase-extract/extract.cpp
@@ -1,6 +1,7 @@
/*
* extract.cpp
- *
+ * Modified by: Rohit Gupta CDAC, Mumbai, India
+ * on July 15, 2012 to implement parallel processing
* Modified by: Nadi Tomeh - LIMSI/CNRS
* Machine Translation Marathon 2010, Dublin
*/
@@ -13,7 +14,7 @@
#include <stdlib.h>
#include <assert.h>
#include <cstring>
-
+#include <sstream>
#include <map>
#include <set>
#include <vector>
@@ -23,10 +24,16 @@
#include "tables-core.h"
#include "InputFileStream.h"
#include "OutputFileStream.h"
+#include "PhraseExtractionOptions.h"
using namespace std;
+using namespace MosesTraining;
+
+namespace MosesTraining {
+
+
+const long int LINE_MAX_LENGTH = 500000 ;
-#define LINE_MAX_LENGTH 500000
// HPhraseVertex represents a point in the alignment matrix
typedef pair <int, int> HPhraseVertex;
@@ -42,127 +49,149 @@ typedef vector < HPhrase > HPhraseVector;
// The key of the map is the English index and the value is a set of the source ones
typedef map <int, set<int> > HSentenceVertices;
-enum REO_MODEL_TYPE {REO_MSD, REO_MSLR, REO_MONO};
-enum REO_POS {LEFT, RIGHT, DLEFT, DRIGHT, UNKNOWN};
-
-REO_POS getOrientWordModel(SentenceAlignment &, REO_MODEL_TYPE, bool, bool,
+ REO_POS getOrientWordModel(SentenceAlignment &, REO_MODEL_TYPE, bool, bool,
int, int, int, int, int, int, int,
bool (*)(int, int), bool (*)(int, int));
-REO_POS getOrientPhraseModel(SentenceAlignment &, REO_MODEL_TYPE, bool, bool,
+ REO_POS getOrientPhraseModel(SentenceAlignment &, REO_MODEL_TYPE, bool, bool,
int, int, int, int, int, int, int,
bool (*)(int, int), bool (*)(int, int),
const HSentenceVertices &, const HSentenceVertices &);
-REO_POS getOrientHierModel(SentenceAlignment &, REO_MODEL_TYPE, bool, bool,
+ REO_POS getOrientHierModel(SentenceAlignment &, REO_MODEL_TYPE, bool, bool,
int, int, int, int, int, int, int,
bool (*)(int, int), bool (*)(int, int),
const HSentenceVertices &, const HSentenceVertices &,
const HSentenceVertices &, const HSentenceVertices &,
REO_POS);
-void insertVertex(HSentenceVertices &, int, int);
-void insertPhraseVertices(HSentenceVertices &, HSentenceVertices &, HSentenceVertices &, HSentenceVertices &,
+ void insertVertex(HSentenceVertices &, int, int);
+ void insertPhraseVertices(HSentenceVertices &, HSentenceVertices &, HSentenceVertices &, HSentenceVertices &,
int, int, int, int);
-string getOrientString(REO_POS, REO_MODEL_TYPE);
-
-bool ge(int, int);
-bool le(int, int);
-bool lt(int, int);
-
-void extractBase(SentenceAlignment &);
-void extract(SentenceAlignment &);
-void addPhrase(SentenceAlignment &, int, int, int, int, string &);
-bool isAligned (SentenceAlignment &, int, int);
-
-bool allModelsOutputFlag = false;
-
-bool wordModel = false;
-REO_MODEL_TYPE wordType = REO_MSD;
-bool phraseModel = false;
-REO_MODEL_TYPE phraseType = REO_MSD;
-bool hierModel = false;
-REO_MODEL_TYPE hierType = REO_MSD;
-
-
-Moses::OutputFileStream extractFile;
-Moses::OutputFileStream extractFileInv;
-Moses::OutputFileStream extractFileOrientation;
-Moses::OutputFileStream extractFileSentenceId;
-int maxPhraseLength;
-bool orientationFlag = false;
-bool translationFlag = true;
-bool sentenceIdFlag = false; //create extract file with sentence id
-bool onlyOutputSpanInfo = false;
-bool gzOutput = false;
+ string getOrientString(REO_POS, REO_MODEL_TYPE);
+
+ bool ge(int, int);
+ bool le(int, int);
+ bool lt(int, int);
+
+ bool isAligned (SentenceAlignment &, int, int);
+ int sentenceOffset = 0;
+
+}
+
+namespace MosesTraining{
+
+class ExtractTask
+{
+public:
+ ExtractTask(size_t id, SentenceAlignment &sentence,PhraseExtractionOptions &initoptions, Moses::OutputFileStream &extractFile, Moses::OutputFileStream &extractFileInv,Moses::OutputFileStream &extractFileOrientation,Moses::OutputFileStream &extractFileSentenceId ):
+ m_sentence(sentence),
+ m_options(initoptions),
+ m_extractFile(extractFile),
+ m_extractFileInv(extractFileInv),
+ m_extractFileOrientation(extractFileOrientation),
+ m_extractFileSentenceId(extractFileSentenceId) {}
+void Run();
+private:
+ vector< string > m_extractedPhrases;
+ vector< string > m_extractedPhrasesInv;
+ vector< string > m_extractedPhrasesOri;
+ vector< string > m_extractedPhrasesSid;
+ void extractBase(SentenceAlignment &);
+ void extract(SentenceAlignment &);
+ void addPhrase(SentenceAlignment &, int, int, int, int, string &);
+ void writePhrasesToFile();
+
+ SentenceAlignment &m_sentence;
+ const PhraseExtractionOptions &m_options;
+ Moses::OutputFileStream &m_extractFile;
+ Moses::OutputFileStream &m_extractFileInv;
+ Moses::OutputFileStream &m_extractFileOrientation;
+ Moses::OutputFileStream &m_extractFileSentenceId;
+};
+}
int main(int argc, char* argv[])
{
cerr << "PhraseExtract v1.4, written by Philipp Koehn\n"
<< "phrase extraction from an aligned parallel corpus\n";
- if (argc < 6) {
- cerr << "syntax: extract en de align extract max-length [orientation [ --model [wbe|phrase|hier]-[msd|mslr|mono] ] | --OnlyOutputSpanInfo | --NoTTable | --SentenceId]\n";
+ if (argc < 6) {
+ cerr << "syntax: extract en de align extract max-length [orientation [ --model [wbe|phrase|hier]-[msd|mslr|mono] ] ";
+ cerr<<"| --OnlyOutputSpanInfo | --NoTTable | --SentenceId | --GZOutput | --IncludeSentenceId | --SentenceOffset n ]\n";
exit(1);
}
- char* &fileNameE = argv[1];
- char* &fileNameF = argv[2];
- char* &fileNameA = argv[3];
- string fileNameExtract = string(argv[4]);
- maxPhraseLength = atoi(argv[5]);
+
+ Moses::OutputFileStream extractFile;
+ Moses::OutputFileStream extractFileInv;
+ Moses::OutputFileStream extractFileOrientation;
+ Moses::OutputFileStream extractFileSentenceId;
+ const char* const &fileNameE = argv[1];
+ const char* const &fileNameF = argv[2];
+ const char* const &fileNameA = argv[3];
+ const string fileNameExtract = string(argv[4]);
+ PhraseExtractionOptions options(atoi(argv[5]));
for(int i=6; i<argc; i++) {
if (strcmp(argv[i],"--OnlyOutputSpanInfo") == 0) {
- onlyOutputSpanInfo = true;
+ options.initOnlyOutputSpanInfo(true);
} else if (strcmp(argv[i],"orientation") == 0 || strcmp(argv[i],"--Orientation") == 0) {
- orientationFlag = true;
+ options.initOrientationFlag(true);
} else if (strcmp(argv[i],"--NoTTable") == 0) {
- translationFlag = false;
+ options.initTranslationFlag(false);
} else if (strcmp(argv[i], "--SentenceId") == 0) {
- sentenceIdFlag = true;
+ options.initSentenceIdFlag(true);
+ } else if (strcmp(argv[i], "--IncludeSentenceId") == 0) {
+ options.initIncludeSentenceIdFlag(true);
+ } else if (strcmp(argv[i], "--SentenceOffset") == 0) {
+ if (i+1 >= argc || argv[i+1][0] < '0' || argv[i+1][0] > '9') {
+ cerr << "extract: syntax error, used switch --SentenceOffset without a number" << endl;
+ exit(1);
+ }
+ sentenceOffset = atoi(argv[++i]);
} else if (strcmp(argv[i], "--GZOutput") == 0) {
- gzOutput = true;
+ options.initGzOutput(true);
} else if(strcmp(argv[i],"--model") == 0) {
if (i+1 >= argc) {
cerr << "extract: syntax error, no model's information provided to the option --model " << endl;
exit(1);
}
- char* modelParams = argv[++i];
- char* modelName = strtok(modelParams, "-");
- char* modelType = strtok(NULL, "-");
+ char* modelParams = argv[++i];
+ char* modelName = strtok(modelParams, "-");
+ char* modelType = strtok(NULL, "-");
- REO_MODEL_TYPE intModelType;
+ // REO_MODEL_TYPE intModelType;
if(strcmp(modelName, "wbe") == 0) {
- wordModel = true;
+ options.initWordModel(true);
if(strcmp(modelType, "msd") == 0)
- wordType = REO_MSD;
+ options.initWordType(REO_MSD);
else if(strcmp(modelType, "mslr") == 0)
- wordType = REO_MSLR;
+ options.initWordType(REO_MSLR);
else if(strcmp(modelType, "mono") == 0 || strcmp(modelType, "monotonicity") == 0)
- wordType = REO_MONO;
+ options.initWordType(REO_MONO);
else {
cerr << "extract: syntax error, unknown reordering model type: " << modelType << endl;
exit(1);
}
} else if(strcmp(modelName, "phrase") == 0) {
- phraseModel = true;
+ options.initPhraseModel(true);
if(strcmp(modelType, "msd") == 0)
- phraseType = REO_MSD;
+ options.initPhraseType(REO_MSD);
else if(strcmp(modelType, "mslr") == 0)
- phraseType = REO_MSLR;
+ options.initPhraseType(REO_MSLR);
else if(strcmp(modelType, "mono") == 0 || strcmp(modelType, "monotonicity") == 0)
- phraseType = REO_MONO;
+ options.initPhraseType(REO_MONO);
else {
cerr << "extract: syntax error, unknown reordering model type: " << modelType << endl;
exit(1);
}
} else if(strcmp(modelName, "hier") == 0) {
- hierModel = true;
+ options.initHierModel(true);
if(strcmp(modelType, "msd") == 0)
- hierType = REO_MSD;
+ options.initHierType(REO_MSD);
else if(strcmp(modelType, "mslr") == 0)
- hierType = REO_MSLR;
+ options.initHierType(REO_MSLR);
else if(strcmp(modelType, "mono") == 0 || strcmp(modelType, "monotonicity") == 0)
- hierType = REO_MONO;
+ options.initHierType(REO_MONO);
else {
cerr << "extract: syntax error, unknown reordering model type: " << modelType << endl;
exit(1);
@@ -172,7 +201,8 @@ int main(int argc, char* argv[])
exit(1);
}
- allModelsOutputFlag = true;
+ options.initAllModelsOutputFlag(true);
+
} else {
cerr << "extract: syntax error, unknown option '" << string(argv[i]) << "'\n";
exit(1);
@@ -181,9 +211,9 @@ int main(int argc, char* argv[])
// default reordering model if no model selected
// allows for the old syntax to be used
- if(orientationFlag && !allModelsOutputFlag) {
- wordModel = true;
- wordType = REO_MSD;
+ if(options.isOrientationFlag() && !options.isAllModelsOutputFlag()) {
+ options.initWordModel(true);
+ options.initWordType(REO_MSD);
}
// open input files
@@ -196,22 +226,22 @@ int main(int argc, char* argv[])
istream *aFileP = &aFile;
// open output files
- if (translationFlag) {
- string fileNameExtractInv = fileNameExtract + ".inv" + (gzOutput?".gz":"");
- extractFile.Open( (fileNameExtract + (gzOutput?".gz":"")).c_str());
+ if (options.isTranslationFlag()) {
+ string fileNameExtractInv = fileNameExtract + ".inv" + (options.isGzOutput()?".gz":"");
+ extractFile.Open( (fileNameExtract + (options.isGzOutput()?".gz":"")).c_str());
extractFileInv.Open(fileNameExtractInv.c_str());
}
- if (orientationFlag) {
- string fileNameExtractOrientation = fileNameExtract + ".o" + (gzOutput?".gz":"");
+ if (options.isOrientationFlag()) {
+ string fileNameExtractOrientation = fileNameExtract + ".o" + (options.isGzOutput()?".gz":"");
extractFileOrientation.Open(fileNameExtractOrientation.c_str());
}
- if (sentenceIdFlag) {
- string fileNameExtractSentenceId = fileNameExtract + ".sid" + (gzOutput?".gz":"");
+ if (options.isSentenceIdFlag()) {
+ string fileNameExtractSentenceId = fileNameExtract + ".sid" + (options.isGzOutput()?".gz":"");
extractFileSentenceId.Open(fileNameExtractSentenceId.c_str());
}
- int i=0;
+ int i = sentenceOffset;
while(true) {
i++;
if (i%10000 == 0) cerr << "." << flush;
@@ -223,37 +253,56 @@ int main(int argc, char* argv[])
SAFE_GETLINE((*fFileP), foreignString, LINE_MAX_LENGTH, '\n', __FILE__);
SAFE_GETLINE((*aFileP), alignmentString, LINE_MAX_LENGTH, '\n', __FILE__);
SentenceAlignment sentence;
- // cout << "read in: " << englishString << " & " << foreignString << " & " << alignmentString << endl;
+ // cout << "read in: " << englishString << " & " << foreignString << " & " << alignmentString << endl;
//az: output src, tgt, and alingment line
- if (onlyOutputSpanInfo) {
+ if (options.isOnlyOutputSpanInfo()) {
cout << "LOG: SRC: " << foreignString << endl;
cout << "LOG: TGT: " << englishString << endl;
cout << "LOG: ALT: " << alignmentString << endl;
cout << "LOG: PHRASES_BEGIN:" << endl;
}
+ if (sentence.create( englishString, foreignString, alignmentString, i, false)) {
+ ExtractTask *task = new ExtractTask(i-1, sentence, options, extractFile , extractFileInv, extractFileOrientation, extractFileSentenceId);
+ task->Run();
+ delete task;
- if (sentence.create( englishString, foreignString, alignmentString, i)) {
- extract(sentence);
}
- if (onlyOutputSpanInfo) cout << "LOG: PHRASES_END:" << endl; //az: mark end of phrases
+ if (options.isOnlyOutputSpanInfo()) cout << "LOG: PHRASES_END:" << endl; //az: mark end of phrases
}
+
eFile.Close();
fFile.Close();
aFile.Close();
+
//az: only close if we actually opened it
- if (!onlyOutputSpanInfo) {
- if (translationFlag) {
+ if (!options.isOnlyOutputSpanInfo()) {
+ if (options.isTranslationFlag()) {
extractFile.Close();
extractFileInv.Close();
+
}
- if (orientationFlag) extractFileOrientation.Close();
- if (sentenceIdFlag) {
+ if (options.isOrientationFlag()){
+ extractFileOrientation.Close();
+ }
+ if (options.isSentenceIdFlag()) {
extractFileSentenceId.Close();
}
}
}
-void extract(SentenceAlignment &sentence)
+namespace MosesTraining
+{
+void ExtractTask::Run() {
+ extract(m_sentence);
+ writePhrasesToFile();
+ m_extractedPhrases.clear();
+ m_extractedPhrasesInv.clear();
+ m_extractedPhrasesOri.clear();
+ m_extractedPhrasesSid.clear();
+
+}
+
+void ExtractTask::extract(SentenceAlignment &sentence)
{
int countE = sentence.target.size();
int countF = sentence.source.size();
@@ -272,14 +321,14 @@ void extract(SentenceAlignment &sentence)
HSentenceVertices::const_iterator it;
- bool relaxLimit = hierModel;
- bool buildExtraStructure = phraseModel || hierModel;
+ bool relaxLimit = m_options.isHierModel();
+ bool buildExtraStructure = m_options.isPhraseModel() || m_options.isHierModel();
// check alignments for target phrase startE...endE
// loop over extracted phrases which are compatible with the word-alignments
for(int startE=0; startE<countE; startE++) {
for(int endE=startE;
- (endE<countE && (relaxLimit || endE<startE+maxPhraseLength));
+ (endE<countE && (relaxLimit || endE<startE+m_options.maxPhraseLength));
endE++) {
int minF = 9999;
@@ -299,7 +348,7 @@ void extract(SentenceAlignment &sentence)
}
if (maxF >= 0 && // aligned to any source words at all
- (relaxLimit || maxF-minF < maxPhraseLength)) { // source phrase within limits
+ (relaxLimit || maxF-minF < m_options.maxPhraseLength)) { // source phrase within limits
// check if source words are aligned to out of bound target words
bool out_of_bounds = false;
@@ -314,17 +363,17 @@ void extract(SentenceAlignment &sentence)
// start point of source phrase may retreat over unaligned
for(int startF=minF;
(startF>=0 &&
- (relaxLimit || startF>maxF-maxPhraseLength) && // within length limit
+ (relaxLimit || startF>maxF-m_options.maxPhraseLength) && // within length limit
(startF==minF || sentence.alignedCountS[startF]==0)); // unaligned
startF--)
// end point of source phrase may advance over unaligned
for(int endF=maxF;
(endF<countF &&
- (relaxLimit || endF<startF+maxPhraseLength) && // within length limit
+ (relaxLimit || endF<startF+m_options.maxPhraseLength) && // within length limit
(endF==maxF || sentence.alignedCountS[endF]==0)); // unaligned
endF++) { // at this point we have extracted a phrase
if(buildExtraStructure) { // phrase || hier
- if(endE-startE < maxPhraseLength && endF-startF < maxPhraseLength) { // within limit
+ if(endE-startE < m_options.maxPhraseLength && endF-startF < m_options.maxPhraseLength) { // within limit
inboundPhrases.push_back(HPhrase(HPhraseVertex(startF,startE),
HPhraseVertex(endF,endE)));
insertPhraseVertices(inTopLeft, inTopRight, inBottomLeft, inBottomRight,
@@ -334,16 +383,16 @@ void extract(SentenceAlignment &sentence)
startF, startE, endF, endE);
} else {
string orientationInfo = "";
- if(wordModel) {
+ if(m_options.isWordModel()) {
REO_POS wordPrevOrient, wordNextOrient;
bool connectedLeftTopP = isAligned( sentence, startF-1, startE-1 );
bool connectedRightTopP = isAligned( sentence, endF+1, startE-1 );
bool connectedLeftTopN = isAligned( sentence, endF+1, endE+1 );
bool connectedRightTopN = isAligned( sentence, startF-1, endE+1 );
- wordPrevOrient = getOrientWordModel(sentence, wordType, connectedLeftTopP, connectedRightTopP, startF, endF, startE, endE, countF, 0, 1, &ge, &lt);
- wordNextOrient = getOrientWordModel(sentence, wordType, connectedLeftTopN, connectedRightTopN, endF, startF, endE, startE, 0, countF, -1, &lt, &ge);
- orientationInfo += getOrientString(wordPrevOrient, wordType) + " " + getOrientString(wordNextOrient, wordType);
- if(allModelsOutputFlag)
+ wordPrevOrient = getOrientWordModel(sentence, m_options.isWordType(), connectedLeftTopP, connectedRightTopP, startF, endF, startE, endE, countF, 0, 1, &ge, &lt);
+ wordNextOrient = getOrientWordModel(sentence, m_options.isWordType(), connectedLeftTopN, connectedRightTopN, endF, startF, endE, startE, 0, countF, -1, &lt, &ge);
+ orientationInfo += getOrientString(wordPrevOrient, m_options.isWordType()) + " " + getOrientString(wordNextOrient, m_options.isWordType());
+ if(m_options.isAllModelsOutputFlag())
" | | ";
}
addPhrase(sentence, startE, endE, startF, endF, orientationInfo);
@@ -369,38 +418,38 @@ void extract(SentenceAlignment &sentence)
bool connectedLeftTopN = isAligned( sentence, endF+1, endE+1 );
bool connectedRightTopN = isAligned( sentence, startF-1, endE+1 );
- if(wordModel) {
- wordPrevOrient = getOrientWordModel(sentence, wordType,
+ if(m_options.isWordModel()) {
+ wordPrevOrient = getOrientWordModel(sentence, m_options.isWordType(),
connectedLeftTopP, connectedRightTopP,
startF, endF, startE, endE, countF, 0, 1,
&ge, &lt);
- wordNextOrient = getOrientWordModel(sentence, wordType,
+ wordNextOrient = getOrientWordModel(sentence, m_options.isWordType(),
connectedLeftTopN, connectedRightTopN,
endF, startF, endE, startE, 0, countF, -1,
&lt, &ge);
}
- if (phraseModel) {
- phrasePrevOrient = getOrientPhraseModel(sentence, phraseType,
+ if (m_options.isPhraseModel()) {
+ phrasePrevOrient = getOrientPhraseModel(sentence, m_options.isPhraseType(),
connectedLeftTopP, connectedRightTopP,
startF, endF, startE, endE, countF-1, 0, 1, &ge, &lt, inBottomRight, inBottomLeft);
- phraseNextOrient = getOrientPhraseModel(sentence, phraseType,
+ phraseNextOrient = getOrientPhraseModel(sentence, m_options.isPhraseType(),
connectedLeftTopN, connectedRightTopN,
endF, startF, endE, startE, 0, countF-1, -1, &lt, &ge, inBottomLeft, inBottomRight);
} else {
phrasePrevOrient = phraseNextOrient = UNKNOWN;
}
- if(hierModel) {
- hierPrevOrient = getOrientHierModel(sentence, hierType,
+ if(m_options.isHierModel()) {
+ hierPrevOrient = getOrientHierModel(sentence, m_options.isHierType(),
connectedLeftTopP, connectedRightTopP,
startF, endF, startE, endE, countF-1, 0, 1, &ge, &lt, inBottomRight, inBottomLeft, outBottomRight, outBottomLeft, phrasePrevOrient);
- hierNextOrient = getOrientHierModel(sentence, hierType,
+ hierNextOrient = getOrientHierModel(sentence, m_options.isHierType(),
connectedLeftTopN, connectedRightTopN,
endF, startF, endE, startE, 0, countF-1, -1, &lt, &ge, inBottomLeft, inBottomRight, outBottomLeft, outBottomRight, phraseNextOrient);
}
- orientationInfo = ((wordModel)? getOrientString(wordPrevOrient, wordType) + " " + getOrientString(wordNextOrient, wordType) : "") + " | " +
- ((phraseModel)? getOrientString(phrasePrevOrient, phraseType) + " " + getOrientString(phraseNextOrient, phraseType) : "") + " | " +
- ((hierModel)? getOrientString(hierPrevOrient, hierType) + " " + getOrientString(hierNextOrient, hierType) : "");
+ orientationInfo = ((m_options.isWordModel())? getOrientString(wordPrevOrient, m_options.isWordType()) + " " + getOrientString(wordNextOrient, m_options.isWordType()) : "") + " | " +
+ ((m_options.isPhraseModel())? getOrientString(phrasePrevOrient, m_options.isPhraseType()) + " " + getOrientString(phraseNextOrient, m_options.isPhraseType()) : "") + " | " +
+ ((m_options.isHierModel())? getOrientString(hierPrevOrient, m_options.isHierType()) + " " + getOrientString(hierNextOrient, m_options.isHierType()) : "");
addPhrase(sentence, startE, endE, startF, endF, orientationInfo);
}
@@ -608,92 +657,141 @@ string getOrientString(REO_POS orient, REO_MODEL_TYPE modelType)
return "";
}
-void addPhrase( SentenceAlignment &sentence, int startE, int endE, int startF, int endF , string &orientationInfo)
+void ExtractTask::addPhrase( SentenceAlignment &sentence, int startE, int endE, int startF, int endF , string &orientationInfo)
{
// source
- // cout << "adding ( " << startF << "-" << endF << ", " << startE << "-" << endE << ")\n";
+ // // cout << "adding ( " << startF << "-" << endF << ", " << startE << "-" << endE << ")\n";
+ ostringstream outextractstr;
+ ostringstream outextractstrInv;
+ ostringstream outextractstrOrientation;
+ ostringstream outextractstrSentenceId;
- if (onlyOutputSpanInfo) {
+ if (m_options.isOnlyOutputSpanInfo()) {
cout << startF << " " << endF << " " << startE << " " << endE << endl;
return;
}
- for(int fi=startF; fi<=endF; fi++) {
- if (translationFlag) extractFile << sentence.source[fi] << " ";
- if (orientationFlag) extractFileOrientation << sentence.source[fi] << " ";
- if (sentenceIdFlag) extractFileSentenceId << sentence.source[fi] << " ";
+for(int fi=startF; fi<=endF; fi++) {
+ if (m_options.isTranslationFlag()) outextractstr << sentence.source[fi] << " ";
+ if (m_options.isOrientationFlag()) outextractstrOrientation << sentence.source[fi] << " ";
+ if (m_options.isSentenceIdFlag()) outextractstrSentenceId << sentence.source[fi] << " ";
}
- if (translationFlag) extractFile << "||| ";
- if (orientationFlag) extractFileOrientation << "||| ";
- if (sentenceIdFlag) extractFileSentenceId << "||| ";
+ if (m_options.isTranslationFlag()) outextractstr << "||| ";
+ if (m_options.isOrientationFlag()) outextractstrOrientation << "||| ";
+ if (m_options.isSentenceIdFlag()) outextractstrSentenceId << "||| ";
// target
for(int ei=startE; ei<=endE; ei++) {
- if (translationFlag) extractFile << sentence.target[ei] << " ";
- if (translationFlag) extractFileInv << sentence.target[ei] << " ";
- if (orientationFlag) extractFileOrientation << sentence.target[ei] << " ";
- if (sentenceIdFlag) extractFileSentenceId << sentence.target[ei] << " ";
+ if (m_options.isTranslationFlag()) outextractstr << sentence.target[ei] << " ";
+ if (m_options.isTranslationFlag()) outextractstrInv << sentence.target[ei] << " ";
+ if (m_options.isOrientationFlag()) outextractstrOrientation << sentence.target[ei] << " ";
+ if (m_options.isSentenceIdFlag()) outextractstrSentenceId << sentence.target[ei] << " ";
}
- if (translationFlag) extractFile << "|||";
- if (translationFlag) extractFileInv << "||| ";
- if (orientationFlag) extractFileOrientation << "||| ";
- if (sentenceIdFlag) extractFileSentenceId << "||| ";
+ if (m_options.isTranslationFlag()) outextractstr << "|||";
+ if (m_options.isTranslationFlag()) outextractstrInv << "||| ";
+ if (m_options.isOrientationFlag()) outextractstrOrientation << "||| ";
+ if (m_options.isSentenceIdFlag()) outextractstrSentenceId << "||| ";
// source (for inverse)
- if (translationFlag) {
+
+ if (m_options.isTranslationFlag()) {
for(int fi=startF; fi<=endF; fi++)
- extractFileInv << sentence.source[fi] << " ";
- extractFileInv << "|||";
+ outextractstrInv << sentence.source[fi] << " ";
+ outextractstrInv << "|||";
}
-
// alignment
- if (translationFlag) {
+ if (m_options.isTranslationFlag()) {
for(int ei=startE; ei<=endE; ei++) {
- for(size_t i=0; i<sentence.alignedToT[ei].size(); i++) {
+ for(unsigned int i=0; i<sentence.alignedToT[ei].size(); i++) {
int fi = sentence.alignedToT[ei][i];
- extractFile << " " << fi-startF << "-" << ei-startE;
- extractFileInv << " " << ei-startE << "-" << fi-startF;
+ outextractstr << " " << fi-startF << "-" << ei-startE;
+ outextractstrInv << " " << ei-startE << "-" << fi-startF;
}
}
}
- if (orientationFlag)
- extractFileOrientation << orientationInfo;
+ if (m_options.isOrientationFlag())
+ outextractstrOrientation << orientationInfo;
- if (sentenceIdFlag) {
- extractFileSentenceId << sentence.sentenceID;
+ if (m_options.isSentenceIdFlag()) {
+ outextractstrSentenceId << sentence.sentenceID;
}
+ if (m_options.isIncludeSentenceIdFlag()) {
+ outextractstr << " ||| " << sentence.sentenceID;
+ }
+
+ if (m_options.isTranslationFlag()) outextractstr << "\n";
+ if (m_options.isTranslationFlag()) outextractstrInv << "\n";
+ if (m_options.isOrientationFlag()) outextractstrOrientation << "\n";
+ if (m_options.isSentenceIdFlag()) outextractstrSentenceId << "\n";
+
+
+ m_extractedPhrases.push_back(outextractstr.str());
+ m_extractedPhrasesInv.push_back(outextractstrInv.str());
+ m_extractedPhrasesOri.push_back(outextractstrOrientation.str());
+ m_extractedPhrasesSid.push_back(outextractstrSentenceId.str());
+}
+
+
+void ExtractTask::writePhrasesToFile(){
+
+ ostringstream outextractFile;
+ ostringstream outextractFileInv;
+ ostringstream outextractFileOrientation;
+ ostringstream outextractFileSentenceId;
- if (translationFlag) extractFile << "\n";
- if (translationFlag) extractFileInv << "\n";
- if (orientationFlag) extractFileOrientation << "\n";
- if (sentenceIdFlag) extractFileSentenceId << "\n";
+ for(vector<string>::const_iterator phrase=m_extractedPhrases.begin();phrase!=m_extractedPhrases.end();phrase++){
+ outextractFile<<phrase->data();
+ }
+ for(vector<string>::const_iterator phrase=m_extractedPhrasesInv.begin();phrase!=m_extractedPhrasesInv.end();phrase++){
+ outextractFileInv<<phrase->data();
+ }
+ for(vector<string>::const_iterator phrase=m_extractedPhrasesOri.begin();phrase!=m_extractedPhrasesOri.end();phrase++){
+ outextractFileOrientation<<phrase->data();
+ }
+ for(vector<string>::const_iterator phrase=m_extractedPhrasesSid.begin();phrase!=m_extractedPhrasesSid.end();phrase++){
+ outextractFileSentenceId<<phrase->data();
+ }
+
+ m_extractFile << outextractFile.str();
+ m_extractFileInv << outextractFileInv.str();
+ m_extractFileOrientation << outextractFileOrientation.str();
+ m_extractFileSentenceId << outextractFileSentenceId.str();
}
// if proper conditioning, we need the number of times a source phrase occured
-void extractBase( SentenceAlignment &sentence )
+
+void ExtractTask::extractBase( SentenceAlignment &sentence )
{
+ ostringstream outextractFile;
+ ostringstream outextractFileInv;
+
int countF = sentence.source.size();
for(int startF=0; startF<countF; startF++) {
for(int endF=startF;
- (endF<countF && endF<startF+maxPhraseLength);
+ (endF<countF && endF<startF+m_options.maxPhraseLength);
endF++) {
for(int fi=startF; fi<=endF; fi++) {
- extractFile << sentence.source[fi] << " ";
- }
- extractFile << "|||" << endl;
+ outextractFile << sentence.source[fi] << " ";
+ }
+ outextractFile << "|||" << endl;
}
}
int countE = sentence.target.size();
for(int startE=0; startE<countE; startE++) {
for(int endE=startE;
- (endE<countE && endE<startE+maxPhraseLength);
+ (endE<countE && endE<startE+m_options.maxPhraseLength);
endE++) {
for(int ei=startE; ei<=endE; ei++) {
- extractFileInv << sentence.target[ei] << " ";
+ outextractFileInv << sentence.target[ei] << " ";
}
- extractFileInv << "|||" << endl;
+ outextractFileInv << "|||" << endl;
}
}
+ m_extractFile << outextractFile.str();
+ m_extractFileInv << outextractFileInv.str();
+
+}
+
}
diff --git a/scripts/training/phrase-extract/extract.vcxproj b/phrase-extract/extract.vcxproj
index 60a1128eb..60a1128eb 100644
--- a/scripts/training/phrase-extract/extract.vcxproj
+++ b/phrase-extract/extract.vcxproj
diff --git a/scripts/training/phrase-extract/extract.xcodeproj/project.pbxproj b/phrase-extract/extract.xcodeproj/project.pbxproj
index 1e02493cb..5dd71ac39 100644
--- a/scripts/training/phrase-extract/extract.xcodeproj/project.pbxproj
+++ b/phrase-extract/extract.xcodeproj/project.pbxproj
@@ -44,6 +44,8 @@
1E671A80155C22C500119DD9 /* OutputFileStream.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E671A7E155C22C500119DD9 /* OutputFileStream.cpp */; };
1E671A82155C234500119DD9 /* OutputFileStream.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E671A7E155C22C500119DD9 /* OutputFileStream.cpp */; };
1E671A83155C234500119DD9 /* OutputFileStream.h in Sources */ = {isa = PBXBuildFile; fileRef = 1E671A7F155C22C500119DD9 /* OutputFileStream.h */; };
+ 1E9B03A8159F58CC00E91032 /* OutputFileStream.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E671A7E155C22C500119DD9 /* OutputFileStream.cpp */; };
+ 1E9B03A9159F58CC00E91032 /* OutputFileStream.h in Sources */ = {isa = PBXBuildFile; fileRef = 1E671A7F155C22C500119DD9 /* OutputFileStream.h */; };
1EB1C8321200D5C00079FCBB /* PhraseAlignment.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EB1C8311200D5C00079FCBB /* PhraseAlignment.cpp */; };
1EB29A3B1511C253005BC4BA /* InputFileStream.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EB8A211129C024C00041956 /* InputFileStream.cpp */; };
1EB29A3C1511C253005BC4BA /* InputFileStream.h in Sources */ = {isa = PBXBuildFile; fileRef = 1EB8A210129C024C00041956 /* InputFileStream.h */; };
@@ -106,6 +108,8 @@
1E7C2CFC11F1146300213451 /* consolidate-direct */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "consolidate-direct"; sourceTree = BUILT_PRODUCTS_DIR; };
1E7C2CFE11F1146300213451 /* extract-rules */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "extract-rules"; sourceTree = BUILT_PRODUCTS_DIR; };
1E7C2D0011F1146300213451 /* statistics */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = statistics; sourceTree = BUILT_PRODUCTS_DIR; };
+ 1E9B03A4159E70A100E91032 /* consolidate-reverse.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = "consolidate-reverse.cpp"; sourceTree = "<group>"; };
+ 1E9B03A5159E70A100E91032 /* XmlException.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = XmlException.h; sourceTree = "<group>"; };
1EB1C8301200D5C00079FCBB /* PhraseAlignment.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = PhraseAlignment.h; sourceTree = "<group>"; };
1EB1C8311200D5C00079FCBB /* PhraseAlignment.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = PhraseAlignment.cpp; sourceTree = "<group>"; };
1EB1C8491200D77E0079FCBB /* score.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = score.h; sourceTree = "<group>"; };
@@ -174,6 +178,8 @@
08FB7795FE84155DC02AAC07 /* Source */ = {
isa = PBXGroup;
children = (
+ 1E9B03A4159E70A100E91032 /* consolidate-reverse.cpp */,
+ 1E9B03A5159E70A100E91032 /* XmlException.h */,
1E671A7E155C22C500119DD9 /* OutputFileStream.cpp */,
1E671A7F155C22C500119DD9 /* OutputFileStream.h */,
1E3EF29E13DBEAF300C1D54A /* extract-lex.cpp */,
@@ -441,6 +447,8 @@
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
+ 1E9B03A8159F58CC00E91032 /* OutputFileStream.cpp in Sources */,
+ 1E9B03A9159F58CC00E91032 /* OutputFileStream.h in Sources */,
1EB8A297129C06A300041956 /* gzfilebuf.h in Sources */,
1EB8A261129C04C700041956 /* InputFileStream.cpp in Sources */,
1CFE962911762A3A006FF13B /* consolidate-direct.cpp in Sources */,
@@ -623,7 +631,10 @@
GCC_MODEL_TUNING = G5;
GCC_OPTIMIZATION_LEVEL = 0;
INSTALL_PATH = /usr/local/bin;
- OTHER_LDFLAGS = "-lz";
+ OTHER_LDFLAGS = (
+ "-lz",
+ "-lboost_iostreams",
+ );
PRODUCT_NAME = "consolidate-direct";
SDKROOT = macosx;
};
@@ -639,7 +650,10 @@
GCC_MODEL_TUNING = G5;
INSTALL_PATH = /usr/local/bin;
ONLY_ACTIVE_ARCH = YES;
- OTHER_LDFLAGS = "-lz";
+ OTHER_LDFLAGS = (
+ "-lz",
+ "-lboost_iostreams",
+ );
PRODUCT_NAME = "consolidate-direct";
SDKROOT = macosx;
ZERO_LINK = NO;
diff --git a/scripts/training/lexical-reordering/gzfilebuf.h b/phrase-extract/gzfilebuf.h
index b5b0ce87f..b5b0ce87f 100755..100644
--- a/scripts/training/lexical-reordering/gzfilebuf.h
+++ b/phrase-extract/gzfilebuf.h
diff --git a/scripts/training/phrase-extract/hierarchical.h b/phrase-extract/hierarchical.h
index 61c899013..dd9c77a25 100644
--- a/scripts/training/phrase-extract/hierarchical.h
+++ b/phrase-extract/hierarchical.h
@@ -14,6 +14,9 @@
#include <set>
#include <vector>
+namespace MosesTraining
+{
+
// HPhraseVertex represents a point in the alignment matrix
typedef std::pair <int, int> HPhraseVertex;
@@ -29,5 +32,6 @@ typedef std::vector < HPhrase > HPhraseVector;
// The key of the std::map is the English index and the value is a std::set of the foreign ones
typedef std::map <int, std::set<int> > HSenteceVertices;
+} // namespace
#endif /* HIERARCHICAL_H_ */
diff --git a/scripts/training/lexical-reordering/InputFileStream.cpp b/phrase-extract/lexical-reordering/InputFileStream.cpp
index 013781c36..013781c36 100755
--- a/scripts/training/lexical-reordering/InputFileStream.cpp
+++ b/phrase-extract/lexical-reordering/InputFileStream.cpp
diff --git a/scripts/training/lexical-reordering/InputFileStream.h b/phrase-extract/lexical-reordering/InputFileStream.h
index 1f37715fd..1f37715fd 100755
--- a/scripts/training/lexical-reordering/InputFileStream.h
+++ b/phrase-extract/lexical-reordering/InputFileStream.h
diff --git a/phrase-extract/lexical-reordering/Jamfile b/phrase-extract/lexical-reordering/Jamfile
new file mode 100644
index 000000000..a53465577
--- /dev/null
+++ b/phrase-extract/lexical-reordering/Jamfile
@@ -0,0 +1,2 @@
+exe lexical-reordering-score : InputFileStream.cpp reordering_classes.cpp score.cpp ../..//z ;
+
diff --git a/scripts/training/phrase-extract/gzfilebuf.h b/phrase-extract/lexical-reordering/gzfilebuf.h
index b5b0ce87f..b5b0ce87f 100644..100755
--- a/scripts/training/phrase-extract/gzfilebuf.h
+++ b/phrase-extract/lexical-reordering/gzfilebuf.h
diff --git a/scripts/training/lexical-reordering/reordering_classes.cpp b/phrase-extract/lexical-reordering/reordering_classes.cpp
index 2f159e4fa..2f159e4fa 100644
--- a/scripts/training/lexical-reordering/reordering_classes.cpp
+++ b/phrase-extract/lexical-reordering/reordering_classes.cpp
diff --git a/scripts/training/lexical-reordering/reordering_classes.h b/phrase-extract/lexical-reordering/reordering_classes.h
index 4d0b56240..4d0b56240 100644
--- a/scripts/training/lexical-reordering/reordering_classes.h
+++ b/phrase-extract/lexical-reordering/reordering_classes.h
diff --git a/scripts/training/lexical-reordering/score.cpp b/phrase-extract/lexical-reordering/score.cpp
index 7f14b9fc8..7f14b9fc8 100644
--- a/scripts/training/lexical-reordering/score.cpp
+++ b/phrase-extract/lexical-reordering/score.cpp
diff --git a/scripts/training/phrase-extract/pcfg-common/Jamfile b/phrase-extract/pcfg-common/Jamfile
index 3dc272a56..3dc272a56 100644
--- a/scripts/training/phrase-extract/pcfg-common/Jamfile
+++ b/phrase-extract/pcfg-common/Jamfile
diff --git a/scripts/training/phrase-extract/pcfg-common/exception.h b/phrase-extract/pcfg-common/exception.h
index 3dbd59d0e..3dbd59d0e 100644
--- a/scripts/training/phrase-extract/pcfg-common/exception.h
+++ b/phrase-extract/pcfg-common/exception.h
diff --git a/scripts/training/phrase-extract/pcfg-common/numbered_set.h b/phrase-extract/pcfg-common/numbered_set.h
index 15e768b4c..15e768b4c 100644
--- a/scripts/training/phrase-extract/pcfg-common/numbered_set.h
+++ b/phrase-extract/pcfg-common/numbered_set.h
diff --git a/scripts/training/phrase-extract/pcfg-common/pcfg.cc b/phrase-extract/pcfg-common/pcfg.cc
index 054e20a48..054e20a48 100644
--- a/scripts/training/phrase-extract/pcfg-common/pcfg.cc
+++ b/phrase-extract/pcfg-common/pcfg.cc
diff --git a/scripts/training/phrase-extract/pcfg-common/pcfg.h b/phrase-extract/pcfg-common/pcfg.h
index b87336584..b87336584 100644
--- a/scripts/training/phrase-extract/pcfg-common/pcfg.h
+++ b/phrase-extract/pcfg-common/pcfg.h
diff --git a/scripts/training/phrase-extract/pcfg-common/pcfg_tree.h b/phrase-extract/pcfg-common/pcfg_tree.h
index bdac64dfc..bdac64dfc 100644
--- a/scripts/training/phrase-extract/pcfg-common/pcfg_tree.h
+++ b/phrase-extract/pcfg-common/pcfg_tree.h
diff --git a/scripts/training/phrase-extract/pcfg-common/syntax_tree.h b/phrase-extract/pcfg-common/syntax_tree.h
index 89c6ec0c3..89c6ec0c3 100644
--- a/scripts/training/phrase-extract/pcfg-common/syntax_tree.h
+++ b/phrase-extract/pcfg-common/syntax_tree.h
diff --git a/scripts/training/phrase-extract/pcfg-common/tool.cc b/phrase-extract/pcfg-common/tool.cc
index bebd220e1..bebd220e1 100644
--- a/scripts/training/phrase-extract/pcfg-common/tool.cc
+++ b/phrase-extract/pcfg-common/tool.cc
diff --git a/scripts/training/phrase-extract/pcfg-common/tool.h b/phrase-extract/pcfg-common/tool.h
index 0af342569..0af342569 100644
--- a/scripts/training/phrase-extract/pcfg-common/tool.h
+++ b/phrase-extract/pcfg-common/tool.h
diff --git a/scripts/training/phrase-extract/pcfg-common/typedef.h b/phrase-extract/pcfg-common/typedef.h
index 49a12d681..49a12d681 100644
--- a/scripts/training/phrase-extract/pcfg-common/typedef.h
+++ b/phrase-extract/pcfg-common/typedef.h
diff --git a/scripts/training/phrase-extract/pcfg-common/xml_tree_parser.cc b/phrase-extract/pcfg-common/xml_tree_parser.cc
index fd9d11334..b6c1da177 100644
--- a/scripts/training/phrase-extract/pcfg-common/xml_tree_parser.cc
+++ b/phrase-extract/pcfg-common/xml_tree_parser.cc
@@ -27,6 +27,8 @@
#include <cassert>
#include <vector>
+using namespace MosesTraining;
+
namespace Moses {
namespace PCFG {
diff --git a/scripts/training/phrase-extract/pcfg-common/xml_tree_parser.h b/phrase-extract/pcfg-common/xml_tree_parser.h
index 6b418c44e..7d01b0684 100644
--- a/scripts/training/phrase-extract/pcfg-common/xml_tree_parser.h
+++ b/phrase-extract/pcfg-common/xml_tree_parser.h
@@ -40,13 +40,13 @@ class XmlTreeParser {
XmlTreeParser();
std::auto_ptr<PcfgTree> Parse(const std::string &);
private:
- std::auto_ptr<PcfgTree> ConvertTree(const SyntaxNode &,
+ std::auto_ptr<PcfgTree> ConvertTree(const MosesTraining::SyntaxNode &,
const std::vector<std::string> &);
std::set<std::string> m_labelSet;
std::map<std::string, int> m_topLabelSet;
std::string m_line;
- ::SyntaxTree m_tree;
+ MosesTraining::SyntaxTree m_tree;
std::vector<std::string> m_words;
};
diff --git a/scripts/training/phrase-extract/pcfg-common/xml_tree_writer.h b/phrase-extract/pcfg-common/xml_tree_writer.h
index 6a9a3de05..6a9a3de05 100644
--- a/scripts/training/phrase-extract/pcfg-common/xml_tree_writer.h
+++ b/phrase-extract/pcfg-common/xml_tree_writer.h
diff --git a/phrase-extract/pcfg-extract/Jamfile b/phrase-extract/pcfg-extract/Jamfile
new file mode 100644
index 000000000..fc93f1a19
--- /dev/null
+++ b/phrase-extract/pcfg-extract/Jamfile
@@ -0,0 +1 @@
+exe pcfg-extract : [ glob *.cc ] ..//pcfg-common ../..//boost_program_options ;
diff --git a/scripts/training/phrase-extract/pcfg-extract/main.cc b/phrase-extract/pcfg-extract/main.cc
index 47b45afc3..47b45afc3 100644
--- a/scripts/training/phrase-extract/pcfg-extract/main.cc
+++ b/phrase-extract/pcfg-extract/main.cc
diff --git a/scripts/training/phrase-extract/pcfg-extract/options.h b/phrase-extract/pcfg-extract/options.h
index 3acb31b58..3acb31b58 100644
--- a/scripts/training/phrase-extract/pcfg-extract/options.h
+++ b/phrase-extract/pcfg-extract/options.h
diff --git a/scripts/training/phrase-extract/pcfg-extract/pcfg_extract.cc b/phrase-extract/pcfg-extract/pcfg_extract.cc
index 71c2e31c3..71c2e31c3 100644
--- a/scripts/training/phrase-extract/pcfg-extract/pcfg_extract.cc
+++ b/phrase-extract/pcfg-extract/pcfg_extract.cc
diff --git a/scripts/training/phrase-extract/pcfg-extract/pcfg_extract.h b/phrase-extract/pcfg-extract/pcfg_extract.h
index 1af6cb4fe..1af6cb4fe 100644
--- a/scripts/training/phrase-extract/pcfg-extract/pcfg_extract.h
+++ b/phrase-extract/pcfg-extract/pcfg_extract.h
diff --git a/scripts/training/phrase-extract/pcfg-extract/rule_collection.cc b/phrase-extract/pcfg-extract/rule_collection.cc
index 32b63e0ef..32b63e0ef 100644
--- a/scripts/training/phrase-extract/pcfg-extract/rule_collection.cc
+++ b/phrase-extract/pcfg-extract/rule_collection.cc
diff --git a/scripts/training/phrase-extract/pcfg-extract/rule_collection.h b/phrase-extract/pcfg-extract/rule_collection.h
index 452fa0e97..452fa0e97 100644
--- a/scripts/training/phrase-extract/pcfg-extract/rule_collection.h
+++ b/phrase-extract/pcfg-extract/rule_collection.h
diff --git a/scripts/training/phrase-extract/pcfg-extract/rule_extractor.cc b/phrase-extract/pcfg-extract/rule_extractor.cc
index 217574e7d..217574e7d 100644
--- a/scripts/training/phrase-extract/pcfg-extract/rule_extractor.cc
+++ b/phrase-extract/pcfg-extract/rule_extractor.cc
diff --git a/scripts/training/phrase-extract/pcfg-extract/rule_extractor.h b/phrase-extract/pcfg-extract/rule_extractor.h
index 6bcffbc61..6bcffbc61 100644
--- a/scripts/training/phrase-extract/pcfg-extract/rule_extractor.h
+++ b/phrase-extract/pcfg-extract/rule_extractor.h
diff --git a/phrase-extract/pcfg-score/Jamfile b/phrase-extract/pcfg-score/Jamfile
new file mode 100644
index 000000000..6b8c963ce
--- /dev/null
+++ b/phrase-extract/pcfg-score/Jamfile
@@ -0,0 +1 @@
+exe pcfg-score : [ glob *.cc ] ..//pcfg-common ../..//boost_program_options ;
diff --git a/scripts/training/phrase-extract/pcfg-score/main.cc b/phrase-extract/pcfg-score/main.cc
index da5392add..da5392add 100644
--- a/scripts/training/phrase-extract/pcfg-score/main.cc
+++ b/phrase-extract/pcfg-score/main.cc
diff --git a/scripts/training/phrase-extract/pcfg-score/options.h b/phrase-extract/pcfg-score/options.h
index e54b2a0b9..e54b2a0b9 100644
--- a/scripts/training/phrase-extract/pcfg-score/options.h
+++ b/phrase-extract/pcfg-score/options.h
diff --git a/scripts/training/phrase-extract/pcfg-score/pcfg_score.cc b/phrase-extract/pcfg-score/pcfg_score.cc
index 345d7fc60..345d7fc60 100644
--- a/scripts/training/phrase-extract/pcfg-score/pcfg_score.cc
+++ b/phrase-extract/pcfg-score/pcfg_score.cc
diff --git a/scripts/training/phrase-extract/pcfg-score/pcfg_score.h b/phrase-extract/pcfg-score/pcfg_score.h
index 5e506c39d..5e506c39d 100644
--- a/scripts/training/phrase-extract/pcfg-score/pcfg_score.h
+++ b/phrase-extract/pcfg-score/pcfg_score.h
diff --git a/scripts/training/phrase-extract/pcfg-score/tree_scorer.cc b/phrase-extract/pcfg-score/tree_scorer.cc
index f9ce97ae0..f9ce97ae0 100644
--- a/scripts/training/phrase-extract/pcfg-score/tree_scorer.cc
+++ b/phrase-extract/pcfg-score/tree_scorer.cc
diff --git a/scripts/training/phrase-extract/pcfg-score/tree_scorer.h b/phrase-extract/pcfg-score/tree_scorer.h
index 36f4e1e99..36f4e1e99 100644
--- a/scripts/training/phrase-extract/pcfg-score/tree_scorer.h
+++ b/phrase-extract/pcfg-score/tree_scorer.h
diff --git a/scripts/training/phrase-extract/phrase-extract.sln b/phrase-extract/phrase-extract.sln
index 800c26192..800c26192 100644
--- a/scripts/training/phrase-extract/phrase-extract.sln
+++ b/phrase-extract/phrase-extract.sln
diff --git a/scripts/training/phrase-extract/relax-parse.cpp b/phrase-extract/relax-parse.cpp
index 6e561b921..ac06174e8 100644
--- a/scripts/training/phrase-extract/relax-parse.cpp
+++ b/phrase-extract/relax-parse.cpp
@@ -25,6 +25,7 @@
#include "tables-core.h"
using namespace std;
+using namespace MosesTraining;
int main(int argc, char* argv[])
{
diff --git a/scripts/training/phrase-extract/relax-parse.h b/phrase-extract/relax-parse.h
index ae5994641..ec604405e 100644
--- a/scripts/training/phrase-extract/relax-parse.h
+++ b/phrase-extract/relax-parse.h
@@ -39,8 +39,8 @@ char SAMTLevel = 0;
// functions
void init(int argc, char* argv[]);
-void store( SyntaxTree &tree, std::vector<std::string> &words );
-void LeftBinarize( SyntaxTree &tree, ParentNodes &parents );
-void RightBinarize( SyntaxTree &tree, ParentNodes &parents );
-void SAMT( SyntaxTree &tree, ParentNodes &parents );
+void store( MosesTraining::SyntaxTree &tree, std::vector<std::string> &words );
+void LeftBinarize( MosesTraining::SyntaxTree &tree, MosesTraining::ParentNodes &parents );
+void RightBinarize( MosesTraining::SyntaxTree &tree, MosesTraining::ParentNodes &parents );
+void SAMT( MosesTraining::SyntaxTree &tree, MosesTraining::ParentNodes &parents );
diff --git a/scripts/training/phrase-extract/score.cpp b/phrase-extract/score.cpp
index 44a71f67e..f764beef7 100644
--- a/scripts/training/phrase-extract/score.cpp
+++ b/phrase-extract/score.cpp
@@ -30,49 +30,19 @@
#include "SafeGetline.h"
#include "tables-core.h"
+#include "domain.h"
#include "PhraseAlignment.h"
#include "score.h"
#include "InputFileStream.h"
#include "OutputFileStream.h"
using namespace std;
+using namespace MosesTraining;
#define LINE_MAX_LENGTH 100000
-Vocabulary vcbT;
-Vocabulary vcbS;
-
-class LexicalTable
+namespace MosesTraining
{
-public:
- map< WORD_ID, map< WORD_ID, double > > ltable;
- void load( char[] );
- double permissiveLookup( WORD_ID wordS, WORD_ID wordT ) {
- // cout << endl << vcbS.getWord( wordS ) << "-" << vcbT.getWord( wordT ) << ":";
- if (ltable.find( wordS ) == ltable.end()) return 1.0;
- if (ltable[ wordS ].find( wordT ) == ltable[ wordS ].end()) return 1.0;
- // cout << ltable[ wordS ][ wordT ];
- return ltable[ wordS ][ wordT ];
- }
-};
-
-vector<string> tokenize( const char [] );
-
-void writeCountOfCounts( const string &fileNameCountOfCounts );
-void processPhrasePairs( vector< PhraseAlignment > & , ostream &phraseTableFile);
-PhraseAlignment* findBestAlignment(const PhraseAlignmentCollection &phrasePair );
-void outputPhrasePair(const PhraseAlignmentCollection &phrasePair, float, int, ostream &phraseTableFile );
-double computeLexicalTranslation( const PHRASE &, const PHRASE &, PhraseAlignment * );
-double computeUnalignedPenalty( const PHRASE &, const PHRASE &, PhraseAlignment * );
-set<string> functionWordList;
-void loadFunctionWords( const char* fileNameFunctionWords );
-double computeUnalignedFWPenalty( const PHRASE &, const PHRASE &, PhraseAlignment * );
-void calcNTLengthProb(const vector< PhraseAlignment* > &phrasePairs
- , map<size_t, map<size_t, float> > &sourceProb
- , map<size_t, map<size_t, float> > &targetProb);
-void printSourcePhrase(const PHRASE &, const PHRASE &, const PhraseAlignment &, ostream &);
-void printTargetPhrase(const PHRASE &, const PHRASE &, const PhraseAlignment &, ostream &);
-
LexicalTable lexTable;
bool inverseFlag = false;
bool hierarchicalFlag = false;
@@ -85,13 +55,44 @@ bool kneserNeyFlag = false;
#define COC_MAX 10
bool logProbFlag = false;
int negLogProb = 1;
+inline float maybeLogProb( float a ) { return logProbFlag ? negLogProb*log(a) : a; }
bool lexFlag = true;
bool unalignedFlag = false;
bool unalignedFWFlag = false;
bool outputNTLengths = false;
+bool singletonFeature = false;
+bool crossedNonTerm = false;
int countOfCounts[COC_MAX+1];
int totalDistinct = 0;
float minCountHierarchical = 0;
+bool domainFlag = false;
+bool domainRatioFlag = false;
+bool domainSubsetFlag = false;
+bool domainSparseFlag = false;
+Domain *domain;
+bool includeSentenceIdFlag = false;
+
+Vocabulary vcbT;
+Vocabulary vcbS;
+
+} // namespace
+
+vector<string> tokenize( const char [] );
+
+void writeCountOfCounts( const string &fileNameCountOfCounts );
+void processPhrasePairs( vector< PhraseAlignment > & , ostream &phraseTableFile, bool isSingleton);
+const PhraseAlignment &findBestAlignment(const PhraseAlignmentCollection &phrasePair );
+void outputPhrasePair(const PhraseAlignmentCollection &phrasePair, float, int, ostream &phraseTableFile, bool isSingleton );
+double computeLexicalTranslation( const PHRASE &, const PHRASE &, const PhraseAlignment & );
+double computeUnalignedPenalty( const PHRASE &, const PHRASE &, const PhraseAlignment & );
+set<string> functionWordList;
+void loadFunctionWords( const string &fileNameFunctionWords );
+double computeUnalignedFWPenalty( const PHRASE &, const PHRASE &, const PhraseAlignment & );
+void calcNTLengthProb(const vector< PhraseAlignment* > &phrasePairs
+ , map<size_t, map<size_t, float> > &sourceProb
+ , map<size_t, map<size_t, float> > &targetProb);
+void printSourcePhrase(const PHRASE &, const PHRASE &, const PhraseAlignment &, ostream &);
+void printTargetPhrase(const PHRASE &, const PHRASE &, const PhraseAlignment &, ostream &);
int main(int argc, char* argv[])
{
@@ -99,14 +100,15 @@ int main(int argc, char* argv[])
<< "scoring methods for extracted rules\n";
if (argc < 4) {
- cerr << "syntax: score extract lex phrase-table [--Inverse] [--Hierarchical] [--LogProb] [--NegLogProb] [--NoLex] [--GoodTuring] [--KneserNey] [--WordAlignment] [--UnalignedPenalty] [--UnalignedFunctionWordPenalty function-word-file] [--MinCountHierarchical count] [--OutputNTLengths] [--PCFG] [--UnpairedExtractFormat] [--ConditionOnTargetLHS]\n";
+ cerr << "syntax: score extract lex phrase-table [--Inverse] [--Hierarchical] [--LogProb] [--NegLogProb] [--NoLex] [--GoodTuring] [--KneserNey] [--WordAlignment] [--UnalignedPenalty] [--UnalignedFunctionWordPenalty function-word-file] [--MinCountHierarchical count] [--OutputNTLengths] [--PCFG] [--UnpairedExtractFormat] [--ConditionOnTargetLHS] [--[Sparse]Domain[Indicator|Ratio|Subset|Bin] domain-file [bins]] [--Singleton] [--CrossedNonTerm] \n";
exit(1);
}
- char* fileNameExtract = argv[1];
- char* fileNameLex = argv[2];
- char* fileNamePhraseTable = argv[3];
+ string fileNameExtract = argv[1];
+ string fileNameLex = argv[2];
+ string fileNamePhraseTable = argv[3];
string fileNameCountOfCounts;
- char* fileNameFunctionWords;
+ char* fileNameFunctionWords = NULL;
+ char* fileNameDomain = NULL;
for(int i=4; i<argc; i++) {
if (strcmp(argv[i],"inverse") == 0 || strcmp(argv[i],"--Inverse") == 0) {
@@ -149,6 +151,22 @@ int main(int argc, char* argv[])
}
fileNameFunctionWords = argv[++i];
cerr << "using unaligned function word penalty with function words from " << fileNameFunctionWords << endl;
+ } else if (strcmp(argv[i],"--SparseDomainIndicator") == 0 ||
+ strcmp(argv[i],"--SparseDomainRatio") == 0 ||
+ strcmp(argv[i],"--SparseDomainSubset") == 0 ||
+ strcmp(argv[i],"--DomainIndicator") == 0 ||
+ strcmp(argv[i],"--DomainRatio") == 0 ||
+ strcmp(argv[i],"--DomainSubset") == 0) {
+ includeSentenceIdFlag = true;
+ domainFlag = true;
+ domainSparseFlag = strstr( argv[i], "Sparse" );
+ domainRatioFlag = strstr( argv[i], "Ratio" );
+ domainSubsetFlag = strstr( argv[i], "Subset" );
+ if (i+1==argc) {
+ cerr << "ERROR: specify domain info file with " << argv[i] << endl;
+ exit(1);
+ }
+ fileNameDomain = argv[++i];
} else if (strcmp(argv[i],"--LogProb") == 0) {
logProbFlag = true;
cerr << "using log-probabilities\n";
@@ -162,6 +180,12 @@ int main(int argc, char* argv[])
minCountHierarchical -= 0.00001; // account for rounding
} else if (strcmp(argv[i],"--OutputNTLengths") == 0) {
outputNTLengths = true;
+ } else if (strcmp(argv[i],"--Singleton") == 0) {
+ singletonFeature = true;
+ cerr << "binary singleton feature\n";
+ } else if (strcmp(argv[i],"--CrossedNonTerm") == 0) {
+ crossedNonTerm = true;
+ cerr << "crossed non-term reordering feature\n";
} else {
cerr << "ERROR: unknown option " << argv[i] << endl;
exit(1);
@@ -176,6 +200,18 @@ int main(int argc, char* argv[])
if (unalignedFWFlag)
loadFunctionWords( fileNameFunctionWords );
+ // load domain information
+ if (domainFlag) {
+ if (inverseFlag) {
+ domainFlag = false;
+ includeSentenceIdFlag = false;
+ }
+ else {
+ domain = new Domain;
+ domain->load( fileNameDomain );
+ }
+ }
+
// compute count of counts for Good Turing discounting
if (goodTuringFlag || kneserNeyFlag) {
for(int i=1; i<=COC_MAX; i++) countOfCounts[i] = 0;
@@ -193,7 +229,7 @@ int main(int argc, char* argv[])
// output file: phrase translation table
ostream *phraseTableFile;
- if (strcmp(fileNamePhraseTable, "-") == 0) {
+ if (fileNamePhraseTable == "-") {
phraseTableFile = &cout;
}
else {
@@ -211,6 +247,7 @@ int main(int argc, char* argv[])
float lastCount = 0.0f;
float lastPcfgSum = 0.0f;
vector< PhraseAlignment > phrasePairsWithSameF;
+ bool isSingleton = true;
int i=0;
char line[LINE_MAX_LENGTH],lastLine[LINE_MAX_LENGTH];
lastLine[0] = '\0';
@@ -231,30 +268,40 @@ int main(int argc, char* argv[])
// create new phrase pair
PhraseAlignment phrasePair;
- phrasePair.create( line, i );
+ phrasePair.create( line, i, includeSentenceIdFlag );
lastCount = phrasePair.count;
lastPcfgSum = phrasePair.pcfgSum;
// only differs in count? just add count
- if (lastPhrasePair != NULL && lastPhrasePair->equals( phrasePair )) {
+ if (lastPhrasePair != NULL
+ && lastPhrasePair->equals( phrasePair )
+ && (!domainFlag
+ || domain->getDomainOfSentence( lastPhrasePair->sentenceId )
+ == domain->getDomainOfSentence( phrasePair.sentenceId ) )) {
lastPhrasePair->count += phrasePair.count;
lastPhrasePair->pcfgSum += phrasePair.pcfgSum;
continue;
}
-
+
// if new source phrase, process last batch
if (lastPhrasePair != NULL &&
lastPhrasePair->GetSource() != phrasePair.GetSource()) {
- processPhrasePairs( phrasePairsWithSameF, *phraseTableFile );
+ processPhrasePairs( phrasePairsWithSameF, *phraseTableFile, isSingleton );
+
phrasePairsWithSameF.clear();
+ isSingleton = false;
lastPhrasePair = NULL;
}
+ else
+ {
+ isSingleton = true;
+ }
// add phrase pairs to list, it's now the last one
phrasePairsWithSameF.push_back( phrasePair );
lastPhrasePair = &phrasePairsWithSameF.back();
}
- processPhrasePairs( phrasePairsWithSameF, *phraseTableFile );
+ processPhrasePairs( phrasePairsWithSameF, *phraseTableFile, isSingleton );
phraseTableFile->flush();
if (phraseTableFile != &cout) {
@@ -288,7 +335,7 @@ void writeCountOfCounts( const string &fileNameCountOfCounts )
countOfCountsFile.Close();
}
-void processPhrasePairs( vector< PhraseAlignment > &phrasePair, ostream &phraseTableFile )
+void processPhrasePairs( vector< PhraseAlignment > &phrasePair, ostream &phraseTableFile, bool isSingleton )
{
if (phrasePair.size() == 0) return;
@@ -329,25 +376,33 @@ void processPhrasePairs( vector< PhraseAlignment > &phrasePair, ostream &phraseT
for(iter = sortedColl.begin(); iter != sortedColl.end(); ++iter)
{
const PhraseAlignmentCollection &group = **iter;
- outputPhrasePair( group, totalSource, phrasePairGroup.GetSize(), phraseTableFile );
-
+ outputPhrasePair( group, totalSource, phrasePairGroup.GetSize(), phraseTableFile, isSingleton );
}
}
-PhraseAlignment* findBestAlignment(const PhraseAlignmentCollection &phrasePair )
+const PhraseAlignment &findBestAlignment(const PhraseAlignmentCollection &phrasePair )
{
float bestAlignmentCount = -1;
- PhraseAlignment* bestAlignment;
+ PhraseAlignment* bestAlignment = NULL;
for(size_t i=0; i<phrasePair.size(); i++) {
- if (phrasePair[i]->count > bestAlignmentCount) {
- bestAlignmentCount = phrasePair[i]->count;
- bestAlignment = phrasePair[i];
+ size_t alignInd;
+ if (inverseFlag)
+ { // count backwards, so that alignments for ties will be the same for both normal & inverse scores
+ alignInd = phrasePair.size() - i - 1;
}
- }
-
- return bestAlignment;
+ else {
+ alignInd = i;
+ }
+
+ if (phrasePair[alignInd]->count > bestAlignmentCount) {
+ bestAlignmentCount = phrasePair[alignInd]->count;
+ bestAlignment = phrasePair[alignInd];
+ }
+ }
+
+ return *bestAlignment;
}
@@ -438,11 +493,65 @@ void outputNTLengthProbs(ostream &phraseTableFile, const map<size_t, map<size_t,
}
-void outputPhrasePair(const PhraseAlignmentCollection &phrasePair, float totalCount, int distinctCount, ostream &phraseTableFile )
+bool calcCrossedNonTerm(int sourcePos, int targetPos, const std::vector< std::set<size_t> > &alignedToS)
+{
+ for (int currSource = 0; currSource < alignedToS.size(); ++currSource)
+ {
+ if (currSource == sourcePos)
+ { // skip
+ }
+ else
+ {
+ const std::set<size_t> &targetSet = alignedToS[currSource];
+ std::set<size_t>::const_iterator iter;
+ for (iter = targetSet.begin(); iter != targetSet.end(); ++iter)
+ {
+ size_t currTarget = *iter;
+
+ if ((currSource < sourcePos && currTarget > targetPos)
+ || (currSource > sourcePos && currTarget < targetPos)
+ )
+ {
+ return true;
+ }
+ }
+
+ }
+ }
+
+ return false;
+}
+
+int calcCrossedNonTerm(const PHRASE &phraseS, const PhraseAlignment &bestAlignment)
+{
+ const std::vector< std::set<size_t> > &alignedToS = bestAlignment.alignedToS;
+
+ for (int sourcePos = 0; sourcePos < alignedToS.size(); ++sourcePos)
+ {
+ const std::set<size_t> &targetSet = alignedToS[sourcePos];
+
+ WORD_ID wordId = phraseS[sourcePos];
+ const WORD &word = vcbS.getWord(wordId);
+ bool isNonTerm = isNonTerminal(word);
+
+ if (isNonTerm)
+ {
+ assert(targetSet.size() == 1);
+ int targetPos = *targetSet.begin();
+ bool ret = calcCrossedNonTerm(sourcePos, targetPos, alignedToS);
+ if (ret)
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+void outputPhrasePair(const PhraseAlignmentCollection &phrasePair, float totalCount, int distinctCount, ostream &phraseTableFile, bool isSingleton )
{
if (phrasePair.size() == 0) return;
- PhraseAlignment *bestAlignment = findBestAlignment( phrasePair );
+ const PhraseAlignment &bestAlignment = findBestAlignment( phrasePair );
// compute count
float count = 0;
@@ -450,6 +559,18 @@ void outputPhrasePair(const PhraseAlignmentCollection &phrasePair, float totalCo
count += phrasePair[i]->count;
}
+ // compute domain counts
+ map< string, float > domainCount;
+ if (domainFlag) {
+ for(size_t i=0; i<phrasePair.size(); i++) {
+ string d = domain->getDomainOfSentence( phrasePair[i]->sentenceId );
+ if (domainCount.find( d ) == domainCount.end())
+ domainCount[ d ] = phrasePair[i]->count;
+ else
+ domainCount[ d ] += phrasePair[i]->count;
+ }
+ }
+
// collect count of count statistics
if (goodTuringFlag || kneserNeyFlag) {
totalDistinct++;
@@ -459,7 +580,7 @@ void outputPhrasePair(const PhraseAlignmentCollection &phrasePair, float totalCo
}
// compute PCFG score
- float pcfgScore;
+ float pcfgScore = 0;
if (pcfgFlag && !inverseFlag) {
float pcfgSum = 0;
for(size_t i=0; i<phrasePair.size(); ++i) {
@@ -482,41 +603,109 @@ void outputPhrasePair(const PhraseAlignmentCollection &phrasePair, float totalCo
// source phrase (unless inverse)
if (! inverseFlag) {
- printSourcePhrase(phraseS, phraseT, *bestAlignment, phraseTableFile);
+ printSourcePhrase(phraseS, phraseT, bestAlignment, phraseTableFile);
phraseTableFile << " ||| ";
}
// target phrase
- printTargetPhrase(phraseS, phraseT, *bestAlignment, phraseTableFile);
+ printTargetPhrase(phraseS, phraseT, bestAlignment, phraseTableFile);
phraseTableFile << " ||| ";
// source phrase (if inverse)
if (inverseFlag) {
- printSourcePhrase(phraseS, phraseT, *bestAlignment, phraseTableFile);
+ printSourcePhrase(phraseS, phraseT, bestAlignment, phraseTableFile);
phraseTableFile << " ||| ";
}
// lexical translation probability
if (lexFlag) {
double lexScore = computeLexicalTranslation( phraseS, phraseT, bestAlignment);
- phraseTableFile << ( logProbFlag ? negLogProb*log(lexScore) : lexScore );
+ phraseTableFile << maybeLogProb( lexScore );
}
// unaligned word penalty
if (unalignedFlag) {
double penalty = computeUnalignedPenalty( phraseS, phraseT, bestAlignment);
- phraseTableFile << " " << ( logProbFlag ? negLogProb*log(penalty) : penalty );
+ phraseTableFile << " " << maybeLogProb( penalty );
}
// unaligned function word penalty
if (unalignedFWFlag) {
double penalty = computeUnalignedFWPenalty( phraseS, phraseT, bestAlignment);
- phraseTableFile << " " << ( logProbFlag ? negLogProb*log(penalty) : penalty );
+ phraseTableFile << " " << maybeLogProb( penalty );
}
+ if (singletonFeature) {
+ phraseTableFile << " " << (isSingleton ? 1 : 0);
+ }
+
+ if (crossedNonTerm && !inverseFlag) {
+ phraseTableFile << " " << calcCrossedNonTerm(phraseS, bestAlignment);
+ }
+
// target-side PCFG score
if (pcfgFlag && !inverseFlag) {
- phraseTableFile << " " << pcfgScore;
+ phraseTableFile << " " << maybeLogProb( pcfgScore );
+ }
+
+ // domain count features
+ if (domainFlag) {
+ if (domainSparseFlag) {
+ // sparse, subset
+ if (domainSubsetFlag) {
+ typedef vector< string >::const_iterator I;
+ phraseTableFile << " doms";
+ for (I i = domain->list.begin(); i != domain->list.end(); i++ ) {
+ if (domainCount.find( *i ) != domainCount.end() ) {
+ phraseTableFile << "_" << *i;
+ }
+ }
+ phraseTableFile << " 1";
+ }
+ // sparse, indicator or ratio
+ else {
+ typedef map< string, float >::const_iterator I;
+ for (I i=domainCount.begin(); i != domainCount.end(); i++) {
+ if (domainRatioFlag) {
+ phraseTableFile << " domr_" << i->first << " " << (i->second / count);
+ }
+ else {
+ phraseTableFile << " dom_" << i->first << " 1";
+ }
+ }
+ }
+ }
+ // core, subset
+ else if (domainSubsetFlag) {
+ if (domain->list.size() > 6) {
+ cerr << "ERROR: too many domains for core domain subset features\n";
+ exit(1);
+ }
+ size_t bitmap = 0;
+ for(size_t bit = 0; bit < domain->list.size(); bit++) {
+ if (domainCount.find( domain->list[ bit ] ) != domainCount.end()) {
+ bitmap += 1 << bit;
+ }
+ }
+ for(size_t i = 1; i < (1 << domain->list.size()); i++) {
+ phraseTableFile << " " << maybeLogProb( (bitmap == i) ? 2.718 : 1 );
+ }
+ }
+ // core, indicator or ratio
+ else {
+ typedef vector< string >::const_iterator I;
+ for (I i = domain->list.begin(); i != domain->list.end(); i++ ) {
+ if (domainCount.find( *i ) == domainCount.end() ) {
+ phraseTableFile << " " << maybeLogProb( 1 );
+ }
+ else if (domainRatioFlag) {
+ phraseTableFile << " " << maybeLogProb( exp( domainCount[ *i ] / count ) );
+ }
+ else {
+ phraseTableFile << " " << maybeLogProb( 2.718 );
+ }
+ }
+ }
}
phraseTableFile << " ||| ";
@@ -526,41 +715,40 @@ void outputPhrasePair(const PhraseAlignmentCollection &phrasePair, float totalCo
if (hierarchicalFlag) {
// always output alignment if hiero style, but only for non-terms
// (eh: output all alignments, needed for some feature functions)
- assert(phraseT.size() == bestAlignment->alignedToT.size() + 1);
+ assert(phraseT.size() == bestAlignment.alignedToT.size() + 1);
std::vector<std::string> alignment;
for(size_t j = 0; j < phraseT.size() - 1; j++) {
if (isNonTerminal(vcbT.getWord( phraseT[j] ))) {
- if (bestAlignment->alignedToT[ j ].size() != 1) {
+ if (bestAlignment.alignedToT[ j ].size() != 1) {
cerr << "Error: unequal numbers of non-terminals. Make sure the text does not contain words in square brackets (like [xxx])." << endl;
phraseTableFile.flush();
- assert(bestAlignment->alignedToT[ j ].size() == 1);
+ assert(bestAlignment.alignedToT[ j ].size() == 1);
}
- int sourcePos = *(bestAlignment->alignedToT[ j ].begin());
+ int sourcePos = *(bestAlignment.alignedToT[ j ].begin());
//phraseTableFile << sourcePos << "-" << j << " ";
- std::stringstream point;
- point << sourcePos << "-" << j;
- alignment.push_back(point.str());
- }
- else {
+ std::stringstream point;
+ point << sourcePos << "-" << j;
+ alignment.push_back(point.str());
+ } else {
set<size_t>::iterator setIter;
- for(setIter = (bestAlignment->alignedToT[j]).begin(); setIter != (bestAlignment->alignedToT[j]).end(); setIter++) {
+ for(setIter = (bestAlignment.alignedToT[j]).begin(); setIter != (bestAlignment.alignedToT[j]).end(); setIter++) {
int sourcePos = *setIter;
//phraseTableFile << sourcePos << "-" << j << " ";
- std::stringstream point;
- point << sourcePos << "-" << j;
- alignment.push_back(point.str());
- }
- }
- }
- // now print all alignments, sorted by source index
- sort(alignment.begin(), alignment.end());
- for (size_t i = 0; i < alignment.size(); ++i) {
- phraseTableFile << alignment[i] << " ";
- }
- } else if (wordAlignmentFlag) {
+ std::stringstream point;
+ point << sourcePos << "-" << j;
+ alignment.push_back(point.str());
+ }
+ }
+ }
+ // now print all alignments, sorted by source index
+ sort(alignment.begin(), alignment.end());
+ for (size_t i = 0; i < alignment.size(); ++i) {
+ phraseTableFile << alignment[i] << " ";
+ }
+ } else if (wordAlignmentFlag) {
// alignment info in pb model
- for(size_t j=0; j<bestAlignment->alignedToT.size(); j++) {
- const set< size_t > &aligned = bestAlignment->alignedToT[j];
+ for(size_t j=0; j<bestAlignment.alignedToT.size(); j++) {
+ const set< size_t > &aligned = bestAlignment.alignedToT[j];
for (set< size_t >::const_iterator p(aligned.begin()); p != aligned.end(); ++p) {
phraseTableFile << *p << "-" << j << " ";
}
@@ -568,6 +756,7 @@ void outputPhrasePair(const PhraseAlignmentCollection &phrasePair, float totalCo
}
}
+
// counts
phraseTableFile << " ||| " << totalCount << " " << count;
@@ -594,13 +783,13 @@ void outputPhrasePair(const PhraseAlignmentCollection &phrasePair, float totalCo
phraseTableFile << endl;
}
-double computeUnalignedPenalty( const PHRASE &phraseS, const PHRASE &phraseT, PhraseAlignment *alignment )
+double computeUnalignedPenalty( const PHRASE &phraseS, const PHRASE &phraseT, const PhraseAlignment &alignment )
{
// unaligned word counter
double unaligned = 1.0;
// only checking target words - source words are caught when computing inverse
- for(size_t ti=0; ti<alignment->alignedToT.size(); ti++) {
- const set< size_t > & srcIndices = alignment->alignedToT[ ti ];
+ for(size_t ti=0; ti<alignment.alignedToT.size(); ti++) {
+ const set< size_t > & srcIndices = alignment.alignedToT[ ti ];
if (srcIndices.empty()) {
unaligned *= 2.718;
}
@@ -608,13 +797,13 @@ double computeUnalignedPenalty( const PHRASE &phraseS, const PHRASE &phraseT, Ph
return unaligned;
}
-double computeUnalignedFWPenalty( const PHRASE &phraseS, const PHRASE &phraseT, PhraseAlignment *alignment )
+double computeUnalignedFWPenalty( const PHRASE &phraseS, const PHRASE &phraseT, const PhraseAlignment &alignment )
{
// unaligned word counter
double unaligned = 1.0;
// only checking target words - source words are caught when computing inverse
- for(size_t ti=0; ti<alignment->alignedToT.size(); ti++) {
- const set< size_t > & srcIndices = alignment->alignedToT[ ti ];
+ for(size_t ti=0; ti<alignment.alignedToT.size(); ti++) {
+ const set< size_t > & srcIndices = alignment.alignedToT[ ti ];
if (srcIndices.empty() && functionWordList.find( vcbT.getWord( phraseT[ ti ] ) ) != functionWordList.end()) {
unaligned *= 2.718;
}
@@ -622,11 +811,11 @@ double computeUnalignedFWPenalty( const PHRASE &phraseS, const PHRASE &phraseT,
return unaligned;
}
-void loadFunctionWords( const char *fileName )
+void loadFunctionWords( const string &fileName )
{
cerr << "Loading function word list from " << fileName;
ifstream inFile;
- inFile.open(fileName);
+ inFile.open(fileName.c_str());
if (inFile.fail()) {
cerr << " - ERROR: could not open file\n";
exit(1);
@@ -647,14 +836,14 @@ void loadFunctionWords( const char *fileName )
inFile.close();
}
-double computeLexicalTranslation( const PHRASE &phraseS, const PHRASE &phraseT, PhraseAlignment *alignment )
+double computeLexicalTranslation( const PHRASE &phraseS, const PHRASE &phraseT, const PhraseAlignment &alignment )
{
// lexical translation probability
double lexScore = 1.0;
int null = vcbS.getWordID("NULL");
// all target words have to be explained
- for(size_t ti=0; ti<alignment->alignedToT.size(); ti++) {
- const set< size_t > & srcIndices = alignment->alignedToT[ ti ];
+ for(size_t ti=0; ti<alignment.alignedToT.size(); ti++) {
+ const set< size_t > & srcIndices = alignment.alignedToT[ ti ];
if (srcIndices.empty()) {
// explain unaligned word by NULL
lexScore *= lexTable.permissiveLookup( null, phraseT[ ti ] );
@@ -670,11 +859,11 @@ double computeLexicalTranslation( const PHRASE &phraseS, const PHRASE &phraseT,
return lexScore;
}
-void LexicalTable::load( char *fileName )
+void LexicalTable::load( const string &fileName )
{
cerr << "Loading lexical translation table from " << fileName;
ifstream inFile;
- inFile.open(fileName);
+ inFile.open(fileName.c_str());
if (inFile.fail()) {
cerr << " - ERROR: could not open file\n";
exit(1);
diff --git a/scripts/training/phrase-extract/score.h b/phrase-extract/score.h
index 9faa144c5..f720a32d2 100644
--- a/scripts/training/phrase-extract/score.h
+++ b/phrase-extract/score.h
@@ -10,6 +10,9 @@
#include <string>
#include <vector>
+namespace MosesTraining
+{
+
class PhraseAlignment;
typedef std::vector<PhraseAlignment*> PhraseAlignmentCollection;
@@ -58,8 +61,26 @@ private:
};
+class LexicalTable
+{
+public:
+ std::map< WORD_ID, std::map< WORD_ID, double > > ltable;
+ void load( const std::string &filePath );
+ double permissiveLookup( WORD_ID wordS, WORD_ID wordT ) {
+ // cout << endl << vcbS.getWord( wordS ) << "-" << vcbT.getWord( wordT ) << ":";
+ if (ltable.find( wordS ) == ltable.end()) return 1.0;
+ if (ltable[ wordS ].find( wordT ) == ltable[ wordS ].end()) return 1.0;
+ // cout << ltable[ wordS ][ wordT ];
+ return ltable[ wordS ][ wordT ];
+ }
+};
+
// other functions *********************************************
inline bool isNonTerminal( const std::string &word )
{
return (word.length()>=3 && word[0] == '[' && word[word.length()-1] == ']');
}
+
+
+}
+
diff --git a/scripts/training/phrase-extract/score.vcxproj b/phrase-extract/score.vcxproj
index e5dc1df9a..e5dc1df9a 100644
--- a/scripts/training/phrase-extract/score.vcxproj
+++ b/phrase-extract/score.vcxproj
diff --git a/scripts/training/phrase-extract/statistics.cpp b/phrase-extract/statistics.cpp
index a39e98a5d..67373ec93 100644
--- a/scripts/training/phrase-extract/statistics.cpp
+++ b/phrase-extract/statistics.cpp
@@ -17,9 +17,13 @@
#include "InputFileStream.h"
using namespace std;
+using namespace MosesTraining;
#define LINE_MAX_LENGTH 10000
+namespace MosesTraining
+{
+
class PhraseAlignment
{
public:
@@ -36,9 +40,11 @@ class LexicalTable
{
public:
map< WORD_ID, map< WORD_ID, double > > ltable;
- void load( char[] );
+ void load( const string &);
};
+}
+
void processPhrasePairs( vector< PhraseAlignment > & );
ofstream phraseTableFile;
@@ -304,11 +310,11 @@ bool PhraseAlignment::equals( const PhraseAlignment& other )
return true;
}
-void LexicalTable::load( char *fileName )
+void LexicalTable::load( const string &filePath )
{
- cerr << "Loading lexical translation table from " << fileName;
+ cerr << "Loading lexical translation table from " << filePath;
ifstream inFile;
- inFile.open(fileName);
+ inFile.open(filePath.c_str());
if (inFile.fail()) {
cerr << " - ERROR: could not open file\n";
exit(1);
@@ -326,7 +332,7 @@ void LexicalTable::load( char *fileName )
vector<string> token = tokenize( line );
if (token.size() != 3) {
- cerr << "line " << i << " in " << fileName << " has wrong number of tokens, skipping:\n" <<
+ cerr << "line " << i << " in " << filePath << " has wrong number of tokens, skipping:\n" <<
token.size() << " " << token[0] << " " << line << endl;
continue;
}
diff --git a/scripts/training/phrase-extract/tables-core.cpp b/phrase-extract/tables-core.cpp
index 93ad8b6a1..6b35f371b 100644
--- a/scripts/training/phrase-extract/tables-core.cpp
+++ b/phrase-extract/tables-core.cpp
@@ -30,6 +30,9 @@ vector<string> tokenize( const char* input )
return token;
}
+namespace MosesTraining
+{
+
bool isNonTerminal( const WORD &symbol ) {
return symbol.substr(0, 1) == "[" && symbol.substr(symbol.size()-1, 1) == "]";
}
@@ -122,3 +125,5 @@ double DTable::get( int distortion )
return dtable[ distortion ];
}
+}
+
diff --git a/scripts/training/phrase-extract/tables-core.h b/phrase-extract/tables-core.h
index 1899b4d77..e239e5900 100644
--- a/scripts/training/phrase-extract/tables-core.h
+++ b/phrase-extract/tables-core.h
@@ -14,6 +14,9 @@
extern std::vector<std::string> tokenize( const char*);
+namespace MosesTraining
+{
+
typedef std::string WORD;
typedef unsigned int WORD_ID;
@@ -63,4 +66,6 @@ public:
double get( int );
};
+}
+
#endif
diff --git a/regression-testing/Jamfile b/regression-testing/Jamfile
index 6afe1cf97..61157eaf3 100644
--- a/regression-testing/Jamfile
+++ b/regression-testing/Jamfile
@@ -1,9 +1,18 @@
-import option ;
+import option path ;
with-regtest = [ option.get "with-regtest" ] ;
+
if $(with-regtest) {
- path-constant TESTS : $(with-regtest)/tests ;
-
+ with-regtest = [ path.root $(with-regtest) [ path.pwd ] ] ;
+} else if [ option.get "with-regtest" : : "yes" ] {
+ shell_or_fail "git submodule init" ;
+ shell_or_fail "git submodule update" ;
+ with-regtest = $(TOP)/regression-testing/tests ;
+}
+
+if $(with-regtest) {
+ test-dir = $(with-regtest)/tests ;
+
rule reg_test ( name : tests * : program : action ) {
alias $(name) : $(tests:D=).passed ;
for test in $(tests) {
@@ -11,22 +20,34 @@ if $(with-regtest) {
alias $(test) : $(test:D=).passed ;
}
}
-
+
actions reg_test_decode {
- $(TOP)/regression-testing/run-single-test.perl --decoder=$(>) --test=$(<:B) --data-dir=$(with-regtest) --test-dir=$(TESTS) && touch $(<)
+ $(TOP)/regression-testing/run-single-test.perl --decoder=$(>) --test=$(<:B) --data-dir=$(with-regtest) --test-dir=$(test-dir) && touch $(<)
}
- reg_test phrase : [ glob $(TESTS)/phrase.* ] : ../moses-cmd/src//moses : @reg_test_decode ;
- reg_test chart : [ glob $(TESTS)/chart.* : $(TESTS)/chart.hierarchical-withkenlm ] : ../moses-chart-cmd/src//moses_chart : @reg_test_decode ;
+ reg_test phrase : [ glob $(test-dir)/phrase.* ] : ../moses-cmd/src//moses : @reg_test_decode ;
+ reg_test chart : [ glob $(test-dir)/chart.* ] : ../moses-chart-cmd/src//moses_chart : @reg_test_decode ;
actions reg_test_score {
- $(TOP)/regression-testing/run-test-scorer.perl --scorer=$(>) --test=$(<:B) --data-dir=$(with-regtest) --test-dir=$(TESTS) && touch $(<)
+ $(TOP)/regression-testing/run-test-scorer.perl --scorer=$(>) --test=$(<:B) --data-dir=$(with-regtest) --test-dir=$(test-dir) && touch $(<)
+ }
+ reg_test score : [ glob $(test-dir)/score.* : $(test-dir)/score.phrase-based-with-alignment-inv $(test-dir)/score.phrase-based-inv $(test-dir)/score.phrase-based-with-alignment $(test-dir)/score.phrase-based ] : ../phrase-extract//score : @reg_test_score ;
+
+ actions reg_test_extract {
+ $(TOP)/regression-testing/run-test-extract.perl --extractor=$(>) --test=$(<:B) --data-dir=$(with-regtest) --test-dir=$(test-dir) && touch $(<)
}
- reg_test score : [ glob $(TESTS)/score.* ] : ../scripts/training/phrase-extract//score : @reg_test_score ;
+ reg_test extract : [ glob $(test-dir)/extract.* ] : ../phrase-extract//extract : @reg_test_extract ;
+
+
+ actions reg_test_extractrules {
+ $(TOP)/regression-testing/run-test-extract.perl --extractor=$(>) --test=$(<:B) --data-dir=$(with-regtest) --test-dir=$(test-dir) && touch $(<)
+ }
+ reg_test extractrules : [ glob $(test-dir)/extract-rules.* : $(with-regtest)/extract-rules.hierarchical ] : ../phrase-extract//extract-rules : @reg_test_extractrules ;
+
actions reg_test_mert {
- $(TOP)/regression-testing/run-test-mert.perl --mert-dir=$(TOP)/mert --test=$(<:B) --data-dir=$(with-regtest) --test-dir=$(TESTS) && touch $(<)
+ $(TOP)/regression-testing/run-test-mert.perl --test=$(<:B) --data-dir=$(with-regtest) --test-dir=$(test-dir) && touch $(<)
}
- reg_test mert : [ glob $(TESTS)/mert.* ] : ../mert//legacy : @reg_test_mert ;
+ reg_test mert : [ glob $(test-dir)/mert.* ] : ../mert//mert : @reg_test_mert ;
- alias all : phrase chart mert score ;
+ alias all : phrase chart mert score extract extractrules ;
}
diff --git a/regression-testing/MosesRegressionTesting.pm b/regression-testing/MosesRegressionTesting.pm
index f4b3ce099..ff4bebcb4 100644
--- a/regression-testing/MosesRegressionTesting.pm
+++ b/regression-testing/MosesRegressionTesting.pm
@@ -5,16 +5,14 @@ use strict;
# if your tests need a new version of the test data, increment this
# and make sure that a moses-regression-tests-vX.Y is available for
# download from statmt.org (redpony AT umd dot edu for more info)
-use constant TESTING_DATA_VERSION => '10';
# find the data directory in a few likely locations and make sure
# that it is the correct version
sub find_data_directory
{
my ($test_script_root, $data_dir) = @_;
- my $data_version = TESTING_DATA_VERSION;
my @ds = ();
- my $mrtp = "moses-reg-test-data-$data_version";
+ my $mrtp = "moses-reg-test-data";
push @ds, $data_dir if defined $data_dir;
push @ds, "$test_script_root/$mrtp";
push @ds, "/export/ws06osmt/regression-testing/$mrtp";
@@ -41,9 +39,7 @@ standard locations: $test_script_root, /tmp, or /var/tmp with these
commands:
cd <DESIRED_INSTALLATION_DIRECTORY>
- wget http://www.statmt.org/moses/reg-testing/moses-reg-test-data-$data_version.tgz
- tar xzf moses-reg-test-data-$data_version.tgz
- rm moses-reg-test-data-$data_version.tgz
+ git clone https://github.com/hieuhoang/moses-reg-test-data.git
EOT
exit 1;
diff --git a/regression-testing/run-test-extract.perl b/regression-testing/run-test-extract.perl
index c53e6247d..d13ef62d6 100755
--- a/regression-testing/run-test-extract.perl
+++ b/regression-testing/run-test-extract.perl
@@ -53,7 +53,7 @@ my $truthPath = "$test_dir/$test_name/truth/";
if (-e $outPath)
{
- my $cmd = "diff --exclude=.DS_Store $outPath/ $truthPath/ | wc -l";
+ my $cmd = "diff --exclude=.DS_Store --exclude=._* $outPath/ $truthPath/ | wc -l";
my $numDiff = `$cmd`;
if ($numDiff == 0)
diff --git a/regression-testing/run-test-mert.perl b/regression-testing/run-test-mert.perl
index a9e0b290d..e22d152df 100755
--- a/regression-testing/run-test-mert.perl
+++ b/regression-testing/run-test-mert.perl
@@ -10,14 +10,14 @@ my @SIGS = qw ( SIGHUP SIGINT SIGQUIT SIGILL SIGTRAP SIGABRT SIGIOT SIGBUS SIGFP
my ($decoder, $test_name);
my $test_dir = "$script_dir/tests";
-my $mert_dir = "$script_dir/../mert";
+my $bin_dir = "$script_dir/../bin";
my $data_dir;
my $BIN_TEST = $script_dir;
my $results_dir;
GetOptions("test=s" => \$test_name,
"data-dir=s"=> \$data_dir,
- "mert-dir=s"=> \$mert_dir,
+ "bin-dir=s"=> \$bin_dir,
"test-dir=s"=> \$test_dir,
"results-dir=s"=> \$results_dir,
) or exit 1;
@@ -26,7 +26,7 @@ die "Please specify a test to run with --test\n" unless $test_name;
die "Please specify the location of the data directory with --data-dir\n" unless $data_dir;
-die "Please specify the location of the mert directory with --mert-dir\n" unless $mert_dir;
+die "Please specify the location of the mert directory with --mert-dir\n" unless $bin_dir;
die "Cannot locate test dir at $test_dir" unless (-d $test_dir);
@@ -92,7 +92,7 @@ exit 0;
sub exec_test {
my ($test_dir,$results) = @_;
my $start_time = time;
- my ($o, $ec, $sig) = run_command("sh $test_dir/command $mert_dir $test_dir 1> $results/run.stdout 2> $results/run.stderr");
+ my ($o, $ec, $sig) = run_command("sh $test_dir/command $bin_dir $test_dir 1> $results/run.stdout 2> $results/run.stderr");
my $elapsed = 0;
$elapsed = time - $start_time;
return ($o, $elapsed, $ec, $sig);
diff --git a/regression-testing/run-test-suite.perl b/regression-testing/run-test-suite.perl
deleted file mode 100755
index 08ff458bf..000000000
--- a/regression-testing/run-test-suite.perl
+++ /dev/null
@@ -1,180 +0,0 @@
-#!/usr/bin/perl -w
-
-# $Id$
-
-use strict;
-use FindBin qw($Bin);
-
-my $script_dir; BEGIN { use Cwd qw/ abs_path /; use File::Basename; $script_dir = dirname(abs_path($0)); push @INC, $script_dir; }
-use Getopt::Long;
-
-############################################################
-my @tests = qw (
- extract.phrase-based
- extract-rules.hierarchical
- score.phrase-based
- score.phrase-based-inv
- score.phrase-based-with-alignment
- score.phrase-based-with-alignment-inv
- score.hierarchical
- score.hierarchical-inv
- mert.basic
- mert.pro
- mert.extractor-txt
- mert.extractor-bin
- chart.target-syntax
- chart.target-syntax.ondisk
- chart.hierarchical
- chart.hierarchical-withsrilm
- chart.hierarchical.ondisk
- phrase.basic-surface-only
- phrase.basic-surface-only-withirstlm
- phrase.basic-surface-only-withirstlm-binlm
- phrase.basic-lm-oov
- phrase.ptable-filtering
- phrase.multi-factor
- phrase.multi-factor-drop
- phrase.confusionNet-surface-only
- phrase.confusionNet-multi-factor
- phrase.basic-surface-binptable
- phrase.multi-factor-binptable
- phrase.nbest-multi-factor
- phrase.lattice-surface
- phrase.lattice-distortion
- phrase.lexicalized-reordering
- phrase.lexicalized-reordering-bin
- phrase.lexicalized-reordering-cn
- phrase.consensus-decoding-surface
- phrase.continue-partial-translation
- phrase.show-weights.lex-reorder
- phrase.show-weights
- phrase.xml-markup
-);
- #phrase.basic-lm-oov-withkenlm
- #phrase.basic-surface-only-withkenlm
- #phrase.basic-surface-only-withkenlm.bin
- #chart.hierarchical-withkenlm
-
-############################################################
-use MosesRegressionTesting;
-use File::Temp qw ( tempfile );
-use POSIX qw ( strftime );
-
-my $decoderPhrase = "$Bin/../moses-cmd/src/moses";
-my $decoderChart = "$Bin/../moses-chart-cmd/src/moses_chart";
-my $scoreExe = "$Bin/../scripts/training/phrase-extract/score";
-my $extractorExe = "$Bin/../scripts/training/phrase-extract/extract";
-my $extractorRulesExe = "$Bin/../scripts/training/phrase-extract/extract-rules";
-my $kenlmBinarizer = "$Bin/../kenlm/build_binary";
-my $test_dir;
-my $BIN_TEST = $script_dir;
-my $data_dir;
-
-GetOptions( "decoder-phrase=s" => \$decoderPhrase,
- "decoder-chart=s" => \$decoderChart,
- "data-dir=s" => \$data_dir,
- ) or exit 1;
-
-
-$data_dir = MosesRegressionTesting::find_data_directory($BIN_TEST, $data_dir);
-
-my $test_run = "$BIN_TEST/run-single-test.pl --data-dir=$data_dir";
-$test_dir = "$data_dir/tests";
-$test_run .= " --test-dir=$test_dir" if $test_dir;
-
-print "Data directory: $data_dir\n";
-
-die "Please specify the phrase-based decoder & the chart decoder to test with --decoder-phrase=[path] --decoder-chart=[path] \n" unless ($decoderPhrase and $decoderChart);
-
-die "Cannot locate executable called $decoderPhrase\n" unless (-x $decoderPhrase);
-
-print "Running tests: @tests\n\n";
-
-print "TEST NAME STATUS PATH TO RESULTS\n";
-my $lb = "---------------------------------------------------------------------------------------------------------\n";
-print $lb;
-
-my $fail = 0;
-my @failed;
-foreach my $test (@tests)
-{
- my $cmd;
- my @tokens = split('\.', $test);
- my $model_type = $tokens[0];
-
- if ($model_type eq 'phrase')
- {
- $cmd .= "$BIN_TEST/run-single-test.perl $test_run --decoder=$decoderPhrase";
- }
- elsif ($model_type eq 'chart')
- {
- $cmd .= "$BIN_TEST/run-single-test.perl $test_run --decoder=$decoderChart";
- }
- elsif ($model_type eq 'score')
- {
- $cmd .= "$BIN_TEST/run-test-scorer.perl $test_run --scorer=$scoreExe";
- }
- elsif ($model_type eq 'extract')
- {
- $cmd .= "$BIN_TEST/run-test-extract.perl $test_run --extractor=$extractorExe";
- }
- elsif ($model_type eq 'extract-rules')
- {
- $cmd .= "$BIN_TEST/run-test-extract.perl $test_run --extractor=$extractorRulesExe";
- }
- elsif ($model_type eq "mert")
- {
- $cmd .= "$BIN_TEST/run-test-mert.perl $test_run";
- }
- elsif ($model_type eq "kenlmbin")
- {
- $cmd .= "$BIN_TEST/run-kenlm-binarizer.perl --binarizer=$kenlmBinarizer";
- }
- else
- {
- print "FAIL";
- }
-
- $cmd .= " --test=$test";
-
-print STDERR "cmd = $cmd\n";
-
- my ($res, $output, $results_path) = do_test($cmd);
- format STDOUT =
-@<<<<<<<<<<<<<<<<<<<<<< @<<<<<<<<< @<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
-$test, $res, $results_path
-.
- write;
- if ($res eq 'FAIL') {
- print "$lb$output$lb";
- $fail++;
- push @failed, $test;
- } else {
-# TOTAL_WALLTIME result=BASELINE=11, TEST=12 DELTA=1 PCT CHANGE=9.09
- if ($output =~ /TOTAL_WALLTIME\s+result\s*=\s*([^\n]+)/o) {
- print "\t\tTiming statistics: $1\n";
- }
- }
-}
-
-my $total = scalar @tests;
-my $fail_percentage = int(100 * $fail / $total);
-my $pass_percentage = int(100 * ($total-$fail) / $total);
-print "\n$pass_percentage% of the tests passed.\n";
-print "$fail_percentage% of the tests failed.\n";
-if ($fail_percentage>0) { print "\nPLEASE INVESTIGATE THESE FAILED TESTS: @failed\n"; }
-exit 2 if $fail > 0;
-
-sub do_test {
- my ($test) = @_;
- my $o = `$test 2>&1`;
- my $res = 'PASS';
- $res = 'FAIL' if ($? > 0);
- my $od = '';
- if ($o =~ /RESULTS AVAILABLE IN: (.*)$/m) {
- $od = $1;
- $o =~ s/^RESULTS AVAIL.*$//mo;
- }
- return ($res, $o, $od);
-}
-
diff --git a/regression-testing/tests b/regression-testing/tests
new file mode 160000
+Subproject f0bb48dad05db4a12b41f5c8def9c2d097b2e3b
diff --git a/scripts/Jamfile b/scripts/Jamfile
index b9eefcffe..b76152d08 100644
--- a/scripts/Jamfile
+++ b/scripts/Jamfile
@@ -1,61 +1,21 @@
#See ../Jamroot for options.
import option path ;
-build-project ems/biconcor ;
build-project training ;
-with-giza = [ option.get "with-giza" ] ;
-if $(with-giza) {
- rule check-for-bin ( name ) {
- if ! [ FILE_OPEN $(with-giza)/$(name) : "r" ] {
- echo "Did not find "$(with-giza)/$(name)"." ;
- echo "The with-giza directory should contain GIZA++, snt2cooc.out, and mkcls." ;
- echo "These are available from http://code.google.com/p/giza-pp/ :" ;
- echo " wget https://giza-pp.googlecode.com/files/giza-pp.tgz" ;
- echo " tar xzf giza-pp.tgz" ;
- echo " cd giza-pp" ;
- echo " make" ;
- echo " cp GIZA++-v2/{GIZA++,plain2snt.out,snt2cooc.out,snt2plain.out,trainGIZA++.sh} mkcls-v2/mkcls ." ;
- exit "Then run bjam --with-giza=/path/to/giza-pp" : 1 ;
- }
- }
-
- constant WITH-GIZA : $(with-giza) ;
-
- check-for-bin GIZA++ ;
- check-for-bin snt2cooc.out ;
- check-for-bin mkcls ;
+prefix = [ option.get "prefix" ] ;
+if $(prefix) {
+ prefix = [ path.root $(prefix) [ path.pwd ] ] ;
+ location = [ option.get "install-scripts" : : $(prefix)$(GITTAG)/scripts ] ;
} else {
- if $(CLEANING) = no {
- echo "If you want scripts/training/train-model.perl, pass --with-giza=/path/to/giza-pp" ;
- }
- constant WITH-GIZA : "no" ;
+ location = [ option.get "install-scripts" ] ;
}
-location = [ option.get "install-scripts" ] ;
if $(location) {
location = [ path.root $(location) [ path.pwd ] ] ;
- location = $(location)$(GITTAG) ;
-
- #These two used to live in a tools directory.
- install ghkm : training/phrase-extract/extract-ghkm//extract-ghkm : <location>$(location)/training/phrase-extract/extract-ghkm/tools ;
- install compactify : training/compact-rule-table//compactify : <location>$(location)/training/compact-rule-table/tools ;
-
- install phrase-extract : training/phrase-extract//programs : <location>$(location)/training/phrase-extract ;
- install pcfg-extract : training/phrase-extract/pcfg-extract//pcfg-extract : <location>$(location)/training/phrase-extract/pcfg-extract ;
- install pcfg-score : training/phrase-extract/pcfg-score//pcfg-score : <location>$(location)/training/phrase-extract/pcfg-score ;
- install lexical-reordering : training/lexical-reordering//score : <location>$(location)/training/lexical-reordering ;
- install symal : training/symal//symal : <location>$(location)/training/symal ;
-
- install biconcor : ems/biconcor//biconcor : <location>$(location)/ems/biconcor ;
-
- if $(WITH-GIZA) != no {
- install train-model : training//train-model.perl : <location>$(location)/training ;
- }
-
install scripts :
- [ glob-tree README *.js *.pl *.perl *.pm *.py *.sh *.php : tests regression-testing other bin train_model.perl ]
- [ glob tokenizer/nonbreaking_prefixes/* ems/example/*.* ems/example/data/* ems/web/* analysis/smtgui/* : ems/web/javascripts ]
+ [ glob-tree README *.js *.pl *.perl *.pm *.py *.sh *.php : tests regression-testing other bin ]
+ [ glob share/nonbreaking_prefixes/* ems/example/*.* ems/example/data/* ems/web/* analysis/smtgui/* : ems/web/javascripts ]
generic/fsa-sample.fsa
ems/experiment.machines
ems/experiment.meta
diff --git a/scripts/analysis/smtgui/newsmtgui.cgi b/scripts/analysis/smtgui/newsmtgui.cgi
index 552f89924..552f89924 100644..100755
--- a/scripts/analysis/smtgui/newsmtgui.cgi
+++ b/scripts/analysis/smtgui/newsmtgui.cgi
diff --git a/scripts/analysis/weight-scan.pl b/scripts/analysis/weight-scan.pl
index 0ed4dbe09..6789c4d6d 100755
--- a/scripts/analysis/weight-scan.pl
+++ b/scripts/analysis/weight-scan.pl
@@ -8,10 +8,10 @@
use strict;
use warnings;
use Getopt::Long;
-use FindBin qw($Bin);
+use FindBin qw($RealBin);
use File::Basename;
use File::Path;
-my $SCRIPTS_ROOTDIR = $Bin;
+my $SCRIPTS_ROOTDIR = $RealBin;
$SCRIPTS_ROOTDIR =~ s/\/training$//;
$SCRIPTS_ROOTDIR = $ENV{"SCRIPTS_ROOTDIR"} if defined($ENV{"SCRIPTS_ROOTDIR"});
diff --git a/scripts/ems/example/config.basic b/scripts/ems/example/config.basic
index 939e13aad..7e5ec4502 100644
--- a/scripts/ems/example/config.basic
+++ b/scripts/ems/example/config.basic
@@ -19,11 +19,14 @@ pair-extension = fr-en
moses-src-dir = /home/pkoehn/moses
#
# moses binaries
-moses-bin-dir = $moses-src-dir/dist/bin
+moses-bin-dir = $moses-src-dir/bin
#
# moses scripts
moses-script-dir = $moses-src-dir/scripts
#
+# directory where GIZA++/MGIZA programs resides
+external-bin-dir = /Users/hieuhoang/workspace/bin/training-tools
+#
# srilm
srilm-dir = $moses-src-dir/srilm/bin/i686
#
@@ -138,7 +141,7 @@ settings = "-interpolate -kndiscount -unk"
# irstlm
#lm-training = "$moses-script-dir/generic/trainlm-irst.perl -cores $cores -irst-dir $irstlm-dir -temp-dir $working-dir/lm"
-#settings = ""
+#settings = "-s improved-kneser-ney"
# order of the language model
order = 5
diff --git a/scripts/ems/example/config.factored b/scripts/ems/example/config.factored
index df9f28f33..4da6c3612 100644
--- a/scripts/ems/example/config.factored
+++ b/scripts/ems/example/config.factored
@@ -19,11 +19,14 @@ pair-extension = fr-en
moses-src-dir = /home/pkoehn/moses
#
# moses binaries
-moses-bin-dir = $moses-src-dir/dist/bin
+moses-bin-dir = $moses-src-dir/bin
#
# moses scripts
moses-script-dir = $moses-src-dir/scripts
#
+# directory where GIZA++/MGIZA programs resides
+external-bin-dir = /Users/hieuhoang/workspace/bin/training-tools
+#
# srilm
srilm-dir = $moses-src-dir/srilm/bin/i686
#
@@ -138,7 +141,7 @@ settings = "-interpolate -kndiscount -unk"
# irstlm
#lm-training = "$moses-script-dir/generic/trainlm-irst.perl -cores $cores -irst-dir $irstlm-dir -temp-dir $working-dir/lm"
-#settings = ""
+#settings = "-s improved-kneser-ney"
# order of the language model
order = 5
diff --git a/scripts/ems/example/config.hierarchical b/scripts/ems/example/config.hierarchical
index 6161f6ac4..a1d85c89c 100644
--- a/scripts/ems/example/config.hierarchical
+++ b/scripts/ems/example/config.hierarchical
@@ -19,11 +19,14 @@ pair-extension = fr-en
moses-src-dir = /home/pkoehn/moses
#
# moses binaries
-moses-bin-dir = $moses-src-dir/dist/bin
+moses-bin-dir = $moses-src-dir/bin
#
# moses scripts
moses-script-dir = $moses-src-dir/scripts
#
+# directory where GIZA++/MGIZA programs resides
+external-bin-dir = /Users/hieuhoang/workspace/bin/training-tools
+#
# srilm
srilm-dir = $moses-src-dir/srilm/bin/i686
#
@@ -138,7 +141,7 @@ settings = "-interpolate -kndiscount -unk"
# irstlm
#lm-training = "$moses-script-dir/generic/trainlm-irst.perl -cores $cores -irst-dir $irstlm-dir -temp-dir $working-dir/lm"
-#settings = ""
+#settings = "-s improved-kneser-ney"
# order of the language model
order = 5
diff --git a/scripts/ems/example/config.syntax b/scripts/ems/example/config.syntax
index 635585844..a39acf6e4 100644
--- a/scripts/ems/example/config.syntax
+++ b/scripts/ems/example/config.syntax
@@ -19,11 +19,14 @@ pair-extension = fr-en
moses-src-dir = /home/pkoehn/moses
#
# moses binaries
-moses-bin-dir = $moses-src-dir/dist/bin
+moses-bin-dir = $moses-src-dir/bin
#
# moses scripts
moses-script-dir = $moses-src-dir/scripts
#
+# directory where GIZA++/MGIZA programs resides
+external-bin-dir = /Users/hieuhoang/workspace/bin/training-tools
+#
# srilm
srilm-dir = $moses-src-dir/srilm/bin/i686
#
@@ -142,7 +145,7 @@ settings = "-interpolate -kndiscount -unk"
# irstlm
#lm-training = "$moses-script-dir/generic/trainlm-irst.perl -cores $cores -irst-dir $irstlm-dir -temp-dir $working-dir/lm"
-#settings = ""
+#settings = "-s improved-kneser-ney"
# order of the language model
order = 5
diff --git a/scripts/ems/example/config.toy b/scripts/ems/example/config.toy
index 7b8c95faa..629c96336 100644
--- a/scripts/ems/example/config.toy
+++ b/scripts/ems/example/config.toy
@@ -19,11 +19,14 @@ pair-extension = fr-en
moses-src-dir = /home/pkoehn/moses
#
# moses binaries
-moses-bin-dir = $moses-src-dir/dist/bin
+moses-bin-dir = $moses-src-dir/bin
#
# moses scripts
moses-script-dir = $moses-src-dir/scripts
#
+# directory where GIZA++/MGIZA programs resides
+external-bin-dir = /Users/hieuhoang/workspace/bin/training-tools
+#
# srilm
srilm-dir = $moses-src-dir/srilm/bin/i686
#
@@ -132,7 +135,7 @@ settings = "-interpolate -kndiscount -unk"
# irstlm
#lm-training = "$moses-script-dir/generic/trainlm-irst.perl -cores $cores -irst-dir $irstlm-dir -temp-dir $working-dir/lm"
-#settings = ""
+#settings = "-s improved-kneser-ney"
# order of the language model
order = 5
diff --git a/scripts/ems/experiment.meta b/scripts/ems/experiment.meta
index ab873e3b6..1f173e8d5 100644
--- a/scripts/ems/experiment.meta
+++ b/scripts/ems/experiment.meta
@@ -663,7 +663,7 @@ filter
filter-devtest
in: input-devtest TRAINING:phrase-translation-table TRAINING:reordering-table
out: filtered-dir-devtest
- default-name: tuning/filtered.devtest.ini
+ default-name: tuning/filtered.devtest
rerun-on-change: filter-settings
pass-if: TRAINING:binarize-all
ignore-unless: use-mira
diff --git a/scripts/ems/experiment.perl b/scripts/ems/experiment.perl
index 2ab9e1e3a..0d8b2dc88 100755
--- a/scripts/ems/experiment.perl
+++ b/scripts/ems/experiment.perl
@@ -4,14 +4,22 @@
use strict;
use Getopt::Long "GetOptions";
-use FindBin qw($Bin);
+use FindBin qw($RealBin);
+
+sub trim($)
+{
+ my $string = shift;
+ $string =~ s/^\s+//;
+ $string =~ s/\s+$//;
+ return $string;
+}
my $host = `hostname`; chop($host);
print STDERR "STARTING UP AS PROCESS $$ ON $host AT ".`date`;
my ($CONFIG_FILE,$EXECUTE,$NO_GRAPH,$CONTINUE,$VERBOSE,$IGNORE_TIME);
my $SLEEP = 2;
-my $META = "$Bin/experiment.meta";
+my $META = "$RealBin/experiment.meta";
# check if it is run on a multi-core machine
# set number of maximal concurrently active processes
@@ -157,7 +165,7 @@ sub detect_machine {
sub detect_if_cluster {
my $hostname = `hostname`; chop($hostname);
- foreach my $line (`cat $Bin/experiment.machines`) {
+ foreach my $line (`cat $RealBin/experiment.machines`) {
next unless $line =~ /^cluster: (.+)$/;
if (&detect_machine($hostname,$1)) {
$CLUSTER = 1;
@@ -168,7 +176,7 @@ sub detect_if_cluster {
sub detect_if_multicore {
my $hostname = `hostname`; chop($hostname);
- foreach my $line (`cat $Bin/experiment.machines`) {
+ foreach my $line (`cat $RealBin/experiment.machines`) {
next unless $line =~ /^multicore-(\d+): (.+)$/;
my ($cores,$list) = ($1,$2);
if (&detect_machine($hostname,$list)) {
@@ -920,6 +928,10 @@ sub define_step {
elsif ($DO_STEP[$i] eq 'TRAINING:build-biconcor') {
&define_training_build_biconcor($i);
}
+ elsif ($DO_STEP[$i] eq 'TRAINING:build-suffix-array') {
+ &define_training_build_suffix_array($i);
+ }
+
elsif ($DO_STEP[$i] eq 'TRAINING:build-lex-trans') {
&define_training_build_lex_trans($i);
}
@@ -953,10 +965,10 @@ sub define_step {
&define_tuningevaluation_factorize($i);
}
elsif ($DO_STEP[$i] eq 'TUNING:filter') {
- &define_tuningevaluation_filter(undef,$i,"dev");
+ &define_tuningevaluation_filter(undef,$i);
}
elsif ($DO_STEP[$i] eq 'TUNING:filter-devtest') {
- &define_tuningevaluation_filter(undef,$i,"devtest");
+ &define_tuningevaluation_filter(undef,$i);
}
elsif ($DO_STEP[$i] eq 'TUNING:tune') {
&define_tuning_tune($i);
@@ -1188,7 +1200,7 @@ sub check_info {
return 0;
}
print "\tcheck '$VALUE{$parameter}' eq '$INFO{$parameter}' -> " if $VERBOSE;
- if (defined($ONLY_EXISTENCE_MATTERS{"$module:$step"}{$parameter})) {
+ if (defined($ONLY_EXISTENCE_MATTERS{"$module:$step"}{$parameter})) {
print "existence ok\n" if $VERBOSE;
}
elsif (&match_info_strings($VALUE{$parameter},$INFO{$parameter})) {
@@ -1304,7 +1316,7 @@ sub check_if_crashed {
'no such file or directory','unknown option',
'died at','exit code','permission denied',
'segmentation fault','abort',
- 'can\'t locate') {
+ 'can\'t locate', 'unrecognized option') {
if (/$pattern/i) {
my $not_error = 0;
if (defined($NOT_ERROR{&defined_step_id($i)})) {
@@ -1559,21 +1571,21 @@ sub define_tuning_tune {
my ($step_id) = @_;
my $dir = &check_and_get("GENERAL:working-dir");
my $tuning_script = &check_and_get("TUNING:tuning-script");
+ my $use_mira = &backoff_and_get("TUNING:use-mira", 0);
# the last 3 variables are only used for mira tuning
my ($tuned_config,$config,$input,$reference,$config_devtest,$input_devtest,$reference_devtest) = &get_output_and_input($step_id);
- my $addTags = &backoff_and_get("TUNING:add-tags");
- my $use_jackknife = &backoff_and_get("TUNING:use-jackknife");
- if ($addTags && !$use_jackknife) {
- my $input_with_tags = $input.".".$VERSION.".tags";
- `$addTags < $input > $input_with_tags`;
- $input = $input_with_tags;
- }
-
- my $use_mira = &backoff_and_get("TUNING:use-mira");
my $cmd = "";
- if ($use_mira && $use_mira eq "true") {
+ if ($use_mira) {
+ my $addTags = &backoff_and_get("TUNING:add-tags");
+ my $use_jackknife = &backoff_and_get("TUNING:use-jackknife");
+ if ($addTags && !$use_jackknife) {
+ my $input_with_tags = $input.".".$VERSION.".tags";
+ `$addTags < $input > $input_with_tags`;
+ $input = $input_with_tags;
+ }
+
my $addTagsDevtest = &backoff_and_get("TUNING:add-tags-devtest");
if ($addTagsDevtest) {
my $input_devtest_with_tags = $input_devtest.".".$VERSION.".tags";
@@ -1603,7 +1615,6 @@ sub define_tuning_tune {
$cmd .= "\n$script_filename >& $script_filename_log";
}
else {
-
my $scripts = &check_backoff_and_get("TUNING:moses-script-dir");
my $nbest_size = &check_and_get("TUNING:nbest");
my $lambda = &backoff_and_get("TUNING:lambda");
@@ -1646,8 +1657,14 @@ sub write_mira_config {
my $tuning_decoder_settings = &check_and_get("TUNING:decoder-settings");
my $start_weights = &backoff_and_get("TUNING:start-weight-config");
my $tuning_settings = &check_and_get("TUNING:tuning-settings");
- my @settings = split(/ /, $tuning_settings);
- my $mira_tuning_settings = &check_and_get("TUNING:mira-tuning-settings");
+ my $jobs = 10; # this overwrites the default in training-expt.perl
+ if ($tuning_settings =~ /^(.*)--jobs (\d+)(.*)$/) {
+ $jobs = $2;
+ $tuning_settings = $1.$3;
+ $tuning_settings =~ s/ +/ /;
+ $tuning_settings =~ s/^ //;
+ $tuning_settings =~ s/ $//;
+ }
my $use_jackknife = &backoff_and_get("TUNING:use-jackknife");
# are we tuning a meta feature?
@@ -1660,8 +1677,8 @@ sub write_mira_config {
$tune_filtered_ini_start = $expt_dir."/".$tune_filtered_ini_start.".start";
if ($start_weights) {
# apply start weights to filtered ini file, and pass the new ini to mira
- print "DEBUG: $Bin/support/reuse-weights.perl $start_weights < $tune_filtered_ini > $tune_filtered_ini_start \n";
- system("$Bin/support/reuse-weights.perl $start_weights < $tune_filtered_ini > $tune_filtered_ini_start");
+ print "DEBUG: $RealBin/support/reuse-weights.perl $start_weights < $tune_filtered_ini > $tune_filtered_ini_start \n";
+ system("$RealBin/support/reuse-weights.perl $start_weights < $tune_filtered_ini > $tune_filtered_ini_start");
}
}
@@ -1693,7 +1710,7 @@ sub write_mira_config {
print CFG "wait-for-bleu=1 \n\n";
#print CFG "decoder-settings=".$tuning_decoder_settings."\n\n";
print CFG "[train] \n";
- print CFG "trainer=\${moses-home}/dist/bin/mira \n";
+ print CFG "trainer=\${moses-home}/bin/mira \n";
if ($use_jackknife) {
print CFG "input-files-folds=";
for my $i (0..9) {
@@ -1736,16 +1753,14 @@ sub write_mira_config {
}
print CFG "decoder-settings=".$tuning_decoder_settings." -text-type \"dev\"\n";
print CFG "hours=48 \n";
- foreach my $setting (@settings) {
- print CFG $setting."\n";
- }
- print CFG "extra-args=".$mira_tuning_settings."\n\n";
+ print CFG "jobs=$jobs \n";
+ print CFG "extra-args=".$tuning_settings."\n\n";
print CFG "[devtest] \n";
if (&get("TRAINING:hierarchical-rule-set")) {
- print CFG "moses=\${moses-home}/dist/bin/moses_chart \n";
+ print CFG "moses=\${moses-home}/bin/moses_chart \n";
}
else {
- print CFG "moses=\${moses-home}/dist/bin/moses \n";
+ print CFG "moses=\${moses-home}/bin/moses \n";
}
# use multi-bleu to select the best set of weights
print CFG "bleu=\${moses-home}/scripts/generic/multi-bleu.perl \n";
@@ -1840,6 +1855,24 @@ sub define_training_symmetrize_giza {
&create_step($step_id,$cmd);
}
+sub define_training_build_suffix_array {
+ my ($step_id) = @_;
+
+ my $scripts = &check_and_get("GENERAL:moses-script-dir");
+
+ my ($model, $aligned,$corpus) = &get_output_and_input($step_id);
+ my $sa_exec_dir = &check_and_get("TRAINING:suffix-array");
+ my $input_extension = &check_backoff_and_get("TRAINING:input-extension");
+ my $output_extension = &check_backoff_and_get("TRAINING:output-extension");
+ my $method = &check_and_get("TRAINING:alignment-symmetrization-method");
+
+ my $glue_grammar_file = &versionize(&long_file_name("glue-grammar","model",""));
+
+ my $cmd = "$scripts/training/wrappers/adam-suffix-array/suffix-array-create.sh $sa_exec_dir $corpus.$input_extension $corpus.$output_extension $aligned.$method $model $glue_grammar_file";
+
+ &create_step($step_id,$cmd);
+}
+
sub define_training_build_biconcor {
my ($step_id) = @_;
@@ -1875,7 +1908,6 @@ sub define_training_extract_phrases {
$cmd .= "-alignment-stem ".&versionize(&long_file_name("aligned","model",""))." ";
$cmd .= "-extract-file $extract ";
$cmd .= "-corpus $corpus ";
-
if (&get("TRAINING:hierarchical-rule-set")) {
my $glue_grammar_file = &get("TRAINING:glue-grammar");
@@ -1894,6 +1926,7 @@ sub define_training_extract_phrases {
}
my $extract_settings = &get("TRAINING:extract-settings");
+ $extract_settings .= " --IncludeSentenceId " if &get("TRAINING:domain-features");
$cmd .= "-extract-options '".$extract_settings."' " if defined($extract_settings);
&create_step($step_id,$cmd);
@@ -1902,7 +1935,7 @@ sub define_training_extract_phrases {
sub define_training_build_ttable {
my ($step_id) = @_;
- my ($phrase_table, $extract,$lex) = &get_output_and_input($step_id);
+ my ($phrase_table, $extract,$lex,$domains) = &get_output_and_input($step_id);
my $word_report = &backoff_and_get("EVALUATION:report-precision-by-coverage");
my $word_alignment = &backoff_and_get("TRAINING:include-word-alignment-in-rules");
@@ -1916,9 +1949,26 @@ sub define_training_build_ttable {
$cmd .= "-phrase-word-alignment ";
}
+ $cmd .= &define_domain_feature_score_option($domains) if $domains;
+
&create_step($step_id,$cmd);
}
+sub define_domain_feature_score_option {
+ my ($domains) = @_;
+ my $spec = &backoff_and_get("TRAINING:domain-features");
+ my $method;
+ $method = "Indicator" if $spec =~ /indicator/;
+ $method = "Ratio" if $spec =~ /ratio/;
+ $method = "Subset" if $spec =~ /subset/;
+ die("ERROR: faulty TRAINING:domain-features spec (no method): $spec\n") unless defined($method);
+ if ($spec =~ /sparse/) {
+ return "-sparse-translation-table -score-options '--SparseDomain$method $domains' -additional-ini '<br>[report-sparse-features]<br>stm<br><br>' ";
+ }
+ else {
+ return "-score-options '--Domain$method $domains' ";
+ }
+}
sub define_training_build_reordering {
my ($step_id) = @_;
@@ -1956,18 +2006,36 @@ sub define_training_build_custom_generation {
sub define_training_create_config {
my ($step_id) = @_;
- my ($config,
- $reordering_table,$phrase_translation_table,$generation_table,@LM)
- = &get_output_and_input($step_id);
+ my ($config,$reordering_table,$phrase_translation_table,$generation_table,$sparse_lexical_features,$domains,$sparse_phrase_table,@LM)
+ = &get_output_and_input($step_id);
my $cmd = &get_training_setting(9);
+ # get model, and whether suffix array is used. Determines the pt implementation.
+ my $hierarchical = &get("TRAINING:hierarchical-rule-set");
+ my $sa_exec_dir = &get("TRAINING:suffix-array");
+
+ my ($ptImpl, $numFF);
+ if ($hierarchical) {
+ if ($sa_exec_dir) {
+ $ptImpl = 10; # suffix array
+ $numFF = 7;
+ }
+ else {
+ $ptImpl = 6; # in-mem SCFG
+ }
+ }
+ else {
+ $ptImpl = 0; # phrase-based
+ }
+
# additional settings for factored models
- $cmd .= &get_table_name_settings("translation-factors","phrase-translation-table",$phrase_translation_table);
- $cmd .= &get_table_name_settings("reordering-factors","reordering-table",$reordering_table)
- if $reordering_table;
- $cmd .= &get_table_name_settings("generation-factors","generation-table",$generation_table)
- if $generation_table;
+ my $ptCmd = $phrase_translation_table;
+ $ptCmd .= ":$ptImpl" if $ptImpl>0;
+ $ptCmd .= ":$numFF" if defined($numFF);
+ $cmd .= &get_table_name_settings("translation-factors","phrase-translation-table", $ptCmd);
+ $cmd .= &get_table_name_settings("reordering-factors","reordering-table",$reordering_table) if $reordering_table;
+ $cmd .= &get_table_name_settings("generation-factors","generation-table",$generation_table) if $generation_table;
$cmd .= "-config $config ";
my $decoding_graph_backoff = &get("TRAINING:decoding-graph-backoff");
@@ -2061,8 +2129,17 @@ sub define_training_create_config {
}
my $additional_ini = &get("TRAINING:additional-ini");
+ if (&get("TRAINING:score-settings") &&
+ &get("TRAINING:score-settings") =~ /SparseCountBinFeature/) {
+ $additional_ini .= "<br>[report-sparse-features]<br>stm<br><br>";
+ $cmd .= "-sparse-translation-table ";
+ }
$cmd .= "-additional-ini '$additional_ini' " if defined($additional_ini);
+ # sparse lexical features provide additional content for config file
+ $cmd .= "-additional-ini-file $sparse_lexical_features.ini " if $sparse_lexical_features;
+ $cmd .= &define_domain_feature_score_option($domains) if $domains;
+
&create_step($step_id,$cmd);
}
@@ -2202,6 +2279,7 @@ sub get_training_setting {
my ($step) = @_;
my $dir = &check_and_get("GENERAL:working-dir");
my $training_script = &check_and_get("TRAINING:script");
+ my $external_bin_dir = &check_backoff_and_get("TRAINING:external-bin-dir");
my $scripts = &check_backoff_and_get("TUNING:moses-script-dir");
my $reordering = &get("TRAINING:lexicalized-reordering");
my $input_extension = &check_backoff_and_get("TRAINING:input-extension");
@@ -2219,19 +2297,12 @@ sub get_training_setting {
my $xml = $source_syntax || $target_syntax;
- my $external_bin_dir = &check_and_get("GENERAL:external-bin-dir");
-
my $cmd = "$training_script ";
$cmd .= "$options " if defined($options);
$cmd .= "-dont-zip ";
$cmd .= "-first-step $step " if $step>1;
$cmd .= "-last-step $step " if $step<9;
- if ($external_bin_dir) {
- $cmd .= "-external-bin-dir $external_bin_dir ";
- }
- else {
- $cmd .= "-scripts-root-dir $scripts ";
- }
+ $cmd .= "-external-bin-dir $external_bin_dir " if defined($external_bin_dir);
$cmd .= "-f $input_extension -e $output_extension ";
$cmd .= "-alignment $alignment ";
$cmd .= "-max-phrase-length $phrase_length " if $phrase_length;
@@ -2348,13 +2419,12 @@ sub encode_factor_list {
}
sub define_tuningevaluation_filter {
- my ($set,$step_id, $type) = @_;
+ my ($set,$step_id) = @_;
my $scripts = &check_and_get("GENERAL:moses-script-dir");
my $dir = &check_and_get("GENERAL:working-dir");
my $tuning_flag = !defined($set);
- my ($filter_dir,
- $input,$phrase_translation_table,$reordering_table) = &get_output_and_input($step_id);
+ my ($filter_dir,$input,$phrase_translation_table,$reordering_table,$domains) = &get_output_and_input($step_id);
my $binarizer = &get("GENERAL:ttable-binarizer");
my $hierarchical = &get("TRAINING:hierarchical-rule-set");
@@ -2369,12 +2439,6 @@ sub define_tuningevaluation_filter {
#print "filter: $input_filter \n";
$input_filter = $input unless $input_filter;
- # additional settings
- if ($tuning_flag && $type) {
- $filter_dir .= ".$type";
- print "filter-dir: ".$filter_dir."\n";
- }
-
my $settings = &backoff_and_get("EVALUATION:$set:filter-settings") unless $tuning_flag;
$settings = &get("TUNING:filter-settings") if $tuning_flag;
$settings = "" unless $settings;
@@ -2384,34 +2448,82 @@ sub define_tuningevaluation_filter {
$settings .= " -Binarizer \"$binarizer\"" if $binarizer;
$settings .= " --Hierarchical" if &get("TRAINING:hierarchical-rule-set");
+ # get model, and whether suffix array is used. Determines the pt implementation.
+ my $sa_exec_dir = &get("TRAINING:suffix-array");
+
+ my ($ptImpl, $numFF);
+ if ($hierarchical) {
+ if ($sa_exec_dir) {
+ $ptImpl = 10; # suffix array
+ $numFF = 7;
+ }
+ else {
+ $ptImpl = 6; # in-mem SCFG
+ }
+ }
+ else {
+ $ptImpl = 0; # phrase-based
+ }
+
+ # config file specified?
+ my ($config,$cmd,$delete_config);
+ if (&get("TUNING:config-with-reused-weights")) {
+ $config = &get("TUNING:config-with-reused-weights");
+ }
+ elsif (&get("TRAINING:config")) {
+ $config = &get("TRAINING:config");
+ }
# create pseudo-config file
- my $config = $tuning_flag ? "$dir/tuning/moses.table.ini.$VERSION" : "$dir/evaluation/$set.moses.table.ini.$VERSION";
- my $cmd = &get_training_setting(9);
- $cmd .= &get_table_name_settings("translation-factors","phrase-translation-table",$phrase_translation_table);
- $cmd .= &get_table_name_settings("reordering-factors","reordering-table",$reordering_table)
- if $reordering_table;
- # additional settings for hierarchical models
- if (&get("TRAINING:hierarchical-rule-set")) {
- my $extract_version = $VERSION;
- $extract_version = $RE_USE[$STEP_LOOKUP{"TRAINING:extract-phrases"}]
- if defined($STEP_LOOKUP{"TRAINING:extract-phrases"});
- my $glue_grammar_file = &get("TRAINING:glue-grammar");
- $glue_grammar_file = &versionize(&long_file_name("glue-grammar","model",""),$extract_version)
- unless $glue_grammar_file;
- $cmd .= "-glue-grammar-file $glue_grammar_file ";
+ else {
+ $config = $tuning_flag ? "$dir/tuning/moses.table.ini.$VERSION" : "$dir/evaluation/$set.moses.table.ini.$VERSION";
+ $delete_config = 1;
+ $cmd = &get_training_setting(9);
+ $cmd .= &define_domain_feature_score_option($domains) if $domains;
+
+ my $ptCmd = $phrase_translation_table;
+ $ptCmd .= ":$ptImpl" if $ptImpl>0;
+ $ptCmd .= ":$numFF" if defined($numFF);
+ $cmd .= &get_table_name_settings("translation-factors","phrase-translation-table", $ptCmd);
+ $cmd .= &get_table_name_settings("reordering-factors","reordering-table",$reordering_table)
+ if $reordering_table;
+ # additional settings for hierarchical models
+ if (&get("TRAINING:hierarchical-rule-set")) {
+ my $extract_version = $VERSION;
+ $extract_version = $RE_USE[$STEP_LOOKUP{"TRAINING:extract-phrases"}]
+ if defined($STEP_LOOKUP{"TRAINING:extract-phrases"});
+ my $glue_grammar_file = &get("TRAINING:glue-grammar");
+ $glue_grammar_file = &versionize(&long_file_name("glue-grammar","model",""),$extract_version)
+ unless $glue_grammar_file;
+ $cmd .= "-glue-grammar-file $glue_grammar_file ";
+ }
+ if (&get("TRAINING:score-settings") &&
+ &get("TRAINING:score-settings") =~ /SparseCountBinFeature/) {
+ $cmd .= "-sparse-translation-table ";
+ }
+ $cmd .= "-lm 0:3:$dir "; # dummy
+ $cmd .= "-config $config\n";
}
- $cmd .= "-lm 0:3:$dir "; # dummy
- $cmd .= "-config $config\n";
# filter command
- $cmd .= "$scripts/training/filter-model-given-input.pl";
- $cmd .= " $filter_dir $config $input_filter $settings\n";
-
+ if ($sa_exec_dir) {
+ # suffix array
+ $cmd .= "$scripts/training/wrappers/adam-suffix-array/suffix-array-extract.sh $sa_exec_dir $phrase_translation_table $input_filter $filter_dir \n";
+
+ my $escaped_filter_dir = $filter_dir;
+ $escaped_filter_dir =~ s/\//\\\\\//g;
+ $cmd .= "cat $config | sed s/10\\ 0\\ 0\\ 7.*/10\\ 0\\ 0\\ 7\\ $escaped_filter_dir/g > $filter_dir/moses.ini \n";
+ }
+ else {
+ # normal phrase table
+ $cmd .= "$scripts/training/filter-model-given-input.pl";
+ $cmd .= " $filter_dir $config $input_filter $settings\n";
+ }
+
# clean-up
- $cmd .= "rm $config";
+ $cmd .= "rm $config" if $delete_config;
# copy moses.ini into specified file location
- #$cmd .= "\ncp $filter_dir/moses.ini $filter_config\n";
+ $cmd .= "\ncp $filter_dir/moses.ini $config\n";
&create_step($step_id,$cmd);
}
@@ -2441,7 +2553,7 @@ sub define_evaluation_decode {
$report_segmentation = "yes";
}
if (defined($analyze_search_graph) && $analyze_search_graph eq "yes") {
- $settings .= " -unpruned-search-graph -osg $system_output.graph";
+ $settings .= " -unpruned-search-graph -include-lhs-in-search-graph -osg $system_output.graph";
}
if (defined($report_segmentation) && $report_segmentation eq "yes") {
if ($hierarchical) {
@@ -2461,7 +2573,9 @@ sub define_evaluation_decode {
# create command
my $cmd;
- $nbest =~ s/[^\d]//g if $nbest;
+ my $nbest_size;
+ $nbest_size = $nbest if $nbest;
+ $nbest_size =~ s/[^\d]//g if $nbest;
if ($jobs && $CLUSTER) {
$cmd .= "mkdir -p $dir/evaluation/tmp.$set.$VERSION\n";
$cmd .= "cd $dir/evaluation/tmp.$set.$VERSION\n";
@@ -2477,11 +2591,11 @@ sub define_evaluation_decode {
$cmd .= " -input-file $input";
$cmd .= " --jobs $jobs";
$cmd .= " -decoder-parameters \"$settings\" > $system_output";
- $cmd .= " -n-best-file $system_output.best$nbest -n-best-size $nbest" if $nbest;
+ $cmd .= " -n-best-file $system_output.best$nbest_size -n-best-size $nbest" if $nbest;
}
else {
$cmd = "$decoder $settings -v 0 -f $config < $input > $system_output";
- $cmd .= " -n-best-list $system_output.best$nbest $nbest" if $nbest;
+ $cmd .= " -n-best-list $system_output.best$nbest_size $nbest" if $nbest;
}
$cmd .= " -text-type \"test\"";
@@ -2734,14 +2848,31 @@ sub define_template {
$new_cmd .= $single_cmd."\n";
}
elsif ($single_cmd =~ /^.+$/) {
- $single_cmd =~ /(IN\S*)/
- || die("ERROR: could not find IN in $single_cmd");
- my $in = $1;
+ # find IN and OUT files
+ my $in;
+ if ($single_cmd =~ /(IN)$/ ||
+ $single_cmd =~ /(IN) / ||
+ $single_cmd =~ /(IN[^\d]\S*)/) {
+ $in = $1;
+ }
+ else {
+ die("ERROR: could not find IN in $single_cmd");
+ }
$single_cmd =~ /(OUT\S*)/
|| die("ERROR: could not find OUT in $single_cmd");
my $out = $1;
- $single_cmd =~ s/IN\S*/\%s/;
+ # replace IN* and OUT* with %s
+ if ($single_cmd =~ /IN$/) {
+ $single_cmd =~ s/IN$/\%s/;
+ }
+ elsif ($single_cmd =~ /IN /) {
+ $single_cmd =~ s/IN /\%s /;
+ }
+ else {
+ $single_cmd =~ s/IN[^\d]\S*/\%s/;
+ }
$single_cmd =~ s/OUT\S*/\%s/;
+ # build tmp
my $tmp_dir = $module;
$tmp_dir =~ tr/A-Z/a-z/;
$tmp_dir .= "/tmp.$set.$stepname.$VERSION-".($i++);
diff --git a/scripts/ems/support/analysis.perl b/scripts/ems/support/analysis.perl
index 2948d4164..29962ca71 100755
--- a/scripts/ems/support/analysis.perl
+++ b/scripts/ems/support/analysis.perl
@@ -5,7 +5,7 @@ use Getopt::Long "GetOptions";
my $MAX_LENGTH = 4;
-my ($system,$system_alignment,$segmentation,$reference,$dir,$input,$corpus,$ttable,@FACTORED_TTABLE,$score_options,$hierarchical,$output_corpus,$alignment,$biconcor,$input_factors,$input_factor_names,$output_factor_names,$precision_by_coverage,$precision_by_coverage_factor,$coverage_dir);
+my ($system,$system_alignment,$segmentation,$reference,$dir,$input,$corpus,$ttable,@FACTORED_TTABLE,$score_options,$hierarchical,$output_corpus,$alignment,$biconcor,$input_factors,$input_factor_names,$output_factor_names,$precision_by_coverage,$precision_by_coverage_factor,$coverage_dir,$search_graph);
if (!&GetOptions('system=s' => \$system, # raw output from decoder
'system-alignment=s' => \$system_alignment, # word alignment of system output
'reference=s' => \$reference, # tokenized reference
@@ -25,6 +25,7 @@ if (!&GetOptions('system=s' => \$system, # raw output from decoder
'alignment-file=s' => \$alignment, # alignment of parallel corpus
'coverage=s' => \$coverage_dir, # already computed coverage, stored in this dir
'biconcor=s' => \$biconcor, # binary for bilingual concordancer
+ 'search-graph=s' => \$search_graph, # visualization of search graph
'hierarchical' => \$hierarchical) || # hierarchical model?
!defined($dir)) {
die("ERROR: syntax: analysis.perl -system FILE -reference FILE -dir DIR [-input FILE] [-input-corpus FILE] [-ttable FILE] [-score-options SETTINGS] [-segmentation FILE] [-output-corpus FILE] [-alignment-file FILE] [-biconcor BIN]");
@@ -132,6 +133,11 @@ if (defined($corpus) && defined($output_corpus) && defined($alignment) && define
`$biconcor -s $dir/biconcor -c $corpus -t $output_corpus -a $alignment`;
}
+# process search graph for visualization
+if (defined($search_graph)) {
+ &process_search_graph($search_graph);
+}
+
sub best_matches {
my ($CORRECT,$TOTAL,$out) = @_;
my $type = ($out =~ /precision/) ? "precision" : "recall";
@@ -338,7 +344,7 @@ sub ttable_coverage {
# handling hierarchical
$in =~ s/ \[[^ \]]+\]$//; # remove lhs nt
next if $in =~ /\[[^ \]]+\]\[[^ \]]+\]/; # only consider flat rules
- $in = &get_factor_phrase($factor,$in) unless !defined($factor) || $factor eq "0";
+ $in = &get_factor_phrase($factor,$in) if defined($factor) && $factor eq "0";
$scores = $COLUMN[4] if defined($hierarchical); #scalar @COLUMN == 5;
my @IN = split(/ /,$in);
$size = scalar @IN;
@@ -968,11 +974,16 @@ sub hs_rule_type {
# compute depth of each node
sub hs_compute_depth {
my ($start,$end,$depth,$CHART) = @_;
+ if (!defined($$CHART{$start}{$end})) {
+ print STDERR "warning: illegal span ($start,$end)\n";
+ return;
+ }
my $RULE = $$CHART{$start}{$end};
+
$$RULE{'depth'} = $depth;
for(my $i=0;$i<scalar @{$$RULE{'rule_rhs'}};$i++) {
- # non-terminals
+ # non-terminals
if (defined($$RULE{'alignment'}{$i})) {
my $SUBSPAN = $$RULE{'spans'}[$$RULE{'alignment'}{$i}];
&hs_compute_depth($$SUBSPAN{'from'},$$SUBSPAN{'to'},$depth+1,$CHART);
@@ -983,6 +994,10 @@ sub hs_compute_depth {
# re-assign depth to as deep as possible
sub hs_recompute_depth {
my ($start,$end,$CHART,$max_depth) = @_;
+ if (!defined($$CHART{$start}{$end})) {
+ print STDERR "warning: illegal span ($start,$end)\n";
+ return 0;
+ }
my $RULE = $$CHART{$start}{$end};
my $min_sub_depth = $max_depth+1;
@@ -1001,6 +1016,10 @@ sub hs_recompute_depth {
# get child dependencies for a sentence
sub hs_get_children {
my ($start,$end,$CHART) = @_;
+ if (!defined($$CHART{$start}{$end})) {
+ print STDERR "warning: illegal span ($start,$end)\n";
+ return -1;
+ }
my $RULE = $$CHART{$start}{$end};
my @CHILDREN = ();
@@ -1011,7 +1030,7 @@ sub hs_get_children {
if (defined($$RULE{'alignment'}{$i})) {
my $SUBSPAN = $$RULE{'spans'}[$$RULE{'alignment'}{$i}];
my $child = &hs_get_children($$SUBSPAN{'from'},$$SUBSPAN{'to'},$CHART);
- push @CHILDREN, $child;
+ push @CHILDREN, $child unless $child == -1;
}
}
return $$RULE{'id'};
@@ -1020,6 +1039,10 @@ sub hs_get_children {
# create the span annotation for an output sentence
sub hs_create_out_span {
my ($start,$end,$CHART,$MATRIX) = @_;
+ if (!defined($$CHART{$start}{$end})) {
+ print STDERR "warning: illegal span ($start,$end)\n";
+ return;
+ }
my $RULE = $$CHART{$start}{$end};
my %SPAN;
@@ -1064,6 +1087,10 @@ sub hs_create_out_span {
# create the span annotation for an input sentence
sub hs_create_in_span {
my ($start,$end,$CHART,$MATRIX) = @_;
+ if (!defined($$CHART{$start}{$end})) {
+ print STDERR "warning: illegal span ($start,$end)\n";
+ return;
+ }
my $RULE = $$CHART{$start}{$end};
my %SPAN;
@@ -1103,3 +1130,28 @@ sub hs_create_in_span {
$$RULE{'end_div_in'} = $#{$MATRIX};
$$THIS_SPAN{'closing'}{$$RULE{'depth'}} = 1;
}
+
+sub process_search_graph {
+ my ($search_graph_file) = @_;
+ open(OSG,$search_graph) || die("ERROR: could not open search graph file '$search_graph_file'");
+ `mkdir -p $dir/search-graph`;
+ my $last_sentence = -1;
+ while(<OSG>) {
+ /^(\d+) (\d+)\-?\>?(\S*) (\S+) =\> (.+) :(.*): pC=([\de\-\.]+), c=([\de\-\.]+) \[(\d+)\.\.(\d+)\] (.*)\[total=([\d\-\.]+)\] \<\</ || die("ERROR: buggy search graph line: $_");
+ my ($sentence,$id,$recomb,$lhs,$output,$alignment,$rule_score,$heuristic_rule_score,$from,$to,$children,$hyp_score)
+ = ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12);
+ chop($alignment) if $alignment;
+ chop($children) if $children;
+ $recomb = 0 unless $recomb;
+ $children = "" unless defined $children;
+ $alignment = "" unless defined $alignment;
+ if ($last_sentence != $sentence) {
+ close(SENTENCE) if $sentence;
+ open(SENTENCE,">$dir/search-graph/graph.$sentence");
+ $last_sentence = $sentence;
+ }
+ print SENTENCE "$id\t$recomb\t$from\t$to\t$output\t$alignment\t$children\t$rule_score\t$heuristic_rule_score\t$hyp_score\t$lhs\n";
+ }
+ close(OSG);
+ close(SENTENCE);
+}
diff --git a/scripts/ems/support/build-domain-file-from-subcorpora.perl b/scripts/ems/support/build-domain-file-from-subcorpora.perl
new file mode 100755
index 000000000..e85b6ad84
--- /dev/null
+++ b/scripts/ems/support/build-domain-file-from-subcorpora.perl
@@ -0,0 +1,38 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+# Create domain file from corpora
+# (helper for domain adatpation)
+
+# Creates a file with domain names and end line numbers for different domains
+# within the cleaned training corpus. This file is used by various domain
+# adaptation methods.
+
+my ($extension,@SUBCORPORA) = @ARGV;
+
+my $line_count = 0;
+my %UNIQUE_NAME;
+my $number = 1;
+foreach (@SUBCORPORA) {
+ # get number of lines
+ if (!-e "$_.$extension" && -e "$_.$extension.gz") {
+ $line_count += `zcat $_.$extension.gz | wc -l`;
+ }
+ elsif (-e "$_.$extension") {
+ $line_count += `wc -l < $_.$extension`;
+ }
+ else {
+ die("ERROR: could not open sub corpus file $_.$extension\n");
+ }
+
+ # construct name
+ my $name = $number++; # default: cardinal number
+ while(defined($UNIQUE_NAME{$name})) { $name = $number++; } # slightly paranoid
+ if (/\/([^\.\/]+)\.[^\/]+$/ && !defined($UNIQUE_NAME{$1})) { # reconstruct corpus name
+ $name = $1;
+ $UNIQUE_NAME{$1}++;
+ }
+ print "$line_count $name\n";
+}
+
diff --git a/scripts/ems/support/build-sparse-lexical-features.perl b/scripts/ems/support/build-sparse-lexical-features.perl
new file mode 100755
index 000000000..ad24ebb11
--- /dev/null
+++ b/scripts/ems/support/build-sparse-lexical-features.perl
@@ -0,0 +1,100 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+# Build necessary files for sparse lexical features
+# * target word insertion
+# * source word deletion
+# * word translation
+# * phrase length
+
+my ($corpus,$input_extension,$output_extension,$outfile_prefix,$specification) = @ARGV;
+my $ini = "";
+my $report = "";
+my %ALREADY;
+
+foreach my $feature_spec (split(/,\s*/,$specification)) {
+ my @SPEC = split(/\s+/,$feature_spec);
+ if ($SPEC[0] eq 'target-word-insertion') {
+ if ($SPEC[1] eq 'top' && $SPEC[2] =~ /^\d+$/) {
+ my $file = &create_top_words($output_extension, $SPEC[2]);
+ $ini .= "[target-word-insertion-feature]\n0 $file\n\n";
+ $report .= "twi\n";
+ }
+ else {
+ die("ERROR: Unknown parameter specification in '$feature_spec'\n");
+ }
+ }
+ elsif ($SPEC[0] eq 'source-word-deletion') {
+ if ($SPEC[1] eq 'top' && $SPEC[2] =~ /^\d+$/) {
+ my $file = &create_top_words($input_extension, $SPEC[2]);
+ $ini .= "[source-word-deletion-feature]\n0 $file\n\n";
+ $report .= "swd\n";
+ }
+ else {
+ die("ERROR: Unknown parameter specification in '$feature_spec'\n");
+ }
+ }
+ elsif ($SPEC[0] eq 'word-translation') {
+ if ($SPEC[1] eq 'top' && $SPEC[2] =~ /^\d+$/ && $SPEC[3] =~ /^\d+$/) {
+ my $file_in = &create_top_words($input_extension, $SPEC[2]);
+ my $file_out = &create_top_words($output_extension, $SPEC[3]);
+ $ini .= "[word-translation-feature]\n0 0 $file_in $file_out\n\n";
+ $report .= "wt\n";
+ }
+ else {
+ die("ERROR: Unknown parameter specification in '$feature_spec'\n");
+ }
+ }
+ elsif ($SPEC[0] eq 'phrase-length') {
+ $ini .= "[phrase-length-feature]\ntrue\n\n";
+ $report .= "pl\n";
+ }
+ else {
+ die("ERROR: Unknown feature type '$SPEC[0]' in specification '$feature_spec'\nfull spec: '$specification'\n");
+ }
+}
+
+open(INI,">$outfile_prefix.ini");
+print INI $ini;
+print INI "\n[report-sparse-features]\n$report\n";
+print INI "\n[use-alignment-info]\ntrue\n\n";
+close(INI);
+
+sub create_top_words {
+ my ($extension, $count) = @_;
+ my $file = "$outfile_prefix.$extension.top$count";
+ return $file if defined($ALREADY{"$extension,$count"});
+ $ALREADY{"$extension,$count"}++;
+
+ # get counts
+ my %COUNT;
+ open(CORPUS,"$corpus.$extension");
+ while(<CORPUS>) {
+ chop;
+ foreach (split) {
+ $_ =~ s/\|.+//; # only surface factor at this point
+ $COUNT{$_}++ unless $_ eq "";
+ }
+ }
+ close(CORPUS);
+
+ # sort
+ my @COUNT_WORD;
+ foreach (keys %COUNT) {
+ next if $COUNT{$_} <= 3; # avoid large tail
+ next if $_ =~ /:/; # avoid colon bug
+ push @COUNT_WORD,sprintf("%09d %s",$COUNT{$_},$_);
+ }
+ my @SORTED = reverse sort @COUNT_WORD;
+
+ # write top n to file
+ open(TOP,">$file");
+ for(my $i=0; $i<$count && $i<scalar(@SORTED); $i++) {
+ $SORTED[$i] =~ /^\d+ (.+)$/;
+ print TOP "$1\n";
+ }
+ close(TOP);
+
+ return $file;
+}
diff --git a/scripts/ems/support/run-command-on-multiple-refsets.perl b/scripts/ems/support/run-command-on-multiple-refsets.perl
index 7590e72a1..972f5602d 100755
--- a/scripts/ems/support/run-command-on-multiple-refsets.perl
+++ b/scripts/ems/support/run-command-on-multiple-refsets.perl
@@ -7,13 +7,18 @@ die("ERROR: syntax: run-command-on-multiple-refsets.perl cmd in out")
my ($cmd,$in,$out) = @ARGV;
die("ERROR: attempt to run on multiple references, but there is only one")
- if -e $in && ! -e "$in.ref0";
-die("ERROR: did not find reference '$in.ref0'")
- unless -e "$in.ref0";
+ if -e $in && (! -e "$in.ref0" || -e $in."0");
+die("ERROR: did not find reference '$in.ref0' or '${in}0'")
+ unless (-e "$in.ref0" || -e $in."0");
-for(my $i=0;-e "$in.ref$i";$i++) {
+for(my $i=0;-e "$in.ref$i" || -e $in.$i;$i++) {
my $single_cmd = $cmd;
- $single_cmd =~ s/mref-input-file/$in.ref$i/g;
+ if (! -e "$in.ref$i") {
+ $single_cmd =~ s/mref-input-file/$in$i/g;
+ }
+ else {
+ $single_cmd =~ s/mref-input-file/$in.ref$i/g;
+ }
$single_cmd =~ s/mref-output-file/$out.ref$i/g;
system($single_cmd);
}
diff --git a/scripts/ems/support/split-sentences.perl b/scripts/ems/support/split-sentences.perl
index eb03d25a8..b366d3d7e 100755
--- a/scripts/ems/support/split-sentences.perl
+++ b/scripts/ems/support/split-sentences.perl
@@ -6,10 +6,10 @@ binmode(STDIN, ":utf8");
binmode(STDOUT, ":utf8");
binmode(STDERR, ":utf8");
-use FindBin qw($Bin);
+use FindBin qw($RealBin);
use strict;
-my $mydir = "$Bin/nonbreaking_prefixes";
+my $mydir = "$RealBin/../../share/nonbreaking_prefixes";
my %NONBREAKING_PREFIX = ();
my $language = "en";
diff --git a/scripts/ems/support/train-irstlm.perl b/scripts/ems/support/train-irstlm.perl
new file mode 100644
index 000000000..5d2c05ce2
--- /dev/null
+++ b/scripts/ems/support/train-irstlm.perl
@@ -0,0 +1,22 @@
+#!/usr/bin/perl -w
+
+use strict;
+
+# wrapper for irstlm training
+
+my $IRSTLM = shift @ARGV;
+
+my $settings = join(" ",@ARGV);
+$settings =~ s/\-order/\-n/;
+$settings =~ s/\-text/\-i/;
+$settings =~ s/\-lm/\-o/;
+
+if ($settings !~ /\-o +(\S+)/) {
+ die("ERROR: no output file specified");
+}
+my $lm = $1;
+$settings =~ s/(\-o +\S+)/$1.iarpa.gz/;
+
+my $cmd = "IRSTLM=$IRSTLM $IRSTLM/scripts/build-lm.sh $settings ; ~/moses/irstlm/bin/compile-lm --text yes $lm.iarpa.gz $lm";
+print STDERR $cmd."\n";
+print `$cmd`;
diff --git a/scripts/fuzzy-match/create_xml.perl b/scripts/fuzzy-match/create_xml.perl
new file mode 100755
index 000000000..de3d61f71
--- /dev/null
+++ b/scripts/fuzzy-match/create_xml.perl
@@ -0,0 +1,309 @@
+#!/usr/bin/perl -w
+
+binmode(STDIN, ":utf8");
+binmode(STDOUT, ":utf8");
+
+use strict;
+use FindBin qw($RealBin);
+use File::Basename;
+
+sub trim($);
+
+############################################
+# START
+
+my $inPath = $ARGV[0];
+open(IN,"<".$inPath);
+
+open(RULE,">$inPath.extract");
+open(RULE_INV,">$inPath.extract.inv");
+
+my ($sentenceInd, $score, $source, $input, $target, $align, $path, $count);
+
+# MAIN LOOP
+while ($sentenceInd = <IN>) {
+ $score = <IN>;
+ $source = <IN>;
+ $input = <IN>;
+ $target = <IN>;
+ $align = <IN>;
+ $path = <IN>;
+ $count = <IN>;
+ chomp($sentenceInd);
+ chomp($score);
+ chomp($source);
+ chomp($input);
+ chomp($target);
+ chomp($align);
+ chomp($path);
+ chomp($count);
+ $source = trim($sentenceInd);
+ $source = trim($score);
+ $source = trim($source);
+ $input = trim($input);
+ $target = trim($target);
+ $align = trim($align);
+ $path = trim($path);
+ $count = trim($count);
+
+ my ($frame,$rule_s,$rule_t,$rule_alignment,$rule_alignment_inv) = &create_xml($source, $input, $target, $align, $path);
+
+ #print STDOUT $frame."\n";
+ print RULE "$rule_s [X] ||| $rule_t [X] ||| $rule_alignment ||| $count\n";
+ print RULE_INV "$rule_t [X] ||| $rule_s [X] ||| $rule_alignment_inv ||| $count\n";
+ #print STDOUT "$sentenceInd ||| $score ||| $count\n";
+
+}
+
+close(IN);
+close(RULE);
+close(RULE_INV);
+
+`LC_ALL=C sort $inPath.extract | gzip -c > $inPath.extract.sorted.gz`;
+`LC_ALL=C sort $inPath.extract.inv | gzip -c > $inPath.extract.inv.sorted.gz`;
+
+my $lex_file = "-";
+
+my $cmd;
+$cmd = "$RealBin/../../scripts/training/train-model.perl -dont-zip -first-step 6 -last-step 6 -f en -e fr -hierarchical -extract-file $inPath.extract -lexical-file $lex_file -score-options \"--NoLex\" -phrase-translation-table $inPath.pt";
+print STDERR "Executing: $cmd \n";
+`$cmd`;
+
+#######################################################
+sub create_xml {
+ my ($source,$input,$target,$alignment,$path) = @_;
+
+ my @INPUT = split(/ /,$input);
+ my @SOURCE = split(/ /,$source);
+ my @TARGET = split(/ /,$target);
+ my %ALIGN = &create_alignment($alignment);
+
+ my %FRAME_INPUT;
+ my (@NT,@INPUT_BITMAP,@TARGET_BITMAP,%ALIGNMENT_I_TO_S);
+ foreach (@TARGET) { push @TARGET_BITMAP,1 }
+
+ ### STEP 1: FIND MISMATCHES
+
+ my ($s,$i) = (0,0);
+ my $currently_matching = 0;
+ my ($start_s,$start_i) = (0,0);
+
+ $path .= "X"; # indicate end
+ print STDERR "$input\n$source\n$target\n$path\n";
+ for(my $p=0;$p<length($path);$p++) {
+ my $action = substr($path,$p,1);
+
+ # beginning of a mismatch
+ if ($currently_matching && $action ne "M" && $action ne "X") {
+ $start_i = $i;
+ $start_s = $s;
+ $currently_matching = 0;
+ }
+
+ # end of a mismatch
+ elsif (!$currently_matching &&
+ ($action eq "M" || $action eq "X")) {
+
+ # remove use of affected target words
+ for(my $ss = $start_s; $ss<$s; $ss++) {
+ foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
+ $TARGET_BITMAP[$tt] = 0;
+ }
+
+ # also remove enclosed unaligned words?
+ }
+
+ # are there input words that need to be inserted ?
+ print STDERR "($start_i<$i)?\n";
+ if ($start_i<$i) {
+
+ # take note of input words to be inserted
+ my $insertion = "";
+ for(my $ii = $start_i; $ii<$i; $ii++) {
+ $insertion .= $INPUT[$ii]." ";
+ }
+
+ # find position for inserted input words
+
+ # find first removed target word
+ my $start_t = 1000;
+ for(my $ss = $start_s; $ss<$s; $ss++) {
+ foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
+ $start_t = $tt if $tt < $start_t;
+ }
+ }
+
+ # end of sentence? add to end
+ if ($start_t == 1000 && $i > $#INPUT) {
+ $start_t = $#TARGET;
+ }
+
+ # backtrack to previous words if unaligned
+ if ($start_t == 1000) {
+ $start_t = -1;
+ for(my $ss = $s-1; $start_t==-1 && $ss>=0; $ss--) {
+ foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
+ $start_t = $tt if $tt > $start_t;
+ }
+ }
+ }
+ $FRAME_INPUT{$start_t} .= $insertion;
+ my %NT = ("start_t" => $start_t,
+ "start_i" => $start_i );
+ push @NT,\%NT;
+ }
+ $currently_matching = 1;
+ }
+
+ print STDERR "$action $s $i ($start_s $start_i) $currently_matching";
+ if ($action ne "I") {
+ print STDERR " ->";
+ foreach my $tt (keys %{${$ALIGN{'s'}}[$s]}) {
+ print STDERR " ".$tt;
+ }
+ }
+ print STDERR "\n";
+ $s++ unless $action eq "I";
+ $i++ unless $action eq "D";
+ $ALIGNMENT_I_TO_S{$i} = $s unless $action eq "D";
+ push @INPUT_BITMAP, 1 if $action eq "M";
+ push @INPUT_BITMAP, 0 if $action eq "I" || $action eq "S";
+ }
+
+
+ print STDERR $target."\n";
+ foreach (@TARGET_BITMAP) { print STDERR $_; } print STDERR "\n";
+ foreach (sort keys %FRAME_INPUT) {
+ print STDERR "$_: $FRAME_INPUT{$_}\n";
+ }
+
+ ### STEP 2: BUILD RULE AND FRAME
+
+ # hierarchical rule
+ my $rule_s = "";
+ my $rule_pos_s = 0;
+ my %RULE_ALIGNMENT_S;
+ for(my $i=0;$i<scalar(@INPUT_BITMAP);$i++) {
+ if ($INPUT_BITMAP[$i]) {
+ $rule_s .= $INPUT[$i]." ";
+ $RULE_ALIGNMENT_S{$ALIGNMENT_I_TO_S{$i}} = $rule_pos_s++;
+ }
+ foreach my $NT (@NT) {
+ if ($i == $$NT{"start_i"}) {
+ $rule_s .= "[X][X] ";
+ $$NT{"rule_pos_s"} = $rule_pos_s++;
+ }
+ }
+ }
+
+ my $rule_t = "";
+ my $rule_pos_t = 0;
+ my %RULE_ALIGNMENT_T;
+ for(my $t=-1;$t<scalar(@TARGET_BITMAP);$t++) {
+ if ($t>=0 && $TARGET_BITMAP[$t]) {
+ $rule_t .= $TARGET[$t]." ";
+ $RULE_ALIGNMENT_T{$t} = $rule_pos_t++;
+ }
+ foreach my $NT (@NT) {
+ if ($t == $$NT{"start_t"}) {
+ $rule_t .= "[X][X] ";
+ $$NT{"rule_pos_t"} = $rule_pos_t++;
+ }
+ }
+ }
+
+ my $rule_alignment = "";
+ foreach my $s (sort { $a <=> $b} keys %RULE_ALIGNMENT_S) {
+ foreach my $t (keys %{$ALIGN{"s"}[$s]}) {
+ next unless defined($RULE_ALIGNMENT_T{$t});
+ $rule_alignment .= $RULE_ALIGNMENT_S{$s}."-".$RULE_ALIGNMENT_T{$t}." ";
+ }
+ }
+ foreach my $NT (@NT) {
+ $rule_alignment .= $$NT{"rule_pos_s"}."-".$$NT{"rule_pos_t"}." ";
+ }
+
+ chop($rule_s);
+ chop($rule_t);
+ chop($rule_alignment);
+
+ my $rule_alignment_inv = "";
+ foreach (split(/ /,$rule_alignment)) {
+ /^(\d+)\-(\d+)$/;
+ $rule_alignment_inv .= "$2-$1 ";
+ }
+ chop($rule_alignment_inv);
+
+ # frame
+ my $frame = "";
+ $frame = $FRAME_INPUT{-1} if defined $FRAME_INPUT{-1};
+
+ my $currently_included = 0;
+ my $start_t = -1;
+ push @TARGET_BITMAP,0; # indicate end
+
+ for(my $t=0;$t<=scalar(@TARGET);$t++) {
+ # beginning of tm target inclusion
+ if (!$currently_included && $TARGET_BITMAP[$t]) {
+ $start_t = $t;
+ $currently_included = 1;
+ }
+
+ # end of tm target inclusion (not included word or inserted input)
+ elsif ($currently_included &&
+ (!$TARGET_BITMAP[$t] || defined($FRAME_INPUT{$t}))) {
+ # add xml (unless change is at the beginning of the sentence
+ if ($start_t >= 0) {
+ my $target = "";
+ print STDERR "for(tt=$start_t;tt<$t+$TARGET_BITMAP[$t]);\n";
+ for(my $tt=$start_t;$tt<$t+$TARGET_BITMAP[$t];$tt++) {
+ $target .= $TARGET[$tt] . " ";
+ }
+ chop($target);
+ $frame .= "<xml translation=\"$target\"> x </xml> ";
+ }
+ $currently_included = 0;
+ }
+
+ $frame .= $FRAME_INPUT{$t} if defined $FRAME_INPUT{$t};
+ print STDERR "$TARGET_BITMAP[$t] $t ($start_t) $currently_included\n";
+ }
+
+ print STDERR $frame."\n-------------------------------------\n";
+ return ($frame,$rule_s,$rule_t,$rule_alignment,$rule_alignment_inv);
+}
+
+sub create_alignment {
+ my ($line) = @_;
+ my (@ALIGNED_TO_S,@ALIGNED_TO_T);
+ foreach my $point (split(/ /,$line)) {
+ my ($s,$t) = split(/\-/,$point);
+ $ALIGNED_TO_S[$s]{$t}++;
+ $ALIGNED_TO_T[$t]{$s}++;
+ }
+ my %ALIGNMENT = ( 's' => \@ALIGNED_TO_S, 't' => \@ALIGNED_TO_T );
+ return %ALIGNMENT;
+}
+
+# Perl trim function to remove whitespace from the start and end of the string
+sub trim($)
+{
+ my $string = shift;
+ $string =~ s/^\s+//;
+ $string =~ s/\s+$//;
+ return $string;
+}
+# Left trim function to remove leading whitespace
+sub ltrim($)
+{
+ my $string = shift;
+ $string =~ s/^\s+//;
+ return $string;
+}
+# Right trim function to remove trailing whitespace
+sub rtrim($)
+{
+ my $string = shift;
+ $string =~ s/\s+$//;
+ return $string;
+} \ No newline at end of file
diff --git a/scripts/generic/compound-splitter.perl b/scripts/generic/compound-splitter.perl
index 9948c648e..8f82ab8d9 100755
--- a/scripts/generic/compound-splitter.perl
+++ b/scripts/generic/compound-splitter.perl
@@ -256,7 +256,7 @@ sub apply {
}
if ($best_split !~ / /) {
print join(" ",@BUFFER)." " if scalar(@BUFFER); @BUFFER = (); # clear buffer
- print $word; # do not change case for unsplit words
+ print $factored_word; # do not change case for unsplit words
next;
}
if (!$SYNTAX) {
diff --git a/scripts/generic/extract-parallel.perl b/scripts/generic/extract-parallel.perl
index b810d9672..ac9743570 100755
--- a/scripts/generic/extract-parallel.perl
+++ b/scripts/generic/extract-parallel.perl
@@ -24,14 +24,18 @@ my $source = $ARGV[5]; # 2nd arg of extract argument
my $align = $ARGV[6]; # 3rd arg of extract argument
my $extract = $ARGV[7]; # 4th arg of extract argument
+my $makeTTable = 1; # whether to build the ttable extract files
my $otherExtractArgs= "";
for (my $i = 8; $i < $#ARGV + 1; ++$i)
{
+ $makeTTable = 0 if $ARGV[$i] eq "--NoTTable";
$otherExtractArgs .= $ARGV[$i] ." ";
}
+my $cmd;
my $TMPDIR=dirname($extract) ."/tmp.$$";
-mkdir $TMPDIR;
+$cmd = "mkdir -p $TMPDIR";
+`$cmd`;
my $totalLines = int(`cat $align | wc -l`);
my $linesPerSplit = int($totalLines / $numParallel) + 1;
@@ -40,7 +44,6 @@ print "total=$totalLines line-per-split=$linesPerSplit \n";
my @children;
my $pid;
-my $cmd;
if ($numParallel > 1)
{
@@ -88,7 +91,7 @@ for (my $i = 0; $i < $numParallel; ++$i)
if ($pid == 0)
{ # child
my $numStr = NumStr($i);
- my $cmd = "$extractCmd $TMPDIR/target.$numStr $TMPDIR/source.$numStr $TMPDIR/align.$numStr $TMPDIR/extract.$numStr $otherExtractArgs \n";
+ my $cmd = "$extractCmd $TMPDIR/target.$numStr $TMPDIR/source.$numStr $TMPDIR/align.$numStr $TMPDIR/extract.$numStr $otherExtractArgs --SentenceOffset ".($i*$linesPerSplit)." 2>> /dev/stderr \n";
print STDERR $cmd;
`$cmd`;
@@ -106,9 +109,10 @@ foreach (@children) {
}
# merge
-my $catCmd = "zcat ";
-my $catInvCmd = "zcat ";
-my $catOCmd = "zcat ";
+my $is_osx = ($^O eq "darwin");
+my $catCmd = $is_osx?"gunzip -c ":"zcat ";
+my $catInvCmd = $catCmd;
+my $catOCmd = $catCmd;
for (my $i = 0; $i < $numParallel; ++$i)
{
my $numStr = NumStr($i);
@@ -123,11 +127,14 @@ $catOCmd .= " | LC_ALL=C $sortCmd -T $TMPDIR | gzip -c > $extract.o.sorted.gz \n
@children = ();
-$pid = RunFork($catCmd);
-push(@children, $pid);
+if ($makeTTable)
+{
+ $pid = RunFork($catCmd);
+ push(@children, $pid);
-$pid = RunFork($catInvCmd);
-push(@children, $pid);
+ $pid = RunFork($catInvCmd);
+ push(@children, $pid);
+}
my $numStr = NumStr(0);
if (-e "$TMPDIR/extract.$numStr.o.gz")
diff --git a/scripts/generic/multi-bleu.perl b/scripts/generic/multi-bleu.perl
index 06f01acff..137117647 100755
--- a/scripts/generic/multi-bleu.perl
+++ b/scripts/generic/multi-bleu.perl
@@ -49,7 +49,7 @@ while(<STDIN>) {
foreach my $reference (@{$REF[$s]}) {
# print "$s $_ <=> $reference\n";
$reference = lc($reference) if $lowercase;
- my @WORD = split(/ /,$reference);
+ my @WORD = split(' ',$reference);
my $length = scalar(@WORD);
my $diff = abs($length_translation_this_sentence-$length);
if ($diff < $closest_diff) {
diff --git a/scripts/generic/score-parallel.perl b/scripts/generic/score-parallel.perl
index b399a83ba..0081f3055 100755
--- a/scripts/generic/score-parallel.perl
+++ b/scripts/generic/score-parallel.perl
@@ -13,7 +13,7 @@ sub GetSourcePhrase($);
sub NumStr($);
#my $EXTRACT_SPLIT_LINES = 5000000;
-my $EXTRACT_SPLIT_LINES = 1000000;
+my $EXTRACT_SPLIT_LINES = 50000000;
print "Started ".localtime() ."\n";
@@ -121,7 +121,8 @@ for (my $i = 0; $i < $fileCount; ++$i)
my $fileInd = $i % $numParallel;
my $fh = $runFiles[$fileInd];
- my $cmd = "$scoreCmd $TMPDIR/extract.$i.gz $lexFile $TMPDIR/phrase-table.half.$numStr.gz $otherExtractArgs\n";
+ my $cmd = "$scoreCmd $TMPDIR/extract.$i.gz $lexFile $TMPDIR/phrase-table.half.$numStr.gz $otherExtractArgs 2>> /dev/stderr \n";
+ print STDERR $cmd;
print $fh $cmd;
}
@@ -156,7 +157,7 @@ if ($fileCount == 1 && !$doSort)
}
else
{
- $cmd = "zcat $TMPDIR/phrase-table.half.*.gz";
+ $cmd = "gunzip -c $TMPDIR/phrase-table.half.*.gz 2>> /dev/stderr";
if ($doSort) {
$cmd .= "| LC_ALL=C $sortCmd -T $TMPDIR ";
diff --git a/scripts/generic/trainlm-irst.perl b/scripts/generic/trainlm-irst.perl
index 71f6e08cf..d8c6ce2a5 100755
--- a/scripts/generic/trainlm-irst.perl
+++ b/scripts/generic/trainlm-irst.perl
@@ -12,7 +12,7 @@
# And make sure the $settings variable is empty. This script doesn't understand some of the sri args like -unk and will complain.
use strict;
-use FindBin qw($Bin);
+use FindBin qw($RealBin);
use Getopt::Long;
my $order = 3;
@@ -21,6 +21,8 @@ my $lmPath;
my $cores = 2;
my $irstPath;
my $tempPath = "tmp";
+my $p = 1;
+my $s;
my $temp;
GetOptions("order=s" => \$order,
@@ -29,6 +31,8 @@ GetOptions("order=s" => \$order,
"cores=s" => \$cores,
"irst-dir=s" => \$irstPath,
"temp-dir=s" => \$tempPath,
+ "p=i" => \$p, # irstlm parameter: delete singletons
+ "s=s" => \$s, # irstlm parameter: smoothing method
"interpolate!" => \$temp, #ignore
"kndiscount!" => \$temp #ignore
) or exit 1;
@@ -56,7 +60,9 @@ else
print STDERR "EXECUTING $cmd\n";
`$cmd`;
-$cmd = "IRSTLM=$irstPath/.. $irstPath/build-lm.sh -t $tempPath/stat4 -i \"gunzip -c $tempPath/monolingual.setagged.gz\" -n $order -p -o $tempPath/iarpa.gz -k $cores";
+$cmd = "IRSTLM=$irstPath/.. $irstPath/build-lm.sh -t $tempPath/stat4 -i \"gunzip -c $tempPath/monolingual.setagged.gz\" -n $order -o $tempPath/iarpa.gz -k $cores";
+$cmd .= " -p" if $p;
+$cmd .= " -s $s" if defined($s);
print STDERR "EXECUTING $cmd\n";
`$cmd`;
diff --git a/scripts/recaser/recase.perl b/scripts/recaser/recase.perl
index eb85e89dd..c83c30daa 100755
--- a/scripts/recaser/recase.perl
+++ b/scripts/recaser/recase.perl
@@ -45,7 +45,7 @@ binmode(STDOUT, ":utf8");
my $sentence = 0;
my $infile = $INFILE;
$infile =~ s/[\.\/]/_/g;
-open(MODEL,"$MOSES -v 0 -f $RECASE_MODEL -i $INFILE -dl 1|");
+open(MODEL,"$MOSES -v 0 -f $RECASE_MODEL -i $INFILE -dl 0|");
binmode(MODEL, ":utf8");
while(<MODEL>) {
chomp;
diff --git a/scripts/recaser/train-recaser.perl b/scripts/recaser/train-recaser.perl
index a5a707554..7b0d47320 100755
--- a/scripts/recaser/train-recaser.perl
+++ b/scripts/recaser/train-recaser.perl
@@ -2,6 +2,7 @@
# $Id$
use strict;
+use FindBin qw($Bin);
use Getopt::Long "GetOptions";
binmode(STDIN, ":utf8");
@@ -59,7 +60,7 @@ if ($HELP || $ERROR) {
--ngram-count=file ... path to ngram-count.sh if not in \$PATH (used only with --lm=SRILM).
= Steps this script will perform =
- (1) Truecasing (disabled);
+ (1) Truecasing;
(2) Language Model Training;
(3) Data Preparation
(4-10) Recaser Model Training;
@@ -78,16 +79,29 @@ if ($HELP || $ERROR) {
# main loop
`mkdir -p $DIR`;
-&truecase() if 0 && $FIRST_STEP == 1;
+&truecase() if $FIRST_STEP == 1;
+$CORPUS = "$DIR/aligned.truecased" if (-e "$DIR/aligned.truecased");
&train_lm() if $FIRST_STEP <= 2;
&prepare_data() if $FIRST_STEP <= 3 && $LAST_STEP >= 3;
&train_recase_model() if $FIRST_STEP <= 10 && $LAST_STEP >= 3;
&cleanup() if $LAST_STEP == 11;
+exit(0);
+
### subs ###
sub truecase {
- # to do
+ print STDERR "(1) Truecase data @ ".`date`;
+ print STDERR "(1) To build model without truecasing, use --first-step 2, and make sure $DIR/aligned.truecased does not exist\n";
+
+ my $cmd = "$Bin/train-truecaser.perl --model $DIR/truecaser_model --corpus $CORPUS";
+ print STDERR $cmd."\n";
+ system($cmd) == 0 || die("Training truecaser died with error " . ($? >> 8) . "\n");
+
+ $cmd = "$Bin/truecase.perl --model $DIR/truecaser_model < $CORPUS > $DIR/aligned.truecased";
+ print STDERR $cmd."\n";
+ system($cmd) == 0 || die("Applying truecaser died with error " . ($? >> 8) . "\n");
+
}
sub train_lm {
@@ -149,7 +163,6 @@ sub train_recase_model {
else {
$cmd .= " --lm 0:3:$DIR/cased.srilm.gz:0";
}
- $cmd .= " -scripts-root-dir $SCRIPTS_ROOT_DIR" if $SCRIPTS_ROOT_DIR;
$cmd .= " -config $CONFIG" if $CONFIG;
print STDERR $cmd."\n";
system($cmd) == 0 || die("Recaser model training failed with error " . ($? >> 8) . "\n");
@@ -163,7 +176,9 @@ sub cleanup {
my $clean_2 = $?;
`rm -f $DIR/lex*`;
my $clean_3 = $?;
- if ($clean_1 + $clean_2 + $clean_3 != 0) {
+ `rm -f $DIR/truecaser_model`;
+ my $clean_4 = $?;
+ if ($clean_1 + $clean_2 + $clean_3 + $clean_4 != 0) {
print STDERR "Training successful but some files could not be cleaned.\n";
}
}
diff --git a/scripts/regression-testing/moses-virtual b/scripts/regression-testing/moses-virtual
index 8a92051ee..8a92051ee 100644..100755
--- a/scripts/regression-testing/moses-virtual
+++ b/scripts/regression-testing/moses-virtual
diff --git a/scripts/regression-testing/tests/mert-moses-new-aggregate/command b/scripts/regression-testing/tests/mert-moses-new-aggregate/command
index e1b34986a..e1b34986a 100644..100755
--- a/scripts/regression-testing/tests/mert-moses-new-aggregate/command
+++ b/scripts/regression-testing/tests/mert-moses-new-aggregate/command
diff --git a/scripts/regression-testing/tests/mert-moses-new-continue/command b/scripts/regression-testing/tests/mert-moses-new-continue/command
index 3f346febd..3f346febd 100644..100755
--- a/scripts/regression-testing/tests/mert-moses-new-continue/command
+++ b/scripts/regression-testing/tests/mert-moses-new-continue/command
diff --git a/scripts/regression-testing/tests/mert-moses-new-nocase/command b/scripts/regression-testing/tests/mert-moses-new-nocase/command
index ed47d0aa1..ed47d0aa1 100644..100755
--- a/scripts/regression-testing/tests/mert-moses-new-nocase/command
+++ b/scripts/regression-testing/tests/mert-moses-new-nocase/command
diff --git a/scripts/regression-testing/tests/mert-moses-new/command b/scripts/regression-testing/tests/mert-moses-new/command
index a1e0ec1bf..a1e0ec1bf 100644..100755
--- a/scripts/regression-testing/tests/mert-moses-new/command
+++ b/scripts/regression-testing/tests/mert-moses-new/command
diff --git a/scripts/tokenizer/nonbreaking_prefixes/README.txt b/scripts/share/nonbreaking_prefixes/README.txt
index 02cdfccb9..02cdfccb9 100644
--- a/scripts/tokenizer/nonbreaking_prefixes/README.txt
+++ b/scripts/share/nonbreaking_prefixes/README.txt
diff --git a/scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.ca b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.ca
index 2f4fdfc67..2f4fdfc67 100644
--- a/scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.ca
+++ b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.ca
diff --git a/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.cs b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.cs
new file mode 100644
index 000000000..dce6167ae
--- /dev/null
+++ b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.cs
@@ -0,0 +1,390 @@
+Bc
+BcA
+Ing
+Ing.arch
+MUDr
+MVDr
+MgA
+Mgr
+JUDr
+PhDr
+RNDr
+PharmDr
+ThLic
+ThDr
+Ph.D
+Th.D
+prof
+doc
+CSc
+DrSc
+dr. h. c
+PaedDr
+Dr
+PhMr
+DiS
+abt
+ad
+a.i
+aj
+angl
+anon
+apod
+atd
+atp
+aut
+bd
+biogr
+b.m
+b.p
+b.r
+cca
+cit
+cizojaz
+c.k
+col
+Äes
+Äín
+Äj
+ed
+facs
+fasc
+fol
+fot
+franc
+h.c
+hist
+hl
+hrsg
+ibid
+il
+ind
+inv.Ä
+jap
+jhdt
+jv
+koed
+kol
+korej
+kl
+krit
+lat
+lit
+m.a
+maÄ
+mj
+mp
+násl
+napÅ™
+nepubl
+něm
+no
+nr
+n.s
+okr
+odd
+odp
+obr
+opr
+orig
+phil
+pl
+pokraÄ
+pol
+port
+pozn
+pÅ™.kr
+pÅ™.n.l
+přel
+přeprac
+příl
+pseud
+pt
+red
+repr
+resp
+revid
+rkp
+roÄ
+roz
+rozš
+samost
+sect
+sest
+seš
+sign
+sl
+srv
+stol
+sv
+Å¡k
+Å¡k.ro
+Å¡pan
+tab
+t.Ä
+tis
+tj
+tÅ™
+tzv
+univ
+uspoÅ™
+vol
+vl.jm
+vs
+vyd
+vyobr
+zal
+zejm
+zkr
+zprac
+zvl
+n.p
+napÅ™
+než
+MUDr
+abl
+absol
+adj
+adv
+ak
+ak. sl
+akt
+alch
+amer
+anat
+angl
+anglosas
+arab
+arch
+archit
+arg
+astr
+astrol
+att
+bás
+belg
+bibl
+biol
+boh
+bot
+bulh
+círk
+csl
+Äas
+Äes
+dat
+děj
+dep
+dět
+dial
+dór
+dopr
+dosl
+ekon
+epic
+etnonym
+eufem
+f
+fam
+fem
+fil
+film
+form
+fot
+fr
+fut
+fyz
+gen
+geogr
+geol
+geom
+germ
+gram
+hebr
+herald
+hist
+hl
+hovor
+hud
+hut
+chcsl
+chem
+ie
+imp
+impf
+ind
+indoevr
+inf
+instr
+interj
+ión
+iron
+it
+kanad
+katalán
+klas
+kniž
+komp
+konj
+
+konkr
+kÅ™
+kuch
+lat
+lék
+les
+lid
+lit
+liturg
+lok
+log
+m
+mat
+meteor
+metr
+mod
+ms
+mysl
+n
+náb
+námoř
+neklas
+něm
+nesklon
+nom
+ob
+obch
+obyÄ
+ojed
+opt
+part
+pas
+pejor
+pers
+pf
+pl
+plpf
+
+práv
+prep
+předl
+přivl
+r
+rcsl
+refl
+reg
+rkp
+Å™
+řec
+s
+samohl
+sg
+sl
+souhl
+spec
+srov
+stfr
+střv
+stsl
+subj
+subst
+superl
+sv
+sz
+táz
+tech
+telev
+teol
+trans
+typogr
+var
+vedl
+verb
+vl. jm
+voj
+vok
+vůb
+vulg
+výtv
+vztaž
+zahr
+zájm
+zast
+zejm
+
+zeměd
+zkr
+zÅ™
+mj
+dl
+atp
+sport
+Mgr
+horn
+MVDr
+JUDr
+RSDr
+Bc
+PhDr
+ThDr
+Ing
+aj
+apod
+PharmDr
+pomn
+ev
+slang
+nprap
+odp
+dop
+pol
+st
+stol
+p. n. l
+před n. l
+n. l
+pÅ™. Kr
+po Kr
+pÅ™. n. l
+odd
+RNDr
+tzv
+atd
+tzn
+resp
+tj
+p
+br
+Ä. j
+Äj
+Ä. p
+Äp
+a. s
+s. r. o
+spol. s r. o
+p. o
+s. p
+v. o. s
+k. s
+o. p. s
+o. s
+v. r
+v z
+ml
+vÄ
+kr
+mld
+hod
+popÅ™
+ap
+event
+rus
+slov
+rum
+švýc
+P. T
+zvl
+hor
+dol
+S.O.S \ No newline at end of file
diff --git a/scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.de b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.de
index 35fdf5eee..35fdf5eee 100644
--- a/scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.de
+++ b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.de
diff --git a/scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.el b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.el
index 0470f9192..0470f9192 100644
--- a/scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.el
+++ b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.el
diff --git a/scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.en b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.en
index e1a3733b5..e1a3733b5 100644
--- a/scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.en
+++ b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.en
diff --git a/scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.es b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.es
index d8b275518..d8b275518 100644
--- a/scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.es
+++ b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.es
diff --git a/scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.fr b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.fr
index 28126fa57..28126fa57 100644
--- a/scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.fr
+++ b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.fr
diff --git a/scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.is b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.is
index 5b8a71086..5b8a71086 100644
--- a/scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.is
+++ b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.is
diff --git a/scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.it b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.it
index 992b9ecd4..992b9ecd4 100644
--- a/scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.it
+++ b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.it
diff --git a/scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.nl b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.nl
index c80c41772..c80c41772 100644
--- a/scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.nl
+++ b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.nl
diff --git a/scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.pl b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.pl
index 6b7c106e6..6b7c106e6 100755
--- a/scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.pl
+++ b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.pl
diff --git a/scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.pt b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.pt
index 5d65bf25a..5d65bf25a 100644
--- a/scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.pt
+++ b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.pt
diff --git a/scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.ro b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.ro
index d489f4654..d489f4654 100644
--- a/scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.ro
+++ b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.ro
diff --git a/scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.ru b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.ru
index 444465b35..444465b35 100644
--- a/scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.ru
+++ b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.ru
diff --git a/scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.sk b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.sk
index 1198d4829..1198d4829 100644
--- a/scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.sk
+++ b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.sk
diff --git a/scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.sl b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.sl
index 230062c69..230062c69 100644
--- a/scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.sl
+++ b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.sl
diff --git a/scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.sv b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.sv
index df5ef2959..df5ef2959 100644
--- a/scripts/tokenizer/nonbreaking_prefixes/nonbreaking_prefix.sv
+++ b/scripts/share/nonbreaking_prefixes/nonbreaking_prefix.sv
diff --git a/scripts/tests/full-train-mert-decode.test b/scripts/tests/full-train-mert-decode.test
index 359732656..359732656 100644..100755
--- a/scripts/tests/full-train-mert-decode.test
+++ b/scripts/tests/full-train-mert-decode.test
diff --git a/scripts/tests/train-factored-test-step9.test b/scripts/tests/train-factored-test-step9.test
index 96e9ba504..96e9ba504 100644..100755
--- a/scripts/tests/train-factored-test-step9.test
+++ b/scripts/tests/train-factored-test-step9.test
diff --git a/scripts/tokenizer/deescape-special-chars.perl b/scripts/tokenizer/deescape-special-chars.perl
index 345555990..7dc6bc539 100755
--- a/scripts/tokenizer/deescape-special-chars.perl
+++ b/scripts/tokenizer/deescape-special-chars.perl
@@ -3,7 +3,8 @@
use strict;
while(<STDIN>) {
- s/\&bar;/\|/g; # factor separator
+ s/\&bar;/\|/g; # factor separator (legacy)
+ s/\&#124;/\|/g; # factor separator
s/\&lt;/\</g; # xml
s/\&gt;/\>/g; # xml
s/\&bra;/\[/g; # syntax non-terminal (legacy)
diff --git a/scripts/tokenizer/detokenizer.perl b/scripts/tokenizer/detokenizer.perl
index 8233b419c..488ff7b5a 100755
--- a/scripts/tokenizer/detokenizer.perl
+++ b/scripts/tokenizer/detokenizer.perl
@@ -66,7 +66,8 @@ sub detokenize {
$text = " $text ";
$text =~ s/ \@\-\@ /-/g;
# de-escape special chars
- $text =~ s/\&bar;/\|/g; # factor separator
+ $text =~ s/\&bar;/\|/g; # factor separator (legacy)
+ $text =~ s/\&#124;/\|/g; # factor separator
$text =~ s/\&lt;/\</g; # xml
$text =~ s/\&gt;/\>/g; # xml
$text =~ s/\&bra;/\[/g; # syntax non-terminal (legacy)
diff --git a/scripts/tokenizer/escape-special-chars.perl b/scripts/tokenizer/escape-special-chars.perl
index 5d9690c04..89afdb0e3 100755
--- a/scripts/tokenizer/escape-special-chars.perl
+++ b/scripts/tokenizer/escape-special-chars.perl
@@ -13,7 +13,7 @@ while(<STDIN>) {
# special characters in moses
s/\&/\&amp;/g; # escape escape
- s/\|/\&bar;/g; # factor separator
+ s/\|/\&#124;/g; # factor separator
s/\</\&lt;/g; # xml
s/\>/\&gt;/g; # xml
s/\'/\&apos;/g; # xml
@@ -22,6 +22,6 @@ while(<STDIN>) {
s/\]/\&#93;/g; # syntax non-terminal
# restore xml instructions
- s/\&lt;(\S+) translation="([^\"]+)"&gt; (.+?) &lt;\/(\S+)&gt;/\<$1 translation=\"$2\"> $3 <\/$4>/g;
+ s/\&lt;(\S+) translation=&quot;(.+?)&quot;&gt; (.+?) &lt;\/(\S+)&gt;/\<$1 translation=\"$2\"> $3 <\/$4>/g;
print $_."\n";
}
diff --git a/scripts/tokenizer/tokenizer.perl b/scripts/tokenizer/tokenizer.perl
index 0cb713740..f59cd5f86 100755
--- a/scripts/tokenizer/tokenizer.perl
+++ b/scripts/tokenizer/tokenizer.perl
@@ -1,17 +1,25 @@
#!/usr/bin/perl -w
-# $Id: tokenizer.perl 915 2009-08-10 08:15:49Z philipp $
# Sample Tokenizer
+### Version 1.1
+# written by Pidong Wang, based on the code written by Josh Schroeder and Philipp Koehn
+# Version 1.1 updates:
+# (1) add multithreading option "-threads NUM_THREADS" (default is 1);
+# (2) add a timing option "-time" to calculate the average speed of this tokenizer;
+# (3) add an option "-lines NUM_SENTENCES_PER_THREAD" to set the number of lines for each thread (default is 2000), and this option controls the memory amount needed: the larger this number is, the larger memory is required (the higher tokenization speed);
+### Version 1.0
+# $Id: tokenizer.perl 915 2009-08-10 08:15:49Z philipp $
# written by Josh Schroeder, based on code by Philipp Koehn
binmode(STDIN, ":utf8");
binmode(STDOUT, ":utf8");
-use FindBin qw($Bin);
+use FindBin qw($RealBin);
use strict;
-#use Time::HiRes;
+use Time::HiRes;
+use Thread;
-my $mydir = "$Bin/nonbreaking_prefixes";
+my $mydir = "$RealBin/../share/nonbreaking_prefixes";
my %NONBREAKING_PREFIX = ();
my $language = "en";
@@ -19,10 +27,12 @@ my $QUIET = 0;
my $HELP = 0;
my $AGGRESSIVE = 0;
my $SKIP_XML = 0;
+my $TIMING = 0;
+my $NUM_THREADS = 1;
+my $NUM_SENTENCES_PER_THREAD = 2000;
-#my $start = [ Time::HiRes::gettimeofday( ) ];
-
-while (@ARGV) {
+while (@ARGV)
+{
$_ = shift;
/^-b$/ && ($| = 1, next);
/^-l$/ && ($language = shift, next);
@@ -30,167 +40,309 @@ while (@ARGV) {
/^-h$/ && ($HELP = 1, next);
/^-x$/ && ($SKIP_XML = 1, next);
/^-a$/ && ($AGGRESSIVE = 1, next);
+ /^-time$/ && ($TIMING = 1, next);
+ /^-threads$/ && ($NUM_THREADS = int(shift), next);
+ /^-lines$/ && ($NUM_SENTENCES_PER_THREAD = int(shift), next);
+}
+
+# for time calculation
+my $start_time;
+if ($TIMING)
+{
+ $start_time = [ Time::HiRes::gettimeofday( ) ];
}
-if ($HELP) {
- print "Usage ./tokenizer.perl (-l [en|de|...]) < textfile > tokenizedfile\n";
+# print help message
+if ($HELP)
+{
+ print "Usage ./tokenizer.perl (-l [en|de|...]) (-threads 4) < textfile > tokenizedfile\n";
print "Options:\n";
- print " -q ... quiet.\n";
- print " -a ... aggressive hyphen splitting.\n";
- print " -b ... disable Perl buffering.\n";
+ print " -q ... quiet.\n";
+ print " -a ... aggressive hyphen splitting.\n";
+ print " -b ... disable Perl buffering.\n";
+ print " -time ... enable processing time calculation.\n";
exit;
}
-if (!$QUIET) {
- print STDERR "Tokenizer Version 1.0\n";
+
+if (!$QUIET)
+{
+ print STDERR "Tokenizer Version 1.1\n";
print STDERR "Language: $language\n";
+ print STDERR "Number of threads: $NUM_THREADS\n";
}
+# load the language-specific non-breaking prefix info from files in the directory nonbreaking_prefixes
load_prefixes($language,\%NONBREAKING_PREFIX);
-if (scalar(%NONBREAKING_PREFIX) eq 0){
+if (scalar(%NONBREAKING_PREFIX) eq 0)
+{
print STDERR "Warning: No known abbreviations for language '$language'\n";
}
-while(<STDIN>) {
- if (($SKIP_XML && /^<.+>$/) || /^\s*$/) {
- #don't try to tokenize XML/HTML tag lines
- print $_;
- }
- else {
- print &tokenize($_);
- }
-}
+my @batch_sentences = ();
+my @thread_list = ();
+my $count_sentences = 0;
-#my $duration = Time::HiRes::tv_interval( $start );
-#print STDERR ("EXECUTION TIME: ".$duration."\n");
+if ($NUM_THREADS > 1)
+{# multi-threading tokenization
+ while(<STDIN>)
+ {
+ $count_sentences = $count_sentences + 1;
+ push(@batch_sentences, $_);
+ if (scalar(@batch_sentences)>=($NUM_SENTENCES_PER_THREAD*$NUM_THREADS))
+ {
+ # assign each thread work
+ for (my $i=0; $i<$NUM_THREADS; $i++)
+ {
+ my $start_index = $i*$NUM_SENTENCES_PER_THREAD;
+ my $end_index = $start_index+$NUM_SENTENCES_PER_THREAD-1;
+ my @subbatch_sentences = @batch_sentences[$start_index..$end_index];
+ my $new_thread = new Thread \&tokenize_batch, @subbatch_sentences;
+ push(@thread_list, $new_thread);
+ }
+ foreach (@thread_list)
+ {
+ my $tokenized_list = $_->join;
+ foreach (@$tokenized_list)
+ {
+ print $_;
+ }
+ }
+ # reset for the new run
+ @thread_list = ();
+ @batch_sentences = ();
+ }
+ }
+ # the last batch
+ if (scalar(@batch_sentences)>0)
+ {
+ # assign each thread work
+ for (my $i=0; $i<$NUM_THREADS; $i++)
+ {
+ my $start_index = $i*$NUM_SENTENCES_PER_THREAD;
+ if ($start_index >= scalar(@batch_sentences))
+ {
+ last;
+ }
+ my $end_index = $start_index+$NUM_SENTENCES_PER_THREAD-1;
+ if ($end_index >= scalar(@batch_sentences))
+ {
+ $end_index = scalar(@batch_sentences)-1;
+ }
+ my @subbatch_sentences = @batch_sentences[$start_index..$end_index];
+ my $new_thread = new Thread \&tokenize_batch, @subbatch_sentences;
+ push(@thread_list, $new_thread);
+ }
+ foreach (@thread_list)
+ {
+ my $tokenized_list = $_->join;
+ foreach (@$tokenized_list)
+ {
+ print $_;
+ }
+ }
+ }
+}
+else
+{# single thread only
+ while(<STDIN>)
+ {
+ if (($SKIP_XML && /^<.+>$/) || /^\s*$/)
+ {
+ #don't try to tokenize XML/HTML tag lines
+ print $_;
+ }
+ else
+ {
+ print &tokenize($_);
+ }
+ }
+}
+if ($TIMING)
+{
+ my $duration = Time::HiRes::tv_interval( $start_time );
+ print STDERR ("TOTAL EXECUTION TIME: ".$duration."\n");
+ print STDERR ("TOKENIZATION SPEED: ".($duration/$count_sentences*1000)." milliseconds/line\n");
+}
-sub tokenize {
- my($text) = @_;
- chomp($text);
- $text = " $text ";
-
- # remove ASCII junk
- $text =~ s/\s+/ /g;
- $text =~ s/[\000-\037]//g;
+#####################################################################################
+# subroutines afterward
- # seperate out all "other" special characters
- $text =~ s/([^\p{IsAlnum}\s\.\'\`\,\-])/ $1 /g;
-
- # aggressive hyphen splitting
- if ($AGGRESSIVE) {
- $text =~ s/([\p{IsAlnum}])\-([\p{IsAlnum}])/$1 \@-\@ $2/g;
+# tokenize a batch of texts saved in an array
+# input: an array containing a batch of texts
+# return: another array cotaining a batch of tokenized texts for the input array
+sub tokenize_batch
+{
+ my(@text_list) = @_;
+ my(@tokenized_list) = ();
+ foreach (@text_list)
+ {
+ if (($SKIP_XML && /^<.+>$/) || /^\s*$/)
+ {
+ #don't try to tokenize XML/HTML tag lines
+ push(@tokenized_list, $_);
+ }
+ else
+ {
+ push(@tokenized_list, &tokenize($_));
}
+ }
+ return \@tokenized_list;
+}
+
+# the actual tokenize function which tokenizes one input string
+# input: one string
+# return: the tokenized string for the input string
+sub tokenize
+{
+ my($text) = @_;
+ chomp($text);
+ $text = " $text ";
+
+ # remove ASCII junk
+ $text =~ s/\s+/ /g;
+ $text =~ s/[\000-\037]//g;
- #multi-dots stay together
- $text =~ s/\.([\.]+)/ DOTMULTI$1/g;
- while($text =~ /DOTMULTI\./) {
- $text =~ s/DOTMULTI\.([^\.])/DOTDOTMULTI $1/g;
- $text =~ s/DOTMULTI\./DOTDOTMULTI/g;
- }
-
- # seperate out "," except if within numbers (5,300)
- $text =~ s/([^\p{IsN}])[,]([^\p{IsN}])/$1 , $2/g;
- # separate , pre and post number
- $text =~ s/([\p{IsN}])[,]([^\p{IsN}])/$1 , $2/g;
- $text =~ s/([^\p{IsN}])[,]([\p{IsN}])/$1 , $2/g;
+ # seperate out all "other" special characters
+ $text =~ s/([^\p{IsAlnum}\s\.\'\`\,\-])/ $1 /g;
+
+ # aggressive hyphen splitting
+ if ($AGGRESSIVE)
+ {
+ $text =~ s/([\p{IsAlnum}])\-([\p{IsAlnum}])/$1 \@-\@ $2/g;
+ }
+
+ #multi-dots stay together
+ $text =~ s/\.([\.]+)/ DOTMULTI$1/g;
+ while($text =~ /DOTMULTI\./)
+ {
+ $text =~ s/DOTMULTI\.([^\.])/DOTDOTMULTI $1/g;
+ $text =~ s/DOTMULTI\./DOTDOTMULTI/g;
+ }
+
+ # seperate out "," except if within numbers (5,300)
+ $text =~ s/([^\p{IsN}])[,]([^\p{IsN}])/$1 , $2/g;
+ # separate , pre and post number
+ $text =~ s/([\p{IsN}])[,]([^\p{IsN}])/$1 , $2/g;
+ $text =~ s/([^\p{IsN}])[,]([\p{IsN}])/$1 , $2/g;
- # turn `into '
- $text =~ s/\`/\'/g;
+ # turn `into '
+ $text =~ s/\`/\'/g;
- #turn '' into "
- $text =~ s/\'\'/ \" /g;
-
- if ($language eq "en") {
- #split contractions right
- $text =~ s/([^\p{IsAlpha}])[']([^\p{IsAlpha}])/$1 ' $2/g;
- $text =~ s/([^\p{IsAlpha}\p{IsN}])[']([\p{IsAlpha}])/$1 ' $2/g;
- $text =~ s/([\p{IsAlpha}])[']([^\p{IsAlpha}])/$1 ' $2/g;
- $text =~ s/([\p{IsAlpha}])[']([\p{IsAlpha}])/$1 '$2/g;
- #special case for "1990's"
- $text =~ s/([\p{IsN}])[']([s])/$1 '$2/g;
- } elsif (($language eq "fr") or ($language eq "it")) {
- #split contractions left
- $text =~ s/([^\p{IsAlpha}])[']([^\p{IsAlpha}])/$1 ' $2/g;
- $text =~ s/([^\p{IsAlpha}])[']([\p{IsAlpha}])/$1 ' $2/g;
- $text =~ s/([\p{IsAlpha}])[']([^\p{IsAlpha}])/$1 ' $2/g;
- $text =~ s/([\p{IsAlpha}])[']([\p{IsAlpha}])/$1' $2/g;
- } else {
- $text =~ s/\'/ \' /g;
- }
+ #turn '' into "
+ $text =~ s/\'\'/ \" /g;
+
+ if ($language eq "en")
+ {
+ #split contractions right
+ $text =~ s/([^\p{IsAlpha}])[']([^\p{IsAlpha}])/$1 ' $2/g;
+ $text =~ s/([^\p{IsAlpha}\p{IsN}])[']([\p{IsAlpha}])/$1 ' $2/g;
+ $text =~ s/([\p{IsAlpha}])[']([^\p{IsAlpha}])/$1 ' $2/g;
+ $text =~ s/([\p{IsAlpha}])[']([\p{IsAlpha}])/$1 '$2/g;
+ #special case for "1990's"
+ $text =~ s/([\p{IsN}])[']([s])/$1 '$2/g;
+ }
+ elsif (($language eq "fr") or ($language eq "it"))
+ {
+ #split contractions left
+ $text =~ s/([^\p{IsAlpha}])[']([^\p{IsAlpha}])/$1 ' $2/g;
+ $text =~ s/([^\p{IsAlpha}])[']([\p{IsAlpha}])/$1 ' $2/g;
+ $text =~ s/([\p{IsAlpha}])[']([^\p{IsAlpha}])/$1 ' $2/g;
+ $text =~ s/([\p{IsAlpha}])[']([\p{IsAlpha}])/$1' $2/g;
+ }
+ else
+ {
+ $text =~ s/\'/ \' /g;
+ }
- #word token method
- my @words = split(/\s/,$text);
- $text = "";
- for (my $i=0;$i<(scalar(@words));$i++) {
- my $word = $words[$i];
- if ( $word =~ /^(\S+)\.$/) {
- my $pre = $1;
- if (($pre =~ /\./ && $pre =~ /\p{IsAlpha}/) || ($NONBREAKING_PREFIX{$pre} && $NONBREAKING_PREFIX{$pre}==1) || ($i<scalar(@words)-1 && ($words[$i+1] =~ /^[\p{IsLower}]/))) {
- #no change
- } elsif (($NONBREAKING_PREFIX{$pre} && $NONBREAKING_PREFIX{$pre}==2) && ($i<scalar(@words)-1 && ($words[$i+1] =~ /^[0-9]+/))) {
- #no change
- } else {
- $word = $pre." .";
- }
- }
- $text .= $word." ";
- }
-
- # clean up extraneous spaces
- $text =~ s/ +/ /g;
- $text =~ s/^ //g;
- $text =~ s/ $//g;
-
- #restore multi-dots
- while($text =~ /DOTDOTMULTI/) {
- $text =~ s/DOTDOTMULTI/DOTMULTI./g;
- }
- $text =~ s/DOTMULTI/./g;
-
- #escape special chars
- $text =~ s/\&/\&amp;/g; # escape escape
- $text =~ s/\|/\&bar;/g; # factor separator
- $text =~ s/\</\&lt;/g; # xml
- $text =~ s/\>/\&gt;/g; # xml
- $text =~ s/\'/\&apos;/g; # xml
- $text =~ s/\"/\&quot;/g; # xml
- $text =~ s/\[/\&#91;/g; # syntax non-terminal
- $text =~ s/\]/\&#93;/g; # syntax non-terminal
-
- #ensure final line break
- $text .= "\n" unless $text =~ /\n$/;
-
- return $text;
+ #word token method
+ my @words = split(/\s/,$text);
+ $text = "";
+ for (my $i=0;$i<(scalar(@words));$i++)
+ {
+ my $word = $words[$i];
+ if ( $word =~ /^(\S+)\.$/)
+ {
+ my $pre = $1;
+ if (($pre =~ /\./ && $pre =~ /\p{IsAlpha}/) || ($NONBREAKING_PREFIX{$pre} && $NONBREAKING_PREFIX{$pre}==1) || ($i<scalar(@words)-1 && ($words[$i+1] =~ /^[\p{IsLower}]/)))
+ {
+ #no change
+ }
+ elsif (($NONBREAKING_PREFIX{$pre} && $NONBREAKING_PREFIX{$pre}==2) && ($i<scalar(@words)-1 && ($words[$i+1] =~ /^[0-9]+/)))
+ {
+ #no change
+ }
+ else
+ {
+ $word = $pre." .";
+ }
+ }
+ $text .= $word." ";
+ }
+
+ # clean up extraneous spaces
+ $text =~ s/ +/ /g;
+ $text =~ s/^ //g;
+ $text =~ s/ $//g;
+
+ #restore multi-dots
+ while($text =~ /DOTDOTMULTI/)
+ {
+ $text =~ s/DOTDOTMULTI/DOTMULTI./g;
+ }
+ $text =~ s/DOTMULTI/./g;
+
+ #escape special chars
+ $text =~ s/\&/\&amp;/g; # escape escape
+ $text =~ s/\|/\&#124;/g; # factor separator
+ $text =~ s/\</\&lt;/g; # xml
+ $text =~ s/\>/\&gt;/g; # xml
+ $text =~ s/\'/\&apos;/g; # xml
+ $text =~ s/\"/\&quot;/g; # xml
+ $text =~ s/\[/\&#91;/g; # syntax non-terminal
+ $text =~ s/\]/\&#93;/g; # syntax non-terminal
+
+ #ensure final line break
+ $text .= "\n" unless $text =~ /\n$/;
+
+ return $text;
}
-sub load_prefixes {
- my ($language, $PREFIX_REF) = @_;
+sub load_prefixes
+{
+ my ($language, $PREFIX_REF) = @_;
- my $prefixfile = "$mydir/nonbreaking_prefix.$language";
+ my $prefixfile = "$mydir/nonbreaking_prefix.$language";
- #default back to English if we don't have a language-specific prefix file
- if (!(-e $prefixfile)) {
- $prefixfile = "$mydir/nonbreaking_prefix.en";
- print STDERR "WARNING: No known abbreviations for language '$language', attempting fall-back to English version...\n";
- die ("ERROR: No abbreviations files found in $mydir\n") unless (-e $prefixfile);
- }
-
- if (-e "$prefixfile") {
- open(PREFIX, "<:utf8", "$prefixfile");
- while (<PREFIX>) {
- my $item = $_;
- chomp($item);
- if (($item) && (substr($item,0,1) ne "#")) {
- if ($item =~ /(.*)[\s]+(\#NUMERIC_ONLY\#)/) {
- $PREFIX_REF->{$1} = 2;
- } else {
- $PREFIX_REF->{$item} = 1;
- }
- }
- }
- close(PREFIX);
- }
+ #default back to English if we don't have a language-specific prefix file
+ if (!(-e $prefixfile))
+ {
+ $prefixfile = "$mydir/nonbreaking_prefix.en";
+ print STDERR "WARNING: No known abbreviations for language '$language', attempting fall-back to English version...\n";
+ die ("ERROR: No abbreviations files found in $mydir\n") unless (-e $prefixfile);
+ }
+ if (-e "$prefixfile")
+ {
+ open(PREFIX, "<:utf8", "$prefixfile");
+ while (<PREFIX>)
+ {
+ my $item = $_;
+ chomp($item);
+ if (($item) && (substr($item,0,1) ne "#"))
+ {
+ if ($item =~ /(.*)[\s]+(\#NUMERIC_ONLY\#)/)
+ {
+ $PREFIX_REF->{$1} = 2;
+ }
+ else
+ {
+ $PREFIX_REF->{$item} = 1;
+ }
+ }
+ }
+ close(PREFIX);
+ }
}
diff --git a/scripts/training/Jamfile b/scripts/training/Jamfile
index 8ca408d07..e69de29bb 100644
--- a/scripts/training/Jamfile
+++ b/scripts/training/Jamfile
@@ -1,14 +0,0 @@
-build-project compact-rule-table ;
-build-project phrase-extract ;
-build-project lexical-reordering ;
-build-project symal ;
-
-if $(WITH-GIZA) != no || $(CLEANING) != no {
- make train-model.perl : train-model.perl.missing_bin_dir : @missing_bin_dir ;
- actions missing_bin_dir {
- sed 's#^my \$BINDIR\s*=.*#my\ \$BINDIR=\"$(WITH-GIZA)\";#' $(>) >$(<)
- chmod +x $(<)
- }
-
- install legacy : train-model.perl : <location>. ;
-}
diff --git a/scripts/training/LexicalTranslationModel.pm b/scripts/training/LexicalTranslationModel.pm
new file mode 100644
index 000000000..c0570df5c
--- /dev/null
+++ b/scripts/training/LexicalTranslationModel.pm
@@ -0,0 +1,132 @@
+package LexicalTranslationModel;
+
+use strict;
+use warnings;
+
+BEGIN {
+ require Exporter;
+
+ our $VERSION = 1.0;
+ our @ISA = qw(Exporter);
+ our @EXPORT = qw(get_lexical);
+ our @EXPORT_OK = qw();
+
+}
+
+sub open_compressed {
+
+ # utilities
+ my $ZCAT = "gzip -cd";
+ my $BZCAT = "bzcat";
+
+ my ($file) = @_;
+ print "FILE: $file\n";
+
+ # add extensions, if necessary
+ $file = $file.".bz2" if ! -e $file && -e $file.".bz2";
+ $file = $file.".gz" if ! -e $file && -e $file.".gz";
+
+ # pipe zipped, if necessary
+ return "$BZCAT $file|" if $file =~ /\.bz2$/;
+ return "$ZCAT $file|" if $file =~ /\.gz$/;
+ return $file;
+}
+
+sub fix_spaces {
+ my ($in) = @_;
+ $$in =~ s/[ \t]+/ /g; $$in =~ s/[ \t]$//; $$in =~ s/^[ \t]//;
+}
+
+sub get_lexical {
+ my ($alignment_file_f,$alignment_file_e,$alignment_file_a,$lexical_file,$write_counts) = @_;
+ print STDERR "($alignment_file_f,$alignment_file_e,$lexical_file)\n";
+# my $alignment_file_a = $___ALIGNMENT_FILE.".".$___ALIGNMENT;
+
+ my (%WORD_TRANSLATION,%TOTAL_FOREIGN,%TOTAL_ENGLISH);
+
+ if (-e "$lexical_file.f2e" && -e "$lexical_file.e2f" && (!$write_counts || -e "$lexical_file.counts.f2e" && -e "$lexical_file.counts.e2f")) {
+ print STDERR " reusing: $lexical_file.f2e and $lexical_file.e2f\n";
+ return;
+ }
+
+ open(E,&open_compressed($alignment_file_e)) or die "ERROR: Can't read $alignment_file_e";
+ open(F,&open_compressed($alignment_file_f)) or die "ERROR: Can't read $alignment_file_f";
+ open(A,&open_compressed($alignment_file_a)) or die "ERROR: Can't read $alignment_file_a";
+
+ my $alignment_id = 0;
+ while(my $e = <E>) {
+ if (($alignment_id++ % 1000) == 0) { print STDERR "!"; }
+ chomp($e); fix_spaces(\$e);
+ my @ENGLISH = split(/ /,$e);
+ my $f = <F>; chomp($f); fix_spaces(\$f);
+ my @FOREIGN = split(/ /,$f);
+ my $a = <A>; chomp($a); fix_spaces(\$a);
+
+ my (%FOREIGN_ALIGNED,%ENGLISH_ALIGNED);
+ foreach (split(/ /,$a)) {
+ my ($fi,$ei) = split(/\-/);
+ if ($fi >= scalar(@FOREIGN) || $ei >= scalar(@ENGLISH)) {
+ print STDERR "alignment point ($fi,$ei) out of range (0-$#FOREIGN,0-$#ENGLISH) in line $alignment_id, ignoring\n";
+ }
+ else {
+ # local counts
+ $FOREIGN_ALIGNED{$fi}++;
+ $ENGLISH_ALIGNED{$ei}++;
+
+ # global counts
+ $WORD_TRANSLATION{$FOREIGN[$fi]}{$ENGLISH[$ei]}++;
+ $TOTAL_FOREIGN{$FOREIGN[$fi]}++;
+ $TOTAL_ENGLISH{$ENGLISH[$ei]}++;
+ }
+ }
+
+ # unaligned words
+ for(my $ei=0;$ei<scalar(@ENGLISH);$ei++) {
+ next if defined($ENGLISH_ALIGNED{$ei});
+ $WORD_TRANSLATION{"NULL"}{$ENGLISH[$ei]}++;
+ $TOTAL_ENGLISH{$ENGLISH[$ei]}++;
+ $TOTAL_FOREIGN{"NULL"}++;
+ }
+ for(my $fi=0;$fi<scalar(@FOREIGN);$fi++) {
+ next if defined($FOREIGN_ALIGNED{$fi});
+ $WORD_TRANSLATION{$FOREIGN[$fi]}{"NULL"}++;
+ $TOTAL_FOREIGN{$FOREIGN[$fi]}++;
+ $TOTAL_ENGLISH{"NULL"}++;
+ }
+ }
+ print STDERR "\n";
+ close(A);
+ close(F);
+ close(E);
+
+ open(F2E,">$lexical_file.f2e") or die "ERROR: Can't write $lexical_file.f2e";
+ open(E2F,">$lexical_file.e2f") or die "ERROR: Can't write $lexical_file.e2f";
+ if ($write_counts) {
+ open(F2E2,">$lexical_file.counts.f2e") or die "ERROR: Can't write $lexical_file.counts.f2e";
+ open(E2F2,">$lexical_file.counts.e2f") or die "ERROR: Can't write $lexical_file.counts.e2f";
+ }
+
+ foreach my $f (keys %WORD_TRANSLATION) {
+ foreach my $e (keys %{$WORD_TRANSLATION{$f}}) {
+ printf F2E "%s %s %.7f\n",$e,$f,$WORD_TRANSLATION{$f}{$e}/$TOTAL_FOREIGN{$f};
+ printf E2F "%s %s %.7f\n",$f,$e,$WORD_TRANSLATION{$f}{$e}/$TOTAL_ENGLISH{$e};
+ if ($write_counts) {
+ printf F2E2 "%s %s %i %i\n",$e,$f,$WORD_TRANSLATION{$f}{$e},$TOTAL_FOREIGN{$f};
+ printf E2F2 "%s %s %i %i\n",$f,$e,$WORD_TRANSLATION{$f}{$e},$TOTAL_ENGLISH{$e};
+ }
+ }
+ }
+ close(E2F);
+ close(F2E);
+ if ($write_counts) {
+ close(E2F2);
+ close(F2E2);
+ }
+ print STDERR "Saved: $lexical_file.f2e and $lexical_file.e2f\n";
+}
+
+
+END {
+}
+
+1;
diff --git a/scripts/training/absolutize_moses_model.pl b/scripts/training/absolutize_moses_model.pl
index 42f3d8aa3..1b485a01f 100755
--- a/scripts/training/absolutize_moses_model.pl
+++ b/scripts/training/absolutize_moses_model.pl
@@ -42,6 +42,12 @@ while (<$inih>) {
$_ = "$type $b $c $d $abs_src $abs_tgt $abs_align\n";
}
+ elsif ( $type eq '12' ) {
+ $abs = ensure_absolute($fn, $ini);
+ die "File not found or empty: $fn (searched for $abs.minphr)"
+ if ! -s $abs.".minphr"; # accept compact binarized ttables
+ $_ = "$type $b $c $d $abs\n";
+ }
else {
$abs = ensure_absolute($fn, $ini);
die "File not found or empty: $fn (searched for $abs or $abs.binphr.idx)"
@@ -61,8 +67,8 @@ while (<$inih>) {
chomp;
my ($a, $b, $c, $fn) = split / /;
$abs = ensure_absolute($fn, $ini);
- die "File not found or empty: $fn (searched for $abs or $abs.binlexr.idx)"
- if ! -s $abs && ! -s $abs.".binlexr.idx"; # accept binarized lexro models
+ die "File not found or empty: $fn (searched for $abs or $abs.binlexr.idx or $abs.minlexr)"
+ if ! -s $abs && ! -s $abs.".binlexr.idx" && ! -s $abs.".minlexr"; # accept binarized and compact lexro models
$_ = "$a $b $c $abs\n";
}
}
diff --git a/scripts/training/compact-rule-table/Compactify.cpp b/scripts/training/compact-rule-table/Compactify.cpp
deleted file mode 100644
index ceb7eb090..000000000
--- a/scripts/training/compact-rule-table/Compactify.cpp
+++ /dev/null
@@ -1,296 +0,0 @@
-#include "Compactify.h"
-
-#include "NumberedSet.h"
-#include "Options.h"
-#include "RuleTableParser.h"
-
-#include <boost/algorithm/string.hpp>
-#include <boost/program_options.hpp>
-#include <boost/unordered_map.hpp>
-
-#include <cstdlib>
-#include <fstream>
-#include <iostream>
-#include <set>
-#include <sstream>
-
-namespace moses {
-
-int Compactify::main(int argc, char *argv[]) {
- // Process the command-line arguments.
- Options options;
- processOptions(argc, argv, options);
-
- // Open the input stream.
- std::istream *inputPtr;
- std::ifstream inputFileStream;
- if (options.inputFile.empty() || options.inputFile == "-") {
- inputPtr = &(std::cin);
- } else {
- inputFileStream.open(options.inputFile.c_str());
- if (!inputFileStream) {
- std::ostringstream msg;
- msg << "failed to open input file: " << options.inputFile;
- error(msg.str());
- }
- inputPtr = &inputFileStream;
- }
- std::istream &input = *inputPtr;
-
- // Open the output stream.
- std::ostream *outputPtr;
- std::ofstream outputFileStream;
- if (options.outputFile.empty()) {
- outputPtr = &(std::cout);
- } else {
- outputFileStream.open(options.outputFile.c_str());
- if (!outputFileStream) {
- std::ostringstream msg;
- msg << "failed to open output file: " << options.outputFile;
- error(msg.str());
- }
- outputPtr = &outputFileStream;
- }
- std::ostream &output = *outputPtr;
-
- // Open a temporary file: the rule section must appear last in the output
- // file, but we don't want to store the full set of rules in memory during
- // processing, so instead they're written to a temporary file then copied to
- // the output file as a final step.
- std::fstream tempFileStream;
- {
- char fileNameTemplate[] = "/tmp/compact_XXXXXX";
- int fd = mkstemp(fileNameTemplate);
- if (fd == -1) {
- std::ostringstream msg;
- msg << "failed to open temporary file with pattern " << fileNameTemplate;
- error(msg.str());
- }
- tempFileStream.open(fileNameTemplate);
- if (!tempFileStream) {
- std::ostringstream msg;
- msg << "failed to open existing temporary file: " << fileNameTemplate;
- error(msg.str());
- }
- // Close the original file descriptor.
- close(fd);
- // Unlink the file. Its contents will be safe until tempFileStream is
- // closed.
- unlink(fileNameTemplate);
- }
-
- // Write the version number
- output << "1" << '\n';
-
- SymbolSet symbolSet;
- PhraseSet sourcePhraseSet;
- PhraseSet targetPhraseSet;
- AlignmentSetSet alignmentSetSet;
-
- SymbolPhrase symbolPhrase;
-
- size_t ruleCount = 0;
- RuleTableParser end;
- try {
- for (RuleTableParser parser(input); parser != end; ++parser) {
- const RuleTableParser::Entry &entry = *parser;
- ++ruleCount;
-
- // Report progress in the same format as extract-rules.
- if (ruleCount % 100000 == 0) {
- std::cerr << "." << std::flush;
- }
- if (ruleCount % 1000000 == 0) {
- std::cerr << " " << ruleCount << std::endl;
- }
-
- // Encode the source LHS + RHS as a vector of symbol IDs and insert into
- // sourcePhraseSet.
- encodePhrase(entry.sourceLhs, entry.sourceRhs, symbolSet, symbolPhrase);
- SymbolIDType sourceId = sourcePhraseSet.insert(symbolPhrase);
-
- // Encode the target LHS + RHS as a vector of symbol IDs and insert into
- // targetPhraseSet.
- encodePhrase(entry.targetLhs, entry.targetRhs, symbolSet, symbolPhrase);
- SymbolIDType targetId = targetPhraseSet.insert(symbolPhrase);
-
- // Insert the alignments into alignmentSetSet.
- AlignmentSetIDType alignmentSetId = alignmentSetSet.insert(
- entry.alignments);
-
- // Write this rule to the temporary file.
- tempFileStream << sourceId << " " << targetId << " " << alignmentSetId;
- for (std::vector<std::string>::const_iterator p = entry.scores.begin();
- p != entry.scores.end(); ++p) {
- tempFileStream << " " << *p;
- }
- tempFileStream << " :";
- for (std::vector<std::string>::const_iterator p = entry.counts.begin();
- p != entry.counts.end(); ++p) {
- tempFileStream << " " << *p;
- }
- tempFileStream << '\n';
- }
- } catch (Exception &e) {
- std::ostringstream msg;
- msg << "error processing line " << ruleCount+1 << ": " << e.getMsg();
- error(msg.str());
- }
-
- // Report the counts.
-
- if (ruleCount % 1000000 != 0) {
- std::cerr << std::endl;
- }
- std::cerr << "Rule count: " << ruleCount << std::endl;
- std::cerr << "Symbol count: " << symbolSet.size() << std::endl;
- std::cerr << "Source phrase count: " << sourcePhraseSet.size() << std::endl;
- std::cerr << "Target phrase count: " << targetPhraseSet.size() << std::endl;
- std::cerr << "Alignment set count: " << alignmentSetSet.size() << std::endl;
-
- // Write the symbol vocabulary.
-
- output << symbolSet.size() << '\n';
- for (SymbolSet::const_iterator p = symbolSet.begin();
- p != symbolSet.end(); ++p) {
- const std::string &str = **p;
- output << str << '\n';
- }
-
- // Write the source phrases.
-
- output << sourcePhraseSet.size() << '\n';
- for (PhraseSet::const_iterator p = sourcePhraseSet.begin();
- p != sourcePhraseSet.end(); ++p) {
- const SymbolPhrase &sourcePhrase = **p;
- for (SymbolPhrase::const_iterator q = sourcePhrase.begin();
- q != sourcePhrase.end(); ++q) {
- if (q != sourcePhrase.begin()) {
- output << " ";
- }
- output << *q;
- }
- output << '\n';
- }
-
- // Write the target phrases.
-
- output << targetPhraseSet.size() << '\n';
- for (PhraseSet::const_iterator p = targetPhraseSet.begin();
- p != targetPhraseSet.end(); ++p) {
- const SymbolPhrase &targetPhrase = **p;
- for (SymbolPhrase::const_iterator q = targetPhrase.begin();
- q != targetPhrase.end(); ++q) {
- if (q != targetPhrase.begin()) {
- output << " ";
- }
- output << *q;
- }
- output << '\n';
- }
-
- // Write the alignment sets.
-
- output << alignmentSetSet.size() << '\n';
- for (AlignmentSetSet::const_iterator p = alignmentSetSet.begin();
- p != alignmentSetSet.end(); ++p) {
- const AlignmentSet &alignmentSet = **p;
- for (AlignmentSet::const_iterator q = alignmentSet.begin();
- q != alignmentSet.end(); ++q) {
- if (q != alignmentSet.begin()) {
- output << " ";
- }
- output << q->first << "-" << q->second;
- }
- output << '\n';
- }
-
- // Write the rule count.
- output << ruleCount << '\n';
-
- // Copy the rules from the temporary file.
- tempFileStream.seekg(0);
- std::string line;
- while (std::getline(tempFileStream, line)) {
- output << line << '\n';
- }
-
- return 0;
-}
-
-void Compactify::processOptions(int argc, char *argv[],
- Options &options) const {
- namespace po = boost::program_options;
-
- std::ostringstream usageMsg;
- usageMsg << "usage: " << getName() << " [OPTION]... [FILE]";
-
- // Declare the command line options that are visible to the user.
- std::string caption = usageMsg.str() + std::string("\n\nAllowed options");
- po::options_description visible(caption);
- visible.add_options()
- ("help", "print help message and exit")
- ("output,o", po::value<std::string>(),
- "write rule table to arg instead of standard output")
- ;
-
- // Declare the command line options that are hidden from the user
- // (these are used as positional options).
- po::options_description hidden("Hidden options");
- hidden.add_options()
- ("input", po::value<std::string>(), "input file")
- ;
-
- // Compose the full set of command-line options.
- po::options_description cmdLineOptions;
- cmdLineOptions.add(visible).add(hidden);
-
- // Register the positional options.
- po::positional_options_description p;
- p.add("input", 1);
-
- // Process the command-line.
- po::variables_map vm;
- try {
- po::store(po::command_line_parser(argc, argv).
- options(cmdLineOptions).positional(p).run(), vm);
- po::notify(vm);
- } catch (const std::exception &e) {
- std::ostringstream msg;
- msg << e.what() << "\n\n" << visible;
- error(msg.str());
- std::exit(1);
- }
-
- if (vm.count("help")) {
- std::cout << visible << std::endl;
- std::exit(0);
- }
-
- // Process positional options.
-
- if (vm.count("input")) {
- options.inputFile = vm["input"].as<std::string>();
- }
-
- // Process remaining options.
-
- if (vm.count("output")) {
- options.outputFile = vm["output"].as<std::string>();
- }
-}
-
-void Compactify::encodePhrase(const std::string &lhs, const StringPhrase &rhs,
- SymbolSet &symbolSet, SymbolPhrase &vec) const {
- vec.clear();
- vec.reserve(rhs.size()+1);
- SymbolIDType id = symbolSet.insert(lhs);
- vec.push_back(id);
- for (std::vector<std::string>::const_iterator p = rhs.begin();
- p != rhs.end(); ++p) {
- SymbolIDType id = symbolSet.insert(*p);
- vec.push_back(id);
- }
-}
-
-} // namespace moses
diff --git a/scripts/training/compact-rule-table/Compactify.h b/scripts/training/compact-rule-table/Compactify.h
deleted file mode 100644
index ef5c1d9ec..000000000
--- a/scripts/training/compact-rule-table/Compactify.h
+++ /dev/null
@@ -1,43 +0,0 @@
-#pragma once
-#ifndef COMPACTIFY_H_
-#define COMPACTIFY_H_
-
-#include "NumberedSet.h"
-#include "Tool.h"
-
-#include <set>
-#include <vector>
-
-namespace moses {
-
-struct Options;
-
-// Tool for converting a rule table into a more compact format.
-class Compactify : public Tool {
- public:
- Compactify() : Tool("compactify") {}
- virtual int main(int, char *[]);
- private:
- typedef unsigned int SymbolIDType;
- typedef unsigned int PhraseIDType;
- typedef unsigned int AlignmentSetIDType;
- typedef std::vector<std::string> StringPhrase;
- typedef std::vector<SymbolIDType> SymbolPhrase;
- typedef std::pair<int, int> AlignmentPair;
- typedef std::set<AlignmentPair> AlignmentSet;
- typedef NumberedSet<std::string, SymbolIDType> SymbolSet;
- typedef NumberedSet<SymbolPhrase, PhraseIDType> PhraseSet;
- typedef NumberedSet<AlignmentSet, AlignmentSetIDType> AlignmentSetSet;
-
- void processOptions(int, char *[], Options &) const;
-
- // Given the string representations of a source or target LHS and RHS, encode
- // the symbols using the given SymbolSet and create a SymbolPhrase object.
- // The LHS index is the first element of the SymbolPhrase.
- void encodePhrase(const std::string &, const StringPhrase &,
- SymbolSet &, SymbolPhrase &) const;
-};
-
-} // namespace moses
-
-#endif
diff --git a/scripts/training/compact-rule-table/Compactify_Main.cpp b/scripts/training/compact-rule-table/Compactify_Main.cpp
deleted file mode 100644
index 5ae8833c1..000000000
--- a/scripts/training/compact-rule-table/Compactify_Main.cpp
+++ /dev/null
@@ -1,6 +0,0 @@
-#include "Compactify.h"
-
-int main(int argc, char *argv[]) {
- moses::Compactify tool;
- return tool.main(argc, argv);
-}
diff --git a/scripts/training/compact-rule-table/Exception.h b/scripts/training/compact-rule-table/Exception.h
deleted file mode 100644
index 3868fcec5..000000000
--- a/scripts/training/compact-rule-table/Exception.h
+++ /dev/null
@@ -1,22 +0,0 @@
-#pragma once
-#ifndef EXCEPTION_H_
-#define EXCEPTION_H_
-
-#include <string>
-
-namespace moses {
-
-class Exception {
- public:
- Exception(const char *msg) : m_msg(msg) {}
- Exception(const std::string &msg) : m_msg(msg) {}
-
- const std::string &getMsg() const { return m_msg; }
-
- private:
- std::string m_msg;
-};
-
-} // namespace moses
-
-#endif
diff --git a/scripts/training/compact-rule-table/Jamfile b/scripts/training/compact-rule-table/Jamfile
deleted file mode 100644
index 09f45d454..000000000
--- a/scripts/training/compact-rule-table/Jamfile
+++ /dev/null
@@ -1,3 +0,0 @@
-exe compactify : [ glob *.cpp ] ../../..//boost_program_options ;
-
-install tools : compactify : <install-type>EXE ;
diff --git a/scripts/training/compact-rule-table/NumberedSet.h b/scripts/training/compact-rule-table/NumberedSet.h
deleted file mode 100644
index 72c01cb30..000000000
--- a/scripts/training/compact-rule-table/NumberedSet.h
+++ /dev/null
@@ -1,79 +0,0 @@
-#pragma once
-#ifndef NUMBEREDSET_H_
-#define NUMBEREDSET_H_
-
-#include "Exception.h"
-
-#include <boost/unordered_map.hpp>
-
-#include <limits>
-#include <sstream>
-#include <vector>
-
-namespace moses {
-
-// Stores a set of elements of type T, each of which is allocated an integral
-// ID of type IdType. IDs are contiguous starting at 0. Elements cannot be
-// removed.
-template<typename T, typename IdType=size_t>
-class NumberedSet {
- private:
- typedef boost::unordered_map<T, IdType> ElementToIdMap;
- typedef std::vector<const T *> IdToElementMap;
-
- public:
- typedef typename IdToElementMap::const_iterator const_iterator;
-
- NumberedSet() {}
-
- const_iterator begin() const { return m_idToElement.begin(); }
- const_iterator end() const { return m_idToElement.end(); }
-
- // Static value
- static IdType nullID() { return std::numeric_limits<IdType>::max(); }
-
- bool empty() const { return m_idToElement.empty(); }
- size_t size() const { return m_idToElement.size(); }
-
- IdType lookup(const T &) const;
- const T &lookup(IdType) const;
-
- // Insert the given object and return its ID.
- IdType insert(const T &);
-
- private:
- ElementToIdMap m_elementToId;
- IdToElementMap m_idToElement;
-};
-
-template<typename T, typename IdType>
-IdType NumberedSet<T, IdType>::lookup(const T &s) const {
- typename ElementToIdMap::const_iterator p = m_elementToId.find(s);
- return (p == m_elementToId.end()) ? nullID() : p->second;
-}
-
-template<typename T, typename IdType>
-const T &NumberedSet<T, IdType>::lookup(IdType id) const {
- if (id < 0 || id >= m_idToElement.size()) {
- std::ostringstream msg;
- msg << "Value not found: " << id;
- throw Exception(msg.str());
- }
- return *(m_idToElement[id]);
-}
-
-template<typename T, typename IdType>
-IdType NumberedSet<T, IdType>::insert(const T &x) {
- std::pair<T, IdType> value(x, m_idToElement.size());
- std::pair<typename ElementToIdMap::iterator, bool> result =
- m_elementToId.insert(value);
- if (result.second) {
- // x is a new element.
- m_idToElement.push_back(&result.first->first);
- }
- return result.first->second;
-}
-
-} // namespace moses
-
-#endif
diff --git a/scripts/training/compact-rule-table/Options.h b/scripts/training/compact-rule-table/Options.h
deleted file mode 100644
index f3fdb9139..000000000
--- a/scripts/training/compact-rule-table/Options.h
+++ /dev/null
@@ -1,18 +0,0 @@
-#pragma once
-#ifndef OPTIONS_H_
-#define OPTIONS_H_
-
-#include <string>
-
-namespace moses {
-
-struct Options {
- public:
- Options() {}
- std::string inputFile;
- std::string outputFile;
-};
-
-} // namespace moses
-
-#endif
diff --git a/scripts/training/compact-rule-table/RuleTableParser.cpp b/scripts/training/compact-rule-table/RuleTableParser.cpp
deleted file mode 100644
index 6272079fd..000000000
--- a/scripts/training/compact-rule-table/RuleTableParser.cpp
+++ /dev/null
@@ -1,168 +0,0 @@
-#include "RuleTableParser.h"
-
-#include "Exception.h"
-
-#include <boost/algorithm/string.hpp>
-#include <boost/lexical_cast.hpp>
-
-#include <iostream>
-
-#include <istream>
-#include <string>
-
-namespace moses {
-
-RuleTableParser::RuleTableParser()
- : m_input(0) {
-}
-
-RuleTableParser::RuleTableParser(std::istream &input)
- : m_input(&input) {
- ++(*this);
-}
-
-RuleTableParser & RuleTableParser::operator++() {
- if (!m_input) {
- return *this;
- }
- if (!std::getline(*m_input, m_line)) {
- m_input = 0;
- return *this;
- }
- parseLine(m_line);
- return *this;
-}
-
-RuleTableParser RuleTableParser::operator++(int) {
- RuleTableParser tmp(*this);
- ++(*this);
- return tmp;
-}
-
-void RuleTableParser::parseLine(const std::string &line) {
- // Source symbols
- size_t pos = line.find("|||");
- if (pos == std::string::npos) {
- throw Exception("missing first delimiter");
- }
- std::string text = line.substr(0, pos);
- boost::trim(text);
- m_value.sourceRhs.clear();
- boost::split(m_value.sourceRhs, text, boost::algorithm::is_space(),
- boost::algorithm::token_compress_on);
- m_value.sourceLhs = m_value.sourceRhs.back();
- m_value.sourceRhs.pop_back();
- std::for_each(m_value.sourceRhs.begin(), m_value.sourceRhs.end(),
- trimPairedSymbolFromRight);
-
- // Target symbols
- size_t begin = pos+3;
- pos = line.find("|||", begin);
- if (pos == std::string::npos) {
- throw Exception("missing second delimiter");
- }
- text = line.substr(begin, pos-begin);
- boost::trim(text);
- m_value.targetRhs.clear();
- boost::split(m_value.targetRhs, text, boost::algorithm::is_space(),
- boost::algorithm::token_compress_on);
- m_value.targetLhs = m_value.targetRhs.back();
- m_value.targetRhs.pop_back();
- std::for_each(m_value.targetRhs.begin(), m_value.targetRhs.end(),
- trimPairedSymbolFromLeft);
-
- // Scores
- begin = pos+3;
- pos = line.find("|||", begin);
- if (pos == std::string::npos) {
- throw Exception("missing third delimiter");
- }
- text = line.substr(begin, pos-begin);
- boost::trim(text);
- m_value.scores.clear();
- boost::split(m_value.scores, text, boost::algorithm::is_space(),
- boost::algorithm::token_compress_on);
-
- // Alignments
- begin = pos+3;
- pos = line.find("|||", begin);
- if (pos == std::string::npos) {
- throw Exception("missing fourth delimiter");
- }
- text = line.substr(begin, pos-begin);
- m_value.alignments.clear();
- boost::trim(text);
- // boost::split behaves differently between versions on empry strings
- if (!text.empty()) {
- tmpStringVec.clear();
- boost::split(tmpStringVec, text, boost::algorithm::is_space(),
- boost::algorithm::token_compress_on);
- for (std::vector<std::string>::const_iterator p = tmpStringVec.begin();
- p != tmpStringVec.end(); ++p) {
- assert(!p->empty());
- std::vector<std::string> tmpVec;
- tmpVec.reserve(2);
- boost::split(tmpVec, *p, boost::algorithm::is_any_of("-"));
- if (tmpVec.size() != 2) {
- throw Exception("bad alignment pair");
- }
- std::pair<int, int> alignmentPair;
- alignmentPair.first = boost::lexical_cast<int>(tmpVec[0]);
- alignmentPair.second = boost::lexical_cast<int>(tmpVec[1]);
- m_value.alignments.insert(alignmentPair);
- }
- }
-
- // Counts + everything else (the 'tail')
- begin = pos+3;
- pos = line.find("|||", begin);
- if (pos == std::string::npos) {
- text = line.substr(begin);
- m_value.tail.clear();
- } else {
- text = line.substr(begin, pos-begin);
- m_value.tail = line.substr(pos+3);
- }
- boost::trim(text);
- m_value.counts.clear();
- boost::split(m_value.counts, text, boost::algorithm::is_space(),
- boost::algorithm::token_compress_on);
-}
-
-void RuleTableParser::trimPairedSymbolFromLeft(std::string &s) {
- size_t len = s.size();
- if (len < 2 || s[0] != '[' || s[len-1] != ']') {
- return;
- }
- size_t pos = s.find('[', 1);
- if (pos == std::string::npos) {
- std::ostringstream msg;
- msg << "malformed non-terminal pair: " << s;
- throw Exception(msg.str());
- }
- s.erase(0, pos);
-}
-
-void RuleTableParser::trimPairedSymbolFromRight(std::string &s) {
- size_t len = s.size();
- if (len < 2 || s[0] != '[' || s[len-1] != ']') {
- return;
- }
- size_t pos = s.find('[', 1);
- if (pos == std::string::npos) {
- std::ostringstream msg;
- msg << "malformed non-terminal pair: " << s;
- throw Exception(msg.str());
- }
- s.resize(pos);
-}
-
-bool operator==(const RuleTableParser &lhs, const RuleTableParser &rhs) {
- return lhs.m_input == rhs.m_input;
-}
-
-bool operator!=(const RuleTableParser &lhs, const RuleTableParser &rhs) {
- return !(lhs == rhs);
-}
-
-} // namespace moses
diff --git a/scripts/training/compact-rule-table/RuleTableParser.h b/scripts/training/compact-rule-table/RuleTableParser.h
deleted file mode 100644
index 5599e63de..000000000
--- a/scripts/training/compact-rule-table/RuleTableParser.h
+++ /dev/null
@@ -1,51 +0,0 @@
-#pragma once
-#ifndef RULETABLEPARSER_H_INCLUDED_
-#define RULETABLEPARSER_H_INCLUDED_
-
-#include <istream>
-#include <set>
-#include <string>
-#include <utility>
-#include <vector>
-
-namespace moses {
-
-class RuleTableParser {
- public:
- struct Entry {
- std::string sourceLhs;
- std::vector<std::string> sourceRhs;
- std::string targetLhs;
- std::vector<std::string> targetRhs;
- std::vector<std::string> scores;
- std::set<std::pair<int, int> > alignments;
- std::vector<std::string> counts;
- std::string tail;
- };
-
- RuleTableParser();
- RuleTableParser(std::istream &);
-
- const Entry &operator*() const { return m_value; }
- const Entry *operator->() const { return &m_value; }
-
- RuleTableParser &operator++();
- RuleTableParser operator++(int);
-
- friend bool operator==(const RuleTableParser &, const RuleTableParser &);
- friend bool operator!=(const RuleTableParser &, const RuleTableParser &);
-
- private:
- Entry m_value;
- std::istream *m_input;
- std::string m_line;
- std::vector<std::string> tmpStringVec;
-
- void parseLine(const std::string &);
- static void trimPairedSymbolFromLeft(std::string &);
- static void trimPairedSymbolFromRight(std::string &);
-};
-
-} // namespace moses
-
-#endif
diff --git a/scripts/training/compact-rule-table/Tool.h b/scripts/training/compact-rule-table/Tool.h
deleted file mode 100644
index 4a7e1e0e1..000000000
--- a/scripts/training/compact-rule-table/Tool.h
+++ /dev/null
@@ -1,34 +0,0 @@
-#pragma once
-#ifndef TOOL_H_
-#define TOOL_H_
-
-#include <cstdlib>
-#include <iostream>
-#include <string>
-
-namespace moses {
-
-class Tool {
- public:
- Tool(const std::string &name) : m_name(name) {}
-
- const std::string &getName() const { return m_name; }
-
- virtual int main(int argc, char *argv[]) = 0;
-
- void warn(const std::string &msg) const {
- std::cerr << m_name << ": warning: " << msg << std::endl;
- }
-
- void error(const std::string &msg) const {
- std::cerr << m_name << ": error: " << msg << std::endl;
- std::exit(1);
- }
-
- private:
- std::string m_name;
-};
-
-} // namespace moses
-
-#endif
diff --git a/scripts/training/filter-model-given-input.pl b/scripts/training/filter-model-given-input.pl
index e022ce530..df9c528e0 100755
--- a/scripts/training/filter-model-given-input.pl
+++ b/scripts/training/filter-model-given-input.pl
@@ -10,14 +10,14 @@
use strict;
-use FindBin qw($Bin);
+use FindBin qw($RealBin);
use Getopt::Long;
my $SCRIPTS_ROOTDIR;
if (defined($ENV{"SCRIPTS_ROOTDIR"})) {
$SCRIPTS_ROOTDIR = $ENV{"SCRIPTS_ROOTDIR"};
} else {
- $SCRIPTS_ROOTDIR = $Bin;
+ $SCRIPTS_ROOTDIR = $RealBin;
if ($SCRIPTS_ROOTDIR eq '') {
$SCRIPTS_ROOTDIR = dirname(__FILE__);
}
diff --git a/scripts/training/get-lexical.perl b/scripts/training/get-lexical.perl
new file mode 100755
index 000000000..e23c15665
--- /dev/null
+++ b/scripts/training/get-lexical.perl
@@ -0,0 +1,19 @@
+#!/usr/bin/perl
+
+use strict;
+use FindBin qw($RealBin);
+BEGIN { require "$RealBin/LexicalTranslationModel.pm"; "LexicalTranslationModel"->import; }
+
+if (scalar(@ARGV) < 4) {
+
+ print STDERR $0." source target alignments output_prefix"."\n"
+
+} else {
+
+ my ($SOURCE,$TARGET,$ALIGNMENT,$OUT) = @ARGV;
+
+ &get_lexical($SOURCE,$TARGET,$ALIGNMENT,$OUT,0);
+
+}
+
+
diff --git a/scripts/training/symal/giza2bal.pl b/scripts/training/giza2bal.pl
index 553ff2b3e..553ff2b3e 100755
--- a/scripts/training/symal/giza2bal.pl
+++ b/scripts/training/giza2bal.pl
diff --git a/scripts/training/lexical-reordering/Jamfile b/scripts/training/lexical-reordering/Jamfile
deleted file mode 100644
index 2b18f9dde..000000000
--- a/scripts/training/lexical-reordering/Jamfile
+++ /dev/null
@@ -1,3 +0,0 @@
-exe score : InputFileStream.cpp reordering_classes.cpp score.cpp ../../..//z ;
-
-install dist : score : <location>. <install-type>EXE ;
diff --git a/scripts/training/mert-moses.pl b/scripts/training/mert-moses.pl
index 6eb6bcdd3..4f6af126d 100755
--- a/scripts/training/mert-moses.pl
+++ b/scripts/training/mert-moses.pl
@@ -48,13 +48,13 @@
# Original version by Philipp Koehn
use strict;
-use FindBin qw($Bin);
+use FindBin qw($RealBin);
use File::Basename;
use File::Path;
use File::Spec;
use Cwd;
-my $SCRIPTS_ROOTDIR = $Bin;
+my $SCRIPTS_ROOTDIR = $RealBin;
$SCRIPTS_ROOTDIR =~ s/\/training$//;
$SCRIPTS_ROOTDIR = $ENV{"SCRIPTS_ROOTDIR"} if defined($ENV{"SCRIPTS_ROOTDIR"});
@@ -108,6 +108,7 @@ my $___START_WITH_HISTORIC_BESTS = 0; # use best settings from all previous iter
my $___RANDOM_DIRECTIONS = 0; # search in random directions only
my $___NUM_RANDOM_DIRECTIONS = 0; # number of random directions, also works with default optimizer [Cer&al.,2008]
my $___RANDOM_RESTARTS = 20;
+my $___RETURN_BEST_DEV = 0; # return the best weights according to dev, not the last
# Flags related to PRO (Hopkins & May, 2011)
my $___PAIRWISE_RANKED_OPTIMIZER = 0; # flag to enable PRO.
@@ -147,6 +148,12 @@ my $mertdir = undef; # path to new mert directory
my $mertargs = undef; # args to pass through to mert & extractor
my $mertmertargs = undef; # args to pass through to mert only
my $extractorargs = undef; # args to pass through to extractor only
+
+# Args to pass through to batch mira only. This flags is useful to
+# change MIRA's hyperparameters such as regularization parameter C,
+# BLEU decay factor, and the number of iterations of MIRA.
+my $batch_mira_args = undef;
+
my $filtercmd = undef; # path to filter-model-given-input.pl
my $filterfile = undef;
my $qsubwrapper = undef;
@@ -157,6 +164,7 @@ my $___ACTIVATE_FEATURES = undef; # comma-separated (or blank-separated) list of
# if undef work on all features
# (others are fixed to the starting values)
my $___RANGES = undef;
+my $___USE_CONFIG_WEIGHTS_FIRST = 0; # use weights in configuration file for first iteration
my $prev_aggregate_nbl_size = -1; # number of previous step to consider when loading data (default =-1)
# -1 means all previous, i.e. from iteration 1
# 0 means no previous data, i.e. from actual iteration
@@ -202,14 +210,17 @@ GetOptions(
"random-directions" => \$___RANDOM_DIRECTIONS, # search only in random directions
"number-of-random-directions=i" => \$___NUM_RANDOM_DIRECTIONS, # number of random directions
"random-restarts=i" => \$___RANDOM_RESTARTS, # number of random restarts
+ "return-best-dev" => \$___RETURN_BEST_DEV, # return the best weights according to dev, not the last
"activate-features=s" => \$___ACTIVATE_FEATURES, #comma-separated (or blank-separated) list of features to work on (others are fixed to the starting values)
"range=s@" => \$___RANGES,
+ "use-config-weights-for-first-run" => \$___USE_CONFIG_WEIGHTS_FIRST, # use the weights in the configuration file when running the decoder for the first time
"prev-aggregate-nbestlist=i" => \$prev_aggregate_nbl_size, #number of previous step to consider when loading data (default =-1, i.e. all previous)
"maximum-iterations=i" => \$maximum_iterations,
"pairwise-ranked" => \$___PAIRWISE_RANKED_OPTIMIZER,
"pro-starting-point" => \$___PRO_STARTING_POINT,
"historic-interpolation=f" => \$___HISTORIC_INTERPOLATION,
"batch-mira" => \$___BATCH_MIRA,
+ "batch-mira-args=s" => \$batch_mira_args,
"threads=i" => \$__THREADS
) or exit(1);
@@ -288,11 +299,17 @@ Options:
N means this and N previous iterations
--maximum-iterations=ITERS ... Maximum number of iterations. Default: $maximum_iterations
+ --return-best-dev ... Return the weights according to dev bleu, instead of returning
+ the last iteration
--random-directions ... search only in random directions
--number-of-random-directions=int ... number of random directions
(also works with regular optimizer, default: 0)
--pairwise-ranked ... Use PRO for optimisation (Hopkins and May, emnlp 2011)
--pro-starting-point ... Use PRO to get a starting point for MERT
+ --batch-mira ... Use Batch MIRA for optimisation (Cherry and Foster, NAACL 2012)
+ --batch-mira-args=STRING ... args to pass through to batch MIRA. This flag is useful to
+ change MIRA's hyperparameters such as regularization parameter C,
+ BLEU decay factor, and the number of iterations of MIRA.
--threads=NUMBER ... Use multi-threaded mert (must be compiled in).
--historic-interpolation ... Interpolate optimized weights with prior iterations' weight
(parameter sets factor [0;1] given to current weights)
@@ -320,7 +337,7 @@ $moses_parallel_cmd = File::Spec->catfile($SCRIPTS_ROOTDIR, "generic", "moses-pa
if !defined $moses_parallel_cmd;
if (!defined $mertdir) {
- $mertdir = File::Spec->catfile(File::Basename::dirname($SCRIPTS_ROOTDIR), "dist", "bin");
+ $mertdir = File::Spec->catfile(File::Basename::dirname($SCRIPTS_ROOTDIR), "bin");
die "mertdir does not exist: $mertdir" if ! -x $mertdir;
print STDERR "Assuming --mertdir=$mertdir\n";
}
@@ -329,11 +346,13 @@ my $mert_extract_cmd = File::Spec->catfile($mertdir, "extractor");
my $mert_mert_cmd = File::Spec->catfile($mertdir, "mert");
my $mert_pro_cmd = File::Spec->catfile($mertdir, "pro");
my $mert_mira_cmd = File::Spec->catfile($mertdir, "kbmira");
+my $mert_eval_cmd = File::Spec->catfile($mertdir, "evaluator");
die "Not executable: $mert_extract_cmd" if ! -x $mert_extract_cmd;
die "Not executable: $mert_mert_cmd" if ! -x $mert_mert_cmd;
die "Not executable: $mert_pro_cmd" if ! -x $mert_pro_cmd;
die "Not executable: $mert_mira_cmd" if ! -x $mert_mira_cmd;
+die "Not executable: $mert_eval_cmd" if ! -x $mert_eval_cmd;
my $pro_optimizer = File::Spec->catfile($mertdir, "megam_i686.opt"); # or set to your installation
@@ -639,6 +658,18 @@ while (1) {
# In case something dies later, we might wish to have a copy
create_config($___CONFIG, "./run$run.moses.ini", $featlist, $run, (defined $devbleu ? $devbleu : "--not-estimated--"), $sparse_weights_file);
+ # Save dense weights to simplify best dev recovery
+ {
+ my $densefile = "run$run.dense";
+ my @vals = @{$featlist->{"values"}};
+ my @names = @{$featlist->{"names"}};
+ open my $denseout, '>', $densefile or die "Can't write $densefile (WD now $___WORKING_DIR)";
+ for (my $i = 0; $i < scalar(@{$featlist->{"names"}}); $i++) {
+ print $denseout "$names[$i] $names[$i] $vals[$i]\n";
+ }
+ close $denseout;
+ }
+
# skip running the decoder if the user wanted
if (! $skip_decoder) {
print "($run) run decoder to produce n-best lists\n";
@@ -734,6 +765,10 @@ while (1) {
}
my $mira_settings = "";
+ if ($___BATCH_MIRA && $batch_mira_args) {
+ $mira_settings .= "$batch_mira_args ";
+ }
+
$mira_settings .= " --dense-init run$run.$weights_in_file";
if (-e "run$run.sparse-weights") {
$mira_settings .= " --sparse-init run$run.sparse-weights";
@@ -924,7 +959,6 @@ while (1) {
print "loading data from $prev_score_file\n" if defined($prev_score_file);
print "loading data from $prev_init_file\n" if defined($prev_init_file);
}
-print "Training finished at " . `date`;
if (defined $allsorted) {
safesystem ("\\rm -f $allsorted") or die;
@@ -933,23 +967,50 @@ if (defined $allsorted) {
safesystem("\\cp -f $weights_in_file run$run.$weights_in_file") or die;
safesystem("\\cp -f $mert_logfile run$run.$mert_logfile") or die;
-create_config($___CONFIG_ORIG, "./moses.ini", $featlist, $run, $devbleu, $sparse_weights_file);
+if($___RETURN_BEST_DEV) {
+ my $bestit=1;
+ my $bestbleu=0;
+ my $evalout = "eval.out";
+ for (my $i = 1; $i < $run; $i++) {
+ safesystem("$mert_eval_cmd --reference " . join(",", @references) . " --candidate run$i.out 2> /dev/null 1> $evalout");
+ open my $fh, '<', $evalout or die "Can't read $evalout : $!";
+ my $bleu = <$fh>;
+ chomp $bleu;
+ if($bleu > $bestbleu) {
+ $bestbleu = $bleu;
+ $bestit = $i;
+ }
+ close $fh;
+ }
+ print "copying weights from best iteration ($bestit, bleu=$bestbleu) to moses.ini\n";
+ my $best_sparse_file = undef;
+ if(defined $sparse_weights_file) {
+ $best_sparse_file = "run$bestit.sparse-weights";
+ }
+ create_config($___CONFIG_ORIG, "./moses.ini", get_featlist_from_file("run$bestit.dense"),
+ $bestit, $bestbleu, $best_sparse_file);
+}
+else {
+ create_config($___CONFIG_ORIG, "./moses.ini", $featlist, $run, $devbleu, $sparse_weights_file);
+}
# just to be sure that we have the really last finished step marked
&save_finished_step($finished_step_file, $run);
#chdir back to the original directory # useless, just to remind we were not there
chdir($cwd);
-
+print "Training finished at " . `date`;
} # end of local scope
sub get_weights_from_mert {
my ($outfile, $logfile, $weight_count, $sparse_weights) = @_;
my ($bestpoint, $devbleu);
- if ($___PAIRWISE_RANKED_OPTIMIZER || ($___PRO_STARTING_POINT && $logfile =~ /pro/) || $___BATCH_MIRA) {
+ if ($___PAIRWISE_RANKED_OPTIMIZER || ($___PRO_STARTING_POINT && $logfile =~ /pro/)
+ || $___BATCH_MIRA) {
open my $fh, '<', $outfile or die "Can't open $outfile: $!";
- my (@WEIGHT, $sum);
+ my @WEIGHT;
for (my $i = 0; $i < $weight_count; $i++) { push @WEIGHT, 0; }
+ my $sum = 0.0;
while (<$fh>) {
if (/^F(\d+) ([\-\.\de]+)/) { # regular features
$WEIGHT[$1] = $2;
@@ -958,11 +1019,14 @@ sub get_weights_from_mert {
$$sparse_weights{$1} = $2;
}
}
+ close $fh;
+ die "It seems feature values are invalid or unable to read $outfile." if $sum < 1e-09;
+
$devbleu = "unknown";
foreach (@WEIGHT) { $_ /= $sum; }
foreach (keys %{$sparse_weights}) { $$sparse_weights{$_} /= $sum; }
$bestpoint = join(" ", @WEIGHT);
- close $fh;
+
if($___BATCH_MIRA) {
open my $fh2, '<', $logfile or die "Can't open $logfile: $!";
while(<$fh2>) {
@@ -970,6 +1034,7 @@ sub get_weights_from_mert {
$devbleu = $1;
}
}
+ close $fh2;
}
} else {
open my $fh, '<', $logfile or die "Can't open $logfile: $!";
@@ -1013,7 +1078,8 @@ sub run_decoder {
$model_weights{$name} = "-$name" if !defined $model_weights{$name};
$model_weights{$name} .= sprintf " %.6f", $vals[$i];
}
- my $decoder_config = join(" ", values %model_weights);
+ my $decoder_config = "";
+ $decoder_config = join(" ", values %model_weights) unless $___USE_CONFIG_WEIGHTS_FIRST && $run==1;
$decoder_config .= " -weight-file run$run.sparse-weights" if -e "run$run.sparse-weights";
print STDERR "DECODER_CFG = $decoder_config\n";
print "decoder_config = $decoder_config\n";
@@ -1102,7 +1168,11 @@ sub get_featlist_from_moses {
my $cmd = "$___DECODER $___DECODER_FLAGS -config $configfn -inputtype $___INPUTTYPE -show-weights > $featlistfn";
safesystem($cmd) or die "Failed to run moses with the config $configfn";
}
+ return get_featlist_from_file($featlistfn);
+}
+sub get_featlist_from_file {
+ my $featlistfn = shift;
# read feature list
my @names = ();
my @startvalues = ();
@@ -1116,7 +1186,7 @@ sub get_featlist_from_moses {
my ($longname, $feature, $value) = ($1, $2, $3);
next if $value eq "sparse";
push @errs, "$featlistfn:$nr:Bad initial value of $feature: $value\n"
- if $value !~ /^[+-]?[0-9.e]+$/;
+ if $value !~ /^[+-]?[0-9.\-e]+$/;
push @errs, "$featlistfn:$nr:Unknown feature '$feature', please add it to \@ABBR_FULL_MAP\n"
if !defined $ABBR2FULL{$feature};
push @names, $feature;
@@ -1153,7 +1223,7 @@ sub get_order_of_scores_from_nbestlist {
$sparse = 1;
} elsif ($tok =~ /^([a-z][0-9a-z]*):/i) {
$label = $1;
- } elsif ($tok =~ /^-?[-0-9.e]+$/) {
+ } elsif ($tok =~ /^-?[-0-9.\-e]+$/) {
if (!$sparse) {
# a score found, remember it
die "Found a score but no label before it! Bad nbestlist '$fname_or_source'!"
diff --git a/scripts/training/phrase-extract/extract-ghkm/Jamfile b/scripts/training/phrase-extract/extract-ghkm/Jamfile
deleted file mode 100644
index e5a416f72..000000000
--- a/scripts/training/phrase-extract/extract-ghkm/Jamfile
+++ /dev/null
@@ -1,3 +0,0 @@
-exe extract-ghkm : [ glob *.cpp ] ..//filestreams ..//trees ../../../..//boost_iostreams ../../../..//boost_program_options ../../../..//z ;
-
-install tools : extract-ghkm : <install-type>EXE ;
diff --git a/scripts/training/phrase-extract/pcfg-extract/Jamfile b/scripts/training/phrase-extract/pcfg-extract/Jamfile
deleted file mode 100644
index be91d6d2f..000000000
--- a/scripts/training/phrase-extract/pcfg-extract/Jamfile
+++ /dev/null
@@ -1 +0,0 @@
-exe pcfg-extract : [ glob *.cc ] ..//pcfg-common ../../../..//boost_program_options ;
diff --git a/scripts/training/phrase-extract/pcfg-score/Jamfile b/scripts/training/phrase-extract/pcfg-score/Jamfile
deleted file mode 100644
index 7225381c0..000000000
--- a/scripts/training/phrase-extract/pcfg-score/Jamfile
+++ /dev/null
@@ -1 +0,0 @@
-exe pcfg-score : [ glob *.cc ] ..//pcfg-common ../../../..//boost_program_options ;
diff --git a/scripts/training/reduce-topt-count.pl b/scripts/training/reduce-topt-count.pl
new file mode 100755
index 000000000..15458b0b5
--- /dev/null
+++ b/scripts/training/reduce-topt-count.pl
@@ -0,0 +1,207 @@
+#!/usr/bin/perl
+
+# given a moses.ini, filter the phrase tables to contain
+# only ttable-limit options per source phrase
+#
+# outputs new phrase tables and updated moses.ini into targetdir
+#
+# usage: reduce-topt-count.pl moses.ini targetdir
+
+use strict;
+use warnings;
+use File::Basename;
+use File::Path;
+use POSIX;
+use List::Util qw( min sum );
+
+my ($ini_file, $targetdir) = @ARGV;
+
+if (! defined $targetdir) {
+ die "usage: reduce-topt-count.pl moses.ini targetdir\n"
+}
+
+my %ttables;
+my $ini_hdl = my_open($ini_file);
+my $outini_hdl = my_save("$targetdir/moses.ini");
+
+my $section = "";
+
+my %section_handlers = (
+ 'ttable-file' => read_ttable_file(),
+ 'ttable-limit' => read_ttable_limit(),
+ 'weight-t' => read_weight_t()
+);
+
+# print header for updated moses.ini
+my $timestamp = POSIX::strftime("%m/%d/%Y %H:%M:%S", localtime);
+print $outini_hdl <<"END";
+# Generated by reduce-topt-count.pl at $timestamp
+# Original file: $ini_file
+
+END
+
+# load original moses.ini & generate new moses.ini
+while (<$ini_hdl>) {
+ chomp(my $line = $_);
+ my $do_print = 1;
+ if ($line =~ m/^\s*#/ || $line =~ m/^\s*$/) {
+ #ignore empty and commented lines
+ } elsif ($line =~ m/^\[(.*)\]/) {
+ $section = $1; # start of a new section
+ } else {
+ if (defined $section_handlers{$section}) {
+ # call appropriate section handler;
+ # handlers are also responsible for printing out
+ # (possibly modified) line into new moses.ini
+ $do_print = 0;
+ $section_handlers{$section}->($line, $outini_hdl);
+ }
+ }
+
+ if ($do_print) {
+ print $outini_hdl "$line\n";
+ }
+}
+close $outini_hdl;
+
+# write filtered phrase tables
+for my $ttable (keys %ttables) {
+ filter_table($ttables{$ttable});
+}
+
+# filter phrase tables
+
+## subroutines
+
+sub read_ttable_file
+{
+ my $ttable_id = 0;
+ return sub {
+ my ($line, $outhdl) = @_;
+ if ($line !~ m/^(\d+) ([\d\,\-]+) ([\d\,\-]+) (\d+) (\S+)$/) {
+ die "Format not recognized: $line";
+ }
+ my ($type, $srcfacts, $tgtfacts, $numscores, $file) = ($1, $2, $3, $4, $5);
+ if ($type != 0) {
+ die "Cannot work with ttables of type $type";
+ }
+ $ttables{$ttable_id} = {
+ file => $file,
+ scores => $numscores
+ };
+
+ print $outhdl
+ "$type $srcfacts $tgtfacts $numscores $targetdir/", basename($file), "\n";
+ $ttable_id++;
+ }
+}
+
+sub read_ttable_limit
+{
+ my $ttable_id = 0;
+ return sub {
+ my ($line, $outhdl) = @_;
+ $ttables{$ttable_id}->{limit} = $line;
+ print $outhdl "$line\n";
+ $ttable_id++;
+ }
+}
+
+sub read_weight_t
+{
+ my $weight_idx = 0;
+ my $ttable_id = 0;
+ return sub {
+ my ($line, $outhdl) = @_;
+ if ($ttables{$ttable_id}->{scores} == $weight_idx) {
+ $weight_idx = 0;
+ $ttable_id++;
+ }
+ push @{ $ttables{$ttable_id}->{weights} }, $line;
+ print $outhdl "$line\n";
+ $weight_idx++;
+ }
+}
+
+sub filter_table
+{
+ my $ttable = shift;
+ my $in = my_open($ttable->{file});
+ my $out = my_save($targetdir . "/" . basename($ttable->{file}));
+ my $limit = $ttable->{limit};
+ my @weights = @{ $ttable->{weights} };
+
+ print STDERR "Filtering ", $ttable->{file}, ", using limit $limit\n";
+ my $kept = 0;
+ my $total = 0;
+
+ my $src_phrase = "";
+ my @tgt_phrases;
+ while (<$in>) {
+ chomp(my $line = $_);
+ $total++;
+ print STDERR '.' if $total % 1000 == 0;
+ my @cols = split / \|\|\| /, $line;
+ if ($cols[0] ne $src_phrase) {
+ my @sorted = sort { $b->{score} <=> $a->{score} } @tgt_phrases;
+ for my $phrase (@sorted[0 .. min($#sorted, $limit - 1)]) {
+ $kept++;
+ print $out $phrase->{str}, "\n";
+ }
+ $src_phrase = $cols[0];
+ @tgt_phrases = ();
+ }
+ my @scores = split ' ', $cols[2];
+ push @tgt_phrases, {
+ str => $line,
+ score => sum(map { $weights[$_] * log $scores[$_] } (0 .. $#weights))
+ };
+ }
+ printf STDERR "Finished, kept %d%% of phrases\n", $kept / $total * 100;
+ close $in;
+ close $out;
+}
+
+sub my_open {
+ my $f = shift;
+ die "Not found: $f" if ! -e $f;
+
+ my $opn;
+ my $hdl;
+ my $ft = `file $f`;
+ # file might not recognize some files!
+ if ($f =~ /\.gz$/ || $ft =~ /gzip compressed data/) {
+ $opn = "zcat $f |";
+ } elsif ($f =~ /\.bz2$/ || $ft =~ /bzip2 compressed data/) {
+ $opn = "bzcat $f |";
+ } else {
+ $opn = "$f";
+ }
+ open $hdl, $opn or die "Can't open '$opn': $!";
+ binmode $hdl, ":utf8";
+ return $hdl;
+}
+
+sub my_save {
+ my $f = shift;
+ if ($f eq "-") {
+ binmode(STDOUT, ":utf8");
+ return *STDOUT;
+ }
+
+ my $opn;
+ my $hdl;
+ # file might not recognize some files!
+ if ($f =~ /\.gz$/) {
+ $opn = "| gzip -c > '$f'";
+ } elsif ($f =~ /\.bz2$/) {
+ $opn = "| bzip2 > '$f'";
+ } else {
+ $opn = ">$f";
+ }
+ mkpath( dirname($f) );
+ open $hdl, $opn or die "Can't write to '$opn': $!";
+ binmode $hdl, ":utf8";
+ return $hdl;
+}
+
diff --git a/scripts/training/symal/Jamfile b/scripts/training/symal/Jamfile
deleted file mode 100644
index 899046bd8..000000000
--- a/scripts/training/symal/Jamfile
+++ /dev/null
@@ -1,3 +0,0 @@
-exe symal : symal.cpp cmd.c ;
-
-install dist : symal : <location>. ;
diff --git a/scripts/training/train-model.perl.missing_bin_dir b/scripts/training/train-model.perl
index 0a0d51ecf..05287afee 100644..100755
--- a/scripts/training/train-model.perl.missing_bin_dir
+++ b/scripts/training/train-model.perl
@@ -2,9 +2,11 @@
use strict;
use Getopt::Long "GetOptions";
-use FindBin qw($Bin);
+use FindBin qw($RealBin);
use File::Spec::Functions;
+use File::Spec::Unix;
use File::Basename;
+BEGIN { require "$RealBin/LexicalTranslationModel.pm"; "LexicalTranslationModel"->import; }
# Train Factored Phrase Model
# (c) 2006-2009 Philipp Koehn
@@ -12,16 +14,16 @@ use File::Basename;
# Train a model from a parallel corpus
# -----------------------------------------------------
$ENV{"LC_ALL"} = "C";
-my $SCRIPTS_ROOTDIR = $Bin;
+my $SCRIPTS_ROOTDIR = $RealBin;
if ($SCRIPTS_ROOTDIR eq '') {
$SCRIPTS_ROOTDIR = dirname(__FILE__);
}
$SCRIPTS_ROOTDIR =~ s/\/training$//;
-$SCRIPTS_ROOTDIR = $ENV{"SCRIPTS_ROOTDIR"} if defined($ENV{"SCRIPTS_ROOTDIR"});
+#$SCRIPTS_ROOTDIR = $ENV{"SCRIPTS_ROOTDIR"} if defined($ENV{"SCRIPTS_ROOTDIR"});
-my($_ROOT_DIR, $_CORPUS_DIR, $_GIZA_E2F, $_GIZA_F2E, $_MODEL_DIR, $_TEMP_DIR, $_SORT_BUFFER_SIZE, $_SORT_BATCH_SIZE, $_SORT_COMPRESS, $_SORT_PARALLEL, $_CORPUS,
+my($_EXTERNAL_BINDIR, $_ROOT_DIR, $_CORPUS_DIR, $_GIZA_E2F, $_GIZA_F2E, $_MODEL_DIR, $_TEMP_DIR, $_SORT_BUFFER_SIZE, $_SORT_BATCH_SIZE, $_SORT_COMPRESS, $_SORT_PARALLEL, $_CORPUS,
$_CORPUS_COMPRESSION, $_FIRST_STEP, $_LAST_STEP, $_F, $_E, $_MAX_PHRASE_LENGTH,
- $_LEXICAL_FILE, $_NO_LEXICAL_WEIGHTING, $_VERBOSE, $_ALIGNMENT,
+ $_LEXICAL_FILE, $_NO_LEXICAL_WEIGHTING, $_LEXICAL_COUNTS, $_VERBOSE, $_ALIGNMENT,
$_ALIGNMENT_FILE, $_ALIGNMENT_STEM, @_LM, $_EXTRACT_FILE, $_GIZA_OPTION, $_HELP, $_PARTS,
$_DIRECTION, $_ONLY_PRINT_GIZA, $_GIZA_EXTENSION, $_REORDERING,
$_REORDERING_SMOOTH, $_INPUT_FACTOR_MAX, $_ALIGNMENT_FACTORS,
@@ -30,24 +32,21 @@ my($_ROOT_DIR, $_CORPUS_DIR, $_GIZA_E2F, $_GIZA_F2E, $_MODEL_DIR, $_TEMP_DIR, $_
$_DECODING_STEPS, $_PARALLEL, $_FACTOR_DELIMITER, @_PHRASE_TABLE,
@_REORDERING_TABLE, @_GENERATION_TABLE, @_GENERATION_TYPE, $_GENERATION_CORPUS,
$_DONT_ZIP, $_MGIZA, $_MGIZA_CPUS, $_SNT2COOC, $_HMM_ALIGN, $_CONFIG,
- $_HIERARCHICAL,$_XML,$_SOURCE_SYNTAX,$_TARGET_SYNTAX,$_GLUE_GRAMMAR,$_GLUE_GRAMMAR_FILE,$_UNKNOWN_WORD_LABEL_FILE,$_GHKM,$_PCFG,$_EXTRACT_OPTIONS,$_SCORE_OPTIONS,
+ $_HIERARCHICAL,$_XML,$_SOURCE_SYNTAX,$_TARGET_SYNTAX,$_GLUE_GRAMMAR,$_GLUE_GRAMMAR_FILE,$_UNKNOWN_WORD_LABEL_FILE,$_GHKM,$_PCFG,@_EXTRACT_OPTIONS,@_SCORE_OPTIONS,
$_ALT_DIRECT_RULE_SCORE_1, $_ALT_DIRECT_RULE_SCORE_2,
$_PHRASE_WORD_ALIGNMENT,$_FORCE_FACTORED_FILENAMES,
$_MEMSCORE, $_FINAL_ALIGNMENT_MODEL,
$_CONTINUE,$_MAX_LEXICAL_REORDERING,$_DO_STEPS,
- $_ADDITIONAL_INI,
- $_DICTIONARY, $_EPPEX, $_SPARSE_PHRASE_FEATURES);
-
+ @_ADDITIONAL_INI,$_ADDITIONAL_INI_FILE,
+ $_SPARSE_TRANSLATION_TABLE,
+ $_DICTIONARY, $_SPARSE_PHRASE_FEATURES, $_EPPEX, $IGNORE);
my $_CORES = 1;
my $debug = 0; # debug this script, do not delete any files in debug mode
-# the following line is set installation time by 'make release'. BEWARE!
-my $BINDIR="/Users/hieuhoang/workspace/bin/training-tools/";
-
$_HELP = 1
unless &GetOptions('root-dir=s' => \$_ROOT_DIR,
- 'bin-dir=s' => \$BINDIR, # allow to override default bindir path
+ 'external-bin-dir=s' => \$_EXTERNAL_BINDIR,
'corpus-dir=s' => \$_CORPUS_DIR,
'corpus=s' => \$_CORPUS,
'f=s' => \$_F,
@@ -57,6 +56,7 @@ $_HELP = 1
'max-phrase-length=s' => \$_MAX_PHRASE_LENGTH,
'lexical-file=s' => \$_LEXICAL_FILE,
'no-lexical-weighting' => \$_NO_LEXICAL_WEIGHTING,
+ 'write-lexical-counts' => \$_LEXICAL_COUNTS,
'model-dir=s' => \$_MODEL_DIR,
'temp-dir=s' => \$_TEMP_DIR,
'sort-buffer-size=s' => \$_SORT_BUFFER_SIZE,
@@ -94,7 +94,8 @@ $_HELP = 1
'generation-factors=s' => \$_GENERATION_FACTORS,
'decoding-steps=s' => \$_DECODING_STEPS,
'decoding-graph-backoff=s' => \$_DECODING_GRAPH_BACKOFF,
- 'scripts-root-dir=s' => \$SCRIPTS_ROOTDIR,
+ 'bin-dir=s' => \$IGNORE,
+ 'scripts-root-dir=s' => \$IGNORE,
'factor-delimiter=s' => \$_FACTOR_DELIMITER,
'phrase-translation-table=s' => \@_PHRASE_TABLE,
'generation-corpus=s' => \$_GENERATION_CORPUS,
@@ -110,8 +111,8 @@ $_HELP = 1
'pcfg' => \$_PCFG,
'alt-direct-rule-score-1' => \$_ALT_DIRECT_RULE_SCORE_1,
'alt-direct-rule-score-2' => \$_ALT_DIRECT_RULE_SCORE_2,
- 'extract-options=s' => \$_EXTRACT_OPTIONS,
- 'score-options=s' => \$_SCORE_OPTIONS,
+ 'extract-options=s' => \@_EXTRACT_OPTIONS,
+ 'score-options=s' => \@_SCORE_OPTIONS,
'source-syntax' => \$_SOURCE_SYNTAX,
'target-syntax' => \$_TARGET_SYNTAX,
'xml' => \$_XML,
@@ -123,9 +124,11 @@ $_HELP = 1
'force-factored-filenames' => \$_FORCE_FACTORED_FILENAMES,
'dictionary=s' => \$_DICTIONARY,
'sparse-phrase-features' => \$_SPARSE_PHRASE_FEATURES,
- 'eppex:s' => \$_EPPEX
- 'additional-ini=s' => \$_ADDITIONAL_INI,
- 'cores=i' => \$_CORES
+ 'eppex:s' => \$_EPPEX,
+ 'additional-ini=s' => \@_ADDITIONAL_INI,
+ 'additional-ini-file=s' => \$_ADDITIONAL_INI_FILE,
+ 'sparse-translation-table' => \$_SPARSE_TRANSLATION_TABLE,
+ 'cores=i' => \$_CORES
);
if ($_HELP) {
@@ -146,6 +149,39 @@ For more, please check manual or contact koehn\@inf.ed.ac.uk\n";
exit(1);
}
+if (defined($IGNORE)) {
+ print STDERR "WARNING: Do not specify -bin-dir or -scripts-root-dir anymore. These variable are ignored and will be deleted soon";
+}
+
+# convert all paths to absolute paths
+$_ROOT_DIR = File::Spec->rel2abs($_ROOT_DIR) if defined($_ROOT_DIR);
+$_EXTERNAL_BINDIR = File::Spec->rel2abs($_EXTERNAL_BINDIR) if defined($_EXTERNAL_BINDIR);
+$_CORPUS_DIR = File::Spec->rel2abs($_CORPUS_DIR) if defined($_CORPUS_DIR);
+$_CORPUS = File::Spec->rel2abs($_CORPUS) if defined($_CORPUS);
+$_LEXICAL_FILE = File::Spec->rel2abs($_LEXICAL_FILE) if defined($_LEXICAL_FILE);
+$_MODEL_DIR = File::Spec->rel2abs($_MODEL_DIR) if defined($_MODEL_DIR);
+$_TEMP_DIR = File::Spec->rel2abs($_TEMP_DIR) if defined($_TEMP_DIR);
+$_ALIGNMENT_FILE = File::Spec->rel2abs($_ALIGNMENT_FILE) if defined($_ALIGNMENT_FILE);
+$_ALIGNMENT_STEM = File::Spec->rel2abs($_ALIGNMENT_STEM) if defined($_ALIGNMENT_STEM);
+$_GLUE_GRAMMAR_FILE = File::Spec->rel2abs($_GLUE_GRAMMAR_FILE) if defined($_GLUE_GRAMMAR_FILE);
+$_UNKNOWN_WORD_LABEL_FILE = File::Spec->rel2abs($_UNKNOWN_WORD_LABEL_FILE) if defined($_UNKNOWN_WORD_LABEL_FILE);
+$_EXTRACT_FILE = File::Spec->rel2abs($_EXTRACT_FILE) if defined($_EXTRACT_FILE);
+foreach (@_PHRASE_TABLE) { $_ = File::Spec->rel2abs($_); }
+foreach (@_REORDERING_TABLE) { $_ = File::Spec->rel2abs($_); }
+foreach (@_GENERATION_TABLE) { $_ = File::Spec->rel2abs($_); }
+$_GIZA_E2F = File::Spec->rel2abs($_GIZA_E2F) if defined($_GIZA_E2F);
+$_GIZA_F2E = File::Spec->rel2abs($_GIZA_F2E) if defined($_GIZA_F2E);
+
+my $_SCORE_OPTIONS; # allow multiple switches
+foreach (@_SCORE_OPTIONS) { $_SCORE_OPTIONS .= $_." "; }
+chop($_SCORE_OPTIONS) if $_SCORE_OPTIONS;
+my $_EXTRACT_OPTIONS; # allow multiple switches
+foreach (@_EXTRACT_OPTIONS) { $_EXTRACT_OPTIONS .= $_." "; }
+chop($_EXTRACT_OPTIONS) if $_EXTRACT_OPTIONS;
+my $_ADDITIONAL_INI; # allow multiple switches
+foreach (@_ADDITIONAL_INI) { $_ADDITIONAL_INI .= $_." "; }
+chop($_ADDITIONAL_INI) if $_ADDITIONAL_INI;
+
$_HIERARCHICAL = 1 if $_SOURCE_SYNTAX || $_TARGET_SYNTAX;
$_XML = 1 if $_SOURCE_SYNTAX || $_TARGET_SYNTAX;
my $___FACTOR_DELIMITER = $_FACTOR_DELIMITER;
@@ -187,39 +223,40 @@ foreach my $step (@step_conf) {
}
-
# supporting binaries from other packages
-my $MGIZA_MERGE_ALIGN = "$BINDIR/merge_alignment.py";
+my $MKCLS = "$_EXTERNAL_BINDIR/mkcls";
+my $MGIZA_MERGE_ALIGN = "$_EXTERNAL_BINDIR/merge_alignment.py";
my $GIZA;
my $SNT2COOC;
-if(!defined $_MGIZA ){
- $GIZA = "$BINDIR/GIZA++";
- if (-x "$BINDIR/snt2cooc.out") {
- $SNT2COOC = "$BINDIR/snt2cooc.out";
- } elsif (-x "$BINDIR/snt2cooc") { # Since "snt2cooc.out" and "snt2cooc" work the same
- $SNT2COOC = "$BINDIR/snt2cooc";
+if ($STEPS[1] || $STEPS[2])
+{
+ if(!defined $_MGIZA ){
+ $GIZA = "$_EXTERNAL_BINDIR/GIZA++";
+ if (-x "$_EXTERNAL_BINDIR/snt2cooc.out") {
+ $SNT2COOC = "$_EXTERNAL_BINDIR/snt2cooc.out";
+ } elsif (-x "$_EXTERNAL_BINDIR/snt2cooc") { # Since "snt2cooc.out" and "snt2cooc" work the same
+ $SNT2COOC = "$_EXTERNAL_BINDIR/snt2cooc";
+ }
+ print STDERR "Using single-thread GIZA\n";
+ } else {
+ $GIZA = "$_EXTERNAL_BINDIR/mgiza";
+ if (-x "$_EXTERNAL_BINDIR/snt2cooc") {
+ $SNT2COOC = "$_EXTERNAL_BINDIR/snt2cooc";
+ } elsif (-x "$_EXTERNAL_BINDIR/snt2cooc.out") { # Important for users that use MGIZA and copy only the "mgiza" file to $_EXTERNAL_BINDIR
+ $SNT2COOC = "$_EXTERNAL_BINDIR/snt2cooc.out";
+ }
+ print STDERR "Using multi-thread GIZA\n";
+ if (!defined($_MGIZA_CPUS)) {
+ $_MGIZA_CPUS=4;
+ }
+ die("ERROR: Cannot find $MGIZA_MERGE_ALIGN") unless (-x $MGIZA_MERGE_ALIGN);
}
- print STDERR "Using single-thread GIZA\n";
-} else {
- $GIZA = "$BINDIR/mgiza";
- if (-x "$BINDIR/snt2cooc") {
- $SNT2COOC = "$BINDIR/snt2cooc";
- } elsif (-x "$BINDIR/snt2cooc.out") { # Important for users that use MGIZA and copy only the "mgiza" file to $BINDIR
- $SNT2COOC = "$BINDIR/snt2cooc.out";
- }
- print STDERR "Using multi-thread GIZA\n";
- if (!defined($_MGIZA_CPUS)) {
- $_MGIZA_CPUS=4;
- }
- die("ERROR: Cannot find $MGIZA_MERGE_ALIGN") unless (-x $MGIZA_MERGE_ALIGN);
+
+ # override
+ $SNT2COOC = "$_EXTERNAL_BINDIR/$_SNT2COOC" if defined($_SNT2COOC);
}
-# override
-$SNT2COOC = "$BINDIR/$_SNT2COOC" if defined($_SNT2COOC);
-
-my $MKCLS = "$BINDIR/mkcls";
-
# parallel extract
my $SPLIT_EXEC = `gsplit --help 2>/dev/null`;
if($SPLIT_EXEC) {
@@ -250,28 +287,28 @@ my $__SORT_PARALLEL = "";
$__SORT_PARALLEL = "--parallel $_SORT_PARALLEL" if $_SORT_PARALLEL;
# supporting scripts/binaries from this package
-my $PHRASE_EXTRACT = "$SCRIPTS_ROOTDIR/training/phrase-extract/extract";
+my $PHRASE_EXTRACT = "$SCRIPTS_ROOTDIR/../bin/extract";
$PHRASE_EXTRACT = "$SCRIPTS_ROOTDIR/generic/extract-parallel.perl $_CORES $SPLIT_EXEC \"$SORT_EXEC $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE $__SORT_COMPRESS $__SORT_PARALLEL\" $PHRASE_EXTRACT";
my $RULE_EXTRACT;
if (defined($_GHKM)) {
- $RULE_EXTRACT = "$SCRIPTS_ROOTDIR/training/phrase-extract/extract-ghkm/tools/extract-ghkm";
+ $RULE_EXTRACT = "$SCRIPTS_ROOTDIR/../bin/extract-ghkm";
}
else {
- $RULE_EXTRACT = "$SCRIPTS_ROOTDIR/training/phrase-extract/extract-rules";
+ $RULE_EXTRACT = "$SCRIPTS_ROOTDIR/../bin/extract-rules";
}
$RULE_EXTRACT = "$SCRIPTS_ROOTDIR/generic/extract-parallel.perl $_CORES $SPLIT_EXEC \"$SORT_EXEC $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE $__SORT_COMPRESS $__SORT_PARALLEL\" $RULE_EXTRACT";
-my $LEXICAL_REO_SCORER = "$SCRIPTS_ROOTDIR/training/lexical-reordering/score";
-my $MEMSCORE = "$SCRIPTS_ROOTDIR/training/memscore/memscore";
-my $EPPEX = "$SCRIPTS_ROOTDIR/training/eppex/eppex";
-my $SYMAL = "$SCRIPTS_ROOTDIR/training/symal/symal";
-my $GIZA2BAL = "$SCRIPTS_ROOTDIR/training/symal/giza2bal.pl";
+my $LEXICAL_REO_SCORER = "$SCRIPTS_ROOTDIR/../bin/lexical-reordering-score";
+my $MEMSCORE = "$SCRIPTS_ROOTDIR/../bin/memscore";
+my $EPPEX = "$SCRIPTS_ROOTDIR/../bin/eppex";
+my $SYMAL = "$SCRIPTS_ROOTDIR/../bin/symal";
+my $GIZA2BAL = "$SCRIPTS_ROOTDIR/training/giza2bal.pl";
-my $PHRASE_SCORE = "$SCRIPTS_ROOTDIR/training/phrase-extract/score";
+my $PHRASE_SCORE = "$SCRIPTS_ROOTDIR/../bin/score";
$PHRASE_SCORE = "$SCRIPTS_ROOTDIR/generic/score-parallel.perl $_CORES \"$SORT_EXEC $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE $__SORT_COMPRESS $__SORT_PARALLEL\" $PHRASE_SCORE";
-my $PHRASE_CONSOLIDATE = "$SCRIPTS_ROOTDIR/training/phrase-extract/consolidate";
+my $PHRASE_CONSOLIDATE = "$SCRIPTS_ROOTDIR/../bin/consolidate";
# utilities
my $ZCAT = "gzip -cd";
@@ -280,8 +317,8 @@ my $BZCAT = "bzcat";
# do a sanity check to make sure we can find the necessary binaries since
# these are not installed by default
# not needed if we start after step 2
-die("ERROR: Cannot find mkcls, GIZA++/mgiza, & snt2cooc.out/snt2cooc in $BINDIR.\nDid you install this script using 'make release'?") unless ((!$STEPS[2]) ||
- (-x $GIZA && defined($SNT2COOC) && -x $MKCLS));
+die("ERROR: Cannot find mkcls, GIZA++/mgiza, & snt2cooc.out/snt2cooc in $_EXTERNAL_BINDIR.\nYou MUST specify the parameter -external-bin-dir") unless ((!$STEPS[2]) ||
+ (defined($_EXTERNAL_BINDIR) && -x $GIZA && defined($SNT2COOC) && -x $MKCLS));
# set varibles to defaults or from options
my $___ROOT_DIR = ".";
@@ -365,9 +402,11 @@ my $___MAX_PHRASE_LENGTH = "7";
$___MAX_PHRASE_LENGTH = "10" if $_HIERARCHICAL;
my $___LEXICAL_WEIGHTING = 1;
+my $___LEXICAL_COUNTS = 0;
my $___LEXICAL_FILE = $___MODEL_DIR."/lex";
$___MAX_PHRASE_LENGTH = $_MAX_PHRASE_LENGTH if $_MAX_PHRASE_LENGTH;
$___LEXICAL_WEIGHTING = 0 if $_NO_LEXICAL_WEIGHTING;
+$___LEXICAL_COUNTS = 1 if $_LEXICAL_COUNTS;
$___LEXICAL_FILE = $_LEXICAL_FILE if $_LEXICAL_FILE;
my $___PHRASE_SCORER = "phrase-extract";
@@ -648,20 +687,6 @@ sub prepare {
}
}
-sub open_compressed {
- my ($file) = @_;
- print "FILE: $file\n";
-
- # add extensions, if necessary
- $file = $file.".bz2" if ! -e $file && -e $file.".bz2";
- $file = $file.".gz" if ! -e $file && -e $file.".gz";
-
- # pipe zipped, if necessary
- return "$BZCAT $file|" if $file =~ /\.bz2$/;
- return "$ZCAT $file|" if $file =~ /\.gz$/;
- return $file;
-}
-
sub reduce_factors {
my ($full,$reduced,$factors) = @_;
@@ -703,7 +728,7 @@ sub reduce_factors {
$realfull .= ".gz";
$reduced =~ s/(\.gz)?$/.gz/;
}
- safesystem("ln -s $realfull $reduced")
+ safesystem("ln -s '$realfull' '$reduced'")
or die "Failed to create symlink $realfull -> $reduced";
return;
}
@@ -761,7 +786,7 @@ sub make_classes {
}
sub get_vocabulary {
- return unless $___LEXICAL_WEIGHTING;
+# return unless $___LEXICAL_WEIGHTING;
my($corpus,$vcb) = @_;
print STDERR "(1.2) creating vcb file $vcb @ ".`date`;
@@ -780,7 +805,7 @@ sub get_vocabulary {
}
my %VCB;
- open(VCB,">$vcb") or die "ERROR: Can't write $vcb";
+ open(VCB,">", "$vcb") or die "ERROR: Can't write $vcb";
print VCB "1\tUNK\t0\n";
my $id=2;
foreach (reverse sort @NUM) {
@@ -1101,7 +1126,7 @@ sub run_single_snt2cooc {
my($dir,$e,$f,$vcb_e,$vcb_f,$train) = @_;
print STDERR "(2.1a) running snt2cooc $f-$e @ ".`date`."\n";
safesystem("mkdir -p $dir") or die("ERROR");
- if ($SNT2COOC eq "$BINDIR/snt2cooc.out") {
+ if ($SNT2COOC eq "$_EXTERNAL_BINDIR/snt2cooc.out") {
print "$SNT2COOC $vcb_e $vcb_f $train > $dir/$f-$e.cooc\n";
safesystem("$SNT2COOC $vcb_e $vcb_f $train > $dir/$f-$e.cooc") or die("ERROR");
} else {
@@ -1173,7 +1198,9 @@ sub get_lexical_factored {
if ($___NOT_FACTORED && !$_XML) {
&get_lexical($___CORPUS.".".$___F,
$___CORPUS.".".$___E,
- $___LEXICAL_FILE);
+ $___ALIGNMENT_FILE.".".$___ALIGNMENT,
+ $___LEXICAL_FILE,
+ $___LEXICAL_COUNTS);
}
else {
foreach my $factor (split(/\+/,$___TRANSLATION_FACTORS)) {
@@ -1189,86 +1216,13 @@ sub get_lexical_factored {
$lexical_file .= ".".$factor if !$___NOT_FACTORED;
&get_lexical($___ALIGNMENT_STEM.".".$factor_f.".".$___F,
$___ALIGNMENT_STEM.".".$factor_e.".".$___E,
- $lexical_file);
+ $___ALIGNMENT_FILE.".".$___ALIGNMENT,
+ $lexical_file,
+ $___LEXICAL_COUNTS);
}
}
}
-sub get_lexical {
- my ($alignment_file_f,$alignment_file_e,$lexical_file) = @_;
- print STDERR "($alignment_file_f,$alignment_file_e,$lexical_file)\n";
- my $alignment_file_a = $___ALIGNMENT_FILE.".".$___ALIGNMENT;
-
- my (%WORD_TRANSLATION,%TOTAL_FOREIGN,%TOTAL_ENGLISH);
-
- if (-e "$lexical_file.f2e" && -e "$lexical_file.e2f") {
- print STDERR " reusing: $lexical_file.f2e and $lexical_file.e2f\n";
- return;
- }
-
- open(E,&open_compressed($alignment_file_e)) or die "ERROR: Can't read $alignment_file_e";
- open(F,&open_compressed($alignment_file_f)) or die "ERROR: Can't read $alignment_file_f";
- open(A,&open_compressed($alignment_file_a)) or die "ERROR: Can't read $alignment_file_a";
-
- my $alignment_id = 0;
- while(my $e = <E>) {
- if (($alignment_id++ % 1000) == 0) { print STDERR "!"; }
- chomp($e); fix_spaces(\$e);
- my @ENGLISH = split(/ /,$e);
- my $f = <F>; chomp($f); fix_spaces(\$f);
- my @FOREIGN = split(/ /,$f);
- my $a = <A>; chomp($a); fix_spaces(\$a);
-
- my (%FOREIGN_ALIGNED,%ENGLISH_ALIGNED);
- foreach (split(/ /,$a)) {
- my ($fi,$ei) = split(/\-/);
- if ($fi >= scalar(@FOREIGN) || $ei >= scalar(@ENGLISH)) {
- print STDERR "alignment point ($fi,$ei) out of range (0-$#FOREIGN,0-$#ENGLISH) in line $alignment_id, ignoring\n";
- }
- else {
- # local counts
- $FOREIGN_ALIGNED{$fi}++;
- $ENGLISH_ALIGNED{$ei}++;
-
- # global counts
- $WORD_TRANSLATION{$FOREIGN[$fi]}{$ENGLISH[$ei]}++;
- $TOTAL_FOREIGN{$FOREIGN[$fi]}++;
- $TOTAL_ENGLISH{$ENGLISH[$ei]}++;
- }
- }
-
- # unaligned words
- for(my $ei=0;$ei<scalar(@ENGLISH);$ei++) {
- next if defined($ENGLISH_ALIGNED{$ei});
- $WORD_TRANSLATION{"NULL"}{$ENGLISH[$ei]}++;
- $TOTAL_ENGLISH{$ENGLISH[$ei]}++;
- $TOTAL_FOREIGN{"NULL"}++;
- }
- for(my $fi=0;$fi<scalar(@FOREIGN);$fi++) {
- next if defined($FOREIGN_ALIGNED{$fi});
- $WORD_TRANSLATION{$FOREIGN[$fi]}{"NULL"}++;
- $TOTAL_FOREIGN{$FOREIGN[$fi]}++;
- $TOTAL_ENGLISH{"NULL"}++;
- }
- }
- print STDERR "\n";
- close(A);
- close(F);
- close(E);
-
- open(F2E,">$lexical_file.f2e") or die "ERROR: Can't write $lexical_file.f2e";
- open(E2F,">$lexical_file.e2f") or die "ERROR: Can't write $lexical_file.e2f";
-
- foreach my $f (keys %WORD_TRANSLATION) {
- foreach my $e (keys %{$WORD_TRANSLATION{$f}}) {
- printf F2E "%s %s %.7f\n",$e,$f,$WORD_TRANSLATION{$f}{$e}/$TOTAL_FOREIGN{$f};
- printf E2F "%s %s %.7f\n",$f,$e,$WORD_TRANSLATION{$f}{$e}/$TOTAL_ENGLISH{$e};
- }
- }
- close(E2F);
- close(F2E);
- print STDERR "Saved: $lexical_file.f2e and $lexical_file.e2f\n";
-}
### (5) PHRASE EXTRACTION
@@ -1457,10 +1411,18 @@ sub score_phrase {
sub score_phrase_phrase_extract {
my ($ttable_file,$lexical_file,$extract_file) = @_;
- # remove consolidation options
+ # distinguish between score and consolidation options
my $ONLY_DIRECT = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /OnlyDirect/);
my $PHRASE_COUNT = (!defined($_SCORE_OPTIONS) || $_SCORE_OPTIONS !~ /NoPhraseCount/);
my $LOW_COUNT = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /LowCountFeature/);
+ my ($SPARSE_COUNT_BIN,$COUNT_BIN,$DOMAIN) = ("","","");
+ $SPARSE_COUNT_BIN = $1 if defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /SparseCountBinFeature ([\s\d]*\d)/;
+ $COUNT_BIN = $1 if defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /\-CountBinFeature ([\s\d]*\d)/;
+ $DOMAIN = $1 if defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /(\-+[a-z]*Domain[a-z]+ .+)/i;
+ $DOMAIN =~ s/ \-.+//g;
+ my $SINGLETON = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /Singleton/);
+ my $CROSSEDNONTERM = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /CrossedNonTerm/);
+
my $UNALIGNED_COUNT = (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /UnalignedPenalty/);
my ($UNALIGNED_FW_COUNT,$UNALIGNED_FW_F,$UNALIGNED_FW_E);
if (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /UnalignedFunctionWordPenalty +(\S+) +(\S+)/) {
@@ -1478,6 +1440,8 @@ sub score_phrase_phrase_extract {
$CORE_SCORE_OPTIONS .= " --LogProb" if $LOG_PROB;
$CORE_SCORE_OPTIONS .= " --NegLogProb" if $NEG_LOG_PROB;
$CORE_SCORE_OPTIONS .= " --NoLex" if $NO_LEX;
+ $CORE_SCORE_OPTIONS .= " --Singleton" if $SINGLETON;
+ $CORE_SCORE_OPTIONS .= " --CrossedNonTerm" if $CROSSEDNONTERM;
my $substep = 1;
my $isParent = 1;
@@ -1500,6 +1464,7 @@ sub score_phrase_phrase_extract {
$inverse = " --Inverse";
$extract_filename = $extract_file.".inv";
}
+
my $extract = "$extract_filename.sorted.gz";
print STDERR "(6.".($substep++).") creating table half $ttable_file.half.$direction @ ".`date`;
@@ -1515,6 +1480,7 @@ sub score_phrase_phrase_extract {
$cmd .= " --PCFG" if $_PCFG;
$cmd .= " --UnpairedExtractFormat" if $_ALT_DIRECT_RULE_SCORE_1 || $_ALT_DIRECT_RULE_SCORE_2;
$cmd .= " --ConditionOnTargetLHS" if $_ALT_DIRECT_RULE_SCORE_1;
+ $cmd .= " $DOMAIN" if $DOMAIN;
$cmd .= " $CORE_SCORE_OPTIONS" if defined($_SCORE_OPTIONS);
# sorting
@@ -1559,6 +1525,8 @@ sub score_phrase_phrase_extract {
$cmd .= " --OnlyDirect" if $ONLY_DIRECT;
$cmd .= " --NoPhraseCount" unless $PHRASE_COUNT;
$cmd .= " --LowCountFeature" if $LOW_COUNT;
+ $cmd .= " --CountBinFeature $COUNT_BIN" if $COUNT_BIN;
+ $cmd .= " --SparseCountBinFeature $SPARSE_COUNT_BIN" if $SPARSE_COUNT_BIN;
$cmd .= " --GoodTuring $ttable_file.half.f2e.gz.coc" if $GOOD_TURING;
$cmd .= " --KneserNey $ttable_file.half.f2e.gz.coc" if $KNESER_NEY;
@@ -1642,6 +1610,12 @@ sub get_reordering {
my $cmd = "$LEXICAL_REO_SCORER $extract_file.o.sorted.gz $smooth $reo_model_path";
$cmd .= " --SmoothWithCounts" if ($smooth =~ /(.+)u$/);
for my $mtype (keys %REORDERING_MODEL_TYPES) {
+ # * $mtype will be one of wbe, phrase, or hier
+ # * the value stored in $REORDERING_MODEL_TYPES{$mtype} is a concatenation of the "orient"
+ # attributes such as "msd"
+ # * the "filename" attribute is appended to the filename, but actually serves as the main configuration specification
+ # for reordering scoring. it holds a string such as "wbe-msd-didirectional-fe"
+ # which has the more general format type-orient-dir-lang
$cmd .= " --model \"$mtype $REORDERING_MODEL_TYPES{$mtype}";
foreach my $model (@REORDERING_MODELS) {
if ($model->{"type"} eq $mtype) {
@@ -1816,19 +1790,41 @@ sub create_ini {
$basic_weight_count /= 2 if defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /OnlyDirect/;
$basic_weight_count++ unless defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /NoPhraseCount/; # phrase count feature
$basic_weight_count++ if defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /LowCountFeature/; # low count feature
+ if (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /(\-CountBinFeature [\s\d]*\d)/) {
+ $basic_weight_count += scalar split(/\s+/,$1);
+ }
+ if (defined($_SCORE_OPTIONS) && $_SCORE_OPTIONS =~ /\-+Domain([a-z]+) (\S+)/i) {
+ my ($method,$file) = ($1,$2);
+ my $count = `cut -d\\ -f 2 $file | sort | uniq | wc -l`;
+ $basic_weight_count += $count if $method eq "Indicator" || $method eq "Ratio";
+ $basic_weight_count += 2**$count-1 if $method eq "Subset";
+ }
$basic_weight_count++ if $_PCFG;
foreach my $f (split(/\+/,$___TRANSLATION_FACTORS)) {
- $num_of_ttables++;
- my $ff = $f;
- $ff =~ s/\-/ /;
- my $file = "$___MODEL_DIR/".($_HIERARCHICAL?"rule-table":"phrase-table").($___NOT_FACTORED ? "" : ".$f").".gz";
- $file = shift @SPECIFIED_TABLE if scalar(@SPECIFIED_TABLE);
- my $phrase_table_impl = ($_HIERARCHICAL ? 6 : 0);
- print INI "$phrase_table_impl $ff $basic_weight_count $file";
- if ($_SPARSE_PHRASE_FEATURES) {
- print INI " sparse";
- }
- print INI "\n";
+ $num_of_ttables++;
+ my $ff = $f;
+ $ff =~ s/\-/ /;
+ my $file = "$___MODEL_DIR/".($_HIERARCHICAL?"rule-table":"phrase-table").($___NOT_FACTORED ? "" : ".$f").".gz";
+ my $phrase_table_impl = ($_HIERARCHICAL? 6 : 0);
+
+ if (scalar(@SPECIFIED_TABLE)) {
+ $file = shift @SPECIFIED_TABLE;
+ my @toks = split(/:/,$file);
+ $file = $toks[0];
+ if (@toks > 1) {
+ $phrase_table_impl = $toks[1];
+ }
+ if (@toks == 3) {
+ $basic_weight_count = $toks[2];
+ }
+ }
+ else {
+
+ }
+
+ print INI "$phrase_table_impl $ff $basic_weight_count $file";
+ print INI " sparse" if defined($_SPARSE_TRANSLATION_TABLE);
+ print INI "\n";
}
if ($_GLUE_GRAMMAR) {
&full_path(\$___GLUE_GRAMMAR_FILE);
@@ -1958,10 +1954,15 @@ sub create_ini {
print INI "\n# delimiter between factors in input\n[factor-delimiter]\n$___FACTOR_DELIMITER\n\n"
}
+ # get addititional content for config file from switch or file
if ($_ADDITIONAL_INI) {
print INI "\n# additional settings\n\n";
foreach (split(/<br>/i,$_ADDITIONAL_INI)) { print INI $_."\n"; }
}
+ if ($_ADDITIONAL_INI_FILE) {
+ print INI "\n# additional settings\n\n";
+ print INI `cat $_ADDITIONAL_INI_FILE`;
+ }
close(INI);
}
@@ -2016,8 +2017,3 @@ sub open_or_zcat {
return $hdl;
}
-sub fix_spaces(){
- my ($in) = @_;
- $$in =~ s/[ \t]+/ /g; $$in =~ s/[ \t]$//; $$in =~ s/^[ \t]//;
-}
-
diff --git a/scripts/training/wrappers/adam-suffix-array/suffix-array-create.sh b/scripts/training/wrappers/adam-suffix-array/suffix-array-create.sh
new file mode 100755
index 000000000..e5210a990
--- /dev/null
+++ b/scripts/training/wrappers/adam-suffix-array/suffix-array-create.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+# execute: ~/workspace/bin/moses-smt/scripts/training/wrappers/suffix-array-create.sh $SA_EXEC_DIR $SOURCE_CORPUS $TARGET_CORPUS $ALIGNMENT $SA_OUTPUT
+
+# eg.
+#SA_EXEC_DIR=/Users/hieuhoang/workspace/github/cdec/sa-extract
+#SOURCE_CORPUS=/Users/hieuhoang/workspace/data/europarl/exp/fr-en/training/corpus.2.fr
+#TARGET_CORPUS=/Users/hieuhoang/workspace/data/europarl/exp/fr-en/training/corpus.2.en
+#ALIGNMENT=/Users/hieuhoang/workspace/data/europarl/exp/fr-en/model/aligned.3.grow-diag-final-and
+#SA_OUTPUT=/Users/hieuhoang/workspace/data/europarl/exp/fr-en/model/suffix-array.3
+
+
+SA_EXEC_DIR=$1
+SOURCE_CORPUS=$2
+TARGET_CORPUS=$3
+ALIGNMENT=$4
+SA_OUTPUT=$5
+GLUE_GRAMMAR=$6
+
+mkdir $SA_OUTPUT
+
+rm -rf $SA_OUTPUT/bitext
+
+pushd .
+cd $SA_EXEC_DIR
+
+./sa-compile.pl -output $SA_OUTPUT -b bitext_name=$SOURCE_CORPUS,$TARGET_CORPUS -a alignment_name=$ALIGNMENT > $SA_OUTPUT/extract.ini
+
+popd
+
+echo "<s> [X] ||| <s> [S] ||| 1 ||| ||| 0\n" > $GLUE_GRAMMAR
+echo "[X][S] </s> [X] ||| [X][S] </s> [S] ||| 1 ||| 0-0 ||| 0\n" >> $GLUE_GRAMMAR
+echo "[X][S] [X][X] [X] ||| [X][S] [X][X] [S] ||| 2.718 ||| 0-0 1-1 ||| 0\n" >> $GLUE_GRAMMAR
diff --git a/scripts/training/wrappers/adam-suffix-array/suffix-array-extract.sh b/scripts/training/wrappers/adam-suffix-array/suffix-array-extract.sh
new file mode 100755
index 000000000..eda11dede
--- /dev/null
+++ b/scripts/training/wrappers/adam-suffix-array/suffix-array-extract.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+# execute: ~/workspace/bin/moses-smt/scripts/training/wrappers/adam-suffix-array/suffix-array-extract.sh $SA_EXEC_DIR $MODEL_DIR $INPUT_FILE $OUTPUT_DIR
+
+# eg.
+#SA_EXEC_DIR=/Users/hieuhoang/workspace/github/cdec/sa-extract
+#MODEL_DIR=/Users/hieuhoang/workspace/data/europarl/exp/fr-en/model/suffix-array.3
+#INPUT_FILE=/Users/hieuhoang/workspace/data/europarl/exp/fr-en/tuning/input.lc.2
+#OUTPUT_DIR=/Users/hieuhoang/workspace/data/europarl/exp/fr-en/tuning/filtered.sa.3
+
+SA_EXEC_DIR=$1
+MODEL_DIR=$2
+INPUT_FILE=$3
+OUTPUT_DIR=$4
+
+mkdir $OUTPUT_DIR
+
+pushd .
+cd $OUTPUT_DIR
+
+cat $INPUT_FILE | $SA_EXEC_DIR/escape-testset.pl | $SA_EXEC_DIR/extractor.py -c $MODEL_DIR/extract.ini
+gzip $OUTPUT_DIR/grammar.out.*
+
+popd
+
diff --git a/scripts/training/wrappers/parse-de-berkeley.perl b/scripts/training/wrappers/parse-de-berkeley.perl
index 6482d11f3..61bda6ee5 100755
--- a/scripts/training/wrappers/parse-de-berkeley.perl
+++ b/scripts/training/wrappers/parse-de-berkeley.perl
@@ -2,7 +2,7 @@
use strict;
use Getopt::Long "GetOptions";
-use FindBin qw($Bin);
+use FindBin qw($RealBin);
my ($JAR,$GRAMMAR,$SPLIT_HYPHEN,$MARK_SPLIT,$BINARIZE);
@@ -19,12 +19,12 @@ die("ERROR: could not find jar file '$JAR'\n") unless -e $JAR;
die("ERROR: could not find grammar file '$GRAMMAR'\n") unless -e $GRAMMAR;
$BINARIZE = $BINARIZE ? "-binarize" : "";
-$SPLIT_HYPHEN = $SPLIT_HYPHEN ? "| $Bin/syntax-hyphen-splitting.perl $BINARIZE" : "";
+$SPLIT_HYPHEN = $SPLIT_HYPHEN ? "| $RealBin/syntax-hyphen-splitting.perl $BINARIZE" : "";
$SPLIT_HYPHEN .= " -mark-split" if $SPLIT_HYPHEN && $MARK_SPLIT;
my $tmp = "/tmp/parse-de-berkeley.$$";
-open(TMP,"| $Bin/../../tokenizer/deescape-special-chars.perl > $tmp");
+open(TMP,"| $RealBin/../../tokenizer/deescape-special-chars.perl > $tmp");
while(<STDIN>) {
# unsplit hyphens
s/ \@-\@ /-/g if $SPLIT_HYPHEN;
@@ -37,7 +37,7 @@ while(<STDIN>) {
}
close(TMP);
-my $cmd = "cat $tmp | java -Xmx10000m -Xms10000m -Dfile.encoding=UTF8 -jar $JAR -gr $GRAMMAR -maxLength 1000 $BINARIZE | $Bin/berkeleyparsed2mosesxml.perl $SPLIT_HYPHEN";
+my $cmd = "cat $tmp | java -Xmx10000m -Xms10000m -Dfile.encoding=UTF8 -jar $JAR -gr $GRAMMAR -maxLength 1000 $BINARIZE | $RealBin/berkeleyparsed2mosesxml.perl $SPLIT_HYPHEN";
print STDERR $cmd."\n";
open(PARSE,"$cmd|");
diff --git a/scripts/training/wrappers/parse-de-bitpar.perl b/scripts/training/wrappers/parse-de-bitpar.perl
index 50da7fd22..bbcccf877 100755
--- a/scripts/training/wrappers/parse-de-bitpar.perl
+++ b/scripts/training/wrappers/parse-de-bitpar.perl
@@ -2,7 +2,7 @@
use strict;
use Getopt::Long "GetOptions";
-use FindBin qw($Bin);
+use FindBin qw($RealBin);
use File::Basename;
use File::Temp qw/tempfile/;
diff --git a/scripts/training/wrappers/suffix-array-create.sh b/scripts/training/wrappers/suffix-array-create.sh
deleted file mode 100755
index 700269310..000000000
--- a/scripts/training/wrappers/suffix-array-create.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/bash
-
-# execute: ~/workspace/bin/moses-smt/scripts/training/wrappers/suffix-array-create.sh $SA_EXEC_DIR $SOURCE_CORPUS $TARGET_CORPUS $ALIGNMENT $SA_OUTPUT
-
-
-SA_EXEC_DIR=$1
-SOURCE_CORPUS=$2
-TARGET_CORPUS=$3
-ALIGNMENT=$4
-SA_OUTPUT=$5
-
-mkdir $SA_OUTPUT
-
-rm -rf $SA_OUTPUT/bitext
-
-pushd .
-cd $SA_EXEC_DIR
-
-./sa-compile.pl -output $SA_OUTPUT -b bitext_name=$SOURCE_CORPUS,$TARGET_CORPUS -a alignment_name=$ALIGNMENT > $SA_OUTPUT/extract.ini
-
-popd
-
diff --git a/scripts/training/wrappers/suffix-array-extract.sh b/scripts/training/wrappers/suffix-array-extract.sh
deleted file mode 100755
index bc11e2cfc..000000000
--- a/scripts/training/wrappers/suffix-array-extract.sh
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/bin/bash
-
-# execute: ~/workspace/bin/moses-smt/scripts/training/wrappers/suffix-array-extract.sh $SA_EXEC_DIR $MODEL_DIR $INPUT_FILE $OUTPUT_DIR
-
-SA_EXEC_DIR=$1
-MODEL_DIR=$2
-INPUT_FILE=$3
-OUTPUT_DIR=$4
-
-mkdir $OUTPUT_DIR
-
-pushd .
-cd $OUTPUT_DIR
-
-$SA_EXEC_DIR/extractor.py -c $MODEL_DIR/extract.ini < $INPUT_FILE
-
-popd
-
diff --git a/scripts/training/zmert-moses.pl b/scripts/training/zmert-moses.pl
index 7b8c60f77..ecd783fa2 100755
--- a/scripts/training/zmert-moses.pl
+++ b/scripts/training/zmert-moses.pl
@@ -12,9 +12,9 @@
# 29 Dec 2009 Derived from mert-moses-new.pl (Kamil Kos)
-use FindBin qw($Bin);
+use FindBin qw($RealBin);
use File::Basename;
-my $SCRIPTS_ROOTDIR = $Bin;
+my $SCRIPTS_ROOTDIR = $RealBin;
$SCRIPTS_ROOTDIR =~ s/\/training$//;
$SCRIPTS_ROOTDIR = $ENV{"SCRIPTS_ROOTDIR"} if defined($ENV{"SCRIPTS_ROOTDIR"});
diff --git a/symal/Jamfile b/symal/Jamfile
new file mode 100644
index 000000000..3ab564790
--- /dev/null
+++ b/symal/Jamfile
@@ -0,0 +1,2 @@
+exe symal : symal.cpp cmd.c ;
+
diff --git a/scripts/training/symal/cmd.c b/symal/cmd.c
index 149fc7290..149fc7290 100644
--- a/scripts/training/symal/cmd.c
+++ b/symal/cmd.c
diff --git a/scripts/training/symal/cmd.h b/symal/cmd.h
index 01a00abc0..01a00abc0 100644
--- a/scripts/training/symal/cmd.h
+++ b/symal/cmd.h
diff --git a/scripts/training/symal/symal.cpp b/symal/symal.cpp
index da386d973..da386d973 100644
--- a/scripts/training/symal/symal.cpp
+++ b/symal/symal.cpp
diff --git a/scripts/training/symal/symal.vcproj b/symal/symal.vcproj
index 6eac62f4a..6eac62f4a 100644
--- a/scripts/training/symal/symal.vcproj
+++ b/symal/symal.vcproj
diff --git a/util/Jamfile b/util/Jamfile
index b89149221..e555488fd 100644
--- a/util/Jamfile
+++ b/util/Jamfile
@@ -1,4 +1,4 @@
-lib kenutil : bit_packing.cc ersatz_progress.cc exception.cc file.cc file_piece.cc mmap.cc murmur_hash.cc ..//z : <include>.. : : <include>.. ;
+lib kenutil : bit_packing.cc ersatz_progress.cc exception.cc file.cc file_piece.cc mmap.cc murmur_hash.cc usage.cc ..//z : <include>.. : : <include>.. ;
import testing ;
diff --git a/util/bit_packing.hh b/util/bit_packing.hh
index 73a5cb226..dcbd814c3 100644
--- a/util/bit_packing.hh
+++ b/util/bit_packing.hh
@@ -174,6 +174,13 @@ struct BitsMask {
uint64_t mask;
};
+struct BitAddress {
+ BitAddress(void *in_base, uint64_t in_offset) : base(in_base), offset(in_offset) {}
+
+ void *base;
+ uint64_t offset;
+};
+
} // namespace util
#endif // UTIL_BIT_PACKING__
diff --git a/util/ersatz_progress.cc b/util/ersatz_progress.cc
index a82ce6726..07b14e26d 100644
--- a/util/ersatz_progress.cc
+++ b/util/ersatz_progress.cc
@@ -12,17 +12,17 @@ namespace { const unsigned char kWidth = 100; }
ErsatzProgress::ErsatzProgress() : current_(0), next_(std::numeric_limits<std::size_t>::max()), complete_(next_), out_(NULL) {}
ErsatzProgress::~ErsatzProgress() {
- if (!out_) return;
- Finished();
+ if (out_) Finished();
}
-ErsatzProgress::ErsatzProgress(std::ostream *to, const std::string &message, std::size_t complete)
+ErsatzProgress::ErsatzProgress(std::size_t complete, std::ostream *to, const std::string &message)
: current_(0), next_(complete / kWidth), complete_(complete), stones_written_(0), out_(to) {
if (!out_) {
next_ = std::numeric_limits<std::size_t>::max();
return;
}
- *out_ << message << "\n----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100\n";
+ if (!message.empty()) *out_ << message << '\n';
+ *out_ << "----5---10---15---20---25---30---35---40---45---50---55---60---65---70---75---80---85---90---95--100\n";
}
void ErsatzProgress::Milestone() {
diff --git a/util/ersatz_progress.hh b/util/ersatz_progress.hh
index 92c345fee..f709dc516 100644
--- a/util/ersatz_progress.hh
+++ b/util/ersatz_progress.hh
@@ -1,7 +1,7 @@
#ifndef UTIL_ERSATZ_PROGRESS__
#define UTIL_ERSATZ_PROGRESS__
-#include <iosfwd>
+#include <iostream>
#include <string>
// Ersatz version of boost::progress so core language model doesn't depend on
@@ -14,7 +14,7 @@ class ErsatzProgress {
ErsatzProgress();
// Null means no output. The null value is useful for passing along the ostream pointer from another caller.
- ErsatzProgress(std::ostream *to, const std::string &message, std::size_t complete);
+ explicit ErsatzProgress(std::size_t complete, std::ostream *to = &std::cerr, const std::string &message = "");
~ErsatzProgress();
diff --git a/util/file.cc b/util/file.cc
index 176737fa7..4899e5ac3 100644
--- a/util/file.cc
+++ b/util/file.cc
@@ -14,6 +14,8 @@
#if defined(_WIN32) || defined(_WIN64)
#include <windows.h>
#include <io.h>
+#else
+#include <unistd.h>
#endif
namespace util {
@@ -166,7 +168,7 @@ static const char letters[] =
does not exist at the time of the call to mkstemp. TMPL is
overwritten with the result. */
int
-mkstemp_and_unlink(char *tmpl)
+mkstemp_and_unlink(char *tmpl, bool and_unlink)
{
int len;
char *XXXXXX;
@@ -240,7 +242,9 @@ mkstemp_and_unlink(char *tmpl)
/* Modified for windows and to unlink */
// fd = open (tmpl, O_RDWR | O_CREAT | O_EXCL, _S_IREAD | _S_IWRITE);
- fd = _open (tmpl, _O_RDWR | _O_CREAT | _O_TEMPORARY | _O_EXCL | _O_BINARY, _S_IREAD | _S_IWRITE);
+ int flags = _O_RDWR | _O_CREAT | _O_EXCL | _O_BINARY;
+ if (and_unlink) flags |= _O_TEMPORARY;
+ fd = _open (tmpl, flags, _S_IREAD | _S_IWRITE);
if (fd >= 0)
{
errno = save_errno;
@@ -256,19 +260,20 @@ mkstemp_and_unlink(char *tmpl)
}
#else
int
-mkstemp_and_unlink(char *tmpl) {
+mkstemp_and_unlink(char *tmpl, bool and_unlink) {
int ret = mkstemp(tmpl);
- if (ret == -1) return -1;
- UTIL_THROW_IF(unlink(tmpl), util::ErrnoException, "Failed to delete " << tmpl);
+ if (ret != -1 && and_unlink) {
+ UTIL_THROW_IF(unlink(tmpl), util::ErrnoException, "Failed to delete " << tmpl);
+ }
return ret;
}
#endif
int TempMaker::Make() const {
- std::string copy(base_);
- copy.push_back(0);
+ std::string name(base_);
+ name.push_back(0);
int ret;
- UTIL_THROW_IF(-1 == (ret = mkstemp_and_unlink(&copy[0])), util::ErrnoException, "Failed to make a temporary based on " << base_);
+ UTIL_THROW_IF(-1 == (ret = mkstemp_and_unlink(&name[0], true)), util::ErrnoException, "Failed to make a temporary based on " << base_);
return ret;
}
@@ -277,4 +282,13 @@ std::FILE *TempMaker::MakeFile() const {
return FDOpenOrThrow(file);
}
+std::string TempMaker::Name(scoped_fd &opened) const {
+ std::string name(base_);
+ name.push_back(0);
+ int fd;
+ UTIL_THROW_IF(-1 == (fd = mkstemp_and_unlink(&name[0], false)), util::ErrnoException, "Failed to make a temporary based on " << base_);
+ opened.reset(fd);
+ return name;
+}
+
} // namespace util
diff --git a/util/file.hh b/util/file.hh
index 72c8ea768..8af1ff4ff 100644
--- a/util/file.hh
+++ b/util/file.hh
@@ -94,10 +94,13 @@ class TempMaker {
public:
explicit TempMaker(const std::string &prefix);
+ // These will already be unlinked for you.
int Make() const;
-
std::FILE *MakeFile() const;
+ // This will force you to close the fd instead of leaving it open.
+ std::string Name(scoped_fd &opened) const;
+
private:
std::string base_;
};
diff --git a/util/file_piece.cc b/util/file_piece.cc
index 169d5205c..19a68728a 100644
--- a/util/file_piece.cc
+++ b/util/file_piece.cc
@@ -36,13 +36,13 @@ const bool kSpaces[256] = {0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0
FilePiece::FilePiece(const char *name, std::ostream *show_progress, std::size_t min_buffer) :
file_(OpenReadOrThrow(name)), total_size_(SizeFile(file_.get())), page_(SizePage()),
- progress_(total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name, total_size_) {
+ progress_(total_size_, total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name) {
Initialize(name, show_progress, min_buffer);
}
FilePiece::FilePiece(int fd, const char *name, std::ostream *show_progress, std::size_t min_buffer) :
file_(fd), total_size_(SizeFile(file_.get())), page_(SizePage()),
- progress_(total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name, total_size_) {
+ progress_(total_size_, total_size_ == kBadSize ? NULL : show_progress, std::string("Reading ") + name) {
Initialize(name, show_progress, min_buffer);
}
diff --git a/util/have.hh b/util/have.hh
index aca8c6264..1d76a7fcf 100644
--- a/util/have.hh
+++ b/util/have.hh
@@ -3,8 +3,10 @@
#define UTIL_HAVE__
#ifndef HAVE_ZLIB
+#if !defined(_WIN32) && !defined(_WIN64)
#define HAVE_ZLIB
#endif
+#endif
#ifndef HAVE_ICU
//#define HAVE_ICU
diff --git a/util/mmap.cc b/util/mmap.cc
index 3b1c58b83..bc9e3f815 100644
--- a/util/mmap.cc
+++ b/util/mmap.cc
@@ -20,6 +20,7 @@
#include <io.h>
#else
#include <sys/mman.h>
+#include <unistd.h>
#endif
namespace util {
diff --git a/util/murmur_hash.cc b/util/murmur_hash.cc
index 6accc21af..4f519312d 100644
--- a/util/murmur_hash.cc
+++ b/util/murmur_hash.cc
@@ -23,7 +23,7 @@ namespace util {
// 64-bit hash for 64-bit platforms
-uint64_t MurmurHash64A ( const void * key, std::size_t len, unsigned int seed )
+uint64_t MurmurHash64A ( const void * key, std::size_t len, uint64_t seed )
{
const uint64_t m = 0xc6a4a7935bd1e995ULL;
const int r = 47;
@@ -81,7 +81,7 @@ uint64_t MurmurHash64A ( const void * key, std::size_t len, unsigned int seed )
// 64-bit hash for 32-bit platforms
-uint64_t MurmurHash64B ( const void * key, std::size_t len, unsigned int seed )
+uint64_t MurmurHash64B ( const void * key, std::size_t len, uint64_t seed )
{
const unsigned int m = 0x5bd1e995;
const int r = 24;
@@ -150,17 +150,18 @@ uint64_t MurmurHash64B ( const void * key, std::size_t len, unsigned int seed )
return h;
}
+
// Trick to test for 64-bit architecture at compile time.
namespace {
-template <unsigned L> uint64_t MurmurHashNativeBackend(const void * key, std::size_t len, unsigned int seed) {
+template <unsigned L> inline uint64_t MurmurHashNativeBackend(const void * key, std::size_t len, uint64_t seed) {
return MurmurHash64A(key, len, seed);
}
-template <> uint64_t MurmurHashNativeBackend<4>(const void * key, std::size_t len, unsigned int seed) {
+template <> inline uint64_t MurmurHashNativeBackend<4>(const void * key, std::size_t len, uint64_t seed) {
return MurmurHash64B(key, len, seed);
}
} // namespace
-uint64_t MurmurHashNative(const void * key, std::size_t len, unsigned int seed) {
+uint64_t MurmurHashNative(const void * key, std::size_t len, uint64_t seed) {
return MurmurHashNativeBackend<sizeof(void*)>(key, len, seed);
}
diff --git a/util/murmur_hash.hh b/util/murmur_hash.hh
index 638aaeb22..ae7e88dec 100644
--- a/util/murmur_hash.hh
+++ b/util/murmur_hash.hh
@@ -5,9 +5,9 @@
namespace util {
-uint64_t MurmurHash64A(const void * key, std::size_t len, unsigned int seed = 0);
-uint64_t MurmurHash64B(const void * key, std::size_t len, unsigned int seed = 0);
-uint64_t MurmurHashNative(const void * key, std::size_t len, unsigned int seed = 0);
+uint64_t MurmurHash64A(const void * key, std::size_t len, uint64_t seed = 0);
+uint64_t MurmurHash64B(const void * key, std::size_t len, uint64_t seed = 0);
+uint64_t MurmurHashNative(const void * key, std::size_t len, uint64_t seed = 0);
} // namespace util
diff --git a/util/probing_hash_table.hh b/util/probing_hash_table.hh
index f466cebc9..3354b68ef 100644
--- a/util/probing_hash_table.hh
+++ b/util/probing_hash_table.hh
@@ -78,12 +78,33 @@ template <class EntryT, class HashT, class EqualT = std::equal_to<typename Entry
}
}
+ // Return true if the value was found (and not inserted). This is consistent with Find but the opposite if hash_map!
+ template <class T> bool FindOrInsert(const T &t, MutableIterator &out) {
+#ifdef DEBUG
+ assert(initialized_);
+#endif
+ for (MutableIterator i(begin_ + (hash_(t.GetKey()) % buckets_));;) {
+ Key got(i->GetKey());
+ if (equal_(got, t.GetKey())) { out = i; return true; }
+ if (equal_(got, invalid_)) {
+ UTIL_THROW_IF(++entries_ >= buckets_, ProbingSizeException, "Hash table with " << buckets_ << " buckets is full.");
+ *i = t;
+ out = i;
+ return false;
+ }
+ if (++i == end_) i = begin_;
+ }
+ }
+
void FinishedInserting() {}
void LoadedBinary() {}
// Don't change anything related to GetKey,
template <class Key> bool UnsafeMutableFind(const Key key, MutableIterator &out) {
+#ifdef DEBUG
+ assert(initialized_);
+#endif
for (MutableIterator i(begin_ + (hash_(key) % buckets_));;) {
Key got(i->GetKey());
if (equal_(got, key)) { out = i; return true; }
diff --git a/util/probing_hash_table_test.cc b/util/probing_hash_table_test.cc
index 3f9024bdc..be0fa8597 100644
--- a/util/probing_hash_table_test.cc
+++ b/util/probing_hash_table_test.cc
@@ -4,7 +4,8 @@
#include <boost/test/unit_test.hpp>
#include <boost/scoped_array.hpp>
#include <boost/functional/hash.hpp>
-
+#include <stdio.h>
+#include <string.h>
#include <stdint.h>
namespace util {
@@ -28,9 +29,11 @@ struct Entry {
typedef ProbingHashTable<Entry, boost::hash<unsigned char> > Table;
BOOST_AUTO_TEST_CASE(simple) {
- boost::scoped_array<char> mem(new char[Table::Size(10, 1.2)]);
+ size_t size = Table::Size(10, 1.2);
+ boost::scoped_array<char> mem(new char[size]);
+ memset(mem.get(), 0, size);
- Table table(mem.get(), Table::Size(10, 1.2));
+ Table table(mem.get(), size);
const Entry *i = NULL;
BOOST_CHECK(!table.Find(2, i));
Entry to_ins;
diff --git a/util/string_piece.hh b/util/string_piece.hh
index 5de053aa8..be6a643d0 100644
--- a/util/string_piece.hh
+++ b/util/string_piece.hh
@@ -85,6 +85,11 @@ U_NAMESPACE_BEGIN
#include <string>
#include <string.h>
+#ifdef WIN32
+#undef max
+#undef min
+#endif
+
class StringPiece {
public:
typedef size_t size_type;
diff --git a/util/usage.cc b/util/usage.cc
new file mode 100644
index 000000000..e5cf76f05
--- /dev/null
+++ b/util/usage.cc
@@ -0,0 +1,46 @@
+#include "util/usage.hh"
+
+#include <fstream>
+#include <ostream>
+
+#include <string.h>
+#include <ctype.h>
+#if !defined(_WIN32) && !defined(_WIN64)
+#include <sys/resource.h>
+#include <sys/time.h>
+#endif
+
+namespace util {
+
+namespace {
+#if !defined(_WIN32) && !defined(_WIN64)
+float FloatSec(const struct timeval &tv) {
+ return static_cast<float>(tv.tv_sec) + (static_cast<float>(tv.tv_usec) / 1000000.0);
+}
+#endif
+} // namespace
+
+void PrintUsage(std::ostream &out) {
+#if !defined(_WIN32) && !defined(_WIN64)
+ struct rusage usage;
+ if (getrusage(RUSAGE_SELF, &usage)) {
+ perror("getrusage");
+ return;
+ }
+ out << "user\t" << FloatSec(usage.ru_utime) << "\nsys\t" << FloatSec(usage.ru_stime) << '\n';
+
+ // Linux doesn't set memory usage :-(.
+ std::ifstream status("/proc/self/status", std::ios::in);
+ std::string line;
+ while (getline(status, line)) {
+ if (!strncmp(line.c_str(), "VmRSS:\t", 7)) {
+ out << "VmRSS: " << (line.c_str() + 7) << '\n';
+ break;
+ } else if (!strncmp(line.c_str(), "VmPeak:\t", 8)) {
+ out << "VmPeak: " << (line.c_str() + 8) << '\n';
+ }
+ }
+#endif
+}
+
+} // namespace util
diff --git a/util/usage.hh b/util/usage.hh
new file mode 100644
index 000000000..d331ff74c
--- /dev/null
+++ b/util/usage.hh
@@ -0,0 +1,8 @@
+#ifndef UTIL_USAGE__
+#define UTIL_USAGE__
+#include <iosfwd>
+
+namespace util {
+void PrintUsage(std::ostream &to);
+} // namespace util
+#endif // UTIL_USAGE__
diff --git a/util/util.xcodeproj/project.pbxproj b/util/util.xcodeproj/project.pbxproj
index 2f8134a39..5183f7a88 100644
--- a/util/util.xcodeproj/project.pbxproj
+++ b/util/util.xcodeproj/project.pbxproj
@@ -22,7 +22,6 @@
1EE8C2B11476A2EA002496F2 /* getopt.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EE8C28B1476A2E9002496F2 /* getopt.hh */; };
1EE8C2B21476A2EA002496F2 /* have.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EE8C28C1476A2E9002496F2 /* have.hh */; };
1EE8C2B41476A2EA002496F2 /* joint_sort.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EE8C28E1476A2E9002496F2 /* joint_sort.hh */; };
- 1EE8C2B61476A2EA002496F2 /* key_value_packing.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EE8C2901476A2E9002496F2 /* key_value_packing.hh */; };
1EE8C2B81476A2EA002496F2 /* mmap.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EE8C2951476A2E9002496F2 /* mmap.cc */; };
1EE8C2B91476A2EA002496F2 /* mmap.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EE8C2961476A2E9002496F2 /* mmap.hh */; };
1EE8C2BA1476A2EA002496F2 /* murmur_hash.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EE8C2971476A2E9002496F2 /* murmur_hash.cc */; };
@@ -53,7 +52,6 @@
1EE8C28B1476A2E9002496F2 /* getopt.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = getopt.hh; sourceTree = "<group>"; };
1EE8C28C1476A2E9002496F2 /* have.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = have.hh; sourceTree = "<group>"; };
1EE8C28E1476A2E9002496F2 /* joint_sort.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = joint_sort.hh; sourceTree = "<group>"; };
- 1EE8C2901476A2E9002496F2 /* key_value_packing.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = key_value_packing.hh; sourceTree = "<group>"; };
1EE8C2951476A2E9002496F2 /* mmap.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = mmap.cc; sourceTree = "<group>"; };
1EE8C2961476A2E9002496F2 /* mmap.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = mmap.hh; sourceTree = "<group>"; };
1EE8C2971476A2E9002496F2 /* murmur_hash.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = murmur_hash.cc; sourceTree = "<group>"; };
@@ -96,7 +94,6 @@
1EE8C28B1476A2E9002496F2 /* getopt.hh */,
1EE8C28C1476A2E9002496F2 /* have.hh */,
1EE8C28E1476A2E9002496F2 /* joint_sort.hh */,
- 1EE8C2901476A2E9002496F2 /* key_value_packing.hh */,
1EE8C2951476A2E9002496F2 /* mmap.cc */,
1EE8C2961476A2E9002496F2 /* mmap.hh */,
1EE8C2971476A2E9002496F2 /* murmur_hash.cc */,
@@ -136,7 +133,6 @@
1EE8C2B11476A2EA002496F2 /* getopt.hh in Headers */,
1EE8C2B21476A2EA002496F2 /* have.hh in Headers */,
1EE8C2B41476A2EA002496F2 /* joint_sort.hh in Headers */,
- 1EE8C2B61476A2EA002496F2 /* key_value_packing.hh in Headers */,
1EE8C2B91476A2EA002496F2 /* mmap.hh in Headers */,
1EE8C2BB1476A2EA002496F2 /* murmur_hash.hh in Headers */,
1EE8C2BD1476A2EA002496F2 /* probing_hash_table.hh in Headers */,
@@ -174,6 +170,9 @@
/* Begin PBXProject section */
1EE8C2681476A262002496F2 /* Project object */ = {
isa = PBXProject;
+ attributes = {
+ LastUpgradeCheck = 0420;
+ };
buildConfigurationList = 1EE8C26B1476A262002496F2 /* Build configuration list for PBXProject "util" */;
compatibilityVersion = "Xcode 3.2";
developmentRegion = English;