diff options
author | Kenneth Heafield <github@kheafield.com> | 2014-06-22 16:54:03 +0400 |
---|---|---|
committer | Kenneth Heafield <github@kheafield.com> | 2014-06-22 16:54:03 +0400 |
commit | 6b22097cbe81a3c50e65f8ba4ce641b042455589 (patch) | |
tree | 2e9150eaa3750b90e5cb5cbf02160a66b3454967 /lm | |
parent | c35a49e606426d37ce9bfe72bdb4b32c62b47e28 (diff) |
KenLM e94d0b3ea9082d8aa443e15e33aee6fb9c2af990 including possible deadlock fix
Diffstat (limited to 'lm')
-rw-r--r-- | lm/builder/pipeline.cc | 55 |
1 files changed, 31 insertions, 24 deletions
diff --git a/lm/builder/pipeline.cc b/lm/builder/pipeline.cc index da82c22e7..e91870808 100644 --- a/lm/builder/pipeline.cc +++ b/lm/builder/pipeline.cc @@ -302,33 +302,40 @@ void Pipeline(PipelineConfig config, int text_file, int out_arpa) { "Not enough memory to fit " << (config.order * config.block_count) << " blocks with minimum size " << config.minimum_block << ". Increase memory to " << (config.minimum_block * config.order * config.block_count) << " bytes or decrease the minimum block size."); UTIL_TIMER("(%w s) Total wall time elapsed\n"); - Master master(config); - - util::scoped_fd vocab_file(config.vocab_file.empty() ? - util::MakeTemp(config.TempPrefix()) : - util::CreateOrThrow(config.vocab_file.c_str())); - uint64_t token_count; - std::string text_file_name; - CountText(text_file, vocab_file.get(), master, token_count, text_file_name); - std::vector<uint64_t> counts; - std::vector<uint64_t> counts_pruned; - std::vector<Discount> discounts; - master >> AdjustCounts(counts, counts_pruned, discounts, config.prune_thresholds); + Master master(config); + // master's destructor will wait for chains. But they might be deadlocked if + // this thread dies because e.g. it ran out of memory. + try { + util::scoped_fd vocab_file(config.vocab_file.empty() ? + util::MakeTemp(config.TempPrefix()) : + util::CreateOrThrow(config.vocab_file.c_str())); + uint64_t token_count; + std::string text_file_name; + CountText(text_file, vocab_file.get(), master, token_count, text_file_name); + + std::vector<uint64_t> counts; + std::vector<uint64_t> counts_pruned; + std::vector<Discount> discounts; + master >> AdjustCounts(counts, counts_pruned, discounts, config.prune_thresholds); + + { + util::FixedArray<util::stream::FileBuffer> gammas; + Sorts<SuffixOrder> primary; + InitialProbabilities(counts, counts_pruned, discounts, master, primary, gammas, config.prune_thresholds); + InterpolateProbabilities(counts_pruned, master, primary, gammas); + } - { - util::FixedArray<util::stream::FileBuffer> gammas; - Sorts<SuffixOrder> primary; - InitialProbabilities(counts, counts_pruned, discounts, master, primary, gammas, config.prune_thresholds); - InterpolateProbabilities(counts_pruned, master, primary, gammas); + std::cerr << "=== 5/5 Writing ARPA model ===" << std::endl; + VocabReconstitute vocab(vocab_file.get()); + UTIL_THROW_IF(vocab.Size() != counts[0], util::Exception, "Vocab words don't match up. Is there a null byte in the input?"); + HeaderInfo header_info(text_file_name, token_count); + master >> PrintARPA(vocab, counts_pruned, (config.verbose_header ? &header_info : NULL), out_arpa) >> util::stream::kRecycle; + master.MutableChains().Wait(true); + } catch (const util::Exception &e) { + std::cerr << e.what() << std::endl; + abort(); } - - std::cerr << "=== 5/5 Writing ARPA model ===" << std::endl; - VocabReconstitute vocab(vocab_file.get()); - UTIL_THROW_IF(vocab.Size() != counts[0], util::Exception, "Vocab words don't match up. Is there a null byte in the input?"); - HeaderInfo header_info(text_file_name, token_count); - master >> PrintARPA(vocab, counts_pruned, (config.verbose_header ? &header_info : NULL), out_arpa) >> util::stream::kRecycle; - master.MutableChains().Wait(true); } }} // namespaces |