Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/moses-smt/mosesdecoder.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore6
-rw-r--r--Jamroot4
-rw-r--r--contrib/server/Jamfile6
-rw-r--r--jam-files/sanity.jam14
-rw-r--r--lm/builder/corpus_count.cc24
-rw-r--r--lm/builder/interpolate.cc8
-rw-r--r--lm/config.cc4
-rw-r--r--lm/filter/arpa_io.hh3
-rw-r--r--lm/filter/count_io.hh11
-rw-r--r--lm/filter/filter_main.cc14
-rw-r--r--lm/filter/vocab.cc5
-rw-r--r--mert/BleuDocScorer.cpp6
-rw-r--r--mert/PreProcessFilter.cpp4
-rw-r--r--mert/Timer.cpp6
-rw-r--r--mert/evaluator.cpp4
-rw-r--r--moses-cmd/Main.cpp2
-rw-r--r--moses/FF/LexicalReordering/LexicalReorderingTable.cpp2
-rw-r--r--moses/FeatureVector.cpp4
-rw-r--r--moses/File.h6
-rw-r--r--moses/Jamfile6
-rw-r--r--moses/LM/Jamfile5
-rw-r--r--moses/Manager.cpp6
-rw-r--r--moses/TranslationModel/CompactPT/BlockHashIndex.h2
-rw-r--r--moses/TranslationModel/CompactPT/MmapAllocator.h12
-rw-r--r--moses/TranslationModel/DynSAInclude/types.h2
-rw-r--r--moses/TranslationModel/PhraseDictionary.h1
-rw-r--r--moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp35
-rw-r--r--util/file.cc22
-rw-r--r--util/file_piece_test.cc12
-rw-r--r--util/read_compressed_test.cc16
30 files changed, 203 insertions, 49 deletions
diff --git a/.gitignore b/.gitignore
index 965eec2b9..5eb32575d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -57,7 +57,6 @@ scripts/training/phrase-extract/relax-parse
scripts/training/phrase-extract/score
scripts/training/phrase-extract/statistics
scripts/training/symal/symal
-scripts/training/train-model.perl
dist
bin
previous.sh
@@ -71,3 +70,8 @@ mert/sentence-bleu
.DS_Store
*.pbxuser
*.mode1v3
+
+*.exe
+build/
+nbproject/
+
diff --git a/Jamroot b/Jamroot
index e4a8a9ed3..2de91ca74 100644
--- a/Jamroot
+++ b/Jamroot
@@ -79,6 +79,10 @@ external-lib z ;
lib dl : : <runtime-link>static:<link>static <runtime-link>shared:<link>shared ;
requirements += <library>dl ;
+if $NT {
+ lib mman : : <runtime-link>static:<link>static <runtime-link>shared:<link>shared ;
+ requirements += <library>mman ;
+}
if ! [ option.get "without-tcmalloc" : : "yes" ] && [ test_library "tcmalloc_minimal" ] {
if [ option.get "full-tcmalloc" : : "yes" ] {
diff --git a/contrib/server/Jamfile b/contrib/server/Jamfile
index 445c07ae8..69522a355 100644
--- a/contrib/server/Jamfile
+++ b/contrib/server/Jamfile
@@ -11,7 +11,11 @@ if $(with-xmlrpc-c) {
exit Could not find $(xmlrpc-command) : 1 ;
}
} else {
- xmlrpc-check = [ _shell "xmlrpc-c-config --features 2>/dev/null" : exit-status ] ;
+ local cmd = "xmlrpc-c-config --features 2>/dev/null" ;
+ if $NT {
+ cmd = "xmlrpc-c-config --features 2>NUL" ;
+ }
+ xmlrpc-check = [ _shell $(cmd) : exit-status ] ;
if $(xmlrpc-check[2]) = 0 {
if [ MATCH "(abyss-server)" : $(xmlrpc-check[1]) ] {
build-moses-server = true ;
diff --git a/jam-files/sanity.jam b/jam-files/sanity.jam
index a9abac9d4..6503f7794 100644
--- a/jam-files/sanity.jam
+++ b/jam-files/sanity.jam
@@ -33,7 +33,10 @@ rule test_flags ( flags * : main ? ) {
main = "int main() {}" ;
}
local cmd = "bash -c \"g++ "$(flags:J=" ")" -x c++ - <<<'$(main)' -o $(TOP)/dummy >/dev/null 2>/dev/null && rm $(TOP)/dummy 2>/dev/null\"" ;
- local ret = [ SHELL $(cmd) : exit-status ] ;
+ if $NT {
+ cmd = "echo $(main) | g++ "$(flags:J=" ")" -x c++ - -o $(TOP)\\dummy >NUL 2>NUL && del $(TOP)\\dummy 2>NUL " ;
+ }
+ local ret = [ SHELL "$(cmd)" : exit-status ] ;
if --debug-configuration in [ modules.peek : ARGV ] {
echo $(cmd) ;
echo $(ret) ;
@@ -163,6 +166,9 @@ rule boost-lib ( name macro : deps * ) {
#Argument is e.g. 103600
rule boost ( min-version ) {
local cmd = "bash -c \"g++ "$(I-boost-include)" -dM -x c++ -E /dev/null -include boost/version.hpp 2>/dev/null |grep '#define BOOST_'\"" ;
+ if $NT {
+ cmd = "g++ "$(I-boost-include)" -dM -x c++ -E NUL -include boost/version.hpp 2>NUL | find \"#define BOOST_\"" ;
+ }
local boost-shell = [ SHELL "$(cmd)" : exit-status ] ;
if $(boost-shell[2]) != 0 && $(CLEANING) = no {
echo Failed to run "$(cmd)" ;
@@ -310,7 +316,11 @@ actions write-options {
#file. This file can then be used with <dependency>$(file) to force
#recompilation.
rule update-if-changed ( file current ) {
- if ( ! [ path.exists $(file) ] ) || ( [ _shell "cat $(file)" ] != $(current) ) {
+ local cmd = "cat $(file)" ;
+ if $NT {
+ cmd = "type $(file)" ;
+ }
+ if ( ! [ path.exists $(file) ] ) || ( [ _shell "$(cmd)" ] != $(current) ) {
make $(file) : : $(__name__).write-options : <options-to-write>$(current) ;
always $(file) ;
}
diff --git a/lm/builder/corpus_count.cc b/lm/builder/corpus_count.cc
index aea93ad10..3edd3216a 100644
--- a/lm/builder/corpus_count.cc
+++ b/lm/builder/corpus_count.cc
@@ -87,7 +87,7 @@ class VocabHandout {
Table table_;
std::size_t double_cutoff_;
-
+
util::FakeOFStream word_list_;
};
@@ -98,7 +98,7 @@ class DedupeHash : public std::unary_function<const WordIndex *, bool> {
std::size_t operator()(const WordIndex *start) const {
return util::MurmurHashNative(start, size_);
}
-
+
private:
const std::size_t size_;
};
@@ -106,11 +106,11 @@ class DedupeHash : public std::unary_function<const WordIndex *, bool> {
class DedupeEquals : public std::binary_function<const WordIndex *, const WordIndex *, bool> {
public:
explicit DedupeEquals(std::size_t order) : size_(order * sizeof(WordIndex)) {}
-
+
bool operator()(const WordIndex *first, const WordIndex *second) const {
return !memcmp(first, second, size_);
- }
-
+ }
+
private:
const std::size_t size_;
};
@@ -131,7 +131,7 @@ typedef util::ProbingHashTable<DedupeEntry, DedupeHash, DedupeEquals> Dedupe;
class Writer {
public:
- Writer(std::size_t order, const util::stream::ChainPosition &position, void *dedupe_mem, std::size_t dedupe_mem_size)
+ Writer(std::size_t order, const util::stream::ChainPosition &position, void *dedupe_mem, std::size_t dedupe_mem_size)
: block_(position), gram_(block_->Get(), order),
dedupe_invalid_(order, std::numeric_limits<WordIndex>::max()),
dedupe_(dedupe_mem, dedupe_mem_size, &dedupe_invalid_[0], DedupeHash(order), DedupeEquals(order)),
@@ -140,7 +140,7 @@ class Writer {
dedupe_.Clear();
assert(Dedupe::Size(position.GetChain().BlockSize() / position.GetChain().EntrySize(), kProbingMultiplier) == dedupe_mem_size);
if (order == 1) {
- // Add special words. AdjustCounts is responsible if order != 1.
+ // Add special words. AdjustCounts is responsible if order != 1.
AddUnigramWord(kUNK);
AddUnigramWord(kBOS);
}
@@ -170,16 +170,16 @@ class Writer {
memmove(gram_.begin(), gram_.begin() + 1, sizeof(WordIndex) * (gram_.Order() - 1));
return;
}
- // Complete the write.
+ // Complete the write.
gram_.Count() = 1;
- // Prepare the next n-gram.
+ // Prepare the next n-gram.
if (reinterpret_cast<uint8_t*>(gram_.begin()) + gram_.TotalSize() != static_cast<uint8_t*>(block_->Get()) + block_size_) {
NGram last(gram_);
gram_.NextInMemory();
std::copy(last.begin() + 1, last.end(), gram_.begin());
return;
}
- // Block end. Need to store the context in a temporary buffer.
+ // Block end. Need to store the context in a temporary buffer.
std::copy(gram_.begin() + 1, gram_.end(), buffer_.get());
dedupe_.Clear();
block_->SetValidSize(block_size_);
@@ -207,7 +207,7 @@ class Writer {
// Hash table combiner implementation.
Dedupe dedupe_;
- // Small buffer to hold existing ngrams when shifting across a block boundary.
+ // Small buffer to hold existing ngrams when shifting across a block boundary.
boost::scoped_array<WordIndex> buffer_;
const std::size_t block_size_;
@@ -223,7 +223,7 @@ std::size_t CorpusCount::VocabUsage(std::size_t vocab_estimate) {
return VocabHandout::MemUsage(vocab_estimate);
}
-CorpusCount::CorpusCount(util::FilePiece &from, int vocab_write, uint64_t &token_count, WordIndex &type_count, std::size_t entries_per_block)
+CorpusCount::CorpusCount(util::FilePiece &from, int vocab_write, uint64_t &token_count, WordIndex &type_count, std::size_t entries_per_block)
: from_(from), vocab_write_(vocab_write), token_count_(token_count), type_count_(type_count),
dedupe_mem_size_(Dedupe::Size(entries_per_block, kProbingMultiplier)),
dedupe_mem_(util::MallocOrThrow(dedupe_mem_size_)) {
diff --git a/lm/builder/interpolate.cc b/lm/builder/interpolate.cc
index 500268069..52e69f02e 100644
--- a/lm/builder/interpolate.cc
+++ b/lm/builder/interpolate.cc
@@ -33,12 +33,12 @@ class Callback {
pay.complete.prob = pay.uninterp.prob + pay.uninterp.gamma * probs_[order_minus_1];
probs_[order_minus_1 + 1] = pay.complete.prob;
pay.complete.prob = log10(pay.complete.prob);
- // TODO: this is a hack to skip n-grams that don't appear as context. Pruning will require some different handling.
- if (order_minus_1 < backoffs_.size() && *(gram.end() - 1) != kUNK && *(gram.end() - 1) != kEOS) {
+ // TODO: this is a hack to skip n-grams that don't appear as context. Pruning will require some different handling.
+ if (order_minus_1 < backoffs_.size() && *(gram.end() - 1) != kUNK && *(gram.end() - 1) != kEOS && backoffs_[order_minus_1].Get()) { // check valid pointer at tht end
pay.complete.backoff = log10(*static_cast<const float*>(backoffs_[order_minus_1].Get()));
++backoffs_[order_minus_1];
} else {
- // Not a context.
+ // Not a context.
pay.complete.backoff = 0.0;
}
}
@@ -52,7 +52,7 @@ class Callback {
};
} // namespace
-Interpolate::Interpolate(uint64_t unigram_count, const ChainPositions &backoffs)
+Interpolate::Interpolate(uint64_t unigram_count, const ChainPositions &backoffs)
: uniform_prob_(1.0 / static_cast<float>(unigram_count - 1)), backoffs_(backoffs) {}
// perform order-wise interpolation
diff --git a/lm/config.cc b/lm/config.cc
index 9520c41c8..dc3365319 100644
--- a/lm/config.cc
+++ b/lm/config.cc
@@ -11,7 +11,11 @@ Config::Config() :
enumerate_vocab(NULL),
unknown_missing(COMPLAIN),
sentence_marker_missing(THROW_UP),
+#if defined(_WIN32) || defined(_WIN64)
+ positive_log_probability(SILENT),
+#else
positive_log_probability(THROW_UP),
+#endif
unknown_missing_logprob(-100.0),
probing_multiplier(1.5),
building_memory(1073741824ULL), // 1 GB
diff --git a/lm/filter/arpa_io.hh b/lm/filter/arpa_io.hh
index 5b31620b5..08e658666 100644
--- a/lm/filter/arpa_io.hh
+++ b/lm/filter/arpa_io.hh
@@ -14,7 +14,10 @@
#include <string>
#include <vector>
+#if !defined __MINGW32__
#include <err.h>
+#endif
+
#include <string.h>
#include <stdint.h>
diff --git a/lm/filter/count_io.hh b/lm/filter/count_io.hh
index 97c0fa25e..740b8d50e 100644
--- a/lm/filter/count_io.hh
+++ b/lm/filter/count_io.hh
@@ -5,7 +5,9 @@
#include <iostream>
#include <string>
+#if !defined __MINGW32__
#include <err.h>
+#endif
#include "util/file_piece.hh"
@@ -17,7 +19,12 @@ class CountOutput : boost::noncopyable {
void AddNGram(const StringPiece &line) {
if (!(file_ << line << '\n')) {
+#if defined __MINGW32__
+ std::cerr<<"Writing counts file failed"<<std::endl;
+ exit(3);
+#else
err(3, "Writing counts file failed");
+#endif
}
}
@@ -35,7 +42,7 @@ class CountOutput : boost::noncopyable {
class CountBatch {
public:
- explicit CountBatch(std::streamsize initial_read)
+ explicit CountBatch(std::streamsize initial_read)
: initial_read_(initial_read) {
buffer_.reserve(initial_read);
}
@@ -68,7 +75,7 @@ class CountBatch {
private:
std::streamsize initial_read_;
- // This could have been a std::string but that's less happy with raw writes.
+ // This could have been a std::string but that's less happy with raw writes.
std::vector<char> buffer_;
};
diff --git a/lm/filter/filter_main.cc b/lm/filter/filter_main.cc
index 1736bc405..f89ac4df3 100644
--- a/lm/filter/filter_main.cc
+++ b/lm/filter/filter_main.cc
@@ -57,7 +57,7 @@ typedef enum {MODE_COPY, MODE_SINGLE, MODE_MULTIPLE, MODE_UNION, MODE_UNSET} Fil
typedef enum {FORMAT_ARPA, FORMAT_COUNT} Format;
struct Config {
- Config() :
+ Config() :
#ifndef NTHREAD
batch_size(25000),
threads(boost::thread::hardware_concurrency()),
@@ -202,7 +202,7 @@ int main(int argc, char *argv[]) {
return 1;
}
}
-
+
if (config.mode == lm::MODE_UNSET) {
lm::DisplayHelp(argv[0]);
return 1;
@@ -221,7 +221,12 @@ int main(int argc, char *argv[]) {
} else if (!strncmp(cmd_input, "model:", 6)) {
cmd_input += 6;
} else if (strchr(cmd_input, ':')) {
+#if defined __MINGW32__
+ std::cerr << "Specify vocab: or model: before the input file name, not " << cmd_input << std::endl;
+ exit(1);
+#else
errx(1, "Specify vocab: or model: before the input file name, not \"%s\"", cmd_input);
+#endif // defined
} else {
std::cerr << "Assuming that " << cmd_input << " is a model file" << std::endl;
}
@@ -232,7 +237,12 @@ int main(int argc, char *argv[]) {
} else {
cmd_file.open(cmd_input, std::ios::in);
if (!cmd_file) {
+#if defined __MINGW32__
+ std::cerr << "Could not open input file " << cmd_input << std::endl;
+ exit(2);
+#else
err(2, "Could not open input file %s", cmd_input);
+#endif // defined
}
vocab = &cmd_file;
}
diff --git a/lm/filter/vocab.cc b/lm/filter/vocab.cc
index 7ee4e84ba..7ed5d92fb 100644
--- a/lm/filter/vocab.cc
+++ b/lm/filter/vocab.cc
@@ -4,7 +4,10 @@
#include <iostream>
#include <ctype.h>
+
+#if !defined __MINGW32__
#include <err.h>
+#endif
namespace lm {
namespace vocab {
@@ -31,7 +34,7 @@ bool IsLineEnd(std::istream &in) {
}// namespace
// Read space separated words in enter separated lines. These lines can be
-// very long, so don't read an entire line at a time.
+// very long, so don't read an entire line at a time.
unsigned int ReadMultiple(std::istream &in, boost::unordered_map<std::string, std::vector<unsigned int> > &out) {
in.exceptions(std::istream::badbit);
unsigned int sentence = 0;
diff --git a/mert/BleuDocScorer.cpp b/mert/BleuDocScorer.cpp
index b96a6bc48..67efae552 100644
--- a/mert/BleuDocScorer.cpp
+++ b/mert/BleuDocScorer.cpp
@@ -18,6 +18,12 @@
using namespace std;
+#if defined __MINGW32__
+#ifndef uint
+#define uint uint16_t
+#endif // uint
+#endif // if
+
namespace
{
diff --git a/mert/PreProcessFilter.cpp b/mert/PreProcessFilter.cpp
index 4fbcc0c89..afe5f0f6e 100644
--- a/mert/PreProcessFilter.cpp
+++ b/mert/PreProcessFilter.cpp
@@ -31,6 +31,9 @@ PreProcessFilter::PreProcessFilter(const string& filterCommand)
: m_toFilter(NULL),
m_fromFilter(NULL)
{
+#if defined __MINGW32__
+ //TODO(jie): replace this function with boost implementation
+#else
// Child error signal install
// sigaction is the replacement for the traditional signal() method
struct sigaction action;
@@ -116,6 +119,7 @@ PreProcessFilter::PreProcessFilter(const string& filterCommand)
perror("Error: fork failed");
exit(EXIT_FAILURE);
}
+#endif // defined
}
string PreProcessFilter::ProcessSentence(const string& sentence)
diff --git a/mert/Timer.cpp b/mert/Timer.cpp
index 47fa5c750..e0a220dda 100644
--- a/mert/Timer.cpp
+++ b/mert/Timer.cpp
@@ -6,10 +6,14 @@
#include <sys/time.h>
#endif
+#if defined __MINGW32__
+#include <sys/time.h>
+#endif // defined
+
namespace
{
-#if !defined(_WIN32) && !defined(_WIN64)
+#if (!defined(_WIN32) && !defined(_WIN64)) || defined __MINGW32__
uint64_t GetMicroSeconds(const struct timeval& tv)
{
return static_cast<uint64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
diff --git a/mert/evaluator.cpp b/mert/evaluator.cpp
index f976f39b3..eb95c9018 100644
--- a/mert/evaluator.cpp
+++ b/mert/evaluator.cpp
@@ -6,6 +6,10 @@
#include <getopt.h>
#include <math.h>
+#if defined __MINGW32__
+#include <time.h>
+#endif // defined
+
#include "Scorer.h"
#include "ScorerFactory.h"
#include "Timer.h"
diff --git a/moses-cmd/Main.cpp b/moses-cmd/Main.cpp
index 4f9c2dd17..b2738be9f 100644
--- a/moses-cmd/Main.cpp
+++ b/moses-cmd/Main.cpp
@@ -543,7 +543,7 @@ int main(int argc, char** argv)
#ifdef HAVE_PROTOBUF
GOOGLE_PROTOBUF_VERIFY_VERSION;
#endif
-
+
// echo command line, if verbose
IFVERBOSE(1) {
TRACE_ERR("command: ");
diff --git a/moses/FF/LexicalReordering/LexicalReorderingTable.cpp b/moses/FF/LexicalReordering/LexicalReorderingTable.cpp
index c33851899..80107f743 100644
--- a/moses/FF/LexicalReordering/LexicalReorderingTable.cpp
+++ b/moses/FF/LexicalReordering/LexicalReorderingTable.cpp
@@ -8,7 +8,7 @@
#include "moses/TargetPhrase.h"
#include "moses/TargetPhraseCollection.h"
-#ifndef WIN32
+#if !defined WIN32 || defined __MINGW32__
#include "moses/TranslationModel/CompactPT/LexicalReorderingTableCompact.h"
#endif
diff --git a/moses/FeatureVector.cpp b/moses/FeatureVector.cpp
index b495e811f..e840f0c83 100644
--- a/moses/FeatureVector.cpp
+++ b/moses/FeatureVector.cpp
@@ -25,6 +25,10 @@
#include <sstream>
#include <stdexcept>
+#if defined __MINGW32__ && defined WITH_THREADS
+#include <boost/thread/locks.hpp>
+#endif // WITH_THREADS
+
#include "FeatureVector.h"
#include "util/string_piece_hash.hh"
diff --git a/moses/File.h b/moses/File.h
index 09d9ddc92..d25bb972e 100644
--- a/moses/File.h
+++ b/moses/File.h
@@ -19,9 +19,15 @@ namespace Moses
{
#ifdef WIN32
+#ifdef __MINGW32__
+#define OFF_T __int64
+#define FTELLO(f) ftello64(f)
+#define FSEEKO(file, offset, origin) fseeko64(file, offset, origin)
+#else
#define OFF_T __int64
#define FTELLO(file) _ftelli64(file)
#define FSEEKO(file, offset, origin) _fseeki64(file, offset, origin)
+#endif
#else
#define OFF_T off_t
diff --git a/moses/Jamfile b/moses/Jamfile
index 26a98c4c9..cb00a10e2 100644
--- a/moses/Jamfile
+++ b/moses/Jamfile
@@ -22,7 +22,11 @@ if [ option.get "with-synlm" : no : yes ] = yes
alias synlm ;
}
-local have-clock = [ SHELL "bash -c \"g++ -dM -x c++ -E /dev/null -include time.h 2>/dev/null |grep CLOCK_MONOTONIC\"" : exit-status ] ;
+local cmd = "bash -c \"g++ -dM -x c++ -E /dev/null -include time.h 2>/dev/null |grep CLOCK_MONOTONIC\"" ;
+if $NT {
+ cmd = "g++ -dM -x c++ -E NUL -include time.h 2>NUL | find \"CLOCK_MONOTONIC\"" ;
+}
+local have-clock = [ SHELL "$(cmd)" : exit-status ] ;
if $(have-clock[2]) = 0 {
#required for clock_gettime. Threads already have rt.
lib rt : : <runtime-link>static:<link>static <runtime-link>shared:<link>shared ;
diff --git a/moses/LM/Jamfile b/moses/LM/Jamfile
index 777864f23..cadcd5661 100644
--- a/moses/LM/Jamfile
+++ b/moses/LM/Jamfile
@@ -75,8 +75,13 @@ obj ORLM.o : ORLM.cpp ..//headers ../TranslationModel/DynSAInclude//dynsa : : :
#Top-level LM library. If you've added a file that doesn't depend on external
#libraries, put it here.
+if $NT {
+alias LM : Backward.cpp BackwardLMState.cpp Base.cpp Implementation.cpp Joint.cpp Ken.cpp MultiFactor.cpp SingleFactor.cpp ORLM.o
+ ../../lm//kenlm ..//headers $(dependencies) ;
+} else {
alias LM : Backward.cpp BackwardLMState.cpp Base.cpp Implementation.cpp Joint.cpp Ken.cpp MultiFactor.cpp Remote.cpp SingleFactor.cpp ORLM.o
../../lm//kenlm ..//headers $(dependencies) ;
+}
alias macros : : : : <define>$(lmmacros) ;
diff --git a/moses/Manager.cpp b/moses/Manager.cpp
index 74d79fe60..dac513ae2 100644
--- a/moses/Manager.cpp
+++ b/moses/Manager.cpp
@@ -389,13 +389,13 @@ void Manager::CalcLatticeSamples(size_t count, TrellisPathList &ret) const
//cerr << endl;
//draw the sample
- float random = log((float)rand()/RAND_MAX);
+ float frandom = log((float)rand()/RAND_MAX);
size_t position = 1;
float sum = candidateScores[0];
- for (; position < candidateScores.size() && sum < random; ++position) {
+ for (; position < candidateScores.size() && sum < frandom; ++position) {
sum = log_sum(sum,candidateScores[position]);
}
- //cerr << "Random: " << random << " Chose " << position-1 << endl;
+ //cerr << "Random: " << frandom << " Chose " << position-1 << endl;
const Hypothesis* chosen = candidates[position-1];
path.push_back(chosen);
}
diff --git a/moses/TranslationModel/CompactPT/BlockHashIndex.h b/moses/TranslationModel/CompactPT/BlockHashIndex.h
index c245d2d66..630313e2c 100644
--- a/moses/TranslationModel/CompactPT/BlockHashIndex.h
+++ b/moses/TranslationModel/CompactPT/BlockHashIndex.h
@@ -35,6 +35,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#ifdef WITH_THREADS
#include "moses/ThreadPool.h"
+#else
+#include <time.h>
#endif
namespace Moses
diff --git a/moses/TranslationModel/CompactPT/MmapAllocator.h b/moses/TranslationModel/CompactPT/MmapAllocator.h
index 6ff890c88..bf08574ff 100644
--- a/moses/TranslationModel/CompactPT/MmapAllocator.h
+++ b/moses/TranslationModel/CompactPT/MmapAllocator.h
@@ -28,6 +28,10 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <cstdio>
#include <unistd.h>
+#ifndef __MMAN_PAGE_SIZE__
+#define __MMAN_PAGE_SIZE__ sysconf(_SC_PAGE_SIZE)
+#endif
+
namespace Moses
{
template <class T>
@@ -56,25 +60,25 @@ public:
MmapAllocator() throw()
: m_file_ptr(std::tmpfile()), m_file_desc(fileno(m_file_ptr)),
- m_page_size(sysconf(_SC_PAGE_SIZE)), m_map_size(0), m_data_ptr(0),
+ m_page_size(__MMAN_PAGE_SIZE__), m_map_size(0), m_data_ptr(0),
m_data_offset(0), m_fixed(false), m_count(new size_t(0)) {
}
MmapAllocator(std::FILE* f_ptr) throw()
: m_file_ptr(f_ptr), m_file_desc(fileno(m_file_ptr)),
- m_page_size(sysconf(_SC_PAGE_SIZE)), m_map_size(0), m_data_ptr(0),
+ m_page_size(__MMAN_PAGE_SIZE__), m_map_size(0), m_data_ptr(0),
m_data_offset(0), m_fixed(false), m_count(new size_t(0)) {
}
MmapAllocator(std::FILE* f_ptr, size_t data_offset) throw()
: m_file_ptr(f_ptr), m_file_desc(fileno(m_file_ptr)),
- m_page_size(sysconf(_SC_PAGE_SIZE)), m_map_size(0), m_data_ptr(0),
+ m_page_size(__MMAN_PAGE_SIZE__), m_map_size(0), m_data_ptr(0),
m_data_offset(data_offset), m_fixed(true), m_count(new size_t(0)) {
}
MmapAllocator(std::string fileName) throw()
: m_file_ptr(std::fopen(fileName.c_str(), "wb+")), m_file_desc(fileno(m_file_ptr)),
- m_page_size(sysconf(_SC_PAGE_SIZE)), m_map_size(0), m_data_ptr(0),
+ m_page_size(__MMAN_PAGE_SIZE__), m_map_size(0), m_data_ptr(0),
m_data_offset(0), m_fixed(false), m_count(new size_t(0)) {
}
diff --git a/moses/TranslationModel/DynSAInclude/types.h b/moses/TranslationModel/DynSAInclude/types.h
index cb714dab4..b3122a4ea 100644
--- a/moses/TranslationModel/DynSAInclude/types.h
+++ b/moses/TranslationModel/DynSAInclude/types.h
@@ -8,7 +8,7 @@
#include <typeinfo>
#include <stdint.h>
-#ifdef WIN32
+#if defined WIN32 && !defined __MINGW32__
#define iterate(c, i) for(decltype(c.begin()) i = c.begin(); i != c.end(); ++i)
#define piterate(c, i) for(decltype(c->begin()) i = c->begin(); i != c->end(); ++i)
#define riterate(c, i) for(decltype(c.rbegin()) i = c.rbegin(); i != c.rend(); ++i)
diff --git a/moses/TranslationModel/PhraseDictionary.h b/moses/TranslationModel/PhraseDictionary.h
index 5c2079207..73fc1c358 100644
--- a/moses/TranslationModel/PhraseDictionary.h
+++ b/moses/TranslationModel/PhraseDictionary.h
@@ -34,6 +34,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <boost/thread/tss.hpp>
#else
#include <boost/scoped_ptr.hpp>
+#include <time.h>
#endif
#include "moses/Phrase.h"
diff --git a/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp b/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp
index 9b1623f92..9185ad22e 100644
--- a/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp
+++ b/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp
@@ -49,6 +49,32 @@
using namespace std;
+#if defined __MINGW32__ && !defined mkdtemp
+#include <windows.h>
+#include <errno.h>
+char *mkdtemp(char *tempbuf) {
+ int rand_value = 0;
+ char* tempbase = NULL;
+ char tempbasebuf[MAX_PATH] = "";
+
+ if (strcmp(&tempbuf[strlen(tempbuf)-6], "XXXXXX")) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ srand((unsigned)time(0));
+ rand_value = (int)((rand() / ((double)RAND_MAX+1.0)) * 1e6);
+ tempbase = strrchr(tempbuf, '/');
+ tempbase = tempbase ? tempbase+1 : tempbuf;
+ strcpy(tempbasebuf, tempbase);
+ sprintf(&tempbasebuf[strlen(tempbasebuf)-6], "%d", rand_value);
+ ::GetTempPath(MAX_PATH, tempbuf);
+ strcat(tempbuf, tempbasebuf);
+ ::CreateDirectory(tempbuf, NULL);
+ return tempbuf;
+}
+#endif
+
namespace Moses
{
@@ -82,6 +108,9 @@ ChartRuleLookupManager *PhraseDictionaryFuzzyMatch::CreateRuleLookupManager(
int removedirectoryrecursively(const char *dirname)
{
+#if defined __MINGW32__
+ //TODO(jie): replace this function with boost implementation
+#else
DIR *dir;
struct dirent *entry;
char path[PATH_MAX];
@@ -127,13 +156,17 @@ int removedirectoryrecursively(const char *dirname)
* printing here, see above)
*/
//printf("(not really) Deleting: %s\n", dirname);
-
+#endif
return 1;
}
void PhraseDictionaryFuzzyMatch::InitializeForInput(InputType const& inputSentence)
{
+#if defined __MINGW32__
+ char dirName[] = "moses.XXXXXX";
+#else
char dirName[] = "/tmp/moses.XXXXXX";
+#endif // defined
char *temp = mkdtemp(dirName);
CHECK(temp);
string dirNameStr(dirName);
diff --git a/util/file.cc b/util/file.cc
index bef04cb1c..0b333e003 100644
--- a/util/file.cc
+++ b/util/file.cc
@@ -17,7 +17,10 @@
#include <fcntl.h>
#include <stdint.h>
-#if defined(_WIN32) || defined(_WIN64)
+#if defined __MINGW32__
+#include <windows.h>
+#include <unistd.h>
+#elif defined(_WIN32) || defined(_WIN64)
#include <windows.h>
#include <io.h>
#include <algorithm>
@@ -76,7 +79,12 @@ int CreateOrThrow(const char *name) {
}
uint64_t SizeFile(int fd) {
-#if defined(_WIN32) || defined(_WIN64)
+#if defined __MINGW32__
+ struct stat sb;
+ int ret = fstat(fd, &sb);
+ if (ret == -1 || (!sb.st_size && !S_ISREG(sb.st_mode))) return kBadSize;
+ return sb.st_size;
+#elif defined(_WIN32) || defined(_WIN64)
__int64 ret = _filelengthi64(fd);
return (ret == -1) ? kBadSize : ret;
#else // Not windows.
@@ -100,7 +108,9 @@ uint64_t SizeOrThrow(int fd) {
}
void ResizeOrThrow(int fd, uint64_t to) {
-#if defined(_WIN32) || defined(_WIN64)
+#if defined __MINGW32__
+ int ret = ftruncate
+#elif defined(_WIN32) || defined(_WIN64)
errno_t ret = _chsize_s
#elif defined(OS_ANDROID)
int ret = ftruncate64
@@ -162,7 +172,7 @@ std::size_t ReadOrEOF(int fd, void *to_void, std::size_t amount) {
void PReadOrThrow(int fd, void *to_void, std::size_t size, uint64_t off) {
uint8_t *to = static_cast<uint8_t*>(to_void);
#if defined(_WIN32) || defined(_WIN64)
- UTIL_THROW(Exception, "This pread implementation for windows is broken. Please send me a patch that does not change the file pointer. Atomically. Or send me an implementation of pwrite that is allowed to change the file pointer but can be called concurrently with pread.");
+ //UTIL_THROW(Exception, "This pread implementation for windows is broken. Please send me a patch that does not change the file pointer. Atomically. Or send me an implementation of pwrite that is allowed to change the file pointer but can be called concurrently with pread.");
const std::size_t kMaxDWORD = static_cast<std::size_t>(4294967295UL);
#endif
for (;size ;) {
@@ -251,7 +261,9 @@ typedef CheckOffT<sizeof(off_t)>::True IgnoredType;
// Can't we all just get along?
void InternalSeek(int fd, int64_t off, int whence) {
if (
-#if defined(_WIN32) || defined(_WIN64)
+#if defined __MINGW32__
+ (off_t)-1 == lseek(fd, off, whence)
+#elif defined(_WIN32) || defined(_WIN64)
(__int64)-1 == _lseeki64(fd, off, whence)
#elif defined(OS_ANDROID)
(off64_t)-1 == lseek64(fd, off, whence)
diff --git a/util/file_piece_test.cc b/util/file_piece_test.cc
index 7336007d7..4361877fe 100644
--- a/util/file_piece_test.cc
+++ b/util/file_piece_test.cc
@@ -1,4 +1,4 @@
-// Tests might fail if you have creative characters in your path. Sue me.
+// Tests might fail if you have creative characters in your path. Sue me.
#include "util/file_piece.hh"
#include "util/file.hh"
@@ -55,7 +55,7 @@ BOOST_AUTO_TEST_CASE(MMapReadLine) {
#if !defined(_WIN32) && !defined(_WIN64) && !defined(__APPLE__)
/* Apple isn't happy with the popen, fileno, dup. And I don't want to
- * reimplement popen. This is an issue with the test.
+ * reimplement popen. This is an issue with the test.
*/
/* read() implementation */
BOOST_AUTO_TEST_CASE(StreamReadLine) {
@@ -67,7 +67,7 @@ BOOST_AUTO_TEST_CASE(StreamReadLine) {
FILE *catter = popen(popen_args.c_str(), "r");
BOOST_REQUIRE(catter);
-
+
FilePiece test(dup(fileno(catter)), "file_piece.cc", NULL, 1);
std::string ref_line;
while (getline(ref, ref_line)) {
@@ -107,8 +107,8 @@ BOOST_AUTO_TEST_CASE(PlainZipReadLine) {
}
// gzip stream. Apple doesn't like popen, fileno, dup. This is an issue with
-// the test.
-#ifndef __APPLE__
+// the test.
+#if !defined __APPLE__ && !defined __MINGW32__
BOOST_AUTO_TEST_CASE(StreamZipReadLine) {
std::fstream ref(FileLocation().c_str(), std::ios::in);
@@ -117,7 +117,7 @@ BOOST_AUTO_TEST_CASE(StreamZipReadLine) {
FILE * catter = popen(command.c_str(), "r");
BOOST_REQUIRE(catter);
-
+
FilePiece test(dup(fileno(catter)), "file_piece.cc.gz", NULL, 1);
std::string ref_line;
while (getline(ref, ref_line)) {
diff --git a/util/read_compressed_test.cc b/util/read_compressed_test.cc
index 9cb4a4b9f..71b97b0f6 100644
--- a/util/read_compressed_test.cc
+++ b/util/read_compressed_test.cc
@@ -12,6 +12,22 @@
#include <stdlib.h>
+#if defined __MINGW32__
+#include <time.h>
+#include <fcntl.h>
+
+#if !defined mkstemp
+int mkstemp(char * stemplate)
+{
+ char *filename = mktemp(stemplate);
+ if (filename == NULL)
+ return -1;
+ return open(filename, O_RDWR | O_CREAT, 0600);
+}
+#endif
+
+#endif // defined
+
namespace util {
namespace {