// $Id$ /*********************************************************************** Moses - factored phrase-based language decoder Copyright (C) 2006 University of Edinburgh This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ***********************************************************************/ #ifdef WIN32 #include #else #include #include #endif #include #include #include #include #include #include "TypeDef.h" #include "Util.h" #include "md5.h" #include "Timer.h" using namespace std; //global variable Timer g_timer; string GetTempFolder() { #ifdef _WIN32 char *tmpPath = getenv("TMP"); string str(tmpPath); if (str.substr(str.size() - 1, 1) != "\\") str += "\\"; return str; #else return "/tmp/"; #endif } void CreateTempFile(ofstream &fileStream, string &filePath) { #ifdef _WIN32 char buffer[BUFSIZ]; ::GetTempFileNameA(GetTempFolder().c_str(), "", 0, buffer); filePath = buffer; #else char buffer[L_tmpnam]; strcpy(buffer, GetTempFolder().c_str()); strcat(buffer, PROJECT_NAME); strcat(buffer, "--XXXXXX"); mkstemp(buffer); filePath = buffer; #endif fileStream.open(filePath.c_str(), ofstream::out | ofstream::app); } string GetMD5Hash(const string &filePath) { unsigned char buffer[16384], signature[16]; struct MD5Context md5c; FILE *in; if ((in = fopen(filePath.c_str(), "rb")) == NULL) { return ""; } MD5Init(&md5c); size_t j; while ((j = fread(buffer, 1, sizeof buffer, in)) > 0) { MD5Update(&md5c, buffer, (unsigned) j); } MD5Final(signature, &md5c); std::stringstream stream(""); stream.setf(std::ios_base::hex,std::ios_base::basefield); for (j = 0; j < sizeof signature; j++) { stream << setw(2) << setfill('0') << (int) signature[j]; } return stream.str(); } const std::string ToLower(const std::string& str) { std::string lc(str); std::transform(lc.begin(), lc.end(), lc.begin(), (int(*)(int))std::tolower); return lc; } template<> bool Scan(const std::string &input) { std::string lc = ToLower(input); if (lc == "yes" || lc == "y" || lc == "true" || lc == "1") return true; if (lc == "no" || lc == "n" || lc =="false" || lc == "0") return false; TRACE_ERR( "Scan: didn't understand '" << lc << "', returning false" << std::endl); return false; } bool FileExists(const std::string& filePath) { ifstream ifs(filePath.c_str()); return !ifs.fail(); } const std::string Trim(const std::string& str, const std::string dropChars) { std::string res = str; res.erase(str.find_last_not_of(dropChars)+1); return res.erase(0, res.find_first_not_of(dropChars)); } void ResetUserTime() { g_timer.start(); }; void PrintUserTime(const std::string &message) { g_timer.check(message.c_str()); } std::map ProcessAndStripSGML(std::string &line) { std::map meta; std::string lline = ToLower(line); if (lline.find(""); if (close == std::string::npos) return meta; // error size_t end = lline.find(""); std::string seg = Trim(lline.substr(4, close-4)); std::string text = line.substr(close+1, end - close - 1); for (size_t i = 1; i < seg.size(); i++) { if (seg[i] == '=' && seg[i-1] == ' ') { std::string less = seg.substr(0, i-1) + seg.substr(i); seg = less; i = 0; continue; } if (seg[i] == '=' && seg[i+1] == ' ') { std::string less = seg.substr(0, i+1); if (i+2 < seg.size()) less += seg.substr(i+2); seg = less; i = 0; continue; } } line = Trim(text); if (seg == "") return meta; for (size_t i = 1; i < seg.size(); i++) { if (seg[i] == '=') { std::string label = seg.substr(0, i); std::string val = seg.substr(i+1); if (val[0] == '"') { val = val.substr(1); size_t close = val.find('"'); if (close == std::string::npos) { TRACE_ERR("SGML parse error: missing \"\n"); seg = ""; i = 0; } else { seg = val.substr(close+1); val = val.substr(0, close); i = 0; } } else { size_t close = val.find(' '); if (close == std::string::npos) { seg = ""; i = 0; } else { seg = val.substr(close+1); val = val.substr(0, close); } } label = Trim(label); seg = Trim(seg); meta[label] = val; } } return meta; }