diff options
Diffstat (limited to 'intern/locale/msgfmt.cc')
-rw-r--r-- | intern/locale/msgfmt.cc | 374 |
1 files changed, 0 insertions, 374 deletions
diff --git a/intern/locale/msgfmt.cc b/intern/locale/msgfmt.cc deleted file mode 100644 index 02c58ebc5bc..00000000000 --- a/intern/locale/msgfmt.cc +++ /dev/null @@ -1,374 +0,0 @@ -// Written by Sergey Sharybin <sergey.vfx@gmail.com> -// Added support for contexts -// -// Based on Python script msgfmt.py from Python source -// code tree, which was written by Written by -// Martin v. Löwis <loewis@informatik.hu-berlin.de> -// -// Generate binary message catalog from textual translation description. -// -// This program converts a textual Uniforum-style message catalog (.po file) into -// a binary GNU catalog (.mo file). This is essentially the same function as the -// GNU msgfmt program, however, it is a simpler implementation. -// -// Usage: msgfmt input.po output.po - -#include <algorithm> -#include <cctype> -#include <fstream> -#include <functional> -#include <iostream> -#include <map> -#include <stdlib.h> -#include <string> -#include <vector> - -namespace { - -std::map<std::string, std::string> MESSAGES; - -bool starts_with(const std::string &str, - const std::string &prefix) { - const size_t prefix_length = prefix.length(); - if (prefix_length == 0) { - return true; - } - // TODO(sergey): Could be optimized if we calculate str.length() - // to maximum of prefix_length characters. - if (prefix_length > str.length()) { - return false; - } else { - return str.compare(0, prefix_length, prefix) == 0; - } -} - -std::string trim(const std::string &str) { - std::string result = str; - result.erase(0, result.find_first_not_of(" \t\r\n")); - result.erase(result.find_last_not_of(" \t\r\n") + 1); - return result; -} - -std::string unescape(const std::string &str) { - std::string result; - const size_t str_length = str.length(); - size_t i = 0; - while (i < str_length) { - char current_char = str[i++]; - if (current_char == '\\' && i < str_length - 1) { - char next_char = str[i++]; - if (next_char == '\\') { - current_char = '\\'; - } else if (next_char == 'n') { - current_char = '\n'; - } else if (next_char == 't') { - current_char = '\t'; - } else { - current_char = next_char; - } - } - result += current_char; - } - - const size_t result_length = result.length(); - if (result[0] == '"' && result[result_length - 1] == '"') { - result = result.substr(1, result_length - 2); - } - - return result; -} - -// Add a non-fuzzy translation to the dictionary. -void add(const std::string &msgctxt, - const std::string &msgid, - const std::string &msgstr, - bool fuzzy) { - if (fuzzy == false && msgstr.empty() == false) { - if (msgctxt.empty()) { - MESSAGES[msgid] = msgstr; - } else { - MESSAGES[msgctxt + (char)0x04 + msgid] = msgstr; - } - } -} - -template<typename TKey, typename TValue> -void get_keys(std::map<TKey, TValue> map, - std::vector<TKey> *keys) { - keys->reserve(map.size()); - for (typename std::map<TKey, TValue>::iterator it = map.begin(); - it != map.end(); - it++) { - keys->push_back(it->first); - } -} - -std::string intToBytes(int value) { - std::string result; - for (unsigned int i = 0; i < sizeof(value); i++) { - result += (unsigned char) ((value >> (i * 8)) & 0xff); - } - return result; -} - -typedef enum { - SECTION_NONE = 0, - SECTION_CTX = 1, - SECTION_ID = 2, - SECTION_STR = 3 -} eSectionType; - -struct Offset { - unsigned int o1, l1, o2, l2; -}; - -// Return the generated output. -std::string generate(void) { - // The keys are sorted in the .mo file - std::vector<std::string> keys; - - // Get list of sorted keys. - get_keys(MESSAGES, &keys); - std::sort(keys.begin(), keys.end()); - - std::vector<Offset> offsets; - offsets.reserve(keys.size()); - std::string ids = "", strs = ""; - for (std::vector<std::string>::iterator it = keys.begin(); - it != keys.end(); - it++) { - std::string &id = *it; - // For each string, we need size and file offset. Each string is NUL - // terminated; the NUL does not count into the size. - Offset offset = {(unsigned int) ids.size(), - (unsigned int) id.size(), - (unsigned int) strs.size(), - (unsigned int) MESSAGES[id].size()}; - offsets.push_back(offset); - ids += id + '\0'; - strs += MESSAGES[id] + '\0'; - } - - // The header is 7 32-bit unsigned integers. We don't use hash tables, so - // the keys start right after the index tables. - // translated string. - int keystart = 7 * 4 + 16 * keys.size(); - // and the values start after the keys - int valuestart = keystart + ids.size(); - std::vector<int> koffsets; - std::vector<int> voffsets; - koffsets.reserve(offsets.size() * 2); - voffsets.reserve(offsets.size() * 2); - // The string table first has the list of keys, then the list of values. - // Each entry has first the size of the string, then the file offset. - for (std::vector<Offset>::iterator it = offsets.begin(); - it != offsets.end(); - it++) { - Offset &offset = *it; - koffsets.push_back(offset.l1); - koffsets.push_back(offset.o1 + keystart); - voffsets.push_back(offset.l2); - voffsets.push_back(offset.o2 + valuestart); - } - - std::vector<int> all_offsets; - all_offsets.reserve(koffsets.size() + voffsets.size()); - all_offsets.insert(all_offsets.end(), koffsets.begin(), koffsets.end()); - all_offsets.insert(all_offsets.end(), voffsets.begin(), voffsets.end()); - - std::string output = ""; - output += intToBytes(0x950412de); // Magic - output += intToBytes(0x0); // Version - output += intToBytes(keys.size()); // # of entries - output += intToBytes(7 * 4); // start of key index - output += intToBytes(7 * 4 + keys.size() * 8); // start of value index - output += intToBytes(0); // Size of hash table - output += intToBytes(0); // Offset of hash table - - for (std::vector<int>::iterator it = all_offsets.begin(); - it != all_offsets.end(); - it++) { - int offset = *it; - output += intToBytes(offset); - } - - output += ids; - output += strs; - - return output; -} - -void make(const char *input_file_name, - const char *output_file_name) { - std::map<std::string, std::string> messages; - - // Start off assuming Latin-1, so everything decodes without failure, - // until we know the exact encoding. - // TODO(sergey): Support encoding. - // const char *encoding = "latin-1"; - - eSectionType section = SECTION_NONE; - bool fuzzy = false; - bool is_plural = false; - std::string msgctxt, msgid, msgstr; - - std::ifstream input_file_stream(input_file_name); - - // Parse the catalog. - int lno = 0; - for (std::string l; getline(input_file_stream, l); ) { - lno++; - // If we get a comment line after a msgstr, this is a new entry. - if (l[0] == '#' && section == SECTION_STR) { - add(msgctxt, msgid, msgstr, fuzzy); - section = SECTION_NONE; - msgctxt = ""; - fuzzy = false; - } - // Record a fuzzy mark. - if (starts_with(l, "#,") && l.find("fuzzy") != std::string::npos) { - fuzzy = true; - } - // Skip comments - if (l[0] == '#') { - continue; - } - // Now we are in a msgid section, output previous section. - if (starts_with(l, "msgctxt")) { - if (section == SECTION_STR) { - add(msgctxt, msgid, msgstr, fuzzy); - } - section = SECTION_CTX; - l = l.substr(7, l.size() - 7); - msgctxt = msgid = msgstr = ""; - } - else if (starts_with(l, "msgid") && !starts_with(l, "msgid_plural")) { - if (section == SECTION_STR) { - add(msgctxt, msgid, msgstr, fuzzy); - msgctxt = ""; - if (msgid == "") { -#if 0 - // See whether there is an encoding declaration. - p = HeaderParser(); - charset = p.parsestr(msgstr.decode(encoding)).get_content_charset(); - if (charset) { - encoding = charset; - } -#else - // Not ported to C++ yet. - std::cerr << "Encoding declarations are not supported yet.\n" - << std::endl; - abort(); -#endif - } - } - section = SECTION_ID; - l = l.substr(5, l.size() - 5); - msgid = msgstr = ""; - is_plural = false; - } else if (starts_with(l, "msgid_plural")) { - // This is a message with plural forms. - if (section != SECTION_ID) { - std::cerr << "msgid_plural not preceeded by msgid on" - << input_file_name << ":" - << lno - << std::endl; - abort(); - } - l = l.substr(12, l.size() - 12); - msgid += '\0'; // separator of singular and plural - is_plural = true; - } else if (starts_with(l, "msgstr")) { - // Now we are in a msgstr section - section = SECTION_STR; - if (starts_with(l, "msgstr[")) { - if (is_plural == false) { - std::cerr << "plural without msgid_plural on " - << input_file_name << ":" - << lno - << std::endl; - abort(); - } - int bracket_position = l.find(']'); - if (bracket_position == std::string::npos) { - std::cerr << "Syntax error on " - << input_file_name << ":" - << lno - << std::endl; - abort(); - } - l = l.substr(bracket_position, l.size() - bracket_position); - if (msgstr != "") { - msgstr += '\0'; // Separator of the various plural forms; - } - } else { - if (is_plural) { - std::cerr << "indexed msgstr required for plural on " - << input_file_name << ":" - << lno - << std::endl; - abort(); - } - l = l.substr(6, l.size() - 6); - } - } - // Skip empty lines. - l = trim(l); - if (l.empty()) { - if (section == SECTION_STR) { - add(msgctxt, msgid, msgstr, fuzzy); - msgctxt = msgid = msgstr = ""; - section = SECTION_NONE; - fuzzy = false; - } - continue; - } - l = unescape(l); - if (section == SECTION_CTX) { - // TODO(sergey): Support encoding. - // msgid += l.encode(encoding); - msgctxt += l; - } - else if (section == SECTION_ID) { - // TODO(sergey): Support encoding. - // msgid += l.encode(encoding); - msgid += l; - } else if (section == SECTION_STR) { - // TODO(sergey): Support encoding. - // msgstr += l.encode(encoding) - msgstr += l; - } else { - std::cerr << "Syntax error on " - << input_file_name << ":" - << lno - << std::endl; - abort(); - } - // Add last entry - if (section == SECTION_STR) { - add(msgctxt, msgid, msgstr, fuzzy); - } - } - - // Compute output - std::string output = generate(); - - std::ofstream output_file_stream(output_file_name, - std::ios::out | std::ios::binary); - output_file_stream << output; -} - -} // namespace - -int main(int argc, char **argv) { - if (argc != 3) { - printf("Usage: %s <input.po> <output.mo>\n", argv[0]); - return EXIT_FAILURE; - } - const char *input_file = argv[1]; - const char *output_file = argv[2]; - - make(input_file, output_file); - - return EXIT_SUCCESS; -} |