Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mapsme/omim.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorgreshilov <slovaricheg@gmail.com>2018-04-27 16:19:14 +0300
committermpimenov <mpimenov@users.noreply.github.com>2018-08-28 14:57:38 +0300
commit514423cabd26dc8170c2609deabebb7629dd969c (patch)
tree1542de5cef1d1b4490b182f8da6f3537e9a1b9b6 /tools
parent799cf4e4764441914d721d95cfa4b008b9ef96f7 (diff)
[strings] Add search category consistence tool
- Review fixes
Diffstat (limited to 'tools')
-rwxr-xr-xtools/ruby/category_consistency/check_consistency.rb65
-rw-r--r--tools/ruby/category_consistency/omim_parsers.rb99
2 files changed, 164 insertions, 0 deletions
diff --git a/tools/ruby/category_consistency/check_consistency.rb b/tools/ruby/category_consistency/check_consistency.rb
new file mode 100755
index 0000000000..6558cdd61e
--- /dev/null
+++ b/tools/ruby/category_consistency/check_consistency.rb
@@ -0,0 +1,65 @@
+#!/usr/bin/env ruby
+
+require_relative './omim_parsers'
+
+ROOT = File.expand_path(File.dirname(__FILE__))
+OMIM_ROOT = File.join(ROOT, '..', '..', '..')
+CPP_CATEGORIES_FILENAME = File.join(OMIM_ROOT, 'search', 'displayed_categories.cpp')
+CATEGORIES_FILENAME = File.join(OMIM_ROOT, 'data', 'categories.txt')
+STRINGS_FILENAME = File.join(OMIM_ROOT, 'strings.txt')
+CATEGORIES_MATCHER = /m_keys = \{(.*)\};/m
+
+def load_categories_from_cpp(filename)
+ raw_categories = File.read(CPP_CATEGORIES_FILENAME)
+ match = CATEGORIES_MATCHER.match(raw_categories)
+ if match
+ cpp_categories = match[1].split(/,\s+/)
+ # Delete quotes
+ cpp_categories.map { |cat| cat.gsub!(/^"|"$/, '') }
+ cpp_categories
+ end
+end
+
+def compare_categories(string_cats, search_cats)
+ inconsistent_strings = {}
+
+ string_cats.each do |category_name, category|
+ if !search_cats.include? category_name
+ puts "Category '#{category_name}' not found in categories.txt"
+ next
+ end
+ category.each do |lang, translation|
+ if search_cats[category_name].include? lang
+ if !search_cats[category_name][lang].include? translation
+ not_found_cats_list = search_cats[category_name][lang]
+ (inconsistent_strings[category_name] ||= {})[lang] = [translation, not_found_cats_list]
+ end
+ end
+ end
+ end
+
+ inconsistent_strings.each do |name, languages|
+ puts "\nInconsistent category \"#{name}\""
+ languages.each do |lang, values|
+ string_value, category_value = values
+ puts "\t#{lang} : \"#{string_value}\" is not matched by #{category_value}"
+ end
+ end
+ inconsistent_strings.empty?
+end
+
+def check_search_categories_consistent
+ cpp_categories = load_categories_from_cpp(CPP_CATEGORIES_FILENAME)
+ categories_txt_parser = OmimParsers::CategoriesParser.new cpp_categories
+ strings_txt_parser = OmimParsers::StringsParser.new cpp_categories
+
+ search_categories = categories_txt_parser.parse_file(CATEGORIES_FILENAME)
+ string_categories = strings_txt_parser.parse_file(STRINGS_FILENAME)
+
+ compare_categories(string_categories, search_categories) ? 0 : 1
+end
+
+
+if __FILE__ == $0
+ exit check_search_categories_consistent()
+end
diff --git a/tools/ruby/category_consistency/omim_parsers.rb b/tools/ruby/category_consistency/omim_parsers.rb
new file mode 100644
index 0000000000..8c55cb99cc
--- /dev/null
+++ b/tools/ruby/category_consistency/omim_parsers.rb
@@ -0,0 +1,99 @@
+module OmimParsers
+ LANGUAGES = %w(en ru ar cs da nl fi fr de hu id it ja ko nb pl
+ pt ro es sv th tr uk vi zh-Hans zh-Hant he sk)
+
+ class AbstractParser
+ def initialize(keys)
+ @keys = keys
+ end
+
+ def parse_line(line)
+ raise NotImplementedError.new("You must implement parse_file.")
+ end
+
+ def match_category(line, result)
+ category_match = category.match(line)
+ if !category_match.nil?
+ category = category_match[1]
+ if @keys.include? category
+ result[category] ||= {}
+ end
+ end
+ end
+
+ def parse_file(filename)
+ current_string = nil
+ result = {}
+ File.open(filename, 'r:UTF-8').each do |line|
+ line.strip!
+ next if should_exclude_line? line
+
+ # If line is empty -> next category block started
+ if line.empty?
+ current_string = nil
+ next
+ end
+
+ current_string ||= match_category(line, result)
+
+ parsed = parse_line(line)
+ if !parsed.nil? and !current_string.nil?
+ lang, translation = parsed
+ current_string[lang] = translation
+ end
+ end
+ result
+ end
+
+ def category
+ raise NotImplementedError.new("You must implement category.")
+ end
+
+ def should_exclude_line?(line)
+ false
+ end
+ end
+
+ class CategoriesParser < AbstractParser
+ def parse_line(line)
+ line_match = /^([^:]+):(\S+)$/u.match(line)
+ return if !line_match
+
+ lang = $1.strip
+ return if !LANGUAGES.include? lang
+
+ translation = $2.strip
+ synonyms = []
+ translation.split('|').each do |token|
+ token_match = /\d?\^?(.*)$/.match(token)
+ synonyms.push(token_match[1]) if token_match
+ end
+ [lang, synonyms]
+ end
+
+ def should_exclude_line?(line)
+ line.start_with? '#'
+ end
+
+ def category
+ # We match only global categories ('food', 'bank'...)
+ /^@([A-Za-z0-9]+)$/
+ end
+ end
+
+ class StringsParser < AbstractParser
+ def parse_line(line)
+ line_match = /^([^=]+)=(.*)$/.match(line)
+ if line_match
+ lang = $1.strip
+ if LANGUAGES.include? lang
+ [lang, $2.strip]
+ end
+ end
+ end
+
+ def category
+ /^\[(.+)\]/
+ end
+ end
+end