From 514423cabd26dc8170c2609deabebb7629dd969c Mon Sep 17 00:00:00 2001 From: greshilov Date: Fri, 27 Apr 2018 16:19:14 +0300 Subject: [strings] Add search category consistence tool - Review fixes --- .../ruby/category_consistency/check_consistency.rb | 65 ++++++++++++++ tools/ruby/category_consistency/omim_parsers.rb | 99 ++++++++++++++++++++++ 2 files changed, 164 insertions(+) create mode 100755 tools/ruby/category_consistency/check_consistency.rb create mode 100644 tools/ruby/category_consistency/omim_parsers.rb (limited to 'tools') diff --git a/tools/ruby/category_consistency/check_consistency.rb b/tools/ruby/category_consistency/check_consistency.rb new file mode 100755 index 0000000000..6558cdd61e --- /dev/null +++ b/tools/ruby/category_consistency/check_consistency.rb @@ -0,0 +1,65 @@ +#!/usr/bin/env ruby + +require_relative './omim_parsers' + +ROOT = File.expand_path(File.dirname(__FILE__)) +OMIM_ROOT = File.join(ROOT, '..', '..', '..') +CPP_CATEGORIES_FILENAME = File.join(OMIM_ROOT, 'search', 'displayed_categories.cpp') +CATEGORIES_FILENAME = File.join(OMIM_ROOT, 'data', 'categories.txt') +STRINGS_FILENAME = File.join(OMIM_ROOT, 'strings.txt') +CATEGORIES_MATCHER = /m_keys = \{(.*)\};/m + +def load_categories_from_cpp(filename) + raw_categories = File.read(CPP_CATEGORIES_FILENAME) + match = CATEGORIES_MATCHER.match(raw_categories) + if match + cpp_categories = match[1].split(/,\s+/) + # Delete quotes + cpp_categories.map { |cat| cat.gsub!(/^"|"$/, '') } + cpp_categories + end +end + +def compare_categories(string_cats, search_cats) + inconsistent_strings = {} + + string_cats.each do |category_name, category| + if !search_cats.include? category_name + puts "Category '#{category_name}' not found in categories.txt" + next + end + category.each do |lang, translation| + if search_cats[category_name].include? lang + if !search_cats[category_name][lang].include? translation + not_found_cats_list = search_cats[category_name][lang] + (inconsistent_strings[category_name] ||= {})[lang] = [translation, not_found_cats_list] + end + end + end + end + + inconsistent_strings.each do |name, languages| + puts "\nInconsistent category \"#{name}\"" + languages.each do |lang, values| + string_value, category_value = values + puts "\t#{lang} : \"#{string_value}\" is not matched by #{category_value}" + end + end + inconsistent_strings.empty? +end + +def check_search_categories_consistent + cpp_categories = load_categories_from_cpp(CPP_CATEGORIES_FILENAME) + categories_txt_parser = OmimParsers::CategoriesParser.new cpp_categories + strings_txt_parser = OmimParsers::StringsParser.new cpp_categories + + search_categories = categories_txt_parser.parse_file(CATEGORIES_FILENAME) + string_categories = strings_txt_parser.parse_file(STRINGS_FILENAME) + + compare_categories(string_categories, search_categories) ? 0 : 1 +end + + +if __FILE__ == $0 + exit check_search_categories_consistent() +end diff --git a/tools/ruby/category_consistency/omim_parsers.rb b/tools/ruby/category_consistency/omim_parsers.rb new file mode 100644 index 0000000000..8c55cb99cc --- /dev/null +++ b/tools/ruby/category_consistency/omim_parsers.rb @@ -0,0 +1,99 @@ +module OmimParsers + LANGUAGES = %w(en ru ar cs da nl fi fr de hu id it ja ko nb pl + pt ro es sv th tr uk vi zh-Hans zh-Hant he sk) + + class AbstractParser + def initialize(keys) + @keys = keys + end + + def parse_line(line) + raise NotImplementedError.new("You must implement parse_file.") + end + + def match_category(line, result) + category_match = category.match(line) + if !category_match.nil? + category = category_match[1] + if @keys.include? category + result[category] ||= {} + end + end + end + + def parse_file(filename) + current_string = nil + result = {} + File.open(filename, 'r:UTF-8').each do |line| + line.strip! + next if should_exclude_line? line + + # If line is empty -> next category block started + if line.empty? + current_string = nil + next + end + + current_string ||= match_category(line, result) + + parsed = parse_line(line) + if !parsed.nil? and !current_string.nil? + lang, translation = parsed + current_string[lang] = translation + end + end + result + end + + def category + raise NotImplementedError.new("You must implement category.") + end + + def should_exclude_line?(line) + false + end + end + + class CategoriesParser < AbstractParser + def parse_line(line) + line_match = /^([^:]+):(\S+)$/u.match(line) + return if !line_match + + lang = $1.strip + return if !LANGUAGES.include? lang + + translation = $2.strip + synonyms = [] + translation.split('|').each do |token| + token_match = /\d?\^?(.*)$/.match(token) + synonyms.push(token_match[1]) if token_match + end + [lang, synonyms] + end + + def should_exclude_line?(line) + line.start_with? '#' + end + + def category + # We match only global categories ('food', 'bank'...) + /^@([A-Za-z0-9]+)$/ + end + end + + class StringsParser < AbstractParser + def parse_line(line) + line_match = /^([^=]+)=(.*)$/.match(line) + if line_match + lang = $1.strip + if LANGUAGES.include? lang + [lang, $2.strip] + end + end + end + + def category + /^\[(.+)\]/ + end + end +end -- cgit v1.2.3