diff options
Diffstat (limited to 'contrib/unicode2nginx/unicode-to-nginx.pl')
-rwxr-xr-x | contrib/unicode2nginx/unicode-to-nginx.pl | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/contrib/unicode2nginx/unicode-to-nginx.pl b/contrib/unicode2nginx/unicode-to-nginx.pl new file mode 100755 index 000000000..daaf354a8 --- /dev/null +++ b/contrib/unicode2nginx/unicode-to-nginx.pl @@ -0,0 +1,45 @@ +#!/usr/bin/perl -w + +# Convert unicode mappings to nginx configuration file format. + +# You may find useful mappings in various places, including +# unicode.org official site: +# +# http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT +# http://www.unicode.org/Public/MAPPINGS/VENDORS/MISC/KOI8-R.TXT + +# Needs perl 5.6 or later. + +# Written by Maxim Dounin, mdounin@rambler-co.ru + +############################################################################### + +require 5.006; + +while (<>) { + # Skip comments and empty lines + + next if /^#/; + next if /^\s*$/; + chomp; + + # Convert mappings + + if (/^\s*0x(..)\s*0x(....)\s*(#.*)/) { + # Mapping <from-code> <unicode-code> "#" <unicode-name> + my $cs_code = $1; + my $un_code = $2; + my $un_name = $3; + + # Produce UTF-8 sequence from character code; + + my $un_utf8 = join('', map { sprintf("%02X", $_) } unpack("C*", pack("U", hex($un_code)))); + + print " $cs_code $un_utf8 ; $un_name\n"; + + } else { + warn "Unrecognized line: '$_'"; + } +} + +############################################################################### |