diff options
Diffstat (limited to 'newlib/libc/iconv/ccs/iconv_mktbl')
-rw-r--r-- | newlib/libc/iconv/ccs/iconv_mktbl | 269 |
1 files changed, 269 insertions, 0 deletions
diff --git a/newlib/libc/iconv/ccs/iconv_mktbl b/newlib/libc/iconv/ccs/iconv_mktbl new file mode 100644 index 000000000..6ecffc6ef --- /dev/null +++ b/newlib/libc/iconv/ccs/iconv_mktbl @@ -0,0 +1,269 @@ +#! /usr/bin/perl +# +# Copyright (c) 1999, 2000 +# Konstantin Chuguev. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# iconv (Charset Conversion Library) v2.0 +# + +require 'getopts.pl'; +use integer; + +sub pack_hex { + "_$_[0](" . join(", ", map sprintf("0x%02X", $_), unpack('C4', $_[1])) + . ")"; +} + +sub pack_array { + my($size, $format, $array_ref) = @_; + return pack("$format$size", @$array_ref) unless $opt_C; + my($res, $i); + if ($format eq 'N') { + for ($i = 0; $i < $size; $i += 2) { + $res .= "\t" + . &pack_hex('1l', pack("N", $$array_ref[$i])) + . ", " + . &pack_hex('1l', pack("N", $$array_ref[$i+1])) + . ",\n"; + } + $array_size += $size * 4; + } else { + for ($i = 0; $i < $size; $i += 4) { + $res .= "\t" + . &pack_hex('2s', pack("n2", $$array_ref[$i], $$array_ref[$i+1])) + . ", " + . &pack_hex('2s', pack("n2", $$array_ref[$i+2], $$array_ref[$i+3])) + . ",\n"; + } + $array_size += $size * 2; + } + return $res; +} + +# create an array of short/long values in network byte order +sub build_array { + my($size, $format, $default, $array_ref) = @_; + my($i); + for $i (0 .. $size-1) { + $$array_ref[$i] = $default unless defined($$array_ref[$i]); + } + return &pack_array($size, $format, $array_ref); +} + +sub build_table1 { + my($size, $array_ref) = @_; + return &build_array($size, "n", 0xFFFE, $array_ref); +} + +sub build_table2 { + my($size, $array_ref) = @_; + my($offset, $n, $i, @offs) = ($size * 4, 0); + for $i (0 .. $size-1) { + next unless defined($$array_ref[$i]); + $offs[$i] = $offset; + $offset += $size * 2; + } + my($data) = (&build_array($size, "N", 0, \@offs)); + for $i (0 .. $size-1) { + next unless defined($$array_ref[$i]); + $n ++; + $data .= &build_table1($size, $$array_ref[$i]); + } + printf STDERR "%d subtables.\n", $n; + return $data; +} + +$control0 = 0; +$control1 = 0; +$delete = 0; + +@to_ucs; +@from_ucs; + +# set a value in two charset conversion tables; update charset properties +# ($cs, $ucs) = (local charset code, Unicode) +# +sub set_val { + my($cs, $ucs) = @_; + return if $opt_a && $cs > 0x7F; + $to_ucs[$cs >> 8][$cs & 0xFF] = $ucs; + $from_ucs[$ucs >> 8][$ucs & 0xFF] = $cs; + if (($cs & 0x60) == 0) { + if($cs & 0x80) { + $control1 = 1; + } else { + $control0 = 1; + } + } + $delete = 1 if $cs == 0x7F; + if ($cs < 0x80) { + $_7bit = 1; + } elsif ($cs < 0x100) { + $_8bit = 1; + } elsif ($cs & 0x8080) { + $_16bit = 1; + } else { + $_14bit = 1; + } +} + +# set a range of equal codes to charset conversion tables +# +sub set_range { + for (@_) { + &set_val($_, $_); + } +} + +&Getopts('aCc:Mm:o:p:u:'); +# ||| || | | +- u N: field number for Unicode character codes +# ||| || | +--- p str: prefix +# ||| || +----- o file: output file name +# ||| |+------- m file: character mnemonic table from RFC1345 +# ||| +-------- M: Macintosh newline (<LF> only) +# ||+---------- c N: field number for charset character codes +# |+----------- C: make C source file +# +------------ a: ignore 8 bit (for ASCII) + +$opt_c = 0 unless defined($opt_c); +$opt_p = '0x' unless defined($opt_p); +$opt_u = 1 unless defined($opt_u); + +if ($opt_o) { + $opt_o =~ tr/-/_/; + open(STDOUT, ">$opt_o"); + $opt_o =~ s/.c$//; +} + +%map; + +if ($opt_M) { + $/ = "\cM"; +} + +if ($opt_m) { + open(MAP, $opt_m); + while(<MAP>) { + chop; + next unless /^ [^ ]/; + next if 2 > split; + $map{$_[0]} = $_[1]; + } + close(MAP); + local($code) = 0; + while (<>) { + chop; + s/^ *//; + if (/^&[a-z]/) { + split(' ', substr($_, 1)); + if ($_[0] eq 'code') { + $code = $_[1]; + } + } else { + foreach (split) { + &set_val($code, hex "0x$map{$_}") if $_ ne '??'; + $code ++; + } + } + } +} else { + while (<>) { + s/[#\n].*//; + next if 2 > split; # too few fields + next if ($_[$opt_c] =~ s/^$opt_p/0x/o) != 1; + # local charset code prefix is invalid + &set_val(hex $_[$opt_c], hex $_[$opt_u]); + } +} + +if (!$_16bit && !$_14bit) { + if ($_8bit) { + print STDERR "8bit charset"; + if (!$control0) { + &set_range(0 .. 0x1F); + print STDERR "; control0 chars added"; + } + if (!$control1) { + &set_range(0x80 .. 0x9F); + print STDERR "; control1 chars added"; + } + if (!$delete) { + &set_range(0x7F); + print STDERR "; delete char added"; + } + $nbits = 8; + $type = 1; + } else { + print STDERR "7bit charset"; + $nbits = 7; + $type = 0; + } + print STDERR ".\n"; + $to = &build_table1($_8bit ? 256 : 128, $to_ucs[0]); +} elsif ($_16bit) { + print STDERR "16bit charset"; + if (!$_7bit && !$_8bit) { + &set_range(0 .. 0x7F); + print STDERR "; ASCII subset added"; + } elsif (!$control0) { + &set_range(0 .. 0x1F); + print STDERR "; control0 chars added"; + } + print STDERR ".\n"; + $to = &build_table2(256, \@to_ucs); + $nbits = 16; + $type = 3; +} else { + print STDERR "14bit charset.\n"; + $to = &build_table2(128, \@to_ucs); + $nbits = 14; + $type = 2; +} + +$to_size = $opt_C ? $array_size : length($to); + +$from = &build_table2(256, \@from_ucs); + +if ($opt_C) { + die "-o option is mandatory with -C" unless $opt_o; + $opt_o =~ s/\.c$//; + $opt_o =~ tr/-/_/; + $name = $opt_o; + $name =~ tr/[a-z]/[A-Z]/; + print qq/#ifdef ENABLE_ICONV\n/; + print qq/#include "..\/lib\/deps.h"\n\n/; + print "#ifdef ICONV_CONVERTER_$name\n"; + print qq/#include "..\/lib\/endian.h"\n\n/; + print "_CONST unsigned char iconv_ccs_table_$opt_o" . "[] = {\n"; + print "\t3, 'C', 'S', 'C', 'T', ICONV_ORDER, $nbits, $type,\n"; + print &pack_array(2, 'N', [8, 8 + $to_size]); +} else { + print pack("A5CCCNN", "\003CSCT", 0, $nbits, $type, 8, 8 + $to_size); +} +print $to; +print $from; +print "};\n\n"; +print "#endif /* #ifdef ICONV_CONVERTER_$name */\n\n"; +print "#endif /* #ifdef ENABLE_ICONV */\n" if $opt_C; + |