diff options
Diffstat (limited to 'newlib/libc/iconv/ccs/mktbl.pl')
-rwxr-xr-x | newlib/libc/iconv/ccs/mktbl.pl | 1505 |
1 files changed, 1505 insertions, 0 deletions
diff --git a/newlib/libc/iconv/ccs/mktbl.pl b/newlib/libc/iconv/ccs/mktbl.pl new file mode 100755 index 000000000..c3b87932b --- /dev/null +++ b/newlib/libc/iconv/ccs/mktbl.pl @@ -0,0 +1,1505 @@ +#!/usr/bin/perl -w +# +# Copyright (c) 2003-2004, Artem B. Bityuckiy, SoftMine Corporation. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +use integer; +use Getopt::Std; +use IO::Seekable; +use strict; + + +# ############################################################################## +# +# FUNCTION PROTOTYPES AND GLOBAL DATA DECLARATION SECTION +# +# ############################################################################## + + +# SUPPLEMENTARY FUNCTIONS FORWARD DECLARATIONS +sub ProcessOptions(); +sub Err($;$); +sub Generate8bitToUCS(); +sub GenerateSpeed($); +sub Generate16bitSize($); +sub Output8bitToUCS(;$); +sub Output8bitFromUCS(;$); +sub OutputSpeed($;$); +sub OutputSize($;$); + +# VARIABLES RELATING TO COMMAND-LINE OPTIONS +my $Verbose; # Be verbose if true +my $Source; # Output C source code instead of binary .cct file if true +my $Plane; # Use this plane if defined +my $InFile; # Use this file for input +my $OutFile; # Use this file for output +my $CCSName; # Use this CCS name +my $NoSpeed; # Don't generate speed-optimized tables (binary files only) +my $NoSize; # Don't generate size-optimized tables (binary files only) +my $NoBE; # Don't generate big-endian tables (binary files only) +my $NoLE; # Don't generate big-endian tables (binary files only) +my $NoTo; # Don't generate "to_ucs" table (binary files only) +my $NoFrom; # Don't generate "from_ucs" table (binary files only) +my $CCSCol; # CCS column number in source file +my $UCSCol; # UCS column number in source file + + +# DATA STRUCTURES WITH "TO_UCS" AND "FROM_UCS" SPEED/SIZE -OPTIMIZED TABLES +my (@FromSpeedTbl, @ToSpeedTbl, @FromSizeTbl, @ToSizeTbl); +# "TO_UCS" AND "FROM_UCS" SPEED/SIZE -OPTIMIZED TABLES SIZE IN BYTES +my ($FromSpeedBytes, $ToSpeedBytes, $FromSizeBytes, $ToSizeBytes) = + (0, 0, 0, 0); + +my (%CCSUCS, %UCSCCS); # CCS->UCS and UCS->CCS mappings +my $Bits = 8; # Table bits (8 or 16); + +# SPECIAL MARKER CODES +my $InvCode = 0xFFFF; # FFFF indicates 18 bit invalid codes +my $InvBlock = 0xFFFF; # FFFF also mark empty blocks in speed-optimized tables +my $LostCode = 0x3F; # ASCII '?' marks codes lost during CCS->UCS mapping +# To mark invalid codes in 8bit encodings 0xFF is used CCS's 0xFF mapping is saved +# separately. $FFMap variable stores real 0xFF mapping if defined. +my $InvCode8bit = 0xFF; +my $FFMap; + +# 8 Bit "From UCS" table header size (bytes) +my $Hdr8bitFromUCS = 2; +# Binary table header size (bytes) +my $HdrBinary = 8; + +# At first all lost CCS codes are marked by $TmpLost to distinguish between +# code which is equivalent to $LostCode and lost codes. This is done in order to +# output $MacroLostCode instead of $LostCode in source file. +my $TmpLost = 0x1FFFF; + +# VARIABLES RELATING TO C SOURCE CODE +my $MacroInvCode = 'INVALC'; +my $MacroInvBlock = 'INVBLK'; +my $MacroLostCode = 'LOST_C'; +my $MacroCCSName = 'ICONV_CCS_%s'; +my $GuardSize = 'defined (TABLE_USE_SIZE_OPTIMIZATION)'; +my $GuardToUCS = "ICONV_TO_UCS_CCS_%s"; +my $GuardFromUCS = "ICONV_FROM_UCS_CCS_%s"; +my $MacroSpeedTable = 'TABLE_SPEED_OPTIMIZED'; +my $MacroSizeTable = 'TABLE_SIZE_OPTIMIZED'; +my $Macro8bitTable = 'TABLE_8BIT'; +my $Macro16bitTable = 'TABLE_16BIT'; +my $MacroVer1Table = 'TABLE_VERSION_1'; +my $TypeBICCS = 'iconv_ccs_t'; +my $VarToUCSSize = "to_ucs_size_%s"; +my $VarToUCSSpeed = "to_ucs_speed_%s"; +my $VarFromUCSSize = "from_ucs_size_%s"; +my $VarFromUCSSpeed = "from_ucs_speed_%s"; +my $VarBICCS = "_iconv_ccs_%s"; +# Text block that visually separates tables. +my $Separator = '=' x 70; + +# ############################################################################## +# +# SCRIPT ENTRY POINT +# +# ############################################################################## + + +# Parse command-line options, check them and set correspondent global variables +ProcessOptions(); + +# Initialize global variables tat depend on CCS name. +$_ = sprintf $_, $CCSName foreach +($VarToUCSSpeed, + $VarToUCSSize, + $VarToUCSSpeed, + $VarFromUCSSpeed, + $VarFromUCSSize, + $VarBICCS); +$_ = sprintf $_, "\U$CCSName" foreach +($GuardToUCS, + $GuardFromUCS, + $MacroCCSName); + +# Open input and output files +Err "Can't open \"$InFile\" file for reading: $!.\n", 1 +unless open(INFILE, '<', $InFile); +Err "Can't open \"$OutFile\" file for writing: $!.\n", 1 +unless open(OUTFILE, '>', $OutFile); + +# ============================================================================== +# EXTRACT CODES MAP FROM INPUT FILE +# ============================================================================== + +for (my $ln = 1; my $l = <INFILE>; $ln += 1) +{ + # Skip comment and empty lines, remove ending CR symbol + next if $l =~ /^#.*$/ or $l =~ /^\s*$/; + $l =~ s/^(.*)\n$/$1/, $l =~ s/^(.*)\r$/$1/; + + # Remove comment and extra spaces + $l =~ s/(.*)\s*#.*/$1/; + $l =~ s/\s+/ /g; + $l =~ s/(.*)\s*$/$1/; + + # Split line into individual codes + my @codes = split / /, $l; + + # Skip line if there is no needed columns + unless (defined $codes[$CCSCol]) + { + print("Warning (line $ln): no CCS column, skip.\n") if $Verbose; + next; + } + unless (defined $codes[$UCSCol]) + { + print("Warning (line $ln): no UCS column, skip.\n") if $Verbose; + next; + } + + # Extract codes strings from needed columns + my ($ccs, $ucs) = ($codes[$CCSCol], $codes[$UCSCol]); + my $patt = qr/(0[xX])?[0-9a-fA-F]{1,8}/; # HEX digit regexp pattern. + + # Check that CCS and UCS code strings has right format. + unless ($ccs =~ m/^$patt$/) + { + print("Warning (line $ln): $ccs CCS code isn't recognized, skip.\n") + if $Verbose; + next; + } + unless ($ucs =~ m/^($patt(,|\+))*$patt$/) + { + print("Warning (line $ln): $ucs UCS code isn't recognized, skip.\n") + if $Verbose; + next; + } + + # Convert code to numeric format (assume hex). + $ccs = hex $ccs; + + if ($ucs =~ m/,/ or $ucs =~ m/\+/) + { + # Mark CCS codes with "one to many" mappings as lost + printf "Warning (line $ln): only one to one mapping is supported, " + . "mark 0x%.4X CCS code as lost.\n", hex $ccs if $Verbose; + $ucs = $TmpLost; + } + else + { + # Convert code to numeric format + $ucs = hex $ucs; + + # Check that UCS code isn't longer than 16 bits. + if ($ucs > 0xFFFF) + { + printf("Warning (line $ln): UCS code should fit 16 bits, " + . "mark 0x%.4X CCS code as lost.\n", hex $ccs) if $Verbose; + $ucs = $TmpLost; + } + } + + # If CCS value > 0xFFFF user should specify plane number. + if ($ccs > 0xFFFF && !defined $Plane) + { + print("Warning (line $ln): $ccs is > 16 bit, plane number should be specified," + . " skip this mapping.\n") if $Verbose; + next; + } + + if (defined $Plane) + { + next if (($ccs & 0xFFFF0000) >> 16) != hex $Plane; # Skip alien plane. + $ccs &= 0xFFFF; + } + + # Check that reserved codes aren't used. + if ($ccs == $InvCode or $ucs == $InvCode) + { + print("Warning (line $ln): $InvCode is reserved to mark invalid codes and " + . "shouldn't be used in mappings, skip.\n") if $Verbose; + next; + } + + # Save mapping in UCSCCS and CCSUCS hash arrays. + $UCSCCS{$ucs} = $ccs if $ucs != $TmpLost && !defined $UCSCCS{$ucs}; + $CCSUCS{$ccs} = $ucs if !defined $CCSUCS{$ccs}; + + $Bits = 16 if $ccs > 0xFF; +} + +if (not %CCSUCS) +{ + Err "Error: there is no plane $Plane in \"$0\".\n" if defined $Plane; + Err "Error: mapping wasn't found.\n"; +} + + +# ============================================================================== +# GENERATE TABLE DATA +# ============================================================================== + +if ($Bits == 8) +{ + $FFMap = $CCSUCS{0xFF}; + $FFMap = $InvCode if !defined $FFMap; +} + +if ($Bits == 8) +{ + Generate8bitToUCS() unless $NoTo; +} +else +{ + GenerateSpeed("to_ucs") unless $NoTo || $NoSpeed; + Generate16bitSize("to_ucs") unless $NoTo || $NoSize; +} + +GenerateSpeed("from_ucs") unless $NoFrom || $NoSpeed; +Generate16bitSize("from_ucs") unless $NoFrom || $NoSize; + +# ============================================================================== +# OUTPUT ARRAYS +# ============================================================================== + +if ($Source) +{ + # OUTPUT SOURCE + print OUTFILE +"/* + * This file was generated automatically - don't edit it. + * File contains iconv CCS tables for $CCSName encoding. + */ + +#include \"ccsbi.h\" + +#if defined ($GuardToUCS) \\ + || defined ($GuardFromUCS) + +#include <_ansi.h> +#include <sys/types.h> +#include <sys/param.h> +#include \"ccs.h\" +#include \"ccsnames.h\" + +"; + + if ($Bits == 8) + { + print OUTFILE +"#if (BYTE_ORDER == LITTLE_ENDIAN) +# define W(word) (word) & 0xFF, (word) >> 8 +#elif (BYTE_ORDER == BIG_ENDIAN) +# define W(word) (word) >> 8, (word) & 0xFF +#else +# error \"Unknown byte order.\" +#endif + +"; + } + + unless ($NoTo) + { + if ($Bits == 8) + { + Output8bitToUCS(); + } + else + { + OutputSpeed("to_ucs") unless $NoSpeed; + OutputSize("to_ucs") unless $NoSize; + } + } + unless ($NoFrom) + { + if ($Bits == 8) + { + Output8bitFromUCS(); + } + else + { + OutputSpeed("from_ucs") unless $NoSpeed; + OutputSize("from_ucs") unless $NoSize; + } + } + + # OUTPUT TABLE DESCRIPTION STRUCTURE + print OUTFILE +"/* + * $CCSName CCS description table. + * $Separator + */ +_CONST $TypeBICCS +$VarBICCS = +{ +\t$MacroVer1Table, /* Table version */ +\t$MacroCCSName, /* CCS name */ +"; + if ($Bits == 8) + { + print OUTFILE +"\t$Macro8bitTable, /* Table bits */ +\t0, /* Not Used */ +#if defined ($GuardFromUCS) +\t(__uint16_t *)&$VarFromUCSSpeed, /* UCS -> $CCSName table */ +#else +\t(__uint16_t *)NULL, +#endif +\t0, /* Not Used */ +#if defined ($GuardToUCS) +\t(__uint16_t *)&$VarToUCSSpeed /* $CCSName -> UCS table */ +#else +\t(__uint16_t *)NULL, +#endif +};\n"; + } + else + { + print OUTFILE +"\t$Macro16bitTable, /* Table bits */ +#if defined ($GuardFromUCS) \\ + && ($GuardSize) +\t$MacroSizeTable, +\t(__uint16_t *)&$VarFromUCSSize, /* UCS -> $CCSName table size-optimized table */ +#elif defined ($GuardFromUCS) \\ + && !($GuardSize) +\t$MacroSpeedTable, +\t(__uint16_t *)&$VarFromUCSSpeed, /* UCS -> $CCSName table speed-optimized table */ +#else +\t$MacroSpeedTable, +\t(__uint16_t *)NULL, +#endif +#if defined ($GuardToUCS) \\ + && ($GuardSize) +\t$MacroSizeTable, +\t(__uint16_t *)&$VarToUCSSize /* $CCSName -> UCS table speed-optimized table */ +#elif defined ($GuardToUCS) \\ + && !($GuardSize) +\t$MacroSpeedTable, +\t(__uint16_t *)&$VarToUCSSpeed /* $CCSName -> UCS table speed-optimized table */ +#else +\t$MacroSpeedTable, +\t(__uint16_t *)NULL, +#endif +};\n"; + } + print OUTFILE "\n#endif /* $GuardToUCS) || ... */\n\n"; +} +else +{ + # OUTPUT BINARY TABLES DESCRIPTION STRUCTURE (ALWAYS BIG ENDIAN) + print OUTFILE pack "n", 1; + print OUTFILE pack "n", $Bits; + my $len = length $CCSName; + print OUTFILE pack "N", $len; + print OUTFILE pack "a$len", $CCSName; + + my $pos = $HdrBinary + $len; + if ($pos & 3) + { + my $l = 4 - ($pos & 3); + print OUTFILE pack "a$l", 'XXX'; + $pos += $l; + } + + $pos += 16*4; + + my @tables; + for (my $i = 0; $i < 16; $i++) + { + $tables[$i] = 0; + } + + $tables[0] = $pos, $tables[1] = $FromSpeedBytes, $pos += $FromSpeedBytes + unless $NoFrom || $NoSpeed || $NoBE; + $tables[2] = $pos, $tables[3] = $FromSpeedBytes, $pos += $FromSpeedBytes + unless $NoFrom || $NoSpeed || $NoLE; + if ($Bits == 16) + { + $tables[4] = $pos, $tables[5] = $FromSizeBytes, $pos += $FromSizeBytes + unless $NoFrom || $NoSize || $NoBE; + $tables[6] = $pos, $tables[7] = $FromSizeBytes, $pos += $FromSizeBytes + unless $NoFrom || $NoSize || $NoLE; + } + $tables[8] = $pos, $tables[9] = $ToSpeedBytes, $pos += $ToSpeedBytes + unless $NoTo || $NoSpeed || $NoBE; + $tables[10] = $pos, $tables[11] = $ToSpeedBytes, $pos += $ToSpeedBytes + unless $NoTo || $NoSpeed || $NoLE; + if ($Bits == 16) + { + $tables[12] = $pos, $tables[13] = $ToSizeBytes, $pos += $ToSizeBytes + unless $NoTo || $NoSize || $NoBE; + $tables[14] = $pos, $tables[15] = $ToSizeBytes, $pos += $ToSizeBytes + unless $NoTo || $NoSize || $NoLE; + } + + print OUTFILE pack("N", $_) foreach @tables; + + print "Total bytes for output: $pos.\n" if $Verbose; + + # OUTPUT BINARY TABLES + unless ($NoFrom) + { + if ($Bits == 8) + { + Output8bitFromUCS("n") unless $NoBE; + Output8bitFromUCS("v") unless $NoLE; + } + else + { + unless ($NoSpeed) + { + OutputSpeed("from_ucs", "n") unless $NoBE; + OutputSpeed("from_ucs", "v") unless $NoLE; + } + unless ($NoSize) + { + OutputSize("from_ucs", "n") unless $NoBE; + OutputSize("from_ucs", "v") unless $NoLE; + } + } + } + unless ($NoTo) + { + if ($Bits == 8) + { + Output8bitToUCS("n") unless $NoBE; + Output8bitToUCS("v") unless $NoLE; + } + else + { + unless ($NoSpeed) + { + OutputSpeed("to_ucs", "n") unless $NoBE; + OutputSpeed("to_ucs", "v") unless $NoLE; + } + unless ($NoSize) + { + OutputSize("to_ucs", "n") unless $NoBE; + OutputSize("to_ucs", "v") unless $NoLE; + } + } + } +} + +close INFILE; +close OUTFILE; +exit 0; + + +# ############################################################################## +# +# SUPPLEMENTARY FUNCTIONS +# +# ############################################################################## + + +# ============================================================================= +# +# Generate 8bit "to_ucs" table. Store table data in %ToSpeedTbl hash. +# Store table size in $ToSpeedBytes scalar. +# +# ============================================================================= +sub Generate8bitToUCS() +{ + for (my $i = 0; $i <= 255; $i++) + { + $ToSpeedTbl[$i] = defined $CCSUCS{$i} ? $CCSUCS{$i} : $InvCode; + } + $ToSpeedBytes = 256*2; +} + + +# ============================================================================= +# +# Generate speed-optimized table. +# +# Parameter 1: +# "to_ucs" - generate "to_ucs" table, store table data in @ToSpeedTbl +# array, store table size in $ToSpeedBytes scalar. +# "from_ucs" - generate "from_ucs" table, store table data in @FromSpeedTbl +# array, store table size in $FromSpeedBytes scalar. +# +# Data is written to @ToSpeedTbl or @FromSpeedTbl (@map) table and has the +# following format: +# $table[0] - 256-element array (control block); +# $table[1 .. $#table] - 256-element arrays (data blocks). +# +# ============================================================================= +sub GenerateSpeed($) +{ + my $map; + my $tbl; + my $bytes; + + if ($_[0] eq "to_ucs") + { + $map = \%CCSUCS; + $tbl = \@ToSpeedTbl; + $bytes = \$ToSpeedBytes; + } + elsif ($_[0] eq "from_ucs") + { + $map = \%UCSCCS; + $tbl = \@FromSpeedTbl; + $bytes = \$FromSpeedBytes; + } + else + { + Err "Internal script error in GenerateSpeed()\n"; + } + + # Identify unused blocks + my @busy_blocks; + $busy_blocks[$_ >> 8] = 1 foreach (keys %$map); + + # GENERATE FIRST 256-ELEMENT CONTROL BLOCK + for (my $i = 0, + my $idx = $Bits == 16 ? 0 : 256 + $Hdr8bitFromUCS; + $i <= 0xFF; $i++) + { + $tbl->[0]->[$i] = $busy_blocks[$i] ? $idx += 256 : undef; + } + + # GENERATE DATA BLOCKS + $$bytes = 0; + for (my $i = 0; $i <= 0xFF; $i++) + { + next unless $busy_blocks[$i]; + $$bytes += 256; + for (my $j = 0; $j <= 0xFF; $j++) + { + $tbl->[$i+1]->[$j] = $map->{($i << 8) | $j}; + } + } + $$bytes *= 2 if $Bits == 16; + $$bytes += $Hdr8bitFromUCS if $Bits == 8; + $$bytes += 512; +} + + +# ============================================================================= +# +# Generate 16bit size-optimized table. +# +# Parameter 1: +# "to_ucs" - generate "to_ucs" table, store table data in @ToSizeTbl +# array, store table size in $ToSizeBytes scalar. +# "from_ucs" - generate "from_ucs" table, store table data in @FromSizeTbl +# array, store table size in $FromSizeBytes scalar. +# +# Data is written to @ToSizeTbl or @FromSizeTbl (@map) table and has the +# following format: +# $table[0] - number of ranges; +# $table[1] - number of unranged codes; +# $table[2] - unranged codes index in resulting array; +# $table[3]->[0 .. $table[0]] - array of arrays of ranges: +# $table[3]->[x]->[0] - first code; +# $table[3]->[x]->[1] - last code; +# $table[3]->[x]->[2] - range index in resulting array; +# $table[4]->[0 .. $table[0]] - array of arrays of ranges content; +# $table[5]->[0 .. $table[1]] - array of arrays of unranged codes; +# $table[5]->[x]->[0] - CCS code; +# $table[5]->[x]->[0] - UCS code; +# +# ============================================================================= +sub Generate16bitSize($) +{ + my $map; + my $tbl; + my $bytes; + + if ($_[0] eq "to_ucs") + { + $map = \%CCSUCS; + $tbl = \@ToSizeTbl; + $bytes = \$ToSizeBytes; + } + elsif ($_[0] eq "from_ucs") + { + $map = \%UCSCCS; + $tbl = \@FromSizeTbl; + $bytes = \$FromSizeBytes; + } + else + { + Err "Internal script error Generate16bitSize()\n"; + } + + # CREATE LIST OF RANGES. + my @codes = sort {$a <=> $b} keys %$map; + my @ranges; # Code ranges + my @range; # Current working range + foreach (@codes) + { + if (not @range or $_ - 1 == $range[$#range]) + { + push @range, $_; + } + else + { + my @tmp = @range; + push @ranges, \@tmp; + undef @range; + redo; + } + } + # Add Last range too + if (@range) + { + my @tmp = @range; + push @ranges, \@tmp; + } + + # OPTIMIZE LIST OF RANGES. + my $r = 0; # Working range number + while (1) + { + last if ($r == $#ranges); + + my @r1 = @{$ranges[$r]}; + my @r2 = @{$ranges[$r + 1]}; + + # Calculate how many array entries two ranges need + my ($s1, $s2); + + if ($#r1 == 0) + { $s1 = 2; } + elsif ($#r1 == 1) + { $s1 = 4; } + else + { $s1 = $#r1 + 1 + 3; } + + if ($#r2 == 0) + { $s2 = 2; } + elsif ($#r2 == 1) + { $s2 = 4; } + else + { $s2 = $#r2 + 1 + 3; } + + my $two = $s1 + $s2; + + # Calculate how many array entries will be needed if we join them + my $one = $r2[$#r2] - $r1[0] + 1 + 3; + + $r += 1, next if ($one > $two); + + # Join ranges + my @r; # New range. + push @r, $_ foreach (@r1); + for (my $i = $r1[$#r1]+1; $i < $r2[0]; $i++) + { + push @r, undef; + } + push @r, $_ foreach (@r2); + $ranges[$r] = \@r; + splice @ranges, $r+1, 1; + } + + # SEPARATE RANGED AND UNRANGED CODES. SPLIT 2-CODES RANGES ON 2 UNRANGED. + my @unranged; + foreach (@ranges) + { + if ($#$_ == 0) + { + push @unranged, $$_[0]; + undef $_; + } + elsif ($#$_ == 1) + { + push @unranged, $$_[0]; + push @unranged, $$_[1]; + undef $_; + } + } + + # DELETE UNUSED ELEMENTS + for (my $i = 0; $i <= $#ranges; $i++) + { + splice @ranges, $i--, 1 unless defined $ranges[$i]; + } + + # CALCULATE UNRANGED CODES ARRAY INDEX + my $idx = 3 + ($#ranges + 1)*3; + $idx += $#$_ + 1 foreach @ranges; + + # COMPOSE TABLE + $tbl->[0] = $#ranges + 1; # Number of ranges + $tbl->[1] = $#unranged + 1; # Number of unranged codes + $tbl->[2] = $idx; # Array index of unranged codes + + # Generate ranges list + $idx = 3 + ($#ranges + 1)*3; # First range data index + $$bytes = $idx*2; + my $num = 0; + foreach (@ranges) + { + $tbl->[3]->[$num]->[0] = $_->[0]; + $tbl->[3]->[$num]->[1] = $_->[$#$_]; + $tbl->[3]->[$num]->[2] = $idx; + $idx += $#$_ + 1; + $num += 1; + } + + # Generate ranges content + $num = 0; + foreach (@ranges) + { + for (my $i = 0; $i <= $#$_; $i++) + { + $tbl->[4]->[$num]->[$i] = defined $_->[$i] ? $map->{$_->[$i]} : undef; + } + $num += 1; + $$bytes += ($#$_ + 1)*2; + } + + # Generate unranged codes list + $num = 0; + foreach (@unranged) + { + $tbl->[5]->[$num]->[0] = $_; + $tbl->[5]->[$num]->[1] = $map->{$_}; + $num += 1; + } + + $$bytes += ($#unranged + 1)*4; +} + + +# ============================================================================= +# +# Output 8bit "to UCS" table. Output table's source code if $Source +# and table's binary data if !$Source. +# +# Parameter 1: Not used when sources are output. Output BE binary if 'n' and +# LE binary if 'v'. +# +# ============================================================================= +sub Output8bitToUCS(;$) +{ + my $endian = $_[0]; + my $br = 0; + + printf "Output%s 8-bit UCS -> $CCSName table ($ToSpeedBytes bytes).\n", + defined $endian ? ($endian eq 'n' ? + " Big Endian" : " Little Endian") : "" if $Verbose; + if ($Source) + { + # Output heading information + printf OUTFILE +"/* + * 8-bit $CCSName -> UCS table ($ToSpeedBytes bytes). + * $Separator + */ +#if defined ($GuardToUCS) + +static _CONST __uint16_t +${VarToUCSSpeed}\[] = +{\n\t"; + } + + if ($Source) + { + foreach (@ToSpeedTbl) + { + $br += 1; + if ($_ != $InvCode) + { + if ($_ != $TmpLost) + { + printf OUTFILE "0x%.4X,", $_; + } + else + { + print OUTFILE "$MacroLostCode,"; + } + } + else + { + print OUTFILE "$MacroInvCode,"; + } + print(OUTFILE "\n\t"), $br = 0 unless $br % 8; + } + print OUTFILE "\n};\n\n#endif /* $GuardToUCS */\n\n"; + } + else + { + foreach (@ToSpeedTbl) + { + print OUTFILE pack($endian, $_ == $TmpLost ? $LostCode : $_); + } + } +} + + +# ============================================================================= +# +# Output 8bit "from UCS" table. Output table's source code if $Source +# and table's binary data if !$Source. +# +# Parameter 1: Not used when sources are output. Output BE binary if 'n' and +# LE binary if 'v'. +# +# ============================================================================= +sub Output8bitFromUCS(;$) +{ + my $endian = $_[0]; + + printf "Output%s 8-bit $CCSName -> UCS table ($FromSpeedBytes bytes).\n", + defined $endian ? ($endian eq 'n' ? + " Big Endian" : " Little Endian") : "" if $Verbose; + if ($Source) + { + print OUTFILE +"/* + * 8-bit UCS -> $CCSName speed-optimized table ($FromSpeedBytes bytes). + * $Separator + */ + +#if defined ($GuardFromUCS) + +static _CONST unsigned char +${VarFromUCSSpeed}\[] = +{ +"; + } + + # SAVE 0xFF MAPPING. + if ($Source) + { + printf OUTFILE "\tW(0x%.4X), /* Real 0xFF mapping. 0xFF is used " + . "to mark invalid codes */\n", $FFMap; + } + else + { + print OUTFILE pack($endian, $FFMap); + } + + # OUTPUT HEADING BLOCK (ALWAYS 16 BIT) + if ($Source) + { + my $count = 0; + print OUTFILE "\t/* Heading Block */"; + for (my $i = 0, my $br = 0; $i < 256; $br = ++$i % 4) + { + print OUTFILE "\n\t" unless $br; + if (defined $FromSpeedTbl[0]->[$i]) + { + printf OUTFILE "W(0x%.4X),", $FromSpeedTbl[0]->[$i]; + } + else + { + print OUTFILE "W($MacroInvBlock),"; + } + } + } + else + { + print OUTFILE pack($endian, defined $_ ? $_ : $InvBlock) + foreach @{$FromSpeedTbl[0]}; + } + + if ($Source) + { + my $index = 512 + $Hdr8bitFromUCS; + for (my $blk = 1; $blk <= $#FromSpeedTbl; $blk++) + { + next unless defined $FromSpeedTbl[$blk]; + printf OUTFILE "\n\t/* Block $blk, Array index 0x%.4X */", $index; + $index += 256; + for (my $i = 0, my $br = 0; $i < 256; $i++, $br = $i % 8) + { + print OUTFILE "\n\t" unless $br; + my $code = $FromSpeedTbl[$blk]->[$i]; + if (!defined $code) + { + printf OUTFILE "0x%.2X,", $InvCode8bit; + } + else + { + printf OUTFILE "0x%.2X,", $code == $TmpLost ? $LostCode : $code; + } + } + } + print OUTFILE "\n};\n\n#endif /* $GuardFromUCS */\n\n"; + } + else + { + for (my $blk = 1; $blk <= $#FromSpeedTbl; $blk++) + { + next unless defined $FromSpeedTbl[$blk]; + for (my $i = 0, my $br = 0; $i < 256; $br = ++$i % 8) + { + my $code = $FromSpeedTbl[$blk]->[$i]; + if (!defined $code) + { + printf OUTFILE pack 'C', $InvCode8bit; + } + else + { + print OUTFILE $code == $TmpLost ? pack('C', $LostCode) + : pack('C', $code); + } + } + } + } +} + + +# ============================================================================= +# +# Output 16bit Speed-optimized table. Output table's source code if $Source +# and table's binary data if !$Source. +# +# Parameter 1: +# "to_ucs" - Output "to_ucs" table. +# "from_ucs" - Output "from_ucs" table. +# Parameter 2: Not used when sources are output. Output BE binary if 'n' and +# LE binary if 'v'. +# +# ============================================================================= +sub OutputSpeed($;$) +{ + my $endian = $_[1]; + my $tbl; + my ($direction, $optimiz, $e, $bytes); + $optimiz = $Bits == 16 ? " speed-optimized" : ""; + $e = $endian ? ($endian eq 'n' ? " Big Endian" : " Little Endian") : ""; + if ($_[0] eq "to_ucs") + { + $tbl = \@ToSpeedTbl; + $direction = " $CCSName -> UCS"; + $bytes = $ToSpeedBytes; + + if ($Source) + { + print OUTFILE +"/* + * 16-bit $CCSName -> UCS speed-optimized table ($ToSpeedBytes bytes). + * $Separator + */ +#if defined ($GuardToUCS) \\ + && !($GuardSize) + +static _CONST __uint16_t +${VarToUCSSpeed}\[] = +{ +"; + } + } + elsif ($_[0] eq "from_ucs") + { + $tbl = \@FromSpeedTbl; + $direction = " UCS -> $CCSName"; + $bytes = $FromSpeedBytes; + + if ($Source) + { + print OUTFILE +"/* + * 16-bit UCS -> $CCSName speed-optimized table ($FromSpeedBytes bytes). + * $Separator + */ + +#if defined ($GuardFromUCS) \\ + && !($GuardSize) + +static _CONST __uint16_t +${VarFromUCSSpeed}\[] = +{ +"; + } + } + else + { + Err "Internal script error Output16bitSpeed()\n"; + } + + printf "Output%s 16-bit%s%s table (%d bytes).\n", + $e, $direction, $optimiz, $bytes if $Verbose; + + # OUTPUT HEADING BLOCK (ALWAYS 16 BIT) + if ($Source) + { + my $count = 0; + print OUTFILE "\t/* Heading Block */"; + for (my $i = 0, my $br = 0; $i < 256; $br = ++$i % 8) + { + print OUTFILE "\n\t" unless $br; + if (defined $tbl->[0]->[$i]) + { + printf OUTFILE "0x%.4X,", $tbl->[0]->[$i]; + } + else + { + print OUTFILE "$MacroInvBlock,"; + } + } + } + else + { + print OUTFILE pack($endian, defined $_ ? $_ : $InvBlock) + foreach @{$tbl->[0]}; + } + + # OUTPUT OTHER BLOCKS + if ($Source) + { + my $index = 256; + for (my $blk = 1; $blk <= $#$tbl; $blk++) + { + next unless defined $tbl->[$blk]; + printf OUTFILE "\n\t/* Block $blk, Array index 0x%.4X */", $index; + $index += 256; + for (my $i = 0, my $br = 0; $i < 256; $br = ++$i % 8) + { + print OUTFILE "\n\t" unless $br; + my $code = $tbl->[$blk]->[$i]; + print OUTFILE defined $code ? + ($code == $TmpLost ? $MacroLostCode : sprintf "0x%.4X", $code) + : $MacroInvCode, ","; + } + } + } + else + { + for (my $blk = 1; $blk <= $#$tbl; $blk++) + { + next unless defined $tbl->[$blk]; + for (my $i = 0, my $br = 0; $i < 256; $br = ++$i % 8) + { + my $code = $tbl->[$blk]->[$i]; + print OUTFILE pack($endian, + defined $code ? ($code == $TmpLost ? $LostCode : $code) : $InvCode); + } + } + } + + if ($Source) + { + if ($_[0] eq "to_ucs") + { + print OUTFILE +" +}; + +#endif /* $GuardToUCS && !$GuardSize */ + +"; + } + else + { + print OUTFILE +" +}; + +#endif /* $GuardFromUCS && !$GuardSize */ + +"; + } + } +} + +# ============================================================================= +# +# Output 16bit Size-optimized table. Output table's source code if $Source +# and table's binary data if !$Source. +# +# Parameter 1: +# "to_ucs" - Output "to_ucs" table. +# "from_ucs" - Output "from_ucs" table. +# Parameter 2: Not used when sources are output. Output BE binary if 'n' and +# LE binary if 'v'. +# +# ============================================================================= +sub OutputSize($;$) +{ + my $endian = $_[1]; + my $tbl; + my ($direction, $optimiz, $e, $bytes); + $optimiz = $Bits == 16 ? " size-optimized" : ""; + $e = $endian ? ($endian eq 'n' ? " Big Endian" : " Little Endian") : ""; + if ($_[0] eq "to_ucs") + { + $tbl = \@ToSizeTbl; + $direction = " $CCSName -> UCS"; + $bytes = $ToSizeBytes; + + if ($Source) + { + print OUTFILE +"/* + * 16-bit $CCSName -> UCS size-optimized table ($ToSizeBytes bytes). + * $Separator + */ +#if defined ($GuardToUCS) \\ + && ($GuardSize) + +static _CONST __uint16_t +${VarToUCSSize}\[] = +{ +"; + } + } + elsif ($_[0] eq "from_ucs") + { + $tbl = \@FromSizeTbl; + $direction = " UCS -> $CCSName"; + $bytes = $FromSizeBytes; + if ($Source) + { + print OUTFILE +"/* + * 16-bit UCS -> $CCSName size-optimized table ($FromSizeBytes bytes). + * $Separator + */ + +#if defined ($GuardFromUCS) \\ + && ($GuardSize) + +static _CONST __uint16_t +${VarFromUCSSize}\[] = +{ +"; + } + } + else + { + Err "Internal script error Output16bitSize()\n"; + } + + printf "Output%s 16-bit%s%s table (%d bytes).\n", + $e, $direction, $optimiz, $bytes if $Verbose; + + # OUTPUT FIRST 3 ELEMENTS + if ($Source) + { + printf OUTFILE "\t0x%.4X, /* Ranges number */\n", $tbl->[0]; + printf OUTFILE "\t0x%.4X, /* Unranged codes number */\n", $tbl->[1]; + printf OUTFILE "\t0x%.4X, /* First unranged code index */\n", $tbl->[2]; + } + else + { + printf OUTFILE pack $endian, $tbl->[0]; + printf OUTFILE pack $endian, $tbl->[1]; + printf OUTFILE pack $endian, $tbl->[2]; + } + + my $idx = 0; + # OUTPUT RANGES + if ($Source) + { + print OUTFILE "\t/* Ranges list: first code, last Code, array index. */\n"; + for (my $range = 0; $range <= $#{$tbl->[3]}; $range++) + { + printf OUTFILE "\t/* Array index: 0x%.4X */ 0x%.4X, 0x%.4X, 0x%.4X,\n", + $idx += 3, + $tbl->[3]->[$range]->[0], + $tbl->[3]->[$range]->[1], + $tbl->[3]->[$range]->[2]; + } + } + else + { + for (my $range = 0; $range <= $#{$tbl->[3]}; $range++) + { + print OUTFILE pack($endian, $tbl->[3]->[$range]->[0]), + pack($endian, $tbl->[3]->[$range]->[1]), + pack($endian, $tbl->[3]->[$range]->[2]); + } + } + $idx += 3; + + # OUTPUT RANGES CONTENT + if ($Source) + { + print OUTFILE "\t/* Ranges content */"; + for (my $range = 0; $range <= $#{$tbl->[3]}; $range++) + { + printf OUTFILE "\n\t/* Range 0x%.4X - 0x%.4X, array index: 0x%.4X */", + $tbl->[3]->[$range]->[0], $tbl->[3]->[$range]->[1], $idx; + $idx += $tbl->[3]->[$range]->[1] - $tbl->[3]->[$range]->[0] + 1; + for (my $elt = 0, my $br = 0; + $elt <= $#{$tbl->[4]->[$range]}; + $br = ++$elt % 8) + { + print OUTFILE "\n\t" unless $br; + if (defined $tbl->[4]->[$range]->[$elt]) + { + if ($tbl->[4]->[$range]->[$elt] != $TmpLost) + { + printf OUTFILE "0x%.4X,", $tbl->[4]->[$range]->[$elt]; + } + else + { + print OUTFILE "$MacroLostCode,"; + } + } + else + { + print OUTFILE "$MacroInvCode,"; + } + } + } + } + else + { + for (my $range = 0; $range <= $#{$tbl->[3]}; $range++) + { + for (my $elt = 0; $elt <= $#{$tbl->[4]->[$range]}; $elt++) + { + if (defined $tbl->[4]->[$range]->[$elt]) + { + if ($tbl->[4]->[$range]->[$elt] != $TmpLost) + { + print OUTFILE pack $endian, $tbl->[4]->[$range]->[$elt]; + } + else + { + print OUTFILE pack $endian, $LostCode; + } + } + else + { + print OUTFILE pack $endian, $InvCode; + } + } + } + } + + # OUTPUT UNRANGED CODES + if ($Source) + { + printf OUTFILE "\n\t/* Unranged codes (%d codes) */", $#{$tbl->[4]} + 1; + for (my $i = 0; $i <= $#{$tbl->[5]}; $i++) + { + printf OUTFILE "\n\t/* Array index: 0x%.4X */ 0x%.4X,0x%.4X,", + $idx, $tbl->[5]->[$i]->[0], $tbl->[5]->[$i]->[1]; + } + } + else + { + for (my $i = 0; $i <= $#{$tbl->[5]}; $i++) + { + print OUTFILE pack($endian, $tbl->[5]->[$i]->[0]), + pack($endian, $tbl->[5]->[$i]->[1]); + } + } + + if ($Source) + { + if ($_[0] eq "to_ucs") + { + print OUTFILE +" +}; + +#endif /* $GuardToUCS && $GuardSize */ + +"; + } + else + { + print OUTFILE +" +}; + +#endif /* $GuardFromUCS && $GuardSize */ + +"; + } + } +} + + +# ============================================================================= +# +# Parse command line options +# +# ============================================================================= +sub ProcessOptions() +{ + my $help_opt = 'h'; # Print help option + my $input_opt = 'i'; # Input file name option + my $output_opt = 'o'; # Output file name option + my $source_opt = 'S'; # Generate C source file option + my $enc_opt = 'N'; # Encoding name + my $plane_opt = 'p'; # Plane number + my $verbose_opt = 'v'; # Verbose output + my $ccscol_opt = 'x'; # Encoding's column number + my $ucscol_opt = 'y'; # UCS column number + my $nosize_opt = 'l'; # Don't generate size-optimized tables + my $nospeed_opt = 'b'; # Don't generate speed-optimized tables + my $nobe_opt = 'B'; # Don't generate big-endian tables + my $nole_opt = 'L'; # Don't generate big-endian tables + my $noto_opt = 't'; # Don't generate "to_ucs" table + my $nofrom_opt = 'f'; # Don't generate "from_ucs" table + + my %args; # Command line arguments found by getopts() + + my $getopts_string = + "$help_opt$source_opt$enc_opt:$verbose_opt$input_opt:$output_opt:$plane_opt:" + . "$nosize_opt$nospeed_opt$nobe_opt$nole_opt$noto_opt$nofrom_opt$ccscol_opt:" + . "$ucscol_opt:"; + + getopts($getopts_string, \%args) || Err "getopts() failed: $!.\n", 1; + + # Print usage rules and exit. + if ($args{$help_opt}) + { + print<<END +Usage: + -$help_opt - this help message; + -$input_opt - input file name (required); + -$output_opt - output file name; + -$enc_opt - CCS or encoding name; + -$plane_opt - plane number (high 16 bits) to use (in hex); + -$source_opt - generate C source file; + -$nospeed_opt - don't generate speed-optimized tables (binary files only); + -$nosize_opt - don't generate size-optimized tables (binary files only); + -$nobe_opt - don't generate Big Endian tables (binary files only); + -$nole_opt - don't generate Little Endian tables (binary files only); + -$noto_opt - don't generate "to_ucs" table; + -$nofrom_opt - don't generate "from_ucs" table; + -$ccscol_opt - encoding's column number; + -$ucscol_opt - UCS column number; + -$verbose_opt - verbose output. + +If output file name isn't specified, <infile>.c (for sources) or +<infile>.cct (for binaries) is assumed. +If encoding name isn't specified <infile> is assumed. +<infile> is normalized (small letters, "-" are substituted by "_") input file +name base (no extension). For example, for Koi8-r.txt input file, <infile> +is koi8_r. +END +; + exit 0; + } + + $Verbose = $args{$verbose_opt}; + $Source = $args{$source_opt}; + $NoSpeed = $args{$nospeed_opt}; + $NoSize = $args{$nosize_opt}; + $NoBE = $args{$nobe_opt}; + $NoLE = $args{$nole_opt}; + $NoFrom = $args{$nofrom_opt}; + $NoTo = $args{$noto_opt}; + $CCSCol = $args{$ccscol_opt}; + $UCSCol = $args{$ucscol_opt}; + $Plane = $args{$plane_opt}; + $InFile = $args{$input_opt}; + $OutFile = $args{$output_opt}; + $CCSName = $args{$enc_opt}; + + Err "Error: input file isn't defined. Use -$help_opt for help.\n", 1 + unless $InFile; + + unless ($OutFile) + { + # Construct output file name + $OutFile = $InFile; + $OutFile =~ s/(.*\/)*([0-9a-zA-Z-_]*)(\..*)$/\L$2/; + $OutFile =~ tr/-/_/; + if ($Source) + { + $OutFile = "$OutFile.c"; + } + else + { + $OutFile = "$OutFile.cct" + } + } + + unless ($CCSName) + { + # Construct CCS name + $CCSName = $InFile; + $CCSName =~ s/(.*\/)*([0-9a-zA-Z-_]*)(\..*)$/\L$2/; + $CCSName =~ tr/-/_/; + } + + Err "-$nosize_opt option can't be used with -$nospeed_opt option " + . "simultaniously.\n", 1 if $NoSpeed && $NoSize; + + Err "-$nobe_opt option can't be used with -$nole_opt option " + . "simultaniously.\n", 1 if $NoBE && $NoLE; + + Err "-$noto_opt option can't be used with -$nofrom_opt option" + . "simultaniously.\n", 1 if $NoTo && $NoFrom; + + Err "-$nosize_opt, -$nospeed_opt, -$nobe_opt -$nole_opt " + . "-$noto_opt and -$nofrom_opt " + . "options can't be used with -$source_opt option.\n" + . "Source code always contains both speed- and size-optimized " + . "tables in System Endian. Use -$help_opt for help.\n", 1 + if $Source and $NoSpeed || $NoSize || $NoBE || $NoLE || $NoTo || $NoFrom; + + if (!$CCSCol && !$UCSCol) + { + $CCSCol = 0; + $UCSCol = 1; + } + elsif ($CCSCol && $UCSCol) + { + Err "Column number should be >= 0\n", 1 if ($CCSCol <= 0 or $UCSCol <= 0); + $CCSCol -= 1; + $UCSCol -= 1; + } + else + { + Err "Please, define both CCS and UCS column numbers\n", 1; + } + + if ($Verbose) + { + print "Use $InFile file for input.\n", + "Use $OutFile file for output.\n", + "Use $CCSName as CCS name.\n"; + print "Generate C source file.\n" if $Source; + print "Generate binary file.\n" if !$Source; + printf "Use plane N 0x%.4X.\n", hex $Plane if defined $Plane; + printf "Use column N $CCSCol for $CCSName.\n"; + printf "Use column N $UCSCol for UCS.\n"; + print "Don't generate size-optimized tables.\n" if $NoSize; + print "Don't generate speed-optimized tables.\n" if $NoSpeed; + print "Don't generate big-endian tables.\n" if $NoBE; + print "Don't generate little-endian tables.\n" if $NoLE; + print "Don't generate \"to_ucs\" table.\n" if $NoTo; + print "Don't generate \"from_ucs\" table.\n" if $NoFrom; + } + + return; +} + + +# ============================================================================= +# +# Print error message, close all and exit +# +# Parameter 1: error message +# Parameter 2: don't delete output file if > 1 +# +# ============================================================================= +sub Err($;$) +{ + print STDERR "$_[0]"; + close INFILE; + close OUTFILE; + unlink $OutFile unless $_[1]; + + exit 1; +}
\ No newline at end of file |