diff options
Diffstat (limited to 'newlib/libc/iconv/ccs/mktbl.pl')
-rwxr-xr-x | newlib/libc/iconv/ccs/mktbl.pl | 1505 |
1 files changed, 0 insertions, 1505 deletions
diff --git a/newlib/libc/iconv/ccs/mktbl.pl b/newlib/libc/iconv/ccs/mktbl.pl deleted file mode 100755 index c3b87932b..000000000 --- a/newlib/libc/iconv/ccs/mktbl.pl +++ /dev/null @@ -1,1505 +0,0 @@ -#!/usr/bin/perl -w -# -# Copyright (c) 2003-2004, Artem B. Bityuckiy, SoftMine Corporation. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -# SUCH DAMAGE. -# -use integer; -use Getopt::Std; -use IO::Seekable; -use strict; - - -# ############################################################################## -# -# FUNCTION PROTOTYPES AND GLOBAL DATA DECLARATION SECTION -# -# ############################################################################## - - -# SUPPLEMENTARY FUNCTIONS FORWARD DECLARATIONS -sub ProcessOptions(); -sub Err($;$); -sub Generate8bitToUCS(); -sub GenerateSpeed($); -sub Generate16bitSize($); -sub Output8bitToUCS(;$); -sub Output8bitFromUCS(;$); -sub OutputSpeed($;$); -sub OutputSize($;$); - -# VARIABLES RELATING TO COMMAND-LINE OPTIONS -my $Verbose; # Be verbose if true -my $Source; # Output C source code instead of binary .cct file if true -my $Plane; # Use this plane if defined -my $InFile; # Use this file for input -my $OutFile; # Use this file for output -my $CCSName; # Use this CCS name -my $NoSpeed; # Don't generate speed-optimized tables (binary files only) -my $NoSize; # Don't generate size-optimized tables (binary files only) -my $NoBE; # Don't generate big-endian tables (binary files only) -my $NoLE; # Don't generate big-endian tables (binary files only) -my $NoTo; # Don't generate "to_ucs" table (binary files only) -my $NoFrom; # Don't generate "from_ucs" table (binary files only) -my $CCSCol; # CCS column number in source file -my $UCSCol; # UCS column number in source file - - -# DATA STRUCTURES WITH "TO_UCS" AND "FROM_UCS" SPEED/SIZE -OPTIMIZED TABLES -my (@FromSpeedTbl, @ToSpeedTbl, @FromSizeTbl, @ToSizeTbl); -# "TO_UCS" AND "FROM_UCS" SPEED/SIZE -OPTIMIZED TABLES SIZE IN BYTES -my ($FromSpeedBytes, $ToSpeedBytes, $FromSizeBytes, $ToSizeBytes) = - (0, 0, 0, 0); - -my (%CCSUCS, %UCSCCS); # CCS->UCS and UCS->CCS mappings -my $Bits = 8; # Table bits (8 or 16); - -# SPECIAL MARKER CODES -my $InvCode = 0xFFFF; # FFFF indicates 18 bit invalid codes -my $InvBlock = 0xFFFF; # FFFF also mark empty blocks in speed-optimized tables -my $LostCode = 0x3F; # ASCII '?' marks codes lost during CCS->UCS mapping -# To mark invalid codes in 8bit encodings 0xFF is used CCS's 0xFF mapping is saved -# separately. $FFMap variable stores real 0xFF mapping if defined. -my $InvCode8bit = 0xFF; -my $FFMap; - -# 8 Bit "From UCS" table header size (bytes) -my $Hdr8bitFromUCS = 2; -# Binary table header size (bytes) -my $HdrBinary = 8; - -# At first all lost CCS codes are marked by $TmpLost to distinguish between -# code which is equivalent to $LostCode and lost codes. This is done in order to -# output $MacroLostCode instead of $LostCode in source file. -my $TmpLost = 0x1FFFF; - -# VARIABLES RELATING TO C SOURCE CODE -my $MacroInvCode = 'INVALC'; -my $MacroInvBlock = 'INVBLK'; -my $MacroLostCode = 'LOST_C'; -my $MacroCCSName = 'ICONV_CCS_%s'; -my $GuardSize = 'defined (TABLE_USE_SIZE_OPTIMIZATION)'; -my $GuardToUCS = "ICONV_TO_UCS_CCS_%s"; -my $GuardFromUCS = "ICONV_FROM_UCS_CCS_%s"; -my $MacroSpeedTable = 'TABLE_SPEED_OPTIMIZED'; -my $MacroSizeTable = 'TABLE_SIZE_OPTIMIZED'; -my $Macro8bitTable = 'TABLE_8BIT'; -my $Macro16bitTable = 'TABLE_16BIT'; -my $MacroVer1Table = 'TABLE_VERSION_1'; -my $TypeBICCS = 'iconv_ccs_t'; -my $VarToUCSSize = "to_ucs_size_%s"; -my $VarToUCSSpeed = "to_ucs_speed_%s"; -my $VarFromUCSSize = "from_ucs_size_%s"; -my $VarFromUCSSpeed = "from_ucs_speed_%s"; -my $VarBICCS = "_iconv_ccs_%s"; -# Text block that visually separates tables. -my $Separator = '=' x 70; - -# ############################################################################## -# -# SCRIPT ENTRY POINT -# -# ############################################################################## - - -# Parse command-line options, check them and set correspondent global variables -ProcessOptions(); - -# Initialize global variables tat depend on CCS name. -$_ = sprintf $_, $CCSName foreach +($VarToUCSSpeed, - $VarToUCSSize, - $VarToUCSSpeed, - $VarFromUCSSpeed, - $VarFromUCSSize, - $VarBICCS); -$_ = sprintf $_, "\U$CCSName" foreach +($GuardToUCS, - $GuardFromUCS, - $MacroCCSName); - -# Open input and output files -Err "Can't open \"$InFile\" file for reading: $!.\n", 1 -unless open(INFILE, '<', $InFile); -Err "Can't open \"$OutFile\" file for writing: $!.\n", 1 -unless open(OUTFILE, '>', $OutFile); - -# ============================================================================== -# EXTRACT CODES MAP FROM INPUT FILE -# ============================================================================== - -for (my $ln = 1; my $l = <INFILE>; $ln += 1) -{ - # Skip comment and empty lines, remove ending CR symbol - next if $l =~ /^#.*$/ or $l =~ /^\s*$/; - $l =~ s/^(.*)\n$/$1/, $l =~ s/^(.*)\r$/$1/; - - # Remove comment and extra spaces - $l =~ s/(.*)\s*#.*/$1/; - $l =~ s/\s+/ /g; - $l =~ s/(.*)\s*$/$1/; - - # Split line into individual codes - my @codes = split / /, $l; - - # Skip line if there is no needed columns - unless (defined $codes[$CCSCol]) - { - print("Warning (line $ln): no CCS column, skip.\n") if $Verbose; - next; - } - unless (defined $codes[$UCSCol]) - { - print("Warning (line $ln): no UCS column, skip.\n") if $Verbose; - next; - } - - # Extract codes strings from needed columns - my ($ccs, $ucs) = ($codes[$CCSCol], $codes[$UCSCol]); - my $patt = qr/(0[xX])?[0-9a-fA-F]{1,8}/; # HEX digit regexp pattern. - - # Check that CCS and UCS code strings has right format. - unless ($ccs =~ m/^$patt$/) - { - print("Warning (line $ln): $ccs CCS code isn't recognized, skip.\n") - if $Verbose; - next; - } - unless ($ucs =~ m/^($patt(,|\+))*$patt$/) - { - print("Warning (line $ln): $ucs UCS code isn't recognized, skip.\n") - if $Verbose; - next; - } - - # Convert code to numeric format (assume hex). - $ccs = hex $ccs; - - if ($ucs =~ m/,/ or $ucs =~ m/\+/) - { - # Mark CCS codes with "one to many" mappings as lost - printf "Warning (line $ln): only one to one mapping is supported, " - . "mark 0x%.4X CCS code as lost.\n", hex $ccs if $Verbose; - $ucs = $TmpLost; - } - else - { - # Convert code to numeric format - $ucs = hex $ucs; - - # Check that UCS code isn't longer than 16 bits. - if ($ucs > 0xFFFF) - { - printf("Warning (line $ln): UCS code should fit 16 bits, " - . "mark 0x%.4X CCS code as lost.\n", hex $ccs) if $Verbose; - $ucs = $TmpLost; - } - } - - # If CCS value > 0xFFFF user should specify plane number. - if ($ccs > 0xFFFF && !defined $Plane) - { - print("Warning (line $ln): $ccs is > 16 bit, plane number should be specified," - . " skip this mapping.\n") if $Verbose; - next; - } - - if (defined $Plane) - { - next if (($ccs & 0xFFFF0000) >> 16) != hex $Plane; # Skip alien plane. - $ccs &= 0xFFFF; - } - - # Check that reserved codes aren't used. - if ($ccs == $InvCode or $ucs == $InvCode) - { - print("Warning (line $ln): $InvCode is reserved to mark invalid codes and " - . "shouldn't be used in mappings, skip.\n") if $Verbose; - next; - } - - # Save mapping in UCSCCS and CCSUCS hash arrays. - $UCSCCS{$ucs} = $ccs if $ucs != $TmpLost && !defined $UCSCCS{$ucs}; - $CCSUCS{$ccs} = $ucs if !defined $CCSUCS{$ccs}; - - $Bits = 16 if $ccs > 0xFF; -} - -if (not %CCSUCS) -{ - Err "Error: there is no plane $Plane in \"$0\".\n" if defined $Plane; - Err "Error: mapping wasn't found.\n"; -} - - -# ============================================================================== -# GENERATE TABLE DATA -# ============================================================================== - -if ($Bits == 8) -{ - $FFMap = $CCSUCS{0xFF}; - $FFMap = $InvCode if !defined $FFMap; -} - -if ($Bits == 8) -{ - Generate8bitToUCS() unless $NoTo; -} -else -{ - GenerateSpeed("to_ucs") unless $NoTo || $NoSpeed; - Generate16bitSize("to_ucs") unless $NoTo || $NoSize; -} - -GenerateSpeed("from_ucs") unless $NoFrom || $NoSpeed; -Generate16bitSize("from_ucs") unless $NoFrom || $NoSize; - -# ============================================================================== -# OUTPUT ARRAYS -# ============================================================================== - -if ($Source) -{ - # OUTPUT SOURCE - print OUTFILE -"/* - * This file was generated automatically - don't edit it. - * File contains iconv CCS tables for $CCSName encoding. - */ - -#include \"ccsbi.h\" - -#if defined ($GuardToUCS) \\ - || defined ($GuardFromUCS) - -#include <_ansi.h> -#include <sys/types.h> -#include <sys/param.h> -#include \"ccs.h\" -#include \"ccsnames.h\" - -"; - - if ($Bits == 8) - { - print OUTFILE -"#if (BYTE_ORDER == LITTLE_ENDIAN) -# define W(word) (word) & 0xFF, (word) >> 8 -#elif (BYTE_ORDER == BIG_ENDIAN) -# define W(word) (word) >> 8, (word) & 0xFF -#else -# error \"Unknown byte order.\" -#endif - -"; - } - - unless ($NoTo) - { - if ($Bits == 8) - { - Output8bitToUCS(); - } - else - { - OutputSpeed("to_ucs") unless $NoSpeed; - OutputSize("to_ucs") unless $NoSize; - } - } - unless ($NoFrom) - { - if ($Bits == 8) - { - Output8bitFromUCS(); - } - else - { - OutputSpeed("from_ucs") unless $NoSpeed; - OutputSize("from_ucs") unless $NoSize; - } - } - - # OUTPUT TABLE DESCRIPTION STRUCTURE - print OUTFILE -"/* - * $CCSName CCS description table. - * $Separator - */ -_CONST $TypeBICCS -$VarBICCS = -{ -\t$MacroVer1Table, /* Table version */ -\t$MacroCCSName, /* CCS name */ -"; - if ($Bits == 8) - { - print OUTFILE -"\t$Macro8bitTable, /* Table bits */ -\t0, /* Not Used */ -#if defined ($GuardFromUCS) -\t(__uint16_t *)&$VarFromUCSSpeed, /* UCS -> $CCSName table */ -#else -\t(__uint16_t *)NULL, -#endif -\t0, /* Not Used */ -#if defined ($GuardToUCS) -\t(__uint16_t *)&$VarToUCSSpeed /* $CCSName -> UCS table */ -#else -\t(__uint16_t *)NULL, -#endif -};\n"; - } - else - { - print OUTFILE -"\t$Macro16bitTable, /* Table bits */ -#if defined ($GuardFromUCS) \\ - && ($GuardSize) -\t$MacroSizeTable, -\t(__uint16_t *)&$VarFromUCSSize, /* UCS -> $CCSName table size-optimized table */ -#elif defined ($GuardFromUCS) \\ - && !($GuardSize) -\t$MacroSpeedTable, -\t(__uint16_t *)&$VarFromUCSSpeed, /* UCS -> $CCSName table speed-optimized table */ -#else -\t$MacroSpeedTable, -\t(__uint16_t *)NULL, -#endif -#if defined ($GuardToUCS) \\ - && ($GuardSize) -\t$MacroSizeTable, -\t(__uint16_t *)&$VarToUCSSize /* $CCSName -> UCS table speed-optimized table */ -#elif defined ($GuardToUCS) \\ - && !($GuardSize) -\t$MacroSpeedTable, -\t(__uint16_t *)&$VarToUCSSpeed /* $CCSName -> UCS table speed-optimized table */ -#else -\t$MacroSpeedTable, -\t(__uint16_t *)NULL, -#endif -};\n"; - } - print OUTFILE "\n#endif /* $GuardToUCS) || ... */\n\n"; -} -else -{ - # OUTPUT BINARY TABLES DESCRIPTION STRUCTURE (ALWAYS BIG ENDIAN) - print OUTFILE pack "n", 1; - print OUTFILE pack "n", $Bits; - my $len = length $CCSName; - print OUTFILE pack "N", $len; - print OUTFILE pack "a$len", $CCSName; - - my $pos = $HdrBinary + $len; - if ($pos & 3) - { - my $l = 4 - ($pos & 3); - print OUTFILE pack "a$l", 'XXX'; - $pos += $l; - } - - $pos += 16*4; - - my @tables; - for (my $i = 0; $i < 16; $i++) - { - $tables[$i] = 0; - } - - $tables[0] = $pos, $tables[1] = $FromSpeedBytes, $pos += $FromSpeedBytes - unless $NoFrom || $NoSpeed || $NoBE; - $tables[2] = $pos, $tables[3] = $FromSpeedBytes, $pos += $FromSpeedBytes - unless $NoFrom || $NoSpeed || $NoLE; - if ($Bits == 16) - { - $tables[4] = $pos, $tables[5] = $FromSizeBytes, $pos += $FromSizeBytes - unless $NoFrom || $NoSize || $NoBE; - $tables[6] = $pos, $tables[7] = $FromSizeBytes, $pos += $FromSizeBytes - unless $NoFrom || $NoSize || $NoLE; - } - $tables[8] = $pos, $tables[9] = $ToSpeedBytes, $pos += $ToSpeedBytes - unless $NoTo || $NoSpeed || $NoBE; - $tables[10] = $pos, $tables[11] = $ToSpeedBytes, $pos += $ToSpeedBytes - unless $NoTo || $NoSpeed || $NoLE; - if ($Bits == 16) - { - $tables[12] = $pos, $tables[13] = $ToSizeBytes, $pos += $ToSizeBytes - unless $NoTo || $NoSize || $NoBE; - $tables[14] = $pos, $tables[15] = $ToSizeBytes, $pos += $ToSizeBytes - unless $NoTo || $NoSize || $NoLE; - } - - print OUTFILE pack("N", $_) foreach @tables; - - print "Total bytes for output: $pos.\n" if $Verbose; - - # OUTPUT BINARY TABLES - unless ($NoFrom) - { - if ($Bits == 8) - { - Output8bitFromUCS("n") unless $NoBE; - Output8bitFromUCS("v") unless $NoLE; - } - else - { - unless ($NoSpeed) - { - OutputSpeed("from_ucs", "n") unless $NoBE; - OutputSpeed("from_ucs", "v") unless $NoLE; - } - unless ($NoSize) - { - OutputSize("from_ucs", "n") unless $NoBE; - OutputSize("from_ucs", "v") unless $NoLE; - } - } - } - unless ($NoTo) - { - if ($Bits == 8) - { - Output8bitToUCS("n") unless $NoBE; - Output8bitToUCS("v") unless $NoLE; - } - else - { - unless ($NoSpeed) - { - OutputSpeed("to_ucs", "n") unless $NoBE; - OutputSpeed("to_ucs", "v") unless $NoLE; - } - unless ($NoSize) - { - OutputSize("to_ucs", "n") unless $NoBE; - OutputSize("to_ucs", "v") unless $NoLE; - } - } - } -} - -close INFILE; -close OUTFILE; -exit 0; - - -# ############################################################################## -# -# SUPPLEMENTARY FUNCTIONS -# -# ############################################################################## - - -# ============================================================================= -# -# Generate 8bit "to_ucs" table. Store table data in %ToSpeedTbl hash. -# Store table size in $ToSpeedBytes scalar. -# -# ============================================================================= -sub Generate8bitToUCS() -{ - for (my $i = 0; $i <= 255; $i++) - { - $ToSpeedTbl[$i] = defined $CCSUCS{$i} ? $CCSUCS{$i} : $InvCode; - } - $ToSpeedBytes = 256*2; -} - - -# ============================================================================= -# -# Generate speed-optimized table. -# -# Parameter 1: -# "to_ucs" - generate "to_ucs" table, store table data in @ToSpeedTbl -# array, store table size in $ToSpeedBytes scalar. -# "from_ucs" - generate "from_ucs" table, store table data in @FromSpeedTbl -# array, store table size in $FromSpeedBytes scalar. -# -# Data is written to @ToSpeedTbl or @FromSpeedTbl (@map) table and has the -# following format: -# $table[0] - 256-element array (control block); -# $table[1 .. $#table] - 256-element arrays (data blocks). -# -# ============================================================================= -sub GenerateSpeed($) -{ - my $map; - my $tbl; - my $bytes; - - if ($_[0] eq "to_ucs") - { - $map = \%CCSUCS; - $tbl = \@ToSpeedTbl; - $bytes = \$ToSpeedBytes; - } - elsif ($_[0] eq "from_ucs") - { - $map = \%UCSCCS; - $tbl = \@FromSpeedTbl; - $bytes = \$FromSpeedBytes; - } - else - { - Err "Internal script error in GenerateSpeed()\n"; - } - - # Identify unused blocks - my @busy_blocks; - $busy_blocks[$_ >> 8] = 1 foreach (keys %$map); - - # GENERATE FIRST 256-ELEMENT CONTROL BLOCK - for (my $i = 0, - my $idx = $Bits == 16 ? 0 : 256 + $Hdr8bitFromUCS; - $i <= 0xFF; $i++) - { - $tbl->[0]->[$i] = $busy_blocks[$i] ? $idx += 256 : undef; - } - - # GENERATE DATA BLOCKS - $$bytes = 0; - for (my $i = 0; $i <= 0xFF; $i++) - { - next unless $busy_blocks[$i]; - $$bytes += 256; - for (my $j = 0; $j <= 0xFF; $j++) - { - $tbl->[$i+1]->[$j] = $map->{($i << 8) | $j}; - } - } - $$bytes *= 2 if $Bits == 16; - $$bytes += $Hdr8bitFromUCS if $Bits == 8; - $$bytes += 512; -} - - -# ============================================================================= -# -# Generate 16bit size-optimized table. -# -# Parameter 1: -# "to_ucs" - generate "to_ucs" table, store table data in @ToSizeTbl -# array, store table size in $ToSizeBytes scalar. -# "from_ucs" - generate "from_ucs" table, store table data in @FromSizeTbl -# array, store table size in $FromSizeBytes scalar. -# -# Data is written to @ToSizeTbl or @FromSizeTbl (@map) table and has the -# following format: -# $table[0] - number of ranges; -# $table[1] - number of unranged codes; -# $table[2] - unranged codes index in resulting array; -# $table[3]->[0 .. $table[0]] - array of arrays of ranges: -# $table[3]->[x]->[0] - first code; -# $table[3]->[x]->[1] - last code; -# $table[3]->[x]->[2] - range index in resulting array; -# $table[4]->[0 .. $table[0]] - array of arrays of ranges content; -# $table[5]->[0 .. $table[1]] - array of arrays of unranged codes; -# $table[5]->[x]->[0] - CCS code; -# $table[5]->[x]->[0] - UCS code; -# -# ============================================================================= -sub Generate16bitSize($) -{ - my $map; - my $tbl; - my $bytes; - - if ($_[0] eq "to_ucs") - { - $map = \%CCSUCS; - $tbl = \@ToSizeTbl; - $bytes = \$ToSizeBytes; - } - elsif ($_[0] eq "from_ucs") - { - $map = \%UCSCCS; - $tbl = \@FromSizeTbl; - $bytes = \$FromSizeBytes; - } - else - { - Err "Internal script error Generate16bitSize()\n"; - } - - # CREATE LIST OF RANGES. - my @codes = sort {$a <=> $b} keys %$map; - my @ranges; # Code ranges - my @range; # Current working range - foreach (@codes) - { - if (not @range or $_ - 1 == $range[$#range]) - { - push @range, $_; - } - else - { - my @tmp = @range; - push @ranges, \@tmp; - undef @range; - redo; - } - } - # Add Last range too - if (@range) - { - my @tmp = @range; - push @ranges, \@tmp; - } - - # OPTIMIZE LIST OF RANGES. - my $r = 0; # Working range number - while (1) - { - last if ($r == $#ranges); - - my @r1 = @{$ranges[$r]}; - my @r2 = @{$ranges[$r + 1]}; - - # Calculate how many array entries two ranges need - my ($s1, $s2); - - if ($#r1 == 0) - { $s1 = 2; } - elsif ($#r1 == 1) - { $s1 = 4; } - else - { $s1 = $#r1 + 1 + 3; } - - if ($#r2 == 0) - { $s2 = 2; } - elsif ($#r2 == 1) - { $s2 = 4; } - else - { $s2 = $#r2 + 1 + 3; } - - my $two = $s1 + $s2; - - # Calculate how many array entries will be needed if we join them - my $one = $r2[$#r2] - $r1[0] + 1 + 3; - - $r += 1, next if ($one > $two); - - # Join ranges - my @r; # New range. - push @r, $_ foreach (@r1); - for (my $i = $r1[$#r1]+1; $i < $r2[0]; $i++) - { - push @r, undef; - } - push @r, $_ foreach (@r2); - $ranges[$r] = \@r; - splice @ranges, $r+1, 1; - } - - # SEPARATE RANGED AND UNRANGED CODES. SPLIT 2-CODES RANGES ON 2 UNRANGED. - my @unranged; - foreach (@ranges) - { - if ($#$_ == 0) - { - push @unranged, $$_[0]; - undef $_; - } - elsif ($#$_ == 1) - { - push @unranged, $$_[0]; - push @unranged, $$_[1]; - undef $_; - } - } - - # DELETE UNUSED ELEMENTS - for (my $i = 0; $i <= $#ranges; $i++) - { - splice @ranges, $i--, 1 unless defined $ranges[$i]; - } - - # CALCULATE UNRANGED CODES ARRAY INDEX - my $idx = 3 + ($#ranges + 1)*3; - $idx += $#$_ + 1 foreach @ranges; - - # COMPOSE TABLE - $tbl->[0] = $#ranges + 1; # Number of ranges - $tbl->[1] = $#unranged + 1; # Number of unranged codes - $tbl->[2] = $idx; # Array index of unranged codes - - # Generate ranges list - $idx = 3 + ($#ranges + 1)*3; # First range data index - $$bytes = $idx*2; - my $num = 0; - foreach (@ranges) - { - $tbl->[3]->[$num]->[0] = $_->[0]; - $tbl->[3]->[$num]->[1] = $_->[$#$_]; - $tbl->[3]->[$num]->[2] = $idx; - $idx += $#$_ + 1; - $num += 1; - } - - # Generate ranges content - $num = 0; - foreach (@ranges) - { - for (my $i = 0; $i <= $#$_; $i++) - { - $tbl->[4]->[$num]->[$i] = defined $_->[$i] ? $map->{$_->[$i]} : undef; - } - $num += 1; - $$bytes += ($#$_ + 1)*2; - } - - # Generate unranged codes list - $num = 0; - foreach (@unranged) - { - $tbl->[5]->[$num]->[0] = $_; - $tbl->[5]->[$num]->[1] = $map->{$_}; - $num += 1; - } - - $$bytes += ($#unranged + 1)*4; -} - - -# ============================================================================= -# -# Output 8bit "to UCS" table. Output table's source code if $Source -# and table's binary data if !$Source. -# -# Parameter 1: Not used when sources are output. Output BE binary if 'n' and -# LE binary if 'v'. -# -# ============================================================================= -sub Output8bitToUCS(;$) -{ - my $endian = $_[0]; - my $br = 0; - - printf "Output%s 8-bit UCS -> $CCSName table ($ToSpeedBytes bytes).\n", - defined $endian ? ($endian eq 'n' ? - " Big Endian" : " Little Endian") : "" if $Verbose; - if ($Source) - { - # Output heading information - printf OUTFILE -"/* - * 8-bit $CCSName -> UCS table ($ToSpeedBytes bytes). - * $Separator - */ -#if defined ($GuardToUCS) - -static _CONST __uint16_t -${VarToUCSSpeed}\[] = -{\n\t"; - } - - if ($Source) - { - foreach (@ToSpeedTbl) - { - $br += 1; - if ($_ != $InvCode) - { - if ($_ != $TmpLost) - { - printf OUTFILE "0x%.4X,", $_; - } - else - { - print OUTFILE "$MacroLostCode,"; - } - } - else - { - print OUTFILE "$MacroInvCode,"; - } - print(OUTFILE "\n\t"), $br = 0 unless $br % 8; - } - print OUTFILE "\n};\n\n#endif /* $GuardToUCS */\n\n"; - } - else - { - foreach (@ToSpeedTbl) - { - print OUTFILE pack($endian, $_ == $TmpLost ? $LostCode : $_); - } - } -} - - -# ============================================================================= -# -# Output 8bit "from UCS" table. Output table's source code if $Source -# and table's binary data if !$Source. -# -# Parameter 1: Not used when sources are output. Output BE binary if 'n' and -# LE binary if 'v'. -# -# ============================================================================= -sub Output8bitFromUCS(;$) -{ - my $endian = $_[0]; - - printf "Output%s 8-bit $CCSName -> UCS table ($FromSpeedBytes bytes).\n", - defined $endian ? ($endian eq 'n' ? - " Big Endian" : " Little Endian") : "" if $Verbose; - if ($Source) - { - print OUTFILE -"/* - * 8-bit UCS -> $CCSName speed-optimized table ($FromSpeedBytes bytes). - * $Separator - */ - -#if defined ($GuardFromUCS) - -static _CONST unsigned char -${VarFromUCSSpeed}\[] = -{ -"; - } - - # SAVE 0xFF MAPPING. - if ($Source) - { - printf OUTFILE "\tW(0x%.4X), /* Real 0xFF mapping. 0xFF is used " - . "to mark invalid codes */\n", $FFMap; - } - else - { - print OUTFILE pack($endian, $FFMap); - } - - # OUTPUT HEADING BLOCK (ALWAYS 16 BIT) - if ($Source) - { - my $count = 0; - print OUTFILE "\t/* Heading Block */"; - for (my $i = 0, my $br = 0; $i < 256; $br = ++$i % 4) - { - print OUTFILE "\n\t" unless $br; - if (defined $FromSpeedTbl[0]->[$i]) - { - printf OUTFILE "W(0x%.4X),", $FromSpeedTbl[0]->[$i]; - } - else - { - print OUTFILE "W($MacroInvBlock),"; - } - } - } - else - { - print OUTFILE pack($endian, defined $_ ? $_ : $InvBlock) - foreach @{$FromSpeedTbl[0]}; - } - - if ($Source) - { - my $index = 512 + $Hdr8bitFromUCS; - for (my $blk = 1; $blk <= $#FromSpeedTbl; $blk++) - { - next unless defined $FromSpeedTbl[$blk]; - printf OUTFILE "\n\t/* Block $blk, Array index 0x%.4X */", $index; - $index += 256; - for (my $i = 0, my $br = 0; $i < 256; $i++, $br = $i % 8) - { - print OUTFILE "\n\t" unless $br; - my $code = $FromSpeedTbl[$blk]->[$i]; - if (!defined $code) - { - printf OUTFILE "0x%.2X,", $InvCode8bit; - } - else - { - printf OUTFILE "0x%.2X,", $code == $TmpLost ? $LostCode : $code; - } - } - } - print OUTFILE "\n};\n\n#endif /* $GuardFromUCS */\n\n"; - } - else - { - for (my $blk = 1; $blk <= $#FromSpeedTbl; $blk++) - { - next unless defined $FromSpeedTbl[$blk]; - for (my $i = 0, my $br = 0; $i < 256; $br = ++$i % 8) - { - my $code = $FromSpeedTbl[$blk]->[$i]; - if (!defined $code) - { - printf OUTFILE pack 'C', $InvCode8bit; - } - else - { - print OUTFILE $code == $TmpLost ? pack('C', $LostCode) - : pack('C', $code); - } - } - } - } -} - - -# ============================================================================= -# -# Output 16bit Speed-optimized table. Output table's source code if $Source -# and table's binary data if !$Source. -# -# Parameter 1: -# "to_ucs" - Output "to_ucs" table. -# "from_ucs" - Output "from_ucs" table. -# Parameter 2: Not used when sources are output. Output BE binary if 'n' and -# LE binary if 'v'. -# -# ============================================================================= -sub OutputSpeed($;$) -{ - my $endian = $_[1]; - my $tbl; - my ($direction, $optimiz, $e, $bytes); - $optimiz = $Bits == 16 ? " speed-optimized" : ""; - $e = $endian ? ($endian eq 'n' ? " Big Endian" : " Little Endian") : ""; - if ($_[0] eq "to_ucs") - { - $tbl = \@ToSpeedTbl; - $direction = " $CCSName -> UCS"; - $bytes = $ToSpeedBytes; - - if ($Source) - { - print OUTFILE -"/* - * 16-bit $CCSName -> UCS speed-optimized table ($ToSpeedBytes bytes). - * $Separator - */ -#if defined ($GuardToUCS) \\ - && !($GuardSize) - -static _CONST __uint16_t -${VarToUCSSpeed}\[] = -{ -"; - } - } - elsif ($_[0] eq "from_ucs") - { - $tbl = \@FromSpeedTbl; - $direction = " UCS -> $CCSName"; - $bytes = $FromSpeedBytes; - - if ($Source) - { - print OUTFILE -"/* - * 16-bit UCS -> $CCSName speed-optimized table ($FromSpeedBytes bytes). - * $Separator - */ - -#if defined ($GuardFromUCS) \\ - && !($GuardSize) - -static _CONST __uint16_t -${VarFromUCSSpeed}\[] = -{ -"; - } - } - else - { - Err "Internal script error Output16bitSpeed()\n"; - } - - printf "Output%s 16-bit%s%s table (%d bytes).\n", - $e, $direction, $optimiz, $bytes if $Verbose; - - # OUTPUT HEADING BLOCK (ALWAYS 16 BIT) - if ($Source) - { - my $count = 0; - print OUTFILE "\t/* Heading Block */"; - for (my $i = 0, my $br = 0; $i < 256; $br = ++$i % 8) - { - print OUTFILE "\n\t" unless $br; - if (defined $tbl->[0]->[$i]) - { - printf OUTFILE "0x%.4X,", $tbl->[0]->[$i]; - } - else - { - print OUTFILE "$MacroInvBlock,"; - } - } - } - else - { - print OUTFILE pack($endian, defined $_ ? $_ : $InvBlock) - foreach @{$tbl->[0]}; - } - - # OUTPUT OTHER BLOCKS - if ($Source) - { - my $index = 256; - for (my $blk = 1; $blk <= $#$tbl; $blk++) - { - next unless defined $tbl->[$blk]; - printf OUTFILE "\n\t/* Block $blk, Array index 0x%.4X */", $index; - $index += 256; - for (my $i = 0, my $br = 0; $i < 256; $br = ++$i % 8) - { - print OUTFILE "\n\t" unless $br; - my $code = $tbl->[$blk]->[$i]; - print OUTFILE defined $code ? - ($code == $TmpLost ? $MacroLostCode : sprintf "0x%.4X", $code) - : $MacroInvCode, ","; - } - } - } - else - { - for (my $blk = 1; $blk <= $#$tbl; $blk++) - { - next unless defined $tbl->[$blk]; - for (my $i = 0, my $br = 0; $i < 256; $br = ++$i % 8) - { - my $code = $tbl->[$blk]->[$i]; - print OUTFILE pack($endian, - defined $code ? ($code == $TmpLost ? $LostCode : $code) : $InvCode); - } - } - } - - if ($Source) - { - if ($_[0] eq "to_ucs") - { - print OUTFILE -" -}; - -#endif /* $GuardToUCS && !$GuardSize */ - -"; - } - else - { - print OUTFILE -" -}; - -#endif /* $GuardFromUCS && !$GuardSize */ - -"; - } - } -} - -# ============================================================================= -# -# Output 16bit Size-optimized table. Output table's source code if $Source -# and table's binary data if !$Source. -# -# Parameter 1: -# "to_ucs" - Output "to_ucs" table. -# "from_ucs" - Output "from_ucs" table. -# Parameter 2: Not used when sources are output. Output BE binary if 'n' and -# LE binary if 'v'. -# -# ============================================================================= -sub OutputSize($;$) -{ - my $endian = $_[1]; - my $tbl; - my ($direction, $optimiz, $e, $bytes); - $optimiz = $Bits == 16 ? " size-optimized" : ""; - $e = $endian ? ($endian eq 'n' ? " Big Endian" : " Little Endian") : ""; - if ($_[0] eq "to_ucs") - { - $tbl = \@ToSizeTbl; - $direction = " $CCSName -> UCS"; - $bytes = $ToSizeBytes; - - if ($Source) - { - print OUTFILE -"/* - * 16-bit $CCSName -> UCS size-optimized table ($ToSizeBytes bytes). - * $Separator - */ -#if defined ($GuardToUCS) \\ - && ($GuardSize) - -static _CONST __uint16_t -${VarToUCSSize}\[] = -{ -"; - } - } - elsif ($_[0] eq "from_ucs") - { - $tbl = \@FromSizeTbl; - $direction = " UCS -> $CCSName"; - $bytes = $FromSizeBytes; - if ($Source) - { - print OUTFILE -"/* - * 16-bit UCS -> $CCSName size-optimized table ($FromSizeBytes bytes). - * $Separator - */ - -#if defined ($GuardFromUCS) \\ - && ($GuardSize) - -static _CONST __uint16_t -${VarFromUCSSize}\[] = -{ -"; - } - } - else - { - Err "Internal script error Output16bitSize()\n"; - } - - printf "Output%s 16-bit%s%s table (%d bytes).\n", - $e, $direction, $optimiz, $bytes if $Verbose; - - # OUTPUT FIRST 3 ELEMENTS - if ($Source) - { - printf OUTFILE "\t0x%.4X, /* Ranges number */\n", $tbl->[0]; - printf OUTFILE "\t0x%.4X, /* Unranged codes number */\n", $tbl->[1]; - printf OUTFILE "\t0x%.4X, /* First unranged code index */\n", $tbl->[2]; - } - else - { - printf OUTFILE pack $endian, $tbl->[0]; - printf OUTFILE pack $endian, $tbl->[1]; - printf OUTFILE pack $endian, $tbl->[2]; - } - - my $idx = 0; - # OUTPUT RANGES - if ($Source) - { - print OUTFILE "\t/* Ranges list: first code, last Code, array index. */\n"; - for (my $range = 0; $range <= $#{$tbl->[3]}; $range++) - { - printf OUTFILE "\t/* Array index: 0x%.4X */ 0x%.4X, 0x%.4X, 0x%.4X,\n", - $idx += 3, - $tbl->[3]->[$range]->[0], - $tbl->[3]->[$range]->[1], - $tbl->[3]->[$range]->[2]; - } - } - else - { - for (my $range = 0; $range <= $#{$tbl->[3]}; $range++) - { - print OUTFILE pack($endian, $tbl->[3]->[$range]->[0]), - pack($endian, $tbl->[3]->[$range]->[1]), - pack($endian, $tbl->[3]->[$range]->[2]); - } - } - $idx += 3; - - # OUTPUT RANGES CONTENT - if ($Source) - { - print OUTFILE "\t/* Ranges content */"; - for (my $range = 0; $range <= $#{$tbl->[3]}; $range++) - { - printf OUTFILE "\n\t/* Range 0x%.4X - 0x%.4X, array index: 0x%.4X */", - $tbl->[3]->[$range]->[0], $tbl->[3]->[$range]->[1], $idx; - $idx += $tbl->[3]->[$range]->[1] - $tbl->[3]->[$range]->[0] + 1; - for (my $elt = 0, my $br = 0; - $elt <= $#{$tbl->[4]->[$range]}; - $br = ++$elt % 8) - { - print OUTFILE "\n\t" unless $br; - if (defined $tbl->[4]->[$range]->[$elt]) - { - if ($tbl->[4]->[$range]->[$elt] != $TmpLost) - { - printf OUTFILE "0x%.4X,", $tbl->[4]->[$range]->[$elt]; - } - else - { - print OUTFILE "$MacroLostCode,"; - } - } - else - { - print OUTFILE "$MacroInvCode,"; - } - } - } - } - else - { - for (my $range = 0; $range <= $#{$tbl->[3]}; $range++) - { - for (my $elt = 0; $elt <= $#{$tbl->[4]->[$range]}; $elt++) - { - if (defined $tbl->[4]->[$range]->[$elt]) - { - if ($tbl->[4]->[$range]->[$elt] != $TmpLost) - { - print OUTFILE pack $endian, $tbl->[4]->[$range]->[$elt]; - } - else - { - print OUTFILE pack $endian, $LostCode; - } - } - else - { - print OUTFILE pack $endian, $InvCode; - } - } - } - } - - # OUTPUT UNRANGED CODES - if ($Source) - { - printf OUTFILE "\n\t/* Unranged codes (%d codes) */", $#{$tbl->[4]} + 1; - for (my $i = 0; $i <= $#{$tbl->[5]}; $i++) - { - printf OUTFILE "\n\t/* Array index: 0x%.4X */ 0x%.4X,0x%.4X,", - $idx, $tbl->[5]->[$i]->[0], $tbl->[5]->[$i]->[1]; - } - } - else - { - for (my $i = 0; $i <= $#{$tbl->[5]}; $i++) - { - print OUTFILE pack($endian, $tbl->[5]->[$i]->[0]), - pack($endian, $tbl->[5]->[$i]->[1]); - } - } - - if ($Source) - { - if ($_[0] eq "to_ucs") - { - print OUTFILE -" -}; - -#endif /* $GuardToUCS && $GuardSize */ - -"; - } - else - { - print OUTFILE -" -}; - -#endif /* $GuardFromUCS && $GuardSize */ - -"; - } - } -} - - -# ============================================================================= -# -# Parse command line options -# -# ============================================================================= -sub ProcessOptions() -{ - my $help_opt = 'h'; # Print help option - my $input_opt = 'i'; # Input file name option - my $output_opt = 'o'; # Output file name option - my $source_opt = 'S'; # Generate C source file option - my $enc_opt = 'N'; # Encoding name - my $plane_opt = 'p'; # Plane number - my $verbose_opt = 'v'; # Verbose output - my $ccscol_opt = 'x'; # Encoding's column number - my $ucscol_opt = 'y'; # UCS column number - my $nosize_opt = 'l'; # Don't generate size-optimized tables - my $nospeed_opt = 'b'; # Don't generate speed-optimized tables - my $nobe_opt = 'B'; # Don't generate big-endian tables - my $nole_opt = 'L'; # Don't generate big-endian tables - my $noto_opt = 't'; # Don't generate "to_ucs" table - my $nofrom_opt = 'f'; # Don't generate "from_ucs" table - - my %args; # Command line arguments found by getopts() - - my $getopts_string = - "$help_opt$source_opt$enc_opt:$verbose_opt$input_opt:$output_opt:$plane_opt:" - . "$nosize_opt$nospeed_opt$nobe_opt$nole_opt$noto_opt$nofrom_opt$ccscol_opt:" - . "$ucscol_opt:"; - - getopts($getopts_string, \%args) || Err "getopts() failed: $!.\n", 1; - - # Print usage rules and exit. - if ($args{$help_opt}) - { - print<<END -Usage: - -$help_opt - this help message; - -$input_opt - input file name (required); - -$output_opt - output file name; - -$enc_opt - CCS or encoding name; - -$plane_opt - plane number (high 16 bits) to use (in hex); - -$source_opt - generate C source file; - -$nospeed_opt - don't generate speed-optimized tables (binary files only); - -$nosize_opt - don't generate size-optimized tables (binary files only); - -$nobe_opt - don't generate Big Endian tables (binary files only); - -$nole_opt - don't generate Little Endian tables (binary files only); - -$noto_opt - don't generate "to_ucs" table; - -$nofrom_opt - don't generate "from_ucs" table; - -$ccscol_opt - encoding's column number; - -$ucscol_opt - UCS column number; - -$verbose_opt - verbose output. - -If output file name isn't specified, <infile>.c (for sources) or -<infile>.cct (for binaries) is assumed. -If encoding name isn't specified <infile> is assumed. -<infile> is normalized (small letters, "-" are substituted by "_") input file -name base (no extension). For example, for Koi8-r.txt input file, <infile> -is koi8_r. -END -; - exit 0; - } - - $Verbose = $args{$verbose_opt}; - $Source = $args{$source_opt}; - $NoSpeed = $args{$nospeed_opt}; - $NoSize = $args{$nosize_opt}; - $NoBE = $args{$nobe_opt}; - $NoLE = $args{$nole_opt}; - $NoFrom = $args{$nofrom_opt}; - $NoTo = $args{$noto_opt}; - $CCSCol = $args{$ccscol_opt}; - $UCSCol = $args{$ucscol_opt}; - $Plane = $args{$plane_opt}; - $InFile = $args{$input_opt}; - $OutFile = $args{$output_opt}; - $CCSName = $args{$enc_opt}; - - Err "Error: input file isn't defined. Use -$help_opt for help.\n", 1 - unless $InFile; - - unless ($OutFile) - { - # Construct output file name - $OutFile = $InFile; - $OutFile =~ s/(.*\/)*([0-9a-zA-Z-_]*)(\..*)$/\L$2/; - $OutFile =~ tr/-/_/; - if ($Source) - { - $OutFile = "$OutFile.c"; - } - else - { - $OutFile = "$OutFile.cct" - } - } - - unless ($CCSName) - { - # Construct CCS name - $CCSName = $InFile; - $CCSName =~ s/(.*\/)*([0-9a-zA-Z-_]*)(\..*)$/\L$2/; - $CCSName =~ tr/-/_/; - } - - Err "-$nosize_opt option can't be used with -$nospeed_opt option " - . "simultaniously.\n", 1 if $NoSpeed && $NoSize; - - Err "-$nobe_opt option can't be used with -$nole_opt option " - . "simultaniously.\n", 1 if $NoBE && $NoLE; - - Err "-$noto_opt option can't be used with -$nofrom_opt option" - . "simultaniously.\n", 1 if $NoTo && $NoFrom; - - Err "-$nosize_opt, -$nospeed_opt, -$nobe_opt -$nole_opt " - . "-$noto_opt and -$nofrom_opt " - . "options can't be used with -$source_opt option.\n" - . "Source code always contains both speed- and size-optimized " - . "tables in System Endian. Use -$help_opt for help.\n", 1 - if $Source and $NoSpeed || $NoSize || $NoBE || $NoLE || $NoTo || $NoFrom; - - if (!$CCSCol && !$UCSCol) - { - $CCSCol = 0; - $UCSCol = 1; - } - elsif ($CCSCol && $UCSCol) - { - Err "Column number should be >= 0\n", 1 if ($CCSCol <= 0 or $UCSCol <= 0); - $CCSCol -= 1; - $UCSCol -= 1; - } - else - { - Err "Please, define both CCS and UCS column numbers\n", 1; - } - - if ($Verbose) - { - print "Use $InFile file for input.\n", - "Use $OutFile file for output.\n", - "Use $CCSName as CCS name.\n"; - print "Generate C source file.\n" if $Source; - print "Generate binary file.\n" if !$Source; - printf "Use plane N 0x%.4X.\n", hex $Plane if defined $Plane; - printf "Use column N $CCSCol for $CCSName.\n"; - printf "Use column N $UCSCol for UCS.\n"; - print "Don't generate size-optimized tables.\n" if $NoSize; - print "Don't generate speed-optimized tables.\n" if $NoSpeed; - print "Don't generate big-endian tables.\n" if $NoBE; - print "Don't generate little-endian tables.\n" if $NoLE; - print "Don't generate \"to_ucs\" table.\n" if $NoTo; - print "Don't generate \"from_ucs\" table.\n" if $NoFrom; - } - - return; -} - - -# ============================================================================= -# -# Print error message, close all and exit -# -# Parameter 1: error message -# Parameter 2: don't delete output file if > 1 -# -# ============================================================================= -sub Err($;$) -{ - print STDERR "$_[0]"; - close INFILE; - close OUTFILE; - unlink $OutFile unless $_[1]; - - exit 1; -}
\ No newline at end of file |