Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/processone/ejabberd.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'src/stringprep/uni_parse.tcl')
-rw-r--r--src/stringprep/uni_parse.tcl437
1 files changed, 0 insertions, 437 deletions
diff --git a/src/stringprep/uni_parse.tcl b/src/stringprep/uni_parse.tcl
deleted file mode 100644
index 100631b6b..000000000
--- a/src/stringprep/uni_parse.tcl
+++ /dev/null
@@ -1,437 +0,0 @@
-# uni_parse.tcl --
-#
-# This program parses the UnicodeData file and generates the
-# corresponding uni_data.c file with compressed character
-# data tables. The input to this program should be rfc3454.txt
-#
-# Copyright (c) 1998-1999 by Scriptics Corporation.
-# All rights reserved.
-#
-# Modified for ejabberd by Alexey Shchepin
-#
-# RCS: @(#) $Id$
-
-
-namespace eval uni {
- set shift 8; # number of bits of data within a page
- # This value can be adjusted to find the
- # best split to minimize table size
-
- variable pMap; # map from page to page index, each entry is
- # an index into the pages table, indexed by
- # page number
- variable pages; # map from page index to page info, each
- # entry is a list of indices into the groups
- # table, the list is indexed by the offset
- variable groups; # list of character info values, indexed by
- # group number, initialized with the
- # unassigned character group
-}
-
-proc uni::getValue {i} {
- variable casemap
- variable casemap2
- variable tablemap
-
- if {[info exists tablemap($i)]} {
- set tables $tablemap($i)
- } else {
- set tables {}
- }
-
- if {[info exists casemap2($i)]} {
- set multicase 1
- set delta $casemap2($i)
- } else {
- set multicase 0
- if {[info exists casemap($i)]} {
- set delta $casemap($i)
- } else {
- set delta 0
- }
- }
-
- if {abs($delta) > 0xFFFFF} {
- puts "delta must be less than 22 bits wide"
- exit
- }
-
- set ac 0
- set c11 0
- set c21 0
- set b1 0
- set d1 0
- set d2 0
- set xnp 0
-
- foreach tab $tables {
- switch -glob -- $tab {
- C.1.1 {set c11 1}
- C.2.1 {set c21 1}
- C.* {set ac 1}
- A.1 {set ac 1}
- B.1 {set b1 1}
- D.1 {set d1 1}
- D.2 {set d2 1}
- XNP {set xnp 1}
- }
- }
-
- set val [expr {($ac << 0) |
- ($c11 << 1) |
- ($c21 << 2) |
- ($b1 << 3) |
- ($d1 << 4) |
- ($d2 << 5) |
- ($xnp << 6) |
- ($multicase << 7) |
- ($delta << 11)}]
-
- return $val
-}
-
-proc uni::getGroup {value} {
- variable groups
-
- set gIndex [lsearch -exact $groups $value]
- if {$gIndex == -1} {
- set gIndex [llength $groups]
- lappend groups $value
- }
- return $gIndex
-}
-
-proc uni::addPage {info} {
- variable pMap
- variable pages
- variable pages_map
-
- if {[info exists pages_map($info)]} {
- lappend pMap $pages_map($info)
- } else {
- set pIndex [llength $pages]
- lappend pages $info
- set pages_map($info) $pIndex
- lappend pMap $pIndex
- }
- return
-}
-
-
-proc uni::load_tables {data} {
- variable casemap
- variable casemap2
- variable multicasemap
- variable tablemap
-
- set multicasemap {}
- set table ""
-
- foreach line [split $data \n] {
- if {$table == ""} {
- if {[regexp { ----- Start Table (.*) -----} $line temp table]} {
- #puts "Start table '$table'"
- }
- } else {
- if {[regexp { ----- End Table (.*) -----} $line temp table1]} {
- set table ""
- } else {
- if {$table == "B.1"} {
- if {[regexp {^ ([[:xdigit:]]+); ;} $line \
- temp val]} {
- scan $val %x val
- if {$val <= 0x10ffff} {
- lappend tablemap($val) $table
- }
- }
- } elseif {$table == "B.2"} {
- if {[regexp {^ ([[:xdigit:]]+); ([[:xdigit:]]+);} $line \
- temp from to]} {
- scan $from %x from
- scan $to %x to
- if {$from <= 0x10ffff && $to <= 0x10ffff} {
- set casemap($from) [expr {$to - $from}]
- }
- } elseif {[regexp {^ ([[:xdigit:]]+); ([[:xdigit:]]+) ([[:xdigit:]]+);} $line \
- temp from to1 to2]} {
- scan $from %x from
- scan $to1 %x to1
- scan $to2 %x to2
- if {$from <= 0x10ffff && \
- $to1 <= 0x10ffff && $to2 <= 0x10ffff} {
- set casemap2($from) [llength $multicasemap]
- lappend multicasemap [list $to1 $to2]
- }
- } elseif {[regexp {^ ([[:xdigit:]]+); ([[:xdigit:]]+) ([[:xdigit:]]+) ([[:xdigit:]]+);} $line \
- temp from to1 to2 to3]} {
- scan $from %x from
- scan $to1 %x to1
- scan $to2 %x to2
- scan $to3 %x to3
- if {$from <= 0x10ffff && \
- $to1 <= 0x10ffff && $to2 <= 0x10ffff && \
- $to3 <= 0x10ffff} {
- set casemap2($from) [llength $multicasemap]
- lappend multicasemap [list $to1 $to2 $to3]
- }
- } else {
- #puts "missed: $line"
- }
-
- } elseif {$table != "B.3"} {
- if {[regexp {^ ([[:xdigit:]]+)-([[:xdigit:]]+)} $line \
- temp from to]} {
- scan $from %x from
- scan $to %x to
- for {set i $from} {$i <= $to && $i <= 0x10ffff} {incr i} {
- lappend tablemap($i) $table
- }
- } elseif {[regexp {^ ([[:xdigit:]]+)} $line \
- temp val]} {
- scan $val %x val
- if {$val <= 0x10ffff} {
- lappend tablemap($val) $table
- }
- }
- }
- }
- }
- }
-
- # XMPP nodeprep prohibited
- foreach val {22 26 27 2f 3a 3c 3e 40} {
- scan $val %x val
- lappend tablemap($val) XNP
- }
-}
-
-proc uni::buildTables {} {
- variable shift
-
- variable casemap
- variable tablemap
-
- variable pMap {}
- variable pages {}
- variable groups {}
- set info {} ;# temporary page info
-
- set mask [expr {(1 << $shift) - 1}]
-
- set next 0
-
- for {set i 0} {$i <= 0x10ffff} {incr i} {
- set gIndex [getGroup [getValue $i]]
-
- # Split character index into offset and page number
- set offset [expr {$i & $mask}]
- set page [expr {($i >> $shift)}]
-
- # Add the group index to the info for the current page
- lappend info $gIndex
-
- # If this is the last entry in the page, add the page
- if {$offset == $mask} {
- addPage $info
- set info {}
- }
- }
- return
-}
-
-proc uni::main {} {
- global argc argv0 argv
- variable pMap
- variable pages
- variable groups
- variable shift
- variable multicasemap
-
- if {$argc != 2} {
- puts stderr "\nusage: $argv0 <datafile> <outdir>\n"
- exit 1
- }
- set f [open [lindex $argv 0] r]
- set data [read $f]
- close $f
-
- load_tables $data
- buildTables
- puts "X = [llength $pMap] Y= [llength $pages] A= [llength $groups]"
- set size [expr {[llength $pMap] + [llength $pages]*(1<<$shift)}]
- puts "shift = $shift, space = $size"
-
- set f [open [file join [lindex $argv 1] uni_data.c] w]
- fconfigure $f -translation lf
- puts $f "/*
- * uni_data.c --
- *
- * Declarations of Unicode character information tables. This file is
- * automatically generated by the uni_parse.tcl script. Do not
- * modify this file by hand.
- *
- * Copyright (c) 1998 by Scriptics Corporation.
- * All rights reserved.
- *
- * Modified for ejabberd by Alexey Shchepin
- *
- * RCS: @(#) \$Id\$
- */
-
-/*
- * A 16-bit Unicode character is split into two parts in order to index
- * into the following tables. The lower OFFSET_BITS comprise an offset
- * into a page of characters. The upper bits comprise the page number.
- */
-
-#define OFFSET_BITS $shift
-
-/*
- * The pageMap is indexed by page number and returns an alternate page number
- * that identifies a unique page of characters. Many Unicode characters map
- * to the same alternate page number.
- */
-
-static unsigned char pageMap\[\] = {"
- set line " "
- set last [expr {[llength $pMap] - 1}]
- for {set i 0} {$i <= $last} {incr i} {
- append line [lindex $pMap $i]
- if {$i != $last} {
- append line ", "
- }
- if {[string length $line] > 70} {
- puts $f $line
- set line " "
- }
- }
- puts $f $line
- puts $f "};
-
-/*
- * The groupMap is indexed by combining the alternate page number with
- * the page offset and returns a group number that identifies a unique
- * set of character attributes.
- */
-
-static unsigned short int groupMap\[\] = {"
- set line " "
- set lasti [expr {[llength $pages] - 1}]
- for {set i 0} {$i <= $lasti} {incr i} {
- set page [lindex $pages $i]
- set lastj [expr {[llength $page] - 1}]
- for {set j 0} {$j <= $lastj} {incr j} {
- append line [lindex $page $j]
- if {$j != $lastj || $i != $lasti} {
- append line ", "
- }
- if {[string length $line] > 70} {
- puts $f $line
- set line " "
- }
- }
- }
- puts $f $line
- puts $f "};
-
-/*
- * Each group represents a unique set of character attributes. The attributes
- * are encoded into a 32-bit value as follows:
- *
- * Bit 0 A.1 | C.1.2 | C.2.2 | C.3 -- C.9
- *
- * Bit 1 C.1.1
- *
- * Bit 2 C.2.1
- *
- * Bit 3 B.1
- *
- * Bit 4 D.1
- *
- * Bit 5 D.2
- *
- * Bit 6 XNP
- *
- * Bit 7 Case maps to several characters
- *
- * Bits 8-10 Reserved for future use.
- *
- * Bits 11-31 Case delta: delta for case conversions. This should be the
- * highest field so we can easily sign extend.
- */
-
-static int groups\[\] = {"
- set line " "
- set last [expr {[llength $groups] - 1}]
- for {set i 0} {$i <= $last} {incr i} {
- set val [lindex $groups $i]
-
- append line [format "%d" $val]
- if {$i != $last} {
- append line ", "
- }
- if {[string length $line] > 65} {
- puts $f $line
- set line " "
- }
- }
- puts $f $line
- puts $f "};
-
-/*
- * Table for characters that lowercased to multiple ones
- */
-
-static int multiCaseTable\[\]\[4\] = {"
- set last [expr {[llength $multicasemap] - 1}]
- for {set i 0} {$i <= $last} {incr i} {
- set val [lindex $multicasemap $i]
-
- set line " "
- append line [format "{%d, %s}" [llength $val] [join $val ", "]]
- if {$i != $last} {
- append line ", "
- }
- puts $f $line
- }
- puts $f "};
-
-/*
- * The following constants are used to determine the category of a
- * Unicode character.
- */
-
-#define ACMask (1 << 0)
-#define C11Mask (1 << 1)
-#define C21Mask (1 << 2)
-#define B1Mask (1 << 3)
-#define D1Mask (1 << 4)
-#define D2Mask (1 << 5)
-#define XNPMask (1 << 6)
-#define MCMask (1 << 7)
-
-/*
- * The following macros extract the fields of the character info. The
- * GetDelta() macro is complicated because we can't rely on the C compiler
- * to do sign extension on right shifts.
- */
-
-#define GetCaseType(info) (((info) & 0xE0) >> 5)
-#define GetCategory(info) ((info) & 0x1F)
-#define GetDelta(info) (((info) > 0) ? ((info) >> 11) : (~(~((info)) >> 11)))
-#define GetMC(info) (multiCaseTable\[GetDelta(info)\])
-
-/*
- * This macro extracts the information about a character from the
- * Unicode character tables.
- */
-
-#define GetUniCharInfo(ch) (groups\[groupMap\[(pageMap\[(((int)(ch)) & 0x1fffff) >> OFFSET_BITS\] << OFFSET_BITS) | ((ch) & ((1 << OFFSET_BITS)-1))\]\])
-"
-
- close $f
-}
-
-uni::main
-
-return