Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/npm/cli.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'node_modules/iconv-lite/encodings/dbcs-codec.js')
-rw-r--r--node_modules/iconv-lite/encodings/dbcs-codec.js144
1 files changed, 93 insertions, 51 deletions
diff --git a/node_modules/iconv-lite/encodings/dbcs-codec.js b/node_modules/iconv-lite/encodings/dbcs-codec.js
index 1fe3e1601..fa8391703 100644
--- a/node_modules/iconv-lite/encodings/dbcs-codec.js
+++ b/node_modules/iconv-lite/encodings/dbcs-codec.js
@@ -49,6 +49,48 @@ function DBCSCodec(codecOptions, iconv) {
for (var i = 0; i < mappingTable.length; i++)
this._addDecodeChunk(mappingTable[i]);
+ // Load & create GB18030 tables when needed.
+ if (typeof codecOptions.gb18030 === 'function') {
+ this.gb18030 = codecOptions.gb18030(); // Load GB18030 ranges.
+
+ // Add GB18030 common decode nodes.
+ var commonThirdByteNodeIdx = this.decodeTables.length;
+ this.decodeTables.push(UNASSIGNED_NODE.slice(0));
+
+ var commonFourthByteNodeIdx = this.decodeTables.length;
+ this.decodeTables.push(UNASSIGNED_NODE.slice(0));
+
+ // Fill out the tree
+ var firstByteNode = this.decodeTables[0];
+ for (var i = 0x81; i <= 0xFE; i++) {
+ var secondByteNode = this.decodeTables[NODE_START - firstByteNode[i]];
+ for (var j = 0x30; j <= 0x39; j++) {
+ if (secondByteNode[j] === UNASSIGNED) {
+ secondByteNode[j] = NODE_START - commonThirdByteNodeIdx;
+ } else if (secondByteNode[j] > NODE_START) {
+ throw new Error("gb18030 decode tables conflict at byte 2");
+ }
+
+ var thirdByteNode = this.decodeTables[NODE_START - secondByteNode[j]];
+ for (var k = 0x81; k <= 0xFE; k++) {
+ if (thirdByteNode[k] === UNASSIGNED) {
+ thirdByteNode[k] = NODE_START - commonFourthByteNodeIdx;
+ } else if (thirdByteNode[k] === NODE_START - commonFourthByteNodeIdx) {
+ continue;
+ } else if (thirdByteNode[k] > NODE_START) {
+ throw new Error("gb18030 decode tables conflict at byte 3");
+ }
+
+ var fourthByteNode = this.decodeTables[NODE_START - thirdByteNode[k]];
+ for (var l = 0x30; l <= 0x39; l++) {
+ if (fourthByteNode[l] === UNASSIGNED)
+ fourthByteNode[l] = GB18030_CODE;
+ }
+ }
+ }
+ }
+ }
+
this.defaultCharUnicode = iconv.defaultCharUnicode;
@@ -92,30 +134,6 @@ function DBCSCodec(codecOptions, iconv) {
this.defCharSB = this.encodeTable[0][iconv.defaultCharSingleByte.charCodeAt(0)];
if (this.defCharSB === UNASSIGNED) this.defCharSB = this.encodeTable[0]['?'];
if (this.defCharSB === UNASSIGNED) this.defCharSB = "?".charCodeAt(0);
-
-
- // Load & create GB18030 tables when needed.
- if (typeof codecOptions.gb18030 === 'function') {
- this.gb18030 = codecOptions.gb18030(); // Load GB18030 ranges.
-
- // Add GB18030 decode tables.
- var thirdByteNodeIdx = this.decodeTables.length;
- var thirdByteNode = this.decodeTables[thirdByteNodeIdx] = UNASSIGNED_NODE.slice(0);
-
- var fourthByteNodeIdx = this.decodeTables.length;
- var fourthByteNode = this.decodeTables[fourthByteNodeIdx] = UNASSIGNED_NODE.slice(0);
-
- for (var i = 0x81; i <= 0xFE; i++) {
- var secondByteNodeIdx = NODE_START - this.decodeTables[0][i];
- var secondByteNode = this.decodeTables[secondByteNodeIdx];
- for (var j = 0x30; j <= 0x39; j++)
- secondByteNode[j] = NODE_START - thirdByteNodeIdx;
- }
- for (var i = 0x81; i <= 0xFE; i++)
- thirdByteNode[i] = NODE_START - fourthByteNodeIdx;
- for (var i = 0x30; i <= 0x39; i++)
- fourthByteNode[i] = GB18030_CODE
- }
}
DBCSCodec.prototype.encoder = DBCSEncoder;
@@ -124,7 +142,7 @@ DBCSCodec.prototype.decoder = DBCSDecoder;
// Decoder helpers
DBCSCodec.prototype._getDecodeTrieNode = function(addr) {
var bytes = [];
- for (; addr > 0; addr >>= 8)
+ for (; addr > 0; addr >>>= 8)
bytes.push(addr & 0xFF);
if (bytes.length == 0)
bytes.push(0);
@@ -249,19 +267,32 @@ DBCSCodec.prototype._setEncodeSequence = function(seq, dbcsCode) {
DBCSCodec.prototype._fillEncodeTable = function(nodeIdx, prefix, skipEncodeChars) {
var node = this.decodeTables[nodeIdx];
+ var hasValues = false;
+ var subNodeEmpty = {};
for (var i = 0; i < 0x100; i++) {
var uCode = node[i];
var mbCode = prefix + i;
if (skipEncodeChars[mbCode])
continue;
- if (uCode >= 0)
+ if (uCode >= 0) {
this._setEncodeChar(uCode, mbCode);
- else if (uCode <= NODE_START)
- this._fillEncodeTable(NODE_START - uCode, mbCode << 8, skipEncodeChars);
- else if (uCode <= SEQ_START)
+ hasValues = true;
+ } else if (uCode <= NODE_START) {
+ var subNodeIdx = NODE_START - uCode;
+ if (!subNodeEmpty[subNodeIdx]) { // Skip empty subtrees (they are too large in gb18030).
+ var newPrefix = (mbCode << 8) >>> 0; // NOTE: '>>> 0' keeps 32-bit num positive.
+ if (this._fillEncodeTable(subNodeIdx, newPrefix, skipEncodeChars))
+ hasValues = true;
+ else
+ subNodeEmpty[subNodeIdx] = true;
+ }
+ } else if (uCode <= SEQ_START) {
this._setEncodeSequence(this.decodeTableSeq[SEQ_START - uCode], mbCode);
+ hasValues = true;
+ }
}
+ return hasValues;
}
@@ -388,10 +419,15 @@ DBCSEncoder.prototype.write = function(str) {
newBuf[j++] = dbcsCode >> 8; // high byte
newBuf[j++] = dbcsCode & 0xFF; // low byte
}
- else {
+ else if (dbcsCode < 0x1000000) {
newBuf[j++] = dbcsCode >> 16;
newBuf[j++] = (dbcsCode >> 8) & 0xFF;
newBuf[j++] = dbcsCode & 0xFF;
+ } else {
+ newBuf[j++] = dbcsCode >>> 24;
+ newBuf[j++] = (dbcsCode >>> 16) & 0xFF;
+ newBuf[j++] = (dbcsCode >>> 8) & 0xFF;
+ newBuf[j++] = dbcsCode & 0xFF;
}
}
@@ -440,7 +476,7 @@ DBCSEncoder.prototype.findIdx = findIdx;
function DBCSDecoder(options, codec) {
// Decoder state
this.nodeIdx = 0;
- this.prevBuf = Buffer.alloc(0);
+ this.prevBytes = [];
// Static data
this.decodeTables = codec.decodeTables;
@@ -452,15 +488,12 @@ function DBCSDecoder(options, codec) {
DBCSDecoder.prototype.write = function(buf) {
var newBuf = Buffer.alloc(buf.length*2),
nodeIdx = this.nodeIdx,
- prevBuf = this.prevBuf, prevBufOffset = this.prevBuf.length,
- seqStart = -this.prevBuf.length, // idx of the start of current parsed sequence.
+ prevBytes = this.prevBytes, prevOffset = this.prevBytes.length,
+ seqStart = -this.prevBytes.length, // idx of the start of current parsed sequence.
uCode;
- if (prevBufOffset > 0) // Make prev buf overlap a little to make it easier to slice later.
- prevBuf = Buffer.concat([prevBuf, buf.slice(0, 10)]);
-
for (var i = 0, j = 0; i < buf.length; i++) {
- var curByte = (i >= 0) ? buf[i] : prevBuf[i + prevBufOffset];
+ var curByte = (i >= 0) ? buf[i] : prevBytes[i + prevOffset];
// Lookup in current trie node.
var uCode = this.decodeTables[nodeIdx][curByte];
@@ -470,13 +503,18 @@ DBCSDecoder.prototype.write = function(buf) {
}
else if (uCode === UNASSIGNED) { // Unknown char.
// TODO: Callback with seq.
- //var curSeq = (seqStart >= 0) ? buf.slice(seqStart, i+1) : prevBuf.slice(seqStart + prevBufOffset, i+1 + prevBufOffset);
- i = seqStart; // Try to parse again, after skipping first byte of the sequence ('i' will be incremented by 'for' cycle).
uCode = this.defaultCharUnicode.charCodeAt(0);
+ i = seqStart; // Skip one byte ('i' will be incremented by the for loop) and try to parse again.
}
else if (uCode === GB18030_CODE) {
- var curSeq = (seqStart >= 0) ? buf.slice(seqStart, i+1) : prevBuf.slice(seqStart + prevBufOffset, i+1 + prevBufOffset);
- var ptr = (curSeq[0]-0x81)*12600 + (curSeq[1]-0x30)*1260 + (curSeq[2]-0x81)*10 + (curSeq[3]-0x30);
+ if (i >= 3) {
+ var ptr = (buf[i-3]-0x81)*12600 + (buf[i-2]-0x30)*1260 + (buf[i-1]-0x81)*10 + (curByte-0x30);
+ } else {
+ var ptr = (prevBytes[i-3+prevOffset]-0x81)*12600 +
+ (((i-2 >= 0) ? buf[i-2] : prevBytes[i-2+prevOffset])-0x30)*1260 +
+ (((i-1 >= 0) ? buf[i-1] : prevBytes[i-1+prevOffset])-0x81)*10 +
+ (curByte-0x30);
+ }
var idx = findIdx(this.gb18030.gbChars, ptr);
uCode = this.gb18030.uChars[idx] + ptr - this.gb18030.gbChars[idx];
}
@@ -497,13 +535,13 @@ DBCSDecoder.prototype.write = function(buf) {
throw new Error("iconv-lite internal error: invalid decoding table value " + uCode + " at " + nodeIdx + "/" + curByte);
// Write the character to buffer, handling higher planes using surrogate pair.
- if (uCode > 0xFFFF) {
+ if (uCode >= 0x10000) {
uCode -= 0x10000;
- var uCodeLead = 0xD800 + Math.floor(uCode / 0x400);
+ var uCodeLead = 0xD800 | (uCode >> 10);
newBuf[j++] = uCodeLead & 0xFF;
newBuf[j++] = uCodeLead >> 8;
- uCode = 0xDC00 + uCode % 0x400;
+ uCode = 0xDC00 | (uCode & 0x3FF);
}
newBuf[j++] = uCode & 0xFF;
newBuf[j++] = uCode >> 8;
@@ -513,7 +551,10 @@ DBCSDecoder.prototype.write = function(buf) {
}
this.nodeIdx = nodeIdx;
- this.prevBuf = (seqStart >= 0) ? buf.slice(seqStart) : prevBuf.slice(seqStart + prevBufOffset);
+ this.prevBytes = (seqStart >= 0)
+ ? Array.prototype.slice.call(buf, seqStart)
+ : prevBytes.slice(seqStart + prevOffset).concat(Array.prototype.slice.call(buf));
+
return newBuf.slice(0, j).toString('ucs2');
}
@@ -521,18 +562,19 @@ DBCSDecoder.prototype.end = function() {
var ret = '';
// Try to parse all remaining chars.
- while (this.prevBuf.length > 0) {
+ while (this.prevBytes.length > 0) {
// Skip 1 character in the buffer.
ret += this.defaultCharUnicode;
- var buf = this.prevBuf.slice(1);
+ var bytesArr = this.prevBytes.slice(1);
// Parse remaining as usual.
- this.prevBuf = Buffer.alloc(0);
+ this.prevBytes = [];
this.nodeIdx = 0;
- if (buf.length > 0)
- ret += this.write(buf);
+ if (bytesArr.length > 0)
+ ret += this.write(bytesArr);
}
+ this.prevBytes = [];
this.nodeIdx = 0;
return ret;
}
@@ -544,7 +586,7 @@ function findIdx(table, val) {
var l = 0, r = table.length;
while (l < r-1) { // always table[l] <= val < table[r]
- var mid = l + Math.floor((r-l+1)/2);
+ var mid = l + ((r-l+1) >> 1);
if (table[mid] <= val)
l = mid;
else