Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/npm/cli.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'node_modules/iconv-lite/encodings/utf16.js')
-rw-r--r--node_modules/iconv-lite/encodings/utf16.js96
1 files changed, 58 insertions, 38 deletions
diff --git a/node_modules/iconv-lite/encodings/utf16.js b/node_modules/iconv-lite/encodings/utf16.js
index 54765aeee..97d066925 100644
--- a/node_modules/iconv-lite/encodings/utf16.js
+++ b/node_modules/iconv-lite/encodings/utf16.js
@@ -61,6 +61,7 @@ Utf16BEDecoder.prototype.write = function(buf) {
}
Utf16BEDecoder.prototype.end = function() {
+ this.overflowByte = -1;
}
@@ -103,8 +104,8 @@ Utf16Encoder.prototype.end = function() {
function Utf16Decoder(options, codec) {
this.decoder = null;
- this.initialBytes = [];
- this.initialBytesLen = 0;
+ this.initialBufs = [];
+ this.initialBufsLen = 0;
this.options = options || {};
this.iconv = codec.iconv;
@@ -113,17 +114,22 @@ function Utf16Decoder(options, codec) {
Utf16Decoder.prototype.write = function(buf) {
if (!this.decoder) {
// Codec is not chosen yet. Accumulate initial bytes.
- this.initialBytes.push(buf);
- this.initialBytesLen += buf.length;
+ this.initialBufs.push(buf);
+ this.initialBufsLen += buf.length;
- if (this.initialBytesLen < 16) // We need more bytes to use space heuristic (see below)
+ if (this.initialBufsLen < 16) // We need more bytes to use space heuristic (see below)
return '';
// We have enough bytes -> detect endianness.
- var buf = Buffer.concat(this.initialBytes),
- encoding = detectEncoding(buf, this.options.defaultEncoding);
+ var encoding = detectEncoding(this.initialBufs, this.options.defaultEncoding);
this.decoder = this.iconv.getDecoder(encoding, this.options);
- this.initialBytes.length = this.initialBytesLen = 0;
+
+ var resStr = '';
+ for (var i = 0; i < this.initialBufs.length; i++)
+ resStr += this.decoder.write(this.initialBufs[i]);
+
+ this.initialBufs.length = this.initialBufsLen = 0;
+ return resStr;
}
return this.decoder.write(buf);
@@ -131,47 +137,61 @@ Utf16Decoder.prototype.write = function(buf) {
Utf16Decoder.prototype.end = function() {
if (!this.decoder) {
- var buf = Buffer.concat(this.initialBytes),
- encoding = detectEncoding(buf, this.options.defaultEncoding);
+ var encoding = detectEncoding(this.initialBufs, this.options.defaultEncoding);
this.decoder = this.iconv.getDecoder(encoding, this.options);
- var res = this.decoder.write(buf),
- trail = this.decoder.end();
+ var resStr = '';
+ for (var i = 0; i < this.initialBufs.length; i++)
+ resStr += this.decoder.write(this.initialBufs[i]);
- return trail ? (res + trail) : res;
+ var trail = this.decoder.end();
+ if (trail)
+ resStr += trail;
+
+ this.initialBufs.length = this.initialBufsLen = 0;
+ return resStr;
}
return this.decoder.end();
}
-function detectEncoding(buf, defaultEncoding) {
- var enc = defaultEncoding || 'utf-16le';
-
- if (buf.length >= 2) {
- // Check BOM.
- if (buf[0] == 0xFE && buf[1] == 0xFF) // UTF-16BE BOM
- enc = 'utf-16be';
- else if (buf[0] == 0xFF && buf[1] == 0xFE) // UTF-16LE BOM
- enc = 'utf-16le';
- else {
- // No BOM found. Try to deduce encoding from initial content.
- // Most of the time, the content has ASCII chars (U+00**), but the opposite (U+**00) is uncommon.
- // So, we count ASCII as if it was LE or BE, and decide from that.
- var asciiCharsLE = 0, asciiCharsBE = 0, // Counts of chars in both positions
- _len = Math.min(buf.length - (buf.length % 2), 64); // Len is always even.
-
- for (var i = 0; i < _len; i += 2) {
- if (buf[i] === 0 && buf[i+1] !== 0) asciiCharsBE++;
- if (buf[i] !== 0 && buf[i+1] === 0) asciiCharsLE++;
+function detectEncoding(bufs, defaultEncoding) {
+ var b = [];
+ var charsProcessed = 0;
+ var asciiCharsLE = 0, asciiCharsBE = 0; // Number of ASCII chars when decoded as LE or BE.
+
+ outer_loop:
+ for (var i = 0; i < bufs.length; i++) {
+ var buf = bufs[i];
+ for (var j = 0; j < buf.length; j++) {
+ b.push(buf[j]);
+ if (b.length === 2) {
+ if (charsProcessed === 0) {
+ // Check BOM first.
+ if (b[0] === 0xFF && b[1] === 0xFE) return 'utf-16le';
+ if (b[0] === 0xFE && b[1] === 0xFF) return 'utf-16be';
+ }
+
+ if (b[0] === 0 && b[1] !== 0) asciiCharsBE++;
+ if (b[0] !== 0 && b[1] === 0) asciiCharsLE++;
+
+ b.length = 0;
+ charsProcessed++;
+
+ if (charsProcessed >= 100) {
+ break outer_loop;
+ }
}
-
- if (asciiCharsBE > asciiCharsLE)
- enc = 'utf-16be';
- else if (asciiCharsBE < asciiCharsLE)
- enc = 'utf-16le';
}
}
- return enc;
+ // Make decisions.
+ // Most of the time, the content has ASCII chars (U+00**), but the opposite (U+**00) is uncommon.
+ // So, we count ASCII as if it was LE or BE, and decide from that.
+ if (asciiCharsBE > asciiCharsLE) return 'utf-16be';
+ if (asciiCharsBE < asciiCharsLE) return 'utf-16le';
+
+ // Couldn't decide (likely all zeros or not enough data).
+ return defaultEncoding || 'utf-16le';
}