Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/nodejs/node.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorFelix Geisendörfer <felix@debuggable.com>2014-01-20 12:47:19 +0400
committerTimothy J Fontaine <tjfontaine@gmail.com>2014-06-07 02:07:29 +0400
commit0da4c671659cfbae12def127b2e94690b9d9b5e1 (patch)
tree14b81692db27486685b07f69303bff39daa790e6 /src
parent881ac26f27f4ac9585d66c8d8a67d5b246a23d1b (diff)
string_bytes: Guarantee valid utf-8 output
Previously v8's WriteUtf8 function would produce invalid utf-8 output when encountering unmatched surrogate code units [1]. The new REPLACE_INVALID_UTF8 option fixes that by replacing invalid code points with the unicode replacement character. [1]: JS Strings are defined as arrays of 16 bit unsigned integers. There is no unicode enforcement, so one can easily end up with invalid unicode code unit sequences inside a string.
Diffstat (limited to 'src')
-rw-r--r--src/node.cc7
-rw-r--r--src/string_bytes.cc2
-rw-r--r--src/string_bytes.h2
3 files changed, 10 insertions, 1 deletions
diff --git a/src/node.cc b/src/node.cc
index 8257604d526..5cb202fa2e4 100644
--- a/src/node.cc
+++ b/src/node.cc
@@ -176,6 +176,8 @@ static uv_async_t dispatch_debug_messages_async;
// Declared in node_internals.h
Isolate* node_isolate = NULL;
+int WRITE_UTF8_FLAGS = v8::String::HINT_MANY_WRITES_EXPECTED |
+ v8::String::NO_NULL_TERMINATION;
static void Spin(uv_idle_t* handle, int status) {
assert((uv_idle_t*) handle == &tick_spinner);
@@ -3042,6 +3044,11 @@ static char **copy_argv(int argc, char **argv) {
}
int Start(int argc, char *argv[]) {
+ const char* replaceInvalid = getenv("NODE_INVALID_UTF8");
+
+ if (replaceInvalid == NULL)
+ WRITE_UTF8_FLAGS |= String::REPLACE_INVALID_UTF8;
+
// Hack aroung with the argv pointer. Used for process.title = "blah".
argv = uv_setup_args(argc, argv);
diff --git a/src/string_bytes.cc b/src/string_bytes.cc
index e4a34fee0e2..a7bab3895f9 100644
--- a/src/string_bytes.cc
+++ b/src/string_bytes.cc
@@ -199,7 +199,7 @@ size_t StringBytes::Write(char* buf,
break;
case UTF8:
- len = str->WriteUtf8(buf, buflen, chars_written, flags);
+ len = str->WriteUtf8(buf, buflen, chars_written, WRITE_UTF8_FLAGS);
break;
case UCS2:
diff --git a/src/string_bytes.h b/src/string_bytes.h
index 8071a494ae9..31f04bbe4b3 100644
--- a/src/string_bytes.h
+++ b/src/string_bytes.h
@@ -29,6 +29,8 @@
namespace node {
+extern int WRITE_UTF8_FLAGS;
+
using v8::Handle;
using v8::Local;
using v8::String;