diff options
author | Felix Geisendörfer <felix@debuggable.com> | 2014-01-20 12:47:19 +0400 |
---|---|---|
committer | Timothy J Fontaine <tjfontaine@gmail.com> | 2014-06-07 02:07:29 +0400 |
commit | 0da4c671659cfbae12def127b2e94690b9d9b5e1 (patch) | |
tree | 14b81692db27486685b07f69303bff39daa790e6 /src | |
parent | 881ac26f27f4ac9585d66c8d8a67d5b246a23d1b (diff) |
string_bytes: Guarantee valid utf-8 output
Previously v8's WriteUtf8 function would produce invalid utf-8 output
when encountering unmatched surrogate code units [1]. The new
REPLACE_INVALID_UTF8 option fixes that by replacing invalid code points
with the unicode replacement character.
[1]: JS Strings are defined as arrays of 16 bit unsigned integers. There
is no unicode enforcement, so one can easily end up with invalid unicode
code unit sequences inside a string.
Diffstat (limited to 'src')
-rw-r--r-- | src/node.cc | 7 | ||||
-rw-r--r-- | src/string_bytes.cc | 2 | ||||
-rw-r--r-- | src/string_bytes.h | 2 |
3 files changed, 10 insertions, 1 deletions
diff --git a/src/node.cc b/src/node.cc index 8257604d526..5cb202fa2e4 100644 --- a/src/node.cc +++ b/src/node.cc @@ -176,6 +176,8 @@ static uv_async_t dispatch_debug_messages_async; // Declared in node_internals.h Isolate* node_isolate = NULL; +int WRITE_UTF8_FLAGS = v8::String::HINT_MANY_WRITES_EXPECTED | + v8::String::NO_NULL_TERMINATION; static void Spin(uv_idle_t* handle, int status) { assert((uv_idle_t*) handle == &tick_spinner); @@ -3042,6 +3044,11 @@ static char **copy_argv(int argc, char **argv) { } int Start(int argc, char *argv[]) { + const char* replaceInvalid = getenv("NODE_INVALID_UTF8"); + + if (replaceInvalid == NULL) + WRITE_UTF8_FLAGS |= String::REPLACE_INVALID_UTF8; + // Hack aroung with the argv pointer. Used for process.title = "blah". argv = uv_setup_args(argc, argv); diff --git a/src/string_bytes.cc b/src/string_bytes.cc index e4a34fee0e2..a7bab3895f9 100644 --- a/src/string_bytes.cc +++ b/src/string_bytes.cc @@ -199,7 +199,7 @@ size_t StringBytes::Write(char* buf, break; case UTF8: - len = str->WriteUtf8(buf, buflen, chars_written, flags); + len = str->WriteUtf8(buf, buflen, chars_written, WRITE_UTF8_FLAGS); break; case UCS2: diff --git a/src/string_bytes.h b/src/string_bytes.h index 8071a494ae9..31f04bbe4b3 100644 --- a/src/string_bytes.h +++ b/src/string_bytes.h @@ -29,6 +29,8 @@ namespace node { +extern int WRITE_UTF8_FLAGS; + using v8::Handle; using v8::Local; using v8::String; |