#include "node_url.h" #include "base_object-inl.h" #include "node_errors.h" #include "node_external_reference.h" #include "node_i18n.h" #include "util-inl.h" #include #include #include #include #include namespace node { using errors::TryCatchScope; using url::table_data::hex; using url::table_data::C0_CONTROL_ENCODE_SET; using url::table_data::FRAGMENT_ENCODE_SET; using url::table_data::PATH_ENCODE_SET; using url::table_data::USERINFO_ENCODE_SET; using url::table_data::QUERY_ENCODE_SET_NONSPECIAL; using url::table_data::QUERY_ENCODE_SET_SPECIAL; using v8::Array; using v8::Context; using v8::Function; using v8::FunctionCallbackInfo; using v8::HandleScope; using v8::Int32; using v8::Integer; using v8::Isolate; using v8::Local; using v8::MaybeLocal; using v8::NewStringType; using v8::Null; using v8::Object; using v8::String; using v8::Undefined; using v8::Value; Local Utf8String(Isolate* isolate, const std::string& str) { return String::NewFromUtf8(isolate, str.data(), NewStringType::kNormal, str.length()).ToLocalChecked(); } namespace url { namespace { // https://url.spec.whatwg.org/#eof-code-point constexpr char kEOL = -1; // https://url.spec.whatwg.org/#concept-host class URLHost { public: ~URLHost(); void ParseIPv4Host(const char* input, size_t length, bool* is_ipv4); void ParseIPv6Host(const char* input, size_t length); void ParseOpaqueHost(const char* input, size_t length); void ParseHost(const char* input, size_t length, bool is_special, bool unicode = false); bool ParsingFailed() const { return type_ == HostType::H_FAILED; } std::string ToString() const; // Like ToString(), but avoids a copy in exchange for invalidating `*this`. std::string ToStringMove(); private: enum class HostType { H_FAILED, H_DOMAIN, H_IPV4, H_IPV6, H_OPAQUE, }; union Value { std::string domain_or_opaque; uint32_t ipv4; uint16_t ipv6[8]; ~Value() {} Value() : ipv4(0) {} }; Value value_; HostType type_ = HostType::H_FAILED; void Reset() { using string = std::string; switch (type_) { case HostType::H_DOMAIN: case HostType::H_OPAQUE: value_.domain_or_opaque.~string(); break; default: break; } type_ = HostType::H_FAILED; } // Setting the string members of the union with = is brittle because // it relies on them being initialized to a state that requires no // destruction of old data. // For a long time, that worked well enough because ParseIPv6Host() happens // to zero-fill `value_`, but that really is relying on standard library // internals too much. // These helpers are the easiest solution but we might want to consider // just not forcing strings into an union. void SetOpaque(std::string&& string) { Reset(); type_ = HostType::H_OPAQUE; new(&value_.domain_or_opaque) std::string(std::move(string)); } void SetDomain(std::string&& string) { Reset(); type_ = HostType::H_DOMAIN; new(&value_.domain_or_opaque) std::string(std::move(string)); } }; URLHost::~URLHost() { Reset(); } #define ARGS(XX) \ XX(ARG_FLAGS) \ XX(ARG_PROTOCOL) \ XX(ARG_USERNAME) \ XX(ARG_PASSWORD) \ XX(ARG_HOST) \ XX(ARG_PORT) \ XX(ARG_PATH) \ XX(ARG_QUERY) \ XX(ARG_FRAGMENT) \ XX(ARG_COUNT) // This one has to be last. enum url_cb_args { #define XX(name) name, ARGS(XX) #undef XX }; #define TWO_CHAR_STRING_TEST(bits, name, expr) \ template \ bool name(const T ch1, const T ch2) { \ static_assert(sizeof(ch1) >= (bits) / 8, \ "Character must be wider than " #bits " bits"); \ return (expr); \ } \ template \ bool name(const std::basic_string& str) { \ static_assert(sizeof(str[0]) >= (bits) / 8, \ "Character must be wider than " #bits " bits"); \ return str.length() >= 2 && name(str[0], str[1]); \ } // https://infra.spec.whatwg.org/#ascii-tab-or-newline CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r')) // https://infra.spec.whatwg.org/#c0-control-or-space CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' ')) // https://infra.spec.whatwg.org/#ascii-digit CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9')) // https://infra.spec.whatwg.org/#ascii-hex-digit CHAR_TEST(8, IsASCIIHexDigit, (IsASCIIDigit(ch) || (ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f'))) // https://infra.spec.whatwg.org/#ascii-alpha CHAR_TEST(8, IsASCIIAlpha, ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'))) // https://infra.spec.whatwg.org/#ascii-alphanumeric CHAR_TEST(8, IsASCIIAlphanumeric, (IsASCIIDigit(ch) || IsASCIIAlpha(ch))) // https://infra.spec.whatwg.org/#ascii-lowercase template T ASCIILowercase(T ch) { return IsASCIIAlpha(ch) ? (ch | 0x20) : ch; } // https://url.spec.whatwg.org/#forbidden-host-code-point CHAR_TEST(8, IsForbiddenHostCodePoint, ch == '\0' || ch == '\t' || ch == '\n' || ch == '\r' || ch == ' ' || ch == '#' || ch == '%' || ch == '/' || ch == ':' || ch == '?' || ch == '@' || ch == '[' || ch == '<' || ch == '>' || ch == '\\' || ch == ']' || ch == '^' || ch == '|') // https://url.spec.whatwg.org/#windows-drive-letter TWO_CHAR_STRING_TEST(8, IsWindowsDriveLetter, (IsASCIIAlpha(ch1) && (ch2 == ':' || ch2 == '|'))) // https://url.spec.whatwg.org/#normalized-windows-drive-letter TWO_CHAR_STRING_TEST(8, IsNormalizedWindowsDriveLetter, (IsASCIIAlpha(ch1) && ch2 == ':')) #undef TWO_CHAR_STRING_TEST bool BitAt(const uint8_t a[], const uint8_t i) { return !!(a[i >> 3] & (1 << (i & 7))); } // Appends ch to str. If ch position in encode_set is set, the ch will // be percent-encoded then appended. void AppendOrEscape(std::string* str, const unsigned char ch, const uint8_t encode_set[]) { if (BitAt(encode_set, ch)) *str += hex + ch * 4; // "%XX\0" has a length of 4 else *str += ch; } unsigned hex2bin(const char ch) { if (ch >= '0' && ch <= '9') return ch - '0'; if (ch >= 'A' && ch <= 'F') return 10 + (ch - 'A'); if (ch >= 'a' && ch <= 'f') return 10 + (ch - 'a'); UNREACHABLE(); } std::string PercentDecode(const char* input, size_t len) { std::string dest; if (len == 0) return dest; dest.reserve(len); const char* pointer = input; const char* end = input + len; while (pointer < end) { const char ch = pointer[0]; size_t remaining = end - pointer - 1; if (ch != '%' || remaining < 2 || (ch == '%' && (!IsASCIIHexDigit(pointer[1]) || !IsASCIIHexDigit(pointer[2])))) { dest += ch; pointer++; continue; } else { unsigned a = hex2bin(pointer[1]); unsigned b = hex2bin(pointer[2]); char c = static_cast(a * 16 + b); dest += c; pointer += 3; } } return dest; } #define SPECIALS(XX) \ XX(ftp, 21, "ftp:") \ XX(file, -1, "file:") \ XX(http, 80, "http:") \ XX(https, 443, "https:") \ XX(ws, 80, "ws:") \ XX(wss, 443, "wss:") bool IsSpecial(const std::string& scheme) { #define V(_, __, name) if (scheme == name) return true; SPECIALS(V); #undef V return false; } Local GetSpecial(Environment* env, const std::string& scheme) { #define V(key, _, name) if (scheme == name) \ return env->url_special_##key##_string(); SPECIALS(V) #undef V UNREACHABLE(); } int NormalizePort(const std::string& scheme, int p) { #define V(_, port, name) if (scheme == name && p == port) return -1; SPECIALS(V); #undef V return p; } // https://url.spec.whatwg.org/#start-with-a-windows-drive-letter bool StartsWithWindowsDriveLetter(const char* p, const char* end) { size_t length = end - p; return length >= 2 && IsWindowsDriveLetter(p[0], p[1]) && (length == 2 || p[2] == '/' || p[2] == '\\' || p[2] == '?' || p[2] == '#'); } #if defined(NODE_HAVE_I18N_SUPPORT) bool ToUnicode(const std::string& input, std::string* output) { MaybeStackBuffer buf; if (i18n::ToUnicode(&buf, input.c_str(), input.length()) < 0) return false; output->assign(*buf, buf.length()); return true; } bool ToASCII(const std::string& input, std::string* output) { MaybeStackBuffer buf; if (i18n::ToASCII(&buf, input.c_str(), input.length()) < 0) return false; if (buf.length() == 0) return false; output->assign(*buf, buf.length()); return true; } #else // !defined(NODE_HAVE_I18N_SUPPORT) // Intentional non-ops if ICU is not present. bool ToUnicode(const std::string& input, std::string* output) { *output = input; return true; } bool ToASCII(const std::string& input, std::string* output) { *output = input; return true; } #endif // !defined(NODE_HAVE_I18N_SUPPORT) #define NS_IN6ADDRSZ 16 void URLHost::ParseIPv6Host(const char* input, size_t length) { CHECK_EQ(type_, HostType::H_FAILED); unsigned char buf[sizeof(struct in6_addr)]; MaybeStackBuffer ipv6(length + 1); *(*ipv6 + length) = 0; memset(buf, 0, sizeof(buf)); memcpy(*ipv6, input, sizeof(const char) * length); int ret = uv_inet_pton(AF_INET6, *ipv6, buf); if (ret != 0) { return; } // Ref: https://sourceware.org/git/?p=glibc.git;a=blob;f=resolv/inet_ntop.c;h=c4d38c0f951013e51a4fc6eaa8a9b82e146abe5a;hb=HEAD#l119 for (int i = 0; i < NS_IN6ADDRSZ; i += 2) { value_.ipv6[i >> 1] = (buf[i] << 8) | buf[i + 1]; } type_ = HostType::H_IPV6; } int64_t ParseNumber(const char* start, const char* end) { unsigned R = 10; if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') { start += 2; R = 16; } if (end - start == 0) { return 0; } else if (R == 10 && end - start > 1 && start[0] == '0') { start++; R = 8; } const char* p = start; while (p < end) { const char ch = p[0]; switch (R) { case 8: if (ch < '0' || ch > '7') return -1; break; case 10: if (!IsASCIIDigit(ch)) return -1; break; case 16: if (!IsASCIIHexDigit(ch)) return -1; break; } p++; } return strtoll(start, nullptr, R); } void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) { CHECK_EQ(type_, HostType::H_FAILED); *is_ipv4 = false; const char* pointer = input; const char* mark = input; const char* end = pointer + length; int parts = 0; uint32_t val = 0; uint64_t numbers[4]; int tooBigNumbers = 0; if (length == 0) return; while (pointer <= end) { const char ch = pointer < end ? pointer[0] : kEOL; int64_t remaining = end - pointer - 1; if (ch == '.' || ch == kEOL) { if (++parts > static_cast(arraysize(numbers))) return; if (pointer == mark) return; int64_t n = ParseNumber(mark, pointer); if (n < 0) return; if (n > 255) { tooBigNumbers++; } numbers[parts - 1] = n; mark = pointer + 1; if (ch == '.' && remaining == 0) break; } pointer++; } CHECK_GT(parts, 0); *is_ipv4 = true; // If any but the last item in numbers is greater than 255, return failure. // If the last item in numbers is greater than or equal to // 256^(5 - the number of items in numbers), return failure. if (tooBigNumbers > 1 || (tooBigNumbers == 1 && numbers[parts - 1] <= 255) || numbers[parts - 1] >= pow(256, static_cast(5 - parts))) { return; } type_ = HostType::H_IPV4; val = static_cast(numbers[parts - 1]); for (int n = 0; n < parts - 1; n++) { double b = 3 - n; val += static_cast(numbers[n]) * static_cast(pow(256, b)); } value_.ipv4 = val; } void URLHost::ParseOpaqueHost(const char* input, size_t length) { CHECK_EQ(type_, HostType::H_FAILED); std::string output; output.reserve(length); for (size_t i = 0; i < length; i++) { const char ch = input[i]; if (ch != '%' && IsForbiddenHostCodePoint(ch)) { return; } else { AppendOrEscape(&output, ch, C0_CONTROL_ENCODE_SET); } } SetOpaque(std::move(output)); } void URLHost::ParseHost(const char* input, size_t length, bool is_special, bool unicode) { CHECK_EQ(type_, HostType::H_FAILED); const char* pointer = input; if (length == 0) return; if (pointer[0] == '[') { if (pointer[length - 1] != ']') return; return ParseIPv6Host(++pointer, length - 2); } if (!is_special) return ParseOpaqueHost(input, length); // First, we have to percent decode std::string decoded = PercentDecode(input, length); // Then we have to punycode toASCII if (!ToASCII(decoded, &decoded)) return; // If any of the following characters are still present, we have to fail for (size_t n = 0; n < decoded.size(); n++) { const char ch = decoded[n]; if (IsForbiddenHostCodePoint(ch)) { return; } } // Check to see if it's an IPv4 IP address bool is_ipv4; ParseIPv4Host(decoded.c_str(), decoded.length(), &is_ipv4); if (is_ipv4) return; // If the unicode flag is set, run the result through punycode ToUnicode if (unicode && !ToUnicode(decoded, &decoded)) return; // It's not an IPv4 or IPv6 address, it must be a domain SetDomain(std::move(decoded)); } // Locates the longest sequence of 0 segments in an IPv6 address // in order to use the :: compression when serializing template T* FindLongestZeroSequence(T* values, size_t len) { T* start = values; T* end = start + len; T* result = nullptr; T* current = nullptr; unsigned counter = 0, longest = 1; while (start < end) { if (*start == 0) { if (current == nullptr) current = start; counter++; } else { if (counter > longest) { longest = counter; result = current; } counter = 0; current = nullptr; } start++; } if (counter > longest) result = current; return result; } std::string URLHost::ToStringMove() { std::string return_value; switch (type_) { case HostType::H_DOMAIN: case HostType::H_OPAQUE: return_value = std::move(value_.domain_or_opaque); break; default: return_value = ToString(); break; } Reset(); return return_value; } std::string URLHost::ToString() const { std::string dest; switch (type_) { case HostType::H_DOMAIN: case HostType::H_OPAQUE: return value_.domain_or_opaque; case HostType::H_IPV4: { dest.reserve(15); uint32_t value = value_.ipv4; for (int n = 0; n < 4; n++) { dest.insert(0, std::to_string(value % 256)); if (n < 3) dest.insert(0, 1, '.'); value /= 256; } break; } case HostType::H_IPV6: { dest.reserve(41); dest += '['; const uint16_t* start = &value_.ipv6[0]; const uint16_t* compress_pointer = FindLongestZeroSequence(start, 8); bool ignore0 = false; for (int n = 0; n <= 7; n++) { const uint16_t* piece = &value_.ipv6[n]; if (ignore0 && *piece == 0) continue; else if (ignore0) ignore0 = false; if (compress_pointer == piece) { dest += n == 0 ? "::" : ":"; ignore0 = true; continue; } char buf[5]; snprintf(buf, sizeof(buf), "%x", *piece); dest += buf; if (n < 7) dest += ':'; } dest += ']'; break; } case HostType::H_FAILED: break; } return dest; } bool ParseHost(const std::string& input, std::string* output, bool is_special, bool unicode = false) { if (input.empty()) { output->clear(); return true; } URLHost host; host.ParseHost(input.c_str(), input.length(), is_special, unicode); if (host.ParsingFailed()) return false; *output = host.ToStringMove(); return true; } std::vector FromJSStringArray(Environment* env, Local array) { std::vector vec; if (array->Length() > 0) vec.reserve(array->Length()); for (size_t n = 0; n < array->Length(); n++) { Local val = array->Get(env->context(), n).ToLocalChecked(); if (val->IsString()) { Utf8Value value(env->isolate(), val.As()); vec.emplace_back(*value, value.length()); } } return vec; } url_data HarvestBase(Environment* env, Local