1 files changed, 422 insertions, 68 deletions
diff --git a/deps/icu-small/source/common/uloc_tag.cpp b/deps/icu-small/source/common/uloc_tag.cpp
index f8337ec0247..8120331c4b9 100644
--- a/deps/icu-small/source/common/uloc_tag.cpp
+++ b/deps/icu-small/source/common/uloc_tag.cpp
@@ -12,11 +12,13 @@
 #include "unicode/putil.h"
 #include "unicode/uloc.h"
 #include "ustr_imp.h"
+#include "charstr.h"
 #include "cmemory.h"
 #include "cstring.h"
 #include "putilimp.h"
 #include "uinvchar.h"
 #include "ulocimp.h"
+#include "uvector.h"
 #include "uassert.h"
 
 
@@ -77,19 +79,34 @@ static const char LOCALE_TYPE_YES[] = "yes";
 
 #define LANG_UND_LEN 3
 
+/*
+ Updated on 2018-09-12 from
+ https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
+
+ This table has 2 parts. The parts for Grandfathered tags is generated by the
+ following scripts from the IANA language tag registry.
+
+ curl  https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
+ egrep -A 7 'Type: grandfathered' | \
+ egrep 'Tag|Prefe' | grep -B1 'Preferred' | grep -v '^--' | \
+ awk -n '/Tag/ {printf("    \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' |\
+ tr 'A-Z' 'a-z'
+
+
+ The 2nd part is made of five ICU-specific entries. They're kept for
+ the backward compatibility for now, even though there are no preferred
+ values. They may have to be removed for the strict BCP 47 compliance.
+
+*/
 static const char* const GRANDFATHERED[] = {
 /*  grandfathered   preferred */
     "art-lojban",   "jbo",
-    "cel-gaulish",  "xtg-x-cel-gaulish",
-    "en-GB-oed",    "en-GB-x-oed",
+    "en-gb-oed",    "en-gb-oxendict",
     "i-ami",        "ami",
     "i-bnn",        "bnn",
-    "i-default",    "en-x-i-default",
-    "i-enochian",   "und-x-i-enochian",
     "i-hak",        "hak",
     "i-klingon",    "tlh",
     "i-lux",        "lb",
-    "i-mingo",      "see-x-i-mingo",
     "i-navajo",     "nv",
     "i-pwn",        "pwn",
     "i-tao",        "tao",
@@ -102,17 +119,175 @@ static const char* const GRANDFATHERED[] = {
     "sgn-ch-de",    "sgg",
     "zh-guoyu",     "cmn",
     "zh-hakka",     "hak",
-    "zh-min",       "nan-x-zh-min",
     "zh-min-nan",   "nan",
     "zh-xiang",     "hsn",
-    NULL,           NULL
+
+    // Grandfathered tags with no preferred value in the IANA
+    // registry. Kept for now for the backward compatibility
+    // because ICU has mapped them this way.
+    "cel-gaulish",  "xtg-x-cel-gaulish",
+    "i-default",    "en-x-i-default",
+    "i-enochian",   "und-x-i-enochian",
+    "i-mingo",      "see-x-i-mingo",
+    "zh-min",       "nan-x-zh-min",
 };
 
+/*
+ Updated on 2018-09-12 from
+ https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
+
+ The table lists redundant tags with preferred value in the IANA languate tag registry.
+ It's generated with the following command:
+
+ curl  https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
+ grep 'Type: redundant' -A 5 | egrep '^(Tag:|Prefer)' | grep -B1 'Preferred' | \
+ awk -n '/Tag/ {printf("    \"%s\",       ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' | \
+ tr 'A-Z' 'a-z'
+
+ In addition, ja-latn-hepburn-heploc is mapped to ja-latn-alalc97 because
+ a variant tag 'hepburn-heploc' has the preferred subtag, 'alaic97'.
+*/
+
+static const char* const REDUNDANT[] = {
+//  redundant       preferred
+    "sgn-br",       "bzs",
+    "sgn-co",       "csn",
+    "sgn-de",       "gsg",
+    "sgn-dk",       "dsl",
+    "sgn-es",       "ssp",
+    "sgn-fr",       "fsl",
+    "sgn-gb",       "bfi",
+    "sgn-gr",       "gss",
+    "sgn-ie",       "isg",
+    "sgn-it",       "ise",
+    "sgn-jp",       "jsl",
+    "sgn-mx",       "mfs",
+    "sgn-ni",       "ncs",
+    "sgn-nl",       "dse",
+    "sgn-no",       "nsl",
+    "sgn-pt",       "psr",
+    "sgn-se",       "swl",
+    "sgn-us",       "ase",
+    "sgn-za",       "sfs",
+    "zh-cmn",       "cmn",
+    "zh-cmn-hans",  "cmn-hans",
+    "zh-cmn-hant",  "cmn-hant",
+    "zh-gan",       "gan",
+    "zh-wuu",       "wuu",
+    "zh-yue",       "yue",
+
+    // variant tag with preferred value
+    "ja-latn-hepburn-heploc", "ja-latn-alalc97",
+};
+
+/*
+  Updated on 2018-09-12 from
+  https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
+
+  grep 'Type: language' -A 7 language-subtag-registry  | egrep 'Subtag|Prefe' | \
+  grep -B1 'Preferred' | grep -v '^--' | \
+  awk -n '/Subtag/ {printf("    \"%s\",       ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}'
+
+  Make sure that 2-letter language subtags come before 3-letter subtags.
+*/
 static const char DEPRECATEDLANGS[][4] = {
 /*  deprecated  new */
+    "in",       "id",
     "iw",       "he",
     "ji",       "yi",
-    "in",       "id"
+    "jw",       "jv",
+    "mo",       "ro",
+    "aam",       "aas",
+    "adp",       "dz",
+    "aue",       "ktz",
+    "ayx",       "nun",
+    "bgm",       "bcg",
+    "bjd",       "drl",
+    "ccq",       "rki",
+    "cjr",       "mom",
+    "cka",       "cmr",
+    "cmk",       "xch",
+    "coy",       "pij",
+    "cqu",       "quh",
+    "drh",       "khk",
+    "drw",       "prs",
+    "gav",       "dev",
+    "gfx",       "vaj",
+    "ggn",       "gvr",
+    "gti",       "nyc",
+    "guv",       "duz",
+    "hrr",       "jal",
+    "ibi",       "opa",
+    "ilw",       "gal",
+    "jeg",       "oyb",
+    "kgc",       "tdf",
+    "kgh",       "kml",
+    "koj",       "kwv",
+    "krm",       "bmf",
+    "ktr",       "dtp",
+    "kvs",       "gdj",
+    "kwq",       "yam",
+    "kxe",       "tvd",
+    "kzj",       "dtp",
+    "kzt",       "dtp",
+    "lii",       "raq",
+    "lmm",       "rmx",
+    "meg",       "cir",
+    "mst",       "mry",
+    "mwj",       "vaj",
+    "myt",       "mry",
+    "nad",       "xny",
+    "ncp",       "kdz",
+    "nnx",       "ngv",
+    "nts",       "pij",
+    "oun",       "vaj",
+    "pcr",       "adx",
+    "pmc",       "huw",
+    "pmu",       "phr",
+    "ppa",       "bfy",
+    "ppr",       "lcq",
+    "pry",       "prt",
+    "puz",       "pub",
+    "sca",       "hle",
+    "skk",       "oyb",
+    "tdu",       "dtp",
+    "thc",       "tpo",
+    "thx",       "oyb",
+    "tie",       "ras",
+    "tkk",       "twm",
+    "tlw",       "weo",
+    "tmp",       "tyj",
+    "tne",       "kak",
+    "tnf",       "prs",
+    "tsf",       "taj",
+    "uok",       "ema",
+    "xba",       "cax",
+    "xia",       "acn",
+    "xkh",       "waw",
+    "xsj",       "suj",
+    "ybd",       "rki",
+    "yma",       "lrr",
+    "ymt",       "mtm",
+    "yos",       "zom",
+    "yuu",       "yug",
+};
+
+/*
+  Updated on 2018-04-24 from
+
+  curl  https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry | \
+  grep 'Type: region' -A 7 | egrep 'Subtag|Prefe' | \
+  grep -B1 'Preferred' | \
+  awk -n '/Subtag/ {printf("    \"%s\",       ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}'
+*/
+static const char DEPRECATEDREGIONS[][3] = {
+/*  deprecated  new */
+    "BU",       "MM",
+    "DD",       "DE",
+    "FX",       "FR",
+    "TP",       "TL",
+    "YD",       "YE",
+    "ZR",       "CD",
 };
 
 /*
@@ -172,6 +347,46 @@ static const char*
 ultag_getGrandfathered(const ULanguageTag* langtag);
 #endif
 
+namespace {
+
+// Helper class to memory manage CharString objects.
+// Only ever stack-allocated, does not need to inherit UMemory.
+class CharStringPool {
+public:
+    CharStringPool() : status(U_ZERO_ERROR), pool(&deleter, nullptr, status) {}
+    ~CharStringPool() = default;
+
+    CharStringPool(const CharStringPool&) = delete;
+    CharStringPool& operator=(const CharStringPool&) = delete;
+
+    icu::CharString* create() {
+        if (U_FAILURE(status)) {
+            return nullptr;
+        }
+        icu::CharString* const obj = new icu::CharString;
+        if (obj == nullptr) {
+            status = U_MEMORY_ALLOCATION_ERROR;
+            return nullptr;
+        }
+        pool.addElement(obj, status);
+        if (U_FAILURE(status)) {
+            delete obj;
+            return nullptr;
+        }
+        return obj;
+    }
+
+private:
+    static void U_CALLCONV deleter(void* obj) {
+        delete static_cast<icu::CharString*>(obj);
+    }
+
+    UErrorCode status;
+    icu::UVector pool;
+};
+
+}  // namespace
+
 /*
 * -------------------------------------------------
 *
@@ -675,6 +890,11 @@ _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capac
     } else {
         /* resolve deprecated */
         for (i = 0; i < UPRV_LENGTHOF(DEPRECATEDLANGS); i += 2) {
+            // 2-letter deprecated subtags are listede before 3-letter
+            // ones in DEPRECATEDLANGS[]. Get out of loop on coming
+            // across the 1st 3-letter subtag, if the input is a 2-letter code.
+            // to avoid continuing to try when there's no match.
+            if (uprv_strlen(buf) < uprv_strlen(DEPRECATEDLANGS[i])) break;
             if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) {
                 uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]);
                 len = (int32_t)uprv_strlen(buf);
@@ -721,7 +941,6 @@ _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacit
                 *(appendAt + reslen) = SEP;
             }
             reslen++;
-
             if (reslen < capacity) {
                 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
             }
@@ -763,6 +982,14 @@ _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacit
                 *(appendAt + reslen) = SEP;
             }
             reslen++;
+           /* resolve deprecated */
+            for (int i = 0; i < UPRV_LENGTHOF(DEPRECATEDREGIONS); i += 2) {
+                if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDREGIONS[i]) == 0) {
+                    uprv_strcpy(buf, DEPRECATEDREGIONS[i + 1]);
+                    len = (int32_t)uprv_strlen(buf);
+                    break;
+                }
+            }
 
             if (reslen < capacity) {
                 uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
@@ -900,7 +1127,6 @@ _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
 
 static int32_t
 _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
-    char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
     char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 };
     int32_t attrBufLength = 0;
     UEnumeration *keywordEnum = NULL;
@@ -920,22 +1146,48 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
         AttributeListEntry *firstAttr = NULL;
         AttributeListEntry *attr;
         char *attrValue;
-        char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
-        char *pExtBuf = extBuf;
-        int32_t extBufCapacity = sizeof(extBuf);
+        CharStringPool extBufPool;
         const char *bcpKey=nullptr, *bcpValue=nullptr;
         UErrorCode tmpStatus = U_ZERO_ERROR;
         int32_t keylen;
         UBool isBcpUExt;
 
         while (TRUE) {
+            icu::CharString buf;
             key = uenum_next(keywordEnum, NULL, status);
             if (key == NULL) {
                 break;
             }
-            len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus);
-            /* buf must be null-terminated */
-            if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
+            char* buffer;
+            int32_t resultCapacity = ULOC_KEYWORD_AND_VALUES_CAPACITY;
+
+            for (;;) {
+                buffer = buf.getAppendBuffer(
+                        /*minCapacity=*/resultCapacity,
+                        /*desiredCapacityHint=*/resultCapacity,
+                        resultCapacity,
+                        tmpStatus);
+
+                if (U_FAILURE(tmpStatus)) {
+                    break;
+                }
+
+                len = uloc_getKeywordValue(
+                        localeID, key, buffer, resultCapacity, &tmpStatus);
+
+                if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) {
+                    break;
+                }
+
+                resultCapacity = len;
+                tmpStatus = U_ZERO_ERROR;
+            }
+
+            if (U_FAILURE(tmpStatus)) {
+                if (tmpStatus == U_MEMORY_ALLOCATION_ERROR) {
+                    *status = U_MEMORY_ALLOCATION_ERROR;
+                    break;
+                }
                 if (strict) {
                     *status = U_ILLEGAL_ARGUMENT_ERROR;
                     break;
@@ -945,6 +1197,11 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
                 continue;
             }
 
+            buf.append(buffer, len, tmpStatus);
+            if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
+                tmpStatus = U_ZERO_ERROR;  // Terminators provided by CharString.
+            }
+
             keylen = (int32_t)uprv_strlen(key);
             isBcpUExt = (keylen > 1);
 
@@ -1007,7 +1264,7 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
                 }
 
                 /* we've checked buf is null-terminated above */
-                bcpValue = uloc_toUnicodeLocaleType(key, buf);
+                bcpValue = uloc_toUnicodeLocaleType(key, buf.data());
                 if (bcpValue == NULL) {
                     if (strict) {
                         *status = U_ILLEGAL_ARGUMENT_ERROR;
@@ -1015,33 +1272,44 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
                     }
                     continue;
                 }
-                if (bcpValue == buf) {
+                if (bcpValue == buf.data()) {
                     /*
                     When uloc_toUnicodeLocaleType(key, buf) returns the
                     input value as is, the value is well-formed, but has
                     no known mapping. This implementation normalizes the
-                    the value to lower case
+                    value to lower case
                     */
+                    icu::CharString* extBuf = extBufPool.create();
+                    if (extBuf == nullptr) {
+                        *status = U_MEMORY_ALLOCATION_ERROR;
+                        break;
+                    }
                     int32_t bcpValueLen = static_cast<int32_t>(uprv_strlen(bcpValue));
-                    if (bcpValueLen < extBufCapacity) {
-                        uprv_strcpy(pExtBuf, bcpValue);
-                        T_CString_toLowerCase(pExtBuf);
+                    int32_t resultCapacity;
+                    char* pExtBuf = extBuf->getAppendBuffer(
+                            /*minCapacity=*/bcpValueLen,
+                            /*desiredCapacityHint=*/bcpValueLen,
+                            resultCapacity,
+                            tmpStatus);
+                    if (U_FAILURE(tmpStatus)) {
+                        *status = tmpStatus;
+                        break;
+                    }
 
-                        bcpValue = pExtBuf;
+                    uprv_strcpy(pExtBuf, bcpValue);
+                    T_CString_toLowerCase(pExtBuf);
 
-                        pExtBuf += (bcpValueLen + 1);
-                        extBufCapacity -= (bcpValueLen + 1);
-                    } else {
-                        if (strict) {
-                            *status = U_ILLEGAL_ARGUMENT_ERROR;
-                            break;
-                        }
-                        continue;
+                    extBuf->append(pExtBuf, bcpValueLen, tmpStatus);
+                    if (U_FAILURE(tmpStatus)) {
+                        *status = tmpStatus;
+                        break;
                     }
+
+                    bcpValue = extBuf->data();
                 }
             } else {
                 if (*key == PRIVATEUSE) {
-                    if (!_isPrivateuseValueSubtags(buf, len)) {
+                    if (!_isPrivateuseValueSubtags(buf.data(), len)) {
                         if (strict) {
                             *status = U_ILLEGAL_ARGUMENT_ERROR;
                             break;
@@ -1049,7 +1317,7 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
                         continue;
                     }
                 } else {
-                    if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) {
+                    if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf.data(), len)) {
                         if (strict) {
                             *status = U_ILLEGAL_ARGUMENT_ERROR;
                             break;
@@ -1058,20 +1326,17 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
                     }
                 }
                 bcpKey = key;
-                if ((len + 1) < extBufCapacity) {
-                    uprv_memcpy(pExtBuf, buf, len);
-                    bcpValue = pExtBuf;
-
-                    pExtBuf += len;
-
-                    *pExtBuf = 0;
-                    pExtBuf++;
-
-                    extBufCapacity -= (len + 1);
-                } else {
-                    *status = U_ILLEGAL_ARGUMENT_ERROR;
+                icu::CharString* extBuf = extBufPool.create();
+                if (extBuf == nullptr) {
+                    *status = U_MEMORY_ALLOCATION_ERROR;
                     break;
                 }
+                extBuf->append(buf.data(), len, tmpStatus);
+                if (U_FAILURE(tmpStatus)) {
+                    *status = tmpStatus;
+                    break;
+                }
+                bcpValue = extBuf->data();
             }
 
             /* create ExtensionListEntry */
@@ -1242,6 +1507,7 @@ _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendT
             attrBufIdx += (len + 1);
         } else {
             *status = U_ILLEGAL_ARGUMENT_ERROR;
+            uprv_free(attr);
             goto cleanup;
         }
 
@@ -1460,9 +1726,9 @@ _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendT
                     kwd->value = pType;
 
                     if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
-                        *status = U_ILLEGAL_ARGUMENT_ERROR;
+                        // duplicate keyword is allowed, Only the first
+                        // is honored.
                         uprv_free(kwd);
-                        goto cleanup;
                     }
                 }
 
@@ -1836,7 +2102,7 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
     }
 
     /* check if the tag is grandfathered */
-    for (i = 0; GRANDFATHERED[i] != NULL; i += 2) {
+    for (i = 0; i < UPRV_LENGTHOF(GRANDFATHERED); i += 2) {
         if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) {
             int32_t newTagLength;
 
@@ -1858,6 +2124,37 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
         }
     }
 
+    size_t parsedLenDelta = 0;
+    if (grandfatheredLen == 0) {
+        for (i = 0; i < UPRV_LENGTHOF(REDUNDANT); i += 2) {
+            const char* redundantTag = REDUNDANT[i];
+            size_t redundantTagLen = uprv_strlen(redundantTag);
+            // The preferred tag for a redundant tag is always shorter than redundant
+            // tag. A redundant tag may or may not be followed by other subtags.
+            // (i.e. "zh-yue" or "zh-yue-u-co-pinyin").
+            if (uprv_strnicmp(redundantTag, tagBuf, static_cast<uint32_t>(redundantTagLen)) == 0) {
+                const char* redundantTagEnd = tagBuf + redundantTagLen;
+                if (*redundantTagEnd  == '\0' || *redundantTagEnd == SEP) {
+                    const char* preferredTag = REDUNDANT[i + 1];
+                    size_t preferredTagLen = uprv_strlen(preferredTag);
+                    uprv_strncpy(t->buf, preferredTag, preferredTagLen);
+                    if (*redundantTagEnd == SEP) {
+                        uprv_memmove(tagBuf + preferredTagLen,
+                                     redundantTagEnd,
+                                     tagLen - redundantTagLen + 1);
+                    } else {
+                        tagBuf[preferredTagLen] = '\0';
+                    }
+                    // parsedLen should be the length of the input
+                    // before redundantTag is replaced by preferredTag.
+                    // Save the delta to add it back later.
+                    parsedLenDelta = redundantTagLen - preferredTagLen;
+                    break;
+                }
+            }
+        }
+    }
+
     /*
      * langtag      =   language
      *                  ["-" script]
@@ -1898,10 +2195,13 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
         if (next & LANG) {
             if (_isLanguageSubtag(pSubtag, subtagLen)) {
                 *pSep = 0;  /* terminate */
+                // TODO: move deprecated language code handling here.
                 t->language = T_CString_toLowerCase(pSubtag);
 
                 pLastGoodPosition = pSep;
-                next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
+                next = SCRT | REGN | VART | EXTS | PRIV;
+                if (subtagLen <= 3)
+                  next |= EXTL;
                 continue;
             }
         }
@@ -1942,6 +2242,7 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
         if (next & REGN) {
             if (_isRegionSubtag(pSubtag, subtagLen)) {
                 *pSep = 0;
+                // TODO: move deprecated region code handling here.
                 t->region = T_CString_toUpperCase(pSubtag);
 
                 pLastGoodPosition = pSep;
@@ -2035,7 +2336,7 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
             }
         }
         if (next & PRIV) {
-            if (uprv_tolower(*pSubtag) == PRIVATEUSE) {
+            if (uprv_tolower(*pSubtag) == PRIVATEUSE && subtagLen == 1) {
                 char *pPrivuseVal;
 
                 if (pExtension != NULL) {
@@ -2138,7 +2439,8 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
     }
 
     if (parsedLen != NULL) {
-        *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen : (int32_t)(pLastGoodPosition - t->buf);
+        *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen :
+            (int32_t)(pLastGoodPosition - t->buf + parsedLenDelta);
     }
 
     return t;
@@ -2335,31 +2637,66 @@ uloc_toLanguageTag(const char* localeID,
                    int32_t langtagCapacity,
                    UBool strict,
                    UErrorCode* status) {
-    /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */
-    char canonical[256];
-    int32_t reslen = 0;
+    icu::CharString canonical;
+    int32_t reslen;
     UErrorCode tmpStatus = U_ZERO_ERROR;
     UBool hadPosix = FALSE;
     const char* pKeywordStart;
 
     /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "".  See #6835 */
-    canonical[0] = 0;
-    if (uprv_strlen(localeID) > 0) {
-        uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus);
-        if (tmpStatus != U_ZERO_ERROR) {
+    int32_t resultCapacity = static_cast<int32_t>(uprv_strlen(localeID));
+    if (resultCapacity > 0) {
+        char* buffer;
+
+        for (;;) {
+            buffer = canonical.getAppendBuffer(
+                    /*minCapacity=*/resultCapacity,
+                    /*desiredCapacityHint=*/resultCapacity,
+                    resultCapacity,
+                    tmpStatus);
+
+            if (U_FAILURE(tmpStatus)) {
+                *status = tmpStatus;
+                return 0;
+            }
+
+            reslen =
+                uloc_canonicalize(localeID, buffer, resultCapacity, &tmpStatus);
+
+            if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) {
+                break;
+            }
+
+            resultCapacity = reslen;
+            tmpStatus = U_ZERO_ERROR;
+        }
+
+        if (U_FAILURE(tmpStatus)) {
             *status = U_ILLEGAL_ARGUMENT_ERROR;
             return 0;
         }
+
+        canonical.append(buffer, reslen, tmpStatus);
+        if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
+            tmpStatus = U_ZERO_ERROR;  // Terminators provided by CharString.
+        }
+
+        if (U_FAILURE(tmpStatus)) {
+            *status = tmpStatus;
+            return 0;
+        }
     }
 
+    reslen = 0;
+
     /* For handling special case - private use only tag */
-    pKeywordStart = locale_getKeywordsStart(canonical);
-    if (pKeywordStart == canonical) {
+    pKeywordStart = locale_getKeywordsStart(canonical.data());
+    if (pKeywordStart == canonical.data()) {
         UEnumeration *kwdEnum;
         int kwdCnt = 0;
         UBool done = FALSE;
 
-        kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus);
+        kwdEnum = uloc_openKeywords(canonical.data(), &tmpStatus);
         if (kwdEnum != NULL) {
             kwdCnt = uenum_count(kwdEnum, &tmpStatus);
             if (kwdCnt == 1) {
@@ -2397,12 +2734,12 @@ uloc_toLanguageTag(const char* localeID,
         }
     }
 
-    reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status);
-    reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
-    reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
-    reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
-    reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
-    reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
+    reslen += _appendLanguageToLanguageTag(canonical.data(), langtag, langtagCapacity, strict, status);
+    reslen += _appendScriptToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, status);
+    reslen += _appendRegionToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, status);
+    reslen += _appendVariantsToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
+    reslen += _appendKeywordsToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
+    reslen += _appendPrivateuseToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
 
     return reslen;
 }
@@ -2414,6 +2751,23 @@ uloc_forLanguageTag(const char* langtag,
                     int32_t localeIDCapacity,
                     int32_t* parsedLength,
                     UErrorCode* status) {
+    return ulocimp_forLanguageTag(
+            langtag,
+            -1,
+            localeID,
+            localeIDCapacity,
+            parsedLength,
+            status);
+}
+
+
+U_CAPI int32_t U_EXPORT2
+ulocimp_forLanguageTag(const char* langtag,
+                       int32_t tagLen,
+                       char* localeID,
+                       int32_t localeIDCapacity,
+                       int32_t* parsedLength,
+                       UErrorCode* status) {
     ULanguageTag *lt;
     int32_t reslen = 0;
     const char *subtag, *p;
@@ -2421,7 +2775,7 @@ uloc_forLanguageTag(const char* langtag,
     int32_t i, n;
     UBool noRegion = TRUE;
 
-    lt = ultag_parse(langtag, -1, parsedLength, status);
+    lt = ultag_parse(langtag, tagLen, parsedLength, status);
     if (U_FAILURE(*status)) {
         return 0;
     }