Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/nodejs/node.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'deps/icu-small/source/common/uloc_tag.cpp')
-rw-r--r--deps/icu-small/source/common/uloc_tag.cpp490
1 files changed, 422 insertions, 68 deletions
diff --git a/deps/icu-small/source/common/uloc_tag.cpp b/deps/icu-small/source/common/uloc_tag.cpp
index f8337ec0247..8120331c4b9 100644
--- a/deps/icu-small/source/common/uloc_tag.cpp
+++ b/deps/icu-small/source/common/uloc_tag.cpp
@@ -12,11 +12,13 @@
#include "unicode/putil.h"
#include "unicode/uloc.h"
#include "ustr_imp.h"
+#include "charstr.h"
#include "cmemory.h"
#include "cstring.h"
#include "putilimp.h"
#include "uinvchar.h"
#include "ulocimp.h"
+#include "uvector.h"
#include "uassert.h"
@@ -77,19 +79,34 @@ static const char LOCALE_TYPE_YES[] = "yes";
#define LANG_UND_LEN 3
+/*
+ Updated on 2018-09-12 from
+ https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
+
+ This table has 2 parts. The parts for Grandfathered tags is generated by the
+ following scripts from the IANA language tag registry.
+
+ curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
+ egrep -A 7 'Type: grandfathered' | \
+ egrep 'Tag|Prefe' | grep -B1 'Preferred' | grep -v '^--' | \
+ awk -n '/Tag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' |\
+ tr 'A-Z' 'a-z'
+
+
+ The 2nd part is made of five ICU-specific entries. They're kept for
+ the backward compatibility for now, even though there are no preferred
+ values. They may have to be removed for the strict BCP 47 compliance.
+
+*/
static const char* const GRANDFATHERED[] = {
/* grandfathered preferred */
"art-lojban", "jbo",
- "cel-gaulish", "xtg-x-cel-gaulish",
- "en-GB-oed", "en-GB-x-oed",
+ "en-gb-oed", "en-gb-oxendict",
"i-ami", "ami",
"i-bnn", "bnn",
- "i-default", "en-x-i-default",
- "i-enochian", "und-x-i-enochian",
"i-hak", "hak",
"i-klingon", "tlh",
"i-lux", "lb",
- "i-mingo", "see-x-i-mingo",
"i-navajo", "nv",
"i-pwn", "pwn",
"i-tao", "tao",
@@ -102,17 +119,175 @@ static const char* const GRANDFATHERED[] = {
"sgn-ch-de", "sgg",
"zh-guoyu", "cmn",
"zh-hakka", "hak",
- "zh-min", "nan-x-zh-min",
"zh-min-nan", "nan",
"zh-xiang", "hsn",
- NULL, NULL
+
+ // Grandfathered tags with no preferred value in the IANA
+ // registry. Kept for now for the backward compatibility
+ // because ICU has mapped them this way.
+ "cel-gaulish", "xtg-x-cel-gaulish",
+ "i-default", "en-x-i-default",
+ "i-enochian", "und-x-i-enochian",
+ "i-mingo", "see-x-i-mingo",
+ "zh-min", "nan-x-zh-min",
};
+/*
+ Updated on 2018-09-12 from
+ https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
+
+ The table lists redundant tags with preferred value in the IANA languate tag registry.
+ It's generated with the following command:
+
+ curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
+ grep 'Type: redundant' -A 5 | egrep '^(Tag:|Prefer)' | grep -B1 'Preferred' | \
+ awk -n '/Tag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' | \
+ tr 'A-Z' 'a-z'
+
+ In addition, ja-latn-hepburn-heploc is mapped to ja-latn-alalc97 because
+ a variant tag 'hepburn-heploc' has the preferred subtag, 'alaic97'.
+*/
+
+static const char* const REDUNDANT[] = {
+// redundant preferred
+ "sgn-br", "bzs",
+ "sgn-co", "csn",
+ "sgn-de", "gsg",
+ "sgn-dk", "dsl",
+ "sgn-es", "ssp",
+ "sgn-fr", "fsl",
+ "sgn-gb", "bfi",
+ "sgn-gr", "gss",
+ "sgn-ie", "isg",
+ "sgn-it", "ise",
+ "sgn-jp", "jsl",
+ "sgn-mx", "mfs",
+ "sgn-ni", "ncs",
+ "sgn-nl", "dse",
+ "sgn-no", "nsl",
+ "sgn-pt", "psr",
+ "sgn-se", "swl",
+ "sgn-us", "ase",
+ "sgn-za", "sfs",
+ "zh-cmn", "cmn",
+ "zh-cmn-hans", "cmn-hans",
+ "zh-cmn-hant", "cmn-hant",
+ "zh-gan", "gan",
+ "zh-wuu", "wuu",
+ "zh-yue", "yue",
+
+ // variant tag with preferred value
+ "ja-latn-hepburn-heploc", "ja-latn-alalc97",
+};
+
+/*
+ Updated on 2018-09-12 from
+ https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
+
+ grep 'Type: language' -A 7 language-subtag-registry | egrep 'Subtag|Prefe' | \
+ grep -B1 'Preferred' | grep -v '^--' | \
+ awk -n '/Subtag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}'
+
+ Make sure that 2-letter language subtags come before 3-letter subtags.
+*/
static const char DEPRECATEDLANGS[][4] = {
/* deprecated new */
+ "in", "id",
"iw", "he",
"ji", "yi",
- "in", "id"
+ "jw", "jv",
+ "mo", "ro",
+ "aam", "aas",
+ "adp", "dz",
+ "aue", "ktz",
+ "ayx", "nun",
+ "bgm", "bcg",
+ "bjd", "drl",
+ "ccq", "rki",
+ "cjr", "mom",
+ "cka", "cmr",
+ "cmk", "xch",
+ "coy", "pij",
+ "cqu", "quh",
+ "drh", "khk",
+ "drw", "prs",
+ "gav", "dev",
+ "gfx", "vaj",
+ "ggn", "gvr",
+ "gti", "nyc",
+ "guv", "duz",
+ "hrr", "jal",
+ "ibi", "opa",
+ "ilw", "gal",
+ "jeg", "oyb",
+ "kgc", "tdf",
+ "kgh", "kml",
+ "koj", "kwv",
+ "krm", "bmf",
+ "ktr", "dtp",
+ "kvs", "gdj",
+ "kwq", "yam",
+ "kxe", "tvd",
+ "kzj", "dtp",
+ "kzt", "dtp",
+ "lii", "raq",
+ "lmm", "rmx",
+ "meg", "cir",
+ "mst", "mry",
+ "mwj", "vaj",
+ "myt", "mry",
+ "nad", "xny",
+ "ncp", "kdz",
+ "nnx", "ngv",
+ "nts", "pij",
+ "oun", "vaj",
+ "pcr", "adx",
+ "pmc", "huw",
+ "pmu", "phr",
+ "ppa", "bfy",
+ "ppr", "lcq",
+ "pry", "prt",
+ "puz", "pub",
+ "sca", "hle",
+ "skk", "oyb",
+ "tdu", "dtp",
+ "thc", "tpo",
+ "thx", "oyb",
+ "tie", "ras",
+ "tkk", "twm",
+ "tlw", "weo",
+ "tmp", "tyj",
+ "tne", "kak",
+ "tnf", "prs",
+ "tsf", "taj",
+ "uok", "ema",
+ "xba", "cax",
+ "xia", "acn",
+ "xkh", "waw",
+ "xsj", "suj",
+ "ybd", "rki",
+ "yma", "lrr",
+ "ymt", "mtm",
+ "yos", "zom",
+ "yuu", "yug",
+};
+
+/*
+ Updated on 2018-04-24 from
+
+ curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry | \
+ grep 'Type: region' -A 7 | egrep 'Subtag|Prefe' | \
+ grep -B1 'Preferred' | \
+ awk -n '/Subtag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}'
+*/
+static const char DEPRECATEDREGIONS[][3] = {
+/* deprecated new */
+ "BU", "MM",
+ "DD", "DE",
+ "FX", "FR",
+ "TP", "TL",
+ "YD", "YE",
+ "ZR", "CD",
};
/*
@@ -172,6 +347,46 @@ static const char*
ultag_getGrandfathered(const ULanguageTag* langtag);
#endif
+namespace {
+
+// Helper class to memory manage CharString objects.
+// Only ever stack-allocated, does not need to inherit UMemory.
+class CharStringPool {
+public:
+ CharStringPool() : status(U_ZERO_ERROR), pool(&deleter, nullptr, status) {}
+ ~CharStringPool() = default;
+
+ CharStringPool(const CharStringPool&) = delete;
+ CharStringPool& operator=(const CharStringPool&) = delete;
+
+ icu::CharString* create() {
+ if (U_FAILURE(status)) {
+ return nullptr;
+ }
+ icu::CharString* const obj = new icu::CharString;
+ if (obj == nullptr) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return nullptr;
+ }
+ pool.addElement(obj, status);
+ if (U_FAILURE(status)) {
+ delete obj;
+ return nullptr;
+ }
+ return obj;
+ }
+
+private:
+ static void U_CALLCONV deleter(void* obj) {
+ delete static_cast<icu::CharString*>(obj);
+ }
+
+ UErrorCode status;
+ icu::UVector pool;
+};
+
+} // namespace
+
/*
* -------------------------------------------------
*
@@ -675,6 +890,11 @@ _appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capac
} else {
/* resolve deprecated */
for (i = 0; i < UPRV_LENGTHOF(DEPRECATEDLANGS); i += 2) {
+ // 2-letter deprecated subtags are listede before 3-letter
+ // ones in DEPRECATEDLANGS[]. Get out of loop on coming
+ // across the 1st 3-letter subtag, if the input is a 2-letter code.
+ // to avoid continuing to try when there's no match.
+ if (uprv_strlen(buf) < uprv_strlen(DEPRECATEDLANGS[i])) break;
if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) {
uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]);
len = (int32_t)uprv_strlen(buf);
@@ -721,7 +941,6 @@ _appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacit
*(appendAt + reslen) = SEP;
}
reslen++;
-
if (reslen < capacity) {
uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
}
@@ -763,6 +982,14 @@ _appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacit
*(appendAt + reslen) = SEP;
}
reslen++;
+ /* resolve deprecated */
+ for (int i = 0; i < UPRV_LENGTHOF(DEPRECATEDREGIONS); i += 2) {
+ if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDREGIONS[i]) == 0) {
+ uprv_strcpy(buf, DEPRECATEDREGIONS[i + 1]);
+ len = (int32_t)uprv_strlen(buf);
+ break;
+ }
+ }
if (reslen < capacity) {
uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
@@ -900,7 +1127,6 @@ _appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
static int32_t
_appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
- char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 };
int32_t attrBufLength = 0;
UEnumeration *keywordEnum = NULL;
@@ -920,22 +1146,48 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
AttributeListEntry *firstAttr = NULL;
AttributeListEntry *attr;
char *attrValue;
- char extBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
- char *pExtBuf = extBuf;
- int32_t extBufCapacity = sizeof(extBuf);
+ CharStringPool extBufPool;
const char *bcpKey=nullptr, *bcpValue=nullptr;
UErrorCode tmpStatus = U_ZERO_ERROR;
int32_t keylen;
UBool isBcpUExt;
while (TRUE) {
+ icu::CharString buf;
key = uenum_next(keywordEnum, NULL, status);
if (key == NULL) {
break;
}
- len = uloc_getKeywordValue(localeID, key, buf, sizeof(buf), &tmpStatus);
- /* buf must be null-terminated */
- if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
+ char* buffer;
+ int32_t resultCapacity = ULOC_KEYWORD_AND_VALUES_CAPACITY;
+
+ for (;;) {
+ buffer = buf.getAppendBuffer(
+ /*minCapacity=*/resultCapacity,
+ /*desiredCapacityHint=*/resultCapacity,
+ resultCapacity,
+ tmpStatus);
+
+ if (U_FAILURE(tmpStatus)) {
+ break;
+ }
+
+ len = uloc_getKeywordValue(
+ localeID, key, buffer, resultCapacity, &tmpStatus);
+
+ if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) {
+ break;
+ }
+
+ resultCapacity = len;
+ tmpStatus = U_ZERO_ERROR;
+ }
+
+ if (U_FAILURE(tmpStatus)) {
+ if (tmpStatus == U_MEMORY_ALLOCATION_ERROR) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
@@ -945,6 +1197,11 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
continue;
}
+ buf.append(buffer, len, tmpStatus);
+ if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
+ tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString.
+ }
+
keylen = (int32_t)uprv_strlen(key);
isBcpUExt = (keylen > 1);
@@ -1007,7 +1264,7 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
}
/* we've checked buf is null-terminated above */
- bcpValue = uloc_toUnicodeLocaleType(key, buf);
+ bcpValue = uloc_toUnicodeLocaleType(key, buf.data());
if (bcpValue == NULL) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
@@ -1015,33 +1272,44 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
}
continue;
}
- if (bcpValue == buf) {
+ if (bcpValue == buf.data()) {
/*
When uloc_toUnicodeLocaleType(key, buf) returns the
input value as is, the value is well-formed, but has
no known mapping. This implementation normalizes the
- the value to lower case
+ value to lower case
*/
+ icu::CharString* extBuf = extBufPool.create();
+ if (extBuf == nullptr) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ break;
+ }
int32_t bcpValueLen = static_cast<int32_t>(uprv_strlen(bcpValue));
- if (bcpValueLen < extBufCapacity) {
- uprv_strcpy(pExtBuf, bcpValue);
- T_CString_toLowerCase(pExtBuf);
+ int32_t resultCapacity;
+ char* pExtBuf = extBuf->getAppendBuffer(
+ /*minCapacity=*/bcpValueLen,
+ /*desiredCapacityHint=*/bcpValueLen,
+ resultCapacity,
+ tmpStatus);
+ if (U_FAILURE(tmpStatus)) {
+ *status = tmpStatus;
+ break;
+ }
- bcpValue = pExtBuf;
+ uprv_strcpy(pExtBuf, bcpValue);
+ T_CString_toLowerCase(pExtBuf);
- pExtBuf += (bcpValueLen + 1);
- extBufCapacity -= (bcpValueLen + 1);
- } else {
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- break;
- }
- continue;
+ extBuf->append(pExtBuf, bcpValueLen, tmpStatus);
+ if (U_FAILURE(tmpStatus)) {
+ *status = tmpStatus;
+ break;
}
+
+ bcpValue = extBuf->data();
}
} else {
if (*key == PRIVATEUSE) {
- if (!_isPrivateuseValueSubtags(buf, len)) {
+ if (!_isPrivateuseValueSubtags(buf.data(), len)) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
@@ -1049,7 +1317,7 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
continue;
}
} else {
- if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf, len)) {
+ if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf.data(), len)) {
if (strict) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
break;
@@ -1058,20 +1326,17 @@ _appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capac
}
}
bcpKey = key;
- if ((len + 1) < extBufCapacity) {
- uprv_memcpy(pExtBuf, buf, len);
- bcpValue = pExtBuf;
-
- pExtBuf += len;
-
- *pExtBuf = 0;
- pExtBuf++;
-
- extBufCapacity -= (len + 1);
- } else {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
+ icu::CharString* extBuf = extBufPool.create();
+ if (extBuf == nullptr) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
break;
}
+ extBuf->append(buf.data(), len, tmpStatus);
+ if (U_FAILURE(tmpStatus)) {
+ *status = tmpStatus;
+ break;
+ }
+ bcpValue = extBuf->data();
}
/* create ExtensionListEntry */
@@ -1242,6 +1507,7 @@ _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendT
attrBufIdx += (len + 1);
} else {
*status = U_ILLEGAL_ARGUMENT_ERROR;
+ uprv_free(attr);
goto cleanup;
}
@@ -1460,9 +1726,9 @@ _appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendT
kwd->value = pType;
if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
+ // duplicate keyword is allowed, Only the first
+ // is honored.
uprv_free(kwd);
- goto cleanup;
}
}
@@ -1836,7 +2102,7 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
}
/* check if the tag is grandfathered */
- for (i = 0; GRANDFATHERED[i] != NULL; i += 2) {
+ for (i = 0; i < UPRV_LENGTHOF(GRANDFATHERED); i += 2) {
if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) {
int32_t newTagLength;
@@ -1858,6 +2124,37 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
}
}
+ size_t parsedLenDelta = 0;
+ if (grandfatheredLen == 0) {
+ for (i = 0; i < UPRV_LENGTHOF(REDUNDANT); i += 2) {
+ const char* redundantTag = REDUNDANT[i];
+ size_t redundantTagLen = uprv_strlen(redundantTag);
+ // The preferred tag for a redundant tag is always shorter than redundant
+ // tag. A redundant tag may or may not be followed by other subtags.
+ // (i.e. "zh-yue" or "zh-yue-u-co-pinyin").
+ if (uprv_strnicmp(redundantTag, tagBuf, static_cast<uint32_t>(redundantTagLen)) == 0) {
+ const char* redundantTagEnd = tagBuf + redundantTagLen;
+ if (*redundantTagEnd == '\0' || *redundantTagEnd == SEP) {
+ const char* preferredTag = REDUNDANT[i + 1];
+ size_t preferredTagLen = uprv_strlen(preferredTag);
+ uprv_strncpy(t->buf, preferredTag, preferredTagLen);
+ if (*redundantTagEnd == SEP) {
+ uprv_memmove(tagBuf + preferredTagLen,
+ redundantTagEnd,
+ tagLen - redundantTagLen + 1);
+ } else {
+ tagBuf[preferredTagLen] = '\0';
+ }
+ // parsedLen should be the length of the input
+ // before redundantTag is replaced by preferredTag.
+ // Save the delta to add it back later.
+ parsedLenDelta = redundantTagLen - preferredTagLen;
+ break;
+ }
+ }
+ }
+ }
+
/*
* langtag = language
* ["-" script]
@@ -1898,10 +2195,13 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
if (next & LANG) {
if (_isLanguageSubtag(pSubtag, subtagLen)) {
*pSep = 0; /* terminate */
+ // TODO: move deprecated language code handling here.
t->language = T_CString_toLowerCase(pSubtag);
pLastGoodPosition = pSep;
- next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
+ next = SCRT | REGN | VART | EXTS | PRIV;
+ if (subtagLen <= 3)
+ next |= EXTL;
continue;
}
}
@@ -1942,6 +2242,7 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
if (next & REGN) {
if (_isRegionSubtag(pSubtag, subtagLen)) {
*pSep = 0;
+ // TODO: move deprecated region code handling here.
t->region = T_CString_toUpperCase(pSubtag);
pLastGoodPosition = pSep;
@@ -2035,7 +2336,7 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
}
}
if (next & PRIV) {
- if (uprv_tolower(*pSubtag) == PRIVATEUSE) {
+ if (uprv_tolower(*pSubtag) == PRIVATEUSE && subtagLen == 1) {
char *pPrivuseVal;
if (pExtension != NULL) {
@@ -2138,7 +2439,8 @@ ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* sta
}
if (parsedLen != NULL) {
- *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen : (int32_t)(pLastGoodPosition - t->buf);
+ *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen :
+ (int32_t)(pLastGoodPosition - t->buf + parsedLenDelta);
}
return t;
@@ -2335,31 +2637,66 @@ uloc_toLanguageTag(const char* localeID,
int32_t langtagCapacity,
UBool strict,
UErrorCode* status) {
- /* char canonical[ULOC_FULLNAME_CAPACITY]; */ /* See #6822 */
- char canonical[256];
- int32_t reslen = 0;
+ icu::CharString canonical;
+ int32_t reslen;
UErrorCode tmpStatus = U_ZERO_ERROR;
UBool hadPosix = FALSE;
const char* pKeywordStart;
/* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */
- canonical[0] = 0;
- if (uprv_strlen(localeID) > 0) {
- uloc_canonicalize(localeID, canonical, sizeof(canonical), &tmpStatus);
- if (tmpStatus != U_ZERO_ERROR) {
+ int32_t resultCapacity = static_cast<int32_t>(uprv_strlen(localeID));
+ if (resultCapacity > 0) {
+ char* buffer;
+
+ for (;;) {
+ buffer = canonical.getAppendBuffer(
+ /*minCapacity=*/resultCapacity,
+ /*desiredCapacityHint=*/resultCapacity,
+ resultCapacity,
+ tmpStatus);
+
+ if (U_FAILURE(tmpStatus)) {
+ *status = tmpStatus;
+ return 0;
+ }
+
+ reslen =
+ uloc_canonicalize(localeID, buffer, resultCapacity, &tmpStatus);
+
+ if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) {
+ break;
+ }
+
+ resultCapacity = reslen;
+ tmpStatus = U_ZERO_ERROR;
+ }
+
+ if (U_FAILURE(tmpStatus)) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return 0;
}
+
+ canonical.append(buffer, reslen, tmpStatus);
+ if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
+ tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString.
+ }
+
+ if (U_FAILURE(tmpStatus)) {
+ *status = tmpStatus;
+ return 0;
+ }
}
+ reslen = 0;
+
/* For handling special case - private use only tag */
- pKeywordStart = locale_getKeywordsStart(canonical);
- if (pKeywordStart == canonical) {
+ pKeywordStart = locale_getKeywordsStart(canonical.data());
+ if (pKeywordStart == canonical.data()) {
UEnumeration *kwdEnum;
int kwdCnt = 0;
UBool done = FALSE;
- kwdEnum = uloc_openKeywords((const char*)canonical, &tmpStatus);
+ kwdEnum = uloc_openKeywords(canonical.data(), &tmpStatus);
if (kwdEnum != NULL) {
kwdCnt = uenum_count(kwdEnum, &tmpStatus);
if (kwdCnt == 1) {
@@ -2397,12 +2734,12 @@ uloc_toLanguageTag(const char* localeID,
}
}
- reslen += _appendLanguageToLanguageTag(canonical, langtag, langtagCapacity, strict, status);
- reslen += _appendScriptToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
- reslen += _appendRegionToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, status);
- reslen += _appendVariantsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
- reslen += _appendKeywordsToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
- reslen += _appendPrivateuseToLanguageTag(canonical, langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
+ reslen += _appendLanguageToLanguageTag(canonical.data(), langtag, langtagCapacity, strict, status);
+ reslen += _appendScriptToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, status);
+ reslen += _appendRegionToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, status);
+ reslen += _appendVariantsToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
+ reslen += _appendKeywordsToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
+ reslen += _appendPrivateuseToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
return reslen;
}
@@ -2414,6 +2751,23 @@ uloc_forLanguageTag(const char* langtag,
int32_t localeIDCapacity,
int32_t* parsedLength,
UErrorCode* status) {
+ return ulocimp_forLanguageTag(
+ langtag,
+ -1,
+ localeID,
+ localeIDCapacity,
+ parsedLength,
+ status);
+}
+
+
+U_CAPI int32_t U_EXPORT2
+ulocimp_forLanguageTag(const char* langtag,
+ int32_t tagLen,
+ char* localeID,
+ int32_t localeIDCapacity,
+ int32_t* parsedLength,
+ UErrorCode* status) {
ULanguageTag *lt;
int32_t reslen = 0;
const char *subtag, *p;
@@ -2421,7 +2775,7 @@ uloc_forLanguageTag(const char* langtag,
int32_t i, n;
UBool noRegion = TRUE;
- lt = ultag_parse(langtag, -1, parsedLength, status);
+ lt = ultag_parse(langtag, tagLen, parsedLength, status);
if (U_FAILURE(*status)) {
return 0;
}