Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/nanopb/nanopb.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPavol Rusnak <pavol@rusnak.io>2019-12-17 01:44:50 +0300
committerPavol Rusnak <pavol@rusnak.io>2019-12-17 16:36:29 +0300
commitc00d4c1979c1d0dd481091c148f951e466eab9dc (patch)
treeed0941940c5d763001bbdf0fab96953af5eb7ece /pb_decode.c
parent28706965251c26d5f9dab6defdca2e384cbf4da7 (diff)
Introduce new compile time flag: PB_VALIDATE_UTF8
Check whether incoming strings are valid UTF-8 sequences. Slows down the string processing slightly and slightly increases code size.
Diffstat (limited to 'pb_decode.c')
-rw-r--r--pb_decode.c58
1 files changed, 58 insertions, 0 deletions
diff --git a/pb_decode.c b/pb_decode.c
index 62a7d97..68351b5 100644
--- a/pb_decode.c
+++ b/pb_decode.c
@@ -1475,6 +1475,53 @@ static bool checkreturn pb_dec_bytes(pb_istream_t *stream, const pb_field_iter_t
return pb_read(stream, dest->bytes, (size_t)size);
}
+#ifdef PB_VALIDATE_UTF8
+
+/* adapted from https://www.cl.cam.ac.uk/~mgk25/ucs/utf8_check.c */
+
+static bool pb_validate_utf8(const uint8_t *s)
+{
+ while (*s) {
+ if (*s < 0x80)
+ /* 0xxxxxxx */
+ s++;
+ else if ((s[0] & 0xe0) == 0xc0) {
+ /* 110XXXXx 10xxxxxx */
+ if ((s[1] & 0xc0) != 0x80 ||
+ (s[0] & 0xfe) == 0xc0) /* overlong? */
+ return false;
+ else
+ s += 2;
+ } else if ((s[0] & 0xf0) == 0xe0) {
+ /* 1110XXXX 10Xxxxxx 10xxxxxx */
+ if ((s[1] & 0xc0) != 0x80 ||
+ (s[2] & 0xc0) != 0x80 ||
+ (s[0] == 0xe0 && (s[1] & 0xe0) == 0x80) || /* overlong? */
+ (s[0] == 0xed && (s[1] & 0xe0) == 0xa0) || /* surrogate? */
+ (s[0] == 0xef && s[1] == 0xbf &&
+ (s[2] & 0xfe) == 0xbe)) /* U+FFFE or U+FFFF? */
+ return false;
+ else
+ s += 3;
+ } else if ((s[0] & 0xf8) == 0xf0) {
+ /* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */
+ if ((s[1] & 0xc0) != 0x80 ||
+ (s[2] & 0xc0) != 0x80 ||
+ (s[3] & 0xc0) != 0x80 ||
+ (s[0] == 0xf0 && (s[1] & 0xf0) == 0x80) || /* overlong? */
+ (s[0] == 0xf4 && s[1] > 0x8f) || s[0] > 0xf4) /* > U+10FFFF? */
+ return false;
+ else
+ s += 4;
+ } else
+ return false;
+ }
+
+ return true;
+}
+
+#endif
+
static bool checkreturn pb_dec_string(pb_istream_t *stream, const pb_field_iter_t *field)
{
uint32_t size;
@@ -1507,7 +1554,18 @@ static bool checkreturn pb_dec_string(pb_istream_t *stream, const pb_field_iter_
}
dest[size] = 0;
+
+#ifdef PB_VALIDATE_UTF8
+ if (!pb_read(stream, dest, (size_t)size))
+ return false;
+
+ if (!pb_validate_utf8((const uint8_t *)dest))
+ PB_RETURN_ERROR(stream, "invalid utf8");
+
+ return true;
+#else
return pb_read(stream, dest, (size_t)size);
+#endif
}
static bool checkreturn pb_dec_submessage(pb_istream_t *stream, const pb_field_iter_t *field)