diff options
author | Pavol Rusnak <pavol@rusnak.io> | 2019-12-17 01:44:50 +0300 |
---|---|---|
committer | Pavol Rusnak <pavol@rusnak.io> | 2019-12-17 16:36:29 +0300 |
commit | c00d4c1979c1d0dd481091c148f951e466eab9dc (patch) | |
tree | ed0941940c5d763001bbdf0fab96953af5eb7ece /pb_decode.c | |
parent | 28706965251c26d5f9dab6defdca2e384cbf4da7 (diff) |
Introduce new compile time flag: PB_VALIDATE_UTF8
Check whether incoming strings are valid UTF-8 sequences. Slows down
the string processing slightly and slightly increases code size.
Diffstat (limited to 'pb_decode.c')
-rw-r--r-- | pb_decode.c | 58 |
1 files changed, 58 insertions, 0 deletions
diff --git a/pb_decode.c b/pb_decode.c index 62a7d97..68351b5 100644 --- a/pb_decode.c +++ b/pb_decode.c @@ -1475,6 +1475,53 @@ static bool checkreturn pb_dec_bytes(pb_istream_t *stream, const pb_field_iter_t return pb_read(stream, dest->bytes, (size_t)size); } +#ifdef PB_VALIDATE_UTF8 + +/* adapted from https://www.cl.cam.ac.uk/~mgk25/ucs/utf8_check.c */ + +static bool pb_validate_utf8(const uint8_t *s) +{ + while (*s) { + if (*s < 0x80) + /* 0xxxxxxx */ + s++; + else if ((s[0] & 0xe0) == 0xc0) { + /* 110XXXXx 10xxxxxx */ + if ((s[1] & 0xc0) != 0x80 || + (s[0] & 0xfe) == 0xc0) /* overlong? */ + return false; + else + s += 2; + } else if ((s[0] & 0xf0) == 0xe0) { + /* 1110XXXX 10Xxxxxx 10xxxxxx */ + if ((s[1] & 0xc0) != 0x80 || + (s[2] & 0xc0) != 0x80 || + (s[0] == 0xe0 && (s[1] & 0xe0) == 0x80) || /* overlong? */ + (s[0] == 0xed && (s[1] & 0xe0) == 0xa0) || /* surrogate? */ + (s[0] == 0xef && s[1] == 0xbf && + (s[2] & 0xfe) == 0xbe)) /* U+FFFE or U+FFFF? */ + return false; + else + s += 3; + } else if ((s[0] & 0xf8) == 0xf0) { + /* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */ + if ((s[1] & 0xc0) != 0x80 || + (s[2] & 0xc0) != 0x80 || + (s[3] & 0xc0) != 0x80 || + (s[0] == 0xf0 && (s[1] & 0xf0) == 0x80) || /* overlong? */ + (s[0] == 0xf4 && s[1] > 0x8f) || s[0] > 0xf4) /* > U+10FFFF? */ + return false; + else + s += 4; + } else + return false; + } + + return true; +} + +#endif + static bool checkreturn pb_dec_string(pb_istream_t *stream, const pb_field_iter_t *field) { uint32_t size; @@ -1507,7 +1554,18 @@ static bool checkreturn pb_dec_string(pb_istream_t *stream, const pb_field_iter_ } dest[size] = 0; + +#ifdef PB_VALIDATE_UTF8 + if (!pb_read(stream, dest, (size_t)size)) + return false; + + if (!pb_validate_utf8((const uint8_t *)dest)) + PB_RETURN_ERROR(stream, "invalid utf8"); + + return true; +#else return pb_read(stream, dest, (size_t)size); +#endif } static bool checkreturn pb_dec_submessage(pb_istream_t *stream, const pb_field_iter_t *field) |