Replace base64 decoding.

This code has caused a long history of problems. This change rewrites it completely with something that is, hopefully, much simplier and robust and adds more testing. Change-Id: Ibeef51f9386afd95d5b73316e451eb3a2d7ec4e0 Reviewed-on: https://boringssl-review.googlesource.com/8033 Reviewed-by: Adam Langley <agl@google.com>
author: Adam Langley <alangley@gmail.com> 2016-05-20 20:51:48 +0300
committer: Adam Langley <agl@google.com> 2016-05-26 20:59:10 +0300
commit: d09175ffe335d9be6846b4ac5e9e622d96213a00 (patch)
tree: f4b68d43bbedb052902c3f0c87d581066ef4d519 /crypto/base64
parent: 1cb405d96b11db5767446766d76516534067bbd1 (diff)
2 files changed, 543 insertions, 320 deletions
diff --git a/crypto/base64/base64.c b/crypto/base64/base64.c
index 61f79cda..0763a3e4 100644
--- a/crypto/base64/base64.c
+++ b/crypto/base64/base64.c
@@ -60,61 +60,42 @@
 #include <limits.h>
 #include <string.h>
 
+#include <openssl/type_check.h>
+
+
+/* Encoding. */
 
 static const unsigned char data_bin2ascii[65] =
     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 
 #define conv_bin2ascii(a) (data_bin2ascii[(a) & 0x3f])
 
-/* 64 char lines
- * pad input with 0
- * left over chars are set to =
- * 1 byte  => xx==
- * 2 bytes => xxx=
- * 3 bytes => xxxx
- */
-#define BIN_PER_LINE    (64/4*3)
-#define CHUNKS_PER_LINE (64/4)
-#define CHAR_PER_LINE   (64+1)
-
-/* 0xF0 is a EOLN
- * 0xF1 is ignore but next needs to be 0xF0 (for \r\n processing).
- * 0xF2 is EOF
- * 0xE0 is ignore at start of line.
- * 0xFF is error */
-
-#define B64_EOLN 0xF0
-#define B64_CR 0xF1
-#define B64_EOF 0xF2
-#define B64_WS 0xE0
-#define B64_ERROR 0xFF
-#define B64_NOT_BASE64(a) (((a) | 0x13) == 0xF3)
-
-static const uint8_t data_ascii2bin[128] = {
-    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xE0, 0xF0, 0xFF,
-    0xFF, 0xF1, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xE0, 0xFF, 0xFF, 0xFF,
-    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x3E, 0xFF, 0xF2, 0xFF, 0x3F,
-    0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0xFF, 0xFF,
-    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
-    0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12,
-    0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-    0xFF, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24,
-    0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30,
-    0x31, 0x32, 0x33, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-};
+OPENSSL_COMPILE_ASSERT(sizeof(((EVP_ENCODE_CTX *)(NULL))->data) % 3 == 0,
+                       data_length_must_be_multiple_of_base64_chunk_size);
 
-static uint8_t conv_ascii2bin(uint8_t a) {
-  if (a >= 128) {
-    return 0xFF;
+int EVP_EncodedLength(size_t *out_len, size_t len) {
+  if (len + 2 < len) {
+    return 0;
+  }
+  len += 2;
+  len /= 3;
+
+  if (((len << 2) >> 2) != len) {
+    return 0;
+  }
+  len <<= 2;
+
+  if (len + 1 < len) {
+    return 0;
   }
-  return data_ascii2bin[a];
+  len++;
+
+  *out_len = len;
+  return 1;
 }
 
 void EVP_EncodeInit(EVP_ENCODE_CTX *ctx) {
-  ctx->length = 48;
-  ctx->num = 0;
-  ctx->line_num = 0;
+  memset(ctx, 0, sizeof(EVP_ENCODE_CTX));
 }
 
 void EVP_EncodeUpdate(EVP_ENCODE_CTX *ctx, uint8_t *out, int *out_len,
@@ -126,55 +107,72 @@ void EVP_EncodeUpdate(EVP_ENCODE_CTX *ctx, uint8_t *out, int *out_len,
     return;
   }
 
-  assert(ctx->length <= sizeof(ctx->enc_data));
-  assert(ctx->num < ctx->length);
+  assert(ctx->data_used < sizeof(ctx->data));
 
-  if (ctx->length - ctx->num > in_len) {
-    memcpy(&ctx->enc_data[ctx->num], in, in_len);
-    ctx->num += in_len;
+  if (sizeof(ctx->data) - ctx->data_used > in_len) {
+    memcpy(&ctx->data[ctx->data_used], in, in_len);
+    ctx->data_used += in_len;
     return;
   }
 
-  if (ctx->num != 0) {
-    size_t todo = ctx->length - ctx->num;
-    memcpy(&ctx->enc_data[ctx->num], in, todo);
+  if (ctx->data_used != 0) {
+    const size_t todo = sizeof(ctx->data) - ctx->data_used;
+    memcpy(&ctx->data[ctx->data_used], in, todo);
     in += todo;
     in_len -= todo;
-    size_t encoded = EVP_EncodeBlock(out, ctx->enc_data, ctx->length);
-    ctx->num = 0;
+
+    size_t encoded = EVP_EncodeBlock(out, ctx->data, sizeof(ctx->data));
+    ctx->data_used = 0;
+
     out += encoded;
     *(out++) = '\n';
     *out = '\0';
+
     total = encoded + 1;
   }
 
-  while (in_len >= ctx->length) {
-    size_t encoded = EVP_EncodeBlock(out, in, ctx->length);
-    in += ctx->length;
-    in_len -= ctx->length;
+  while (in_len >= sizeof(ctx->data)) {
+    size_t encoded = EVP_EncodeBlock(out, in, sizeof(ctx->data));
+    in += sizeof(ctx->data);
+    in_len -= sizeof(ctx->data);
+
     out += encoded;
     *(out++) = '\n';
     *out = '\0';
+
+    if (total + encoded + 1 < total) {
+      *out_len = 0;
+      return;
+    }
+
     total += encoded + 1;
   }
 
   if (in_len != 0) {
-    memcpy(&ctx->enc_data[0], in, in_len);
+    memcpy(ctx->data, in, in_len);
+  }
+
+  ctx->data_used = in_len;
+
+  if (total > INT_MAX) {
+    /* We cannot signal an error, but we can at least avoid making *out_len
+     * negative. */
+    total = 0;
   }
-  ctx->num = in_len;
   *out_len = total;
 }
 
 void EVP_EncodeFinal(EVP_ENCODE_CTX *ctx, uint8_t *out, int *out_len) {
-  unsigned ret = 0;
-
-  if (ctx->num != 0) {
-    ret = EVP_EncodeBlock(out, ctx->enc_data, ctx->num);
-    out[ret++] = '\n';
-    out[ret] = '\0';
-    ctx->num = 0;
+  if (ctx->data_used == 0) {
+    *out_len = 0;
+    return;
   }
-  *out_len = ret;
+
+  size_t encoded = EVP_EncodeBlock(out, ctx->data, ctx->data_used);
+  out[encoded++] = '\n';
+  out[encoded] = '\0';
+  ctx->data_used = 0;
+  *out_len = encoded;
 }
 
 size_t EVP_EncodeBlock(uint8_t *dst, const uint8_t *src, size_t src_len) {
@@ -209,246 +207,223 @@ size_t EVP_EncodeBlock(uint8_t *dst, const uint8_t *src, size_t src_len) {
   return ret;
 }
 
+
+/* Decoding. */
+
 int EVP_DecodedLength(size_t *out_len, size_t len) {
   if (len % 4 != 0) {
     return 0;
   }
+
   *out_len = (len / 4) * 3;
   return 1;
 }
 
-int EVP_DecodeBase64(uint8_t *out, size_t *out_len, size_t max_out,
-                     const uint8_t *in, size_t in_len) {
-  uint8_t a, b, c, d;
-  size_t pad_len = 0, len = 0, max_len, i;
-  uint32_t l;
+void EVP_DecodeInit(EVP_ENCODE_CTX *ctx) {
+  memset(ctx, 0, sizeof(EVP_ENCODE_CTX));
+}
+
+/* kBase64ASCIIToBinData maps characters (c < 128) to their base64 value, or
+ * else 0xff if they are invalid. As a special case, the padding character
+ * ('=') is mapped to zero. */
+static const uint8_t kBase64ASCIIToBinData[128] = {
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe0, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xe0, 0xff, 0xff, 0xff,
+    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3e, 0xff, 0xff, 0xff, 0x3f,
+    0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0xff, 0xff,
+    0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
+    0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x12,
+    0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0xff, 0xff, 0xff, 0xff, 0xff,
+    0xff, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0x22, 0x23, 0x24,
+    0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, 0x30,
+    0x31, 0x32, 0x33, 0xff, 0xff, 0xff, 0xff, 0xff,
+};
+
+static uint8_t base64_ascii_to_bin(uint8_t a) {
+  if (a >= 128) {
+    return 0xFF;
+  }
 
-  if (!EVP_DecodedLength(&max_len, in_len) || max_out < max_len) {
+  return kBase64ASCIIToBinData[a];
+}
+
+/* base64_decode_quad decodes a single “quad” (i.e. four characters) of base64
+ * data and writes up to three bytes to |out|. It sets |*out_num_bytes| to the
+ * number of bytes written, which will be less than three if the quad ended
+ * with padding.  It returns one on success or zero on error. */
+static int base64_decode_quad(uint8_t *out, size_t *out_num_bytes,
+                              const uint8_t *in) {
+  const uint8_t a = base64_ascii_to_bin(in[0]);
+  const uint8_t b = base64_ascii_to_bin(in[1]);
+  const uint8_t c = base64_ascii_to_bin(in[2]);
+  const uint8_t d = base64_ascii_to_bin(in[3]);
+  if (a == 0xff || b == 0xff || c == 0xff || d == 0xff) {
     return 0;
   }
 
-  for (i = 0; i < in_len; i += 4) {
-    a = conv_ascii2bin(*(in++));
-    b = conv_ascii2bin(*(in++));
-    if (i + 4 == in_len && in[1] == '=') {
-        if (in[0] == '=') {
-          pad_len = 2;
-        } else {
-          pad_len = 1;
-        }
-    }
-    if (pad_len < 2) {
-      c = conv_ascii2bin(*(in++));
-    } else {
-      c = 0;
-    }
-    if (pad_len < 1) {
-      d = conv_ascii2bin(*(in++));
-    } else {
-      d = 0;
-    }
-    if ((a & 0x80) || (b & 0x80) || (c & 0x80) || (d & 0x80)) {
+  const uint32_t v = ((uint32_t)a) << 18 | ((uint32_t)b) << 12 |
+                     ((uint32_t)c) << 6 | (uint32_t)d;
+
+  const unsigned padding_pattern = (in[0] == '=') << 3 |
+                                   (in[1] == '=') << 2 |
+                                   (in[2] == '=') << 1 |
+                                   (in[3] == '=');
+
+  switch (padding_pattern) {
+    case 0:
+      /* The common case of no padding. */
+      *out_num_bytes = 3;
+      out[0] = v >> 16;
+      out[1] = v >> 8;
+      out[2] = v;
+      break;
+
+    case 1: /* xxx= */
+      *out_num_bytes = 2;
+      out[0] = v >> 16;
+      out[1] = v >> 8;
+      break;
+
+    case 3: /* xx== */
+      *out_num_bytes = 1;
+      out[0] = v >> 16;
+      break;
+
+    default:
       return 0;
-    }
-    l = ((((uint32_t)a) << 18L) | (((uint32_t)b) << 12L) |
-         (((uint32_t)c) << 6L) | (((uint32_t)d)));
-    *(out++) = (uint8_t)(l >> 16L) & 0xff;
-    if (pad_len < 2) {
-      *(out++) = (uint8_t)(l >> 8L) & 0xff;
-    }
-    if (pad_len < 1) {
-      *(out++) = (uint8_t)(l) & 0xff;
-    }
-    len += 3 - pad_len;
   }
-  *out_len = len;
-  return 1;
-}
 
-void EVP_DecodeInit(EVP_ENCODE_CTX *ctx) {
-  ctx->length = 30;
-  ctx->num = 0;
-  ctx->line_num = 0;
-  ctx->expect_nl = 0;
+  return 1;
 }
 
 int EVP_DecodeUpdate(EVP_ENCODE_CTX *ctx, uint8_t *out, int *out_len,
                      const uint8_t *in, size_t in_len) {
-  int seof = -1, eof = 0, rv = -1, v, tmp, exp_nl;
-  uint8_t *d;
-  unsigned i, n, ln, ret = 0;
-
-  n = ctx->num;
-  d = ctx->enc_data;
-  ln = ctx->line_num;
-  exp_nl = ctx->expect_nl;
-
-  /* last line of input. */
-  if (in_len == 0 || (n == 0 && conv_ascii2bin(in[0]) == B64_EOF)) {
-    rv = 0;
-    goto end;
+  *out_len = 0;
+
+  if (ctx->error_encountered) {
+    return -1;
   }
 
-  /* We parse the input data */
+  size_t bytes_out = 0, i;
   for (i = 0; i < in_len; i++) {
-    /* If the current line is > 80 characters, scream alot */
-    if (ln >= 80) {
-      rv = -1;
-      goto end;
+    const char c = in[i];
+    switch (c) {
+      case ' ':
+      case '\t':
+      case '\r':
+      case '\n':
+        continue;
     }
 
-    /* Get char and put it into the buffer */
-    tmp = *(in++);
-    v = conv_ascii2bin(tmp);
-    /* only save the good data :-) */
-    if (!B64_NOT_BASE64(v)) {
-      assert(n < sizeof(ctx->enc_data));
-      d[n++] = tmp;
-      ln++;
-    } else if (v == B64_ERROR) {
-      rv = -1;
-      goto end;
+    if (base64_ascii_to_bin(c) == 0xff || ctx->eof_seen) {
+      ctx->error_encountered = 1;
+      return -1;
     }
 
-    /* have we seen a '=' which is 'definitly' the last
-     * input line.  seof will point to the character that
-     * holds it. and eof will hold how many characters to
-     * chop off. */
-    if (tmp == '=') {
-      if (seof == -1) {
-        seof = n;
+    ctx->data[ctx->data_used++] = c;
+    if (ctx->data_used == 4) {
+      size_t num_bytes_resulting;
+      if (!base64_decode_quad(out, &num_bytes_resulting, ctx->data)) {
+        ctx->error_encountered = 1;
+        return -1;
       }
-      eof++;
-      if (eof > 2) {
-        /* There are, at most, two equals signs at the end of base64 data. */
-        rv = -1;
-        goto end;
-      }
-    }
 
-    if (v == B64_CR) {
-      ln = 0;
-      if (exp_nl) {
-        continue;
-      }
-    }
+      ctx->data_used = 0;
+      bytes_out += num_bytes_resulting;
+      out += num_bytes_resulting;
 
-    /* eoln */
-    if (v == B64_EOLN) {
-      ln = 0;
-      if (exp_nl) {
-        exp_nl = 0;
-        continue;
-      }
-    }
-    exp_nl = 0;
-
-    /* If we are at the end of input and it looks like a
-     * line, process it. */
-    if ((i + 1) == in_len && (((n & 3) == 0) || eof)) {
-      v = B64_EOF;
-      /* In case things were given us in really small
-         records (so two '=' were given in separate
-         updates), eof may contain the incorrect number
-         of ending bytes to skip, so let's redo the count */
-      eof = 0;
-      if (d[n - 1] == '=') {
-        eof++;
-      }
-      if (d[n - 2] == '=') {
-        eof++;
+      if (num_bytes_resulting < 3) {
+        ctx->eof_seen = 1;
       }
-      /* There will never be more than two '=' */
     }
+  }
 
-    if ((v == B64_EOF && (n & 3) == 0) || n >= 64) {
-      /* This is needed to work correctly on 64 byte input
-       * lines.  We process the line and then need to
-       * accept the '\n' */
-      if (v != B64_EOF && n >= 64) {
-        exp_nl = 1;
-      }
-      if (n > 0) {
-        /* TODO(davidben): Switch this to EVP_DecodeBase64. */
-        v = EVP_DecodeBlock(out, d, n);
-        n = 0;
-        if (v < 0) {
-          rv = 0;
-          goto end;
-        }
-        if (eof > v) {
-          rv = -1;
-          goto end;
-        }
-        ret += (v - eof);
-      } else {
-        eof = 1;
-        v = 0;
-      }
+  if (bytes_out > INT_MAX) {
+    ctx->error_encountered = 1;
+    *out_len = 0;
+    return -1;
+  }
+  *out_len = bytes_out;
 
-      /* This is the case where we have had a short
-       * but valid input line */
-      if (v < (int)ctx->length && eof) {
-        rv = 0;
-        goto end;
-      } else {
-        ctx->length = v;
-      }
+  if (ctx->eof_seen) {
+    return 0;
+  }
 
-      if (seof >= 0) {
-        rv = 0;
-        goto end;
-      }
-      out += v;
-    }
+  return 1;
+}
+
+int EVP_DecodeFinal(EVP_ENCODE_CTX *ctx, uint8_t *out, int *out_len) {
+  *out_len = 0;
+  if (ctx->error_encountered || ctx->data_used != 0) {
+    return -1;
   }
-  rv = 1;
-
-end:
-  *out_len = ret;
-  ctx->num = n;
-  ctx->line_num = ln;
-  ctx->expect_nl = exp_nl;
-  return rv;
+
+  return 1;
 }
 
-int EVP_DecodeFinal(EVP_ENCODE_CTX *ctx, uint8_t *out, int *outl) {
-  int i;
+int EVP_DecodeBase64(uint8_t *out, size_t *out_len, size_t max_out,
+                     const uint8_t *in, size_t in_len) {
+  *out_len = 0;
+
+  if (in_len % 4 != 0) {
+    return 0;
+  }
 
-  *outl = 0;
-  if (ctx->num != 0) {
-    /* TODO(davidben): Switch this to EVP_DecodeBase64. */
-    i = EVP_DecodeBlock(out, ctx->enc_data, ctx->num);
-    if (i < 0) {
-      return -1;
+  size_t max_len;
+  if (!EVP_DecodedLength(&max_len, in_len) ||
+      max_out < max_len) {
+    return 0;
+  }
+
+  size_t i, bytes_out = 0;
+  for (i = 0; i < in_len; i += 4) {
+    size_t num_bytes_resulting;
+
+    if (!base64_decode_quad(out, &num_bytes_resulting, &in[i])) {
+      return 0;
+    }
+
+    bytes_out += num_bytes_resulting;
+    out += num_bytes_resulting;
+    if (num_bytes_resulting != 3 && i != in_len - 4) {
+      return 0;
     }
-    ctx->num = 0;
-    *outl = i;
-    return 1;
-  } else {
-    return 1;
   }
+
+  *out_len = bytes_out;
+  return 1;
 }
 
 int EVP_DecodeBlock(uint8_t *dst, const uint8_t *src, size_t src_len) {
-  size_t dst_len;
+  /* Trim spaces and tabs from the beginning of the input. */
+  while (src_len > 0) {
+    if (src[0] != ' ' && src[0] != '\t') {
+      break;
+    }
 
-  /* trim white space from the start of the line. */
-  while (conv_ascii2bin(*src) == B64_WS && src_len > 0) {
     src++;
     src_len--;
   }
 
-  /* strip off stuff at the end of the line
-   * ascii2bin values B64_WS, B64_EOLN, B64_EOLN and B64_EOF */
-  while (src_len > 3 && B64_NOT_BASE64(conv_ascii2bin(src[src_len - 1]))) {
-    src_len--;
-  }
+  /* Trim newlines, spaces and tabs from the end of the line. */
+  while (src_len > 0) {
+    switch (src[src_len-1]) {
+      case ' ':
+      case '\t':
+      case '\r':
+      case '\n':
+        src_len--;
+        continue;
+    }
 
-  if (!EVP_DecodedLength(&dst_len, src_len) || dst_len > INT_MAX) {
-    return -1;
+    break;
   }
-  if (!EVP_DecodeBase64(dst, &dst_len, dst_len, src, src_len)) {
+
+  size_t dst_len;
+  if (!EVP_DecodedLength(&dst_len, src_len) ||
+      dst_len > INT_MAX ||
+      !EVP_DecodeBase64(dst, &dst_len, dst_len, src, src_len)) {
     return -1;
   }
 
@@ -461,21 +436,3 @@ int EVP_DecodeBlock(uint8_t *dst, const uint8_t *src, size_t src_len) {
 
   return dst_len;
 }
-
-int EVP_EncodedLength(size_t *out_len, size_t len) {
-  if (len + 2 < len) {
-    return 0;
-  }
-  len += 2;
-  len /= 3;
-  if (((len << 2) >> 2) != len) {
-    return 0;
-  }
-  len <<= 2;
-  if (len + 1 < len) {
-    return 0;
-  }
-  len++;
-  *out_len = len;
-  return 1;
-}
diff --git a/crypto/base64/base64_test.cc b/crypto/base64/base64_test.cc
index da016e66..a6087732 100644
--- a/crypto/base64/base64_test.cc
+++ b/crypto/base64/base64_test.cc
@@ -15,76 +15,203 @@
 #include <stdio.h>
 #include <string.h>
 
+#include <string>
+#include <vector>
+
 #include <openssl/base64.h>
 #include <openssl/crypto.h>
 #include <openssl/err.h>
 
 
+enum encoding_relation {
+  // canonical indicates that the encoding is the expected encoding of the
+  // input.
+  canonical,
+  // valid indicates that the encoding is /a/ valid encoding of the input, but
+  // need not be the canonical one.
+  valid,
+  // invalid indicates that the encoded data is valid.
+  invalid,
+};
+
 struct TestVector {
+  enum encoding_relation relation;
   const char *decoded;
   const char *encoded;
 };
 
 // Test vectors from RFC 4648.
 static const TestVector kTestVectors[] = {
-  { "", "" },
-  { "f" , "Zg==" },
-  { "fo", "Zm8=" },
-  { "foo", "Zm9v" },
-  { "foob", "Zm9vYg==" },
-  { "fooba", "Zm9vYmE=" },
-  { "foobar", "Zm9vYmFy" },
+    {canonical, "", ""},
+    {canonical, "f", "Zg==\n"},
+    {canonical, "fo", "Zm8=\n"},
+    {canonical, "foo", "Zm9v\n"},
+    {canonical, "foob", "Zm9vYg==\n"},
+    {canonical, "fooba", "Zm9vYmE=\n"},
+    {canonical, "foobar", "Zm9vYmFy\n"},
+    {valid, "foobar", "Zm9vYmFy\n\n"},
+    {valid, "foobar", " Zm9vYmFy\n\n"},
+    {valid, "foobar", " Z m 9 v Y m F y\n\n"},
+    {invalid, "", "Zm9vYmFy=\n"},
+    {invalid, "", "Zm9vYmFy==\n"},
+    {invalid, "", "Zm9vYmFy===\n"},
+    {invalid, "", "Z"},
+    {invalid, "", "Z\n"},
+    {invalid, "", "ab!c"},
+    {invalid, "", "ab=c"},
+    {invalid, "", "abc"},
+
+    {canonical, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
+     "eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eA==\n"},
+    {valid, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
+     "eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eA\n==\n"},
+    {valid, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
+     "eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eA=\n=\n"},
+    {invalid, "",
+     "eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eA=\n==\n"},
+    {canonical, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
+     "eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4\neHh4eHh"
+     "4eHh4eHh4\n"},
+    {canonical,
+     "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
+     "eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4\neHh4eHh"
+     "4eHh4eHh4eHh4eA==\n"},
+    {valid, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
+     "eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh\n4eHh4eHh"
+     "4eHh4eHh4eHh4eA==\n"},
+    {valid, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx",
+     "eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4e"
+     "Hh4eHh4eHh4eA==\n"},
+    {invalid, "",
+     "eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eHh4eA=="
+     "\neHh4eHh4eHh4eHh4eHh4eHh4\n"},
+
+    // A '-' has traditionally been treated as the end of the data by OpenSSL
+    // and anything following would be ignored. BoringSSL does not accept this
+    // non-standard extension.
+    {invalid, "", "Zm9vYmFy-anythinggoes"},
+    {invalid, "", "Zm9vYmFy\n-anythinggoes"},
+
+    // CVE-2015-0292
+    {invalid, "",
+     "ZW5jb2RlIG1lCg==========================================================="
+     "=======\n"},
 };
 
 static const size_t kNumTests = sizeof(kTestVectors) / sizeof(kTestVectors[0]);
 
-static bool TestEncode() {
-  for (size_t i = 0; i < kNumTests; i++) {
+// RemoveNewlines returns a copy of |in| with all '\n' characters removed.
+static std::string RemoveNewlines(const char *in) {
+  std::string ret;
+  const size_t in_len = strlen(in);
+
+  size_t i;
+  for (i = 0; i < in_len; i++) {
+    if (in[i] != '\n') {
+      ret.push_back(in[i]);
+    }
+  }
+
+  return ret;
+}
+
+static bool TestEncodeBlock() {
+  for (unsigned i = 0; i < kNumTests; i++) {
     const TestVector *t = &kTestVectors[i];
-    uint8_t out[9];
-    size_t len = EVP_EncodeBlock(out, (const uint8_t*)t->decoded,
-                                 strlen(t->decoded));
-    if (len != strlen(t->encoded) ||
-        memcmp(out, t->encoded, len) != 0) {
+    if (t->relation != canonical) {
+      continue;
+    }
+
+    const size_t decoded_len = strlen(t->decoded);
+    size_t max_encoded_len;
+    if (!EVP_EncodedLength(&max_encoded_len, decoded_len)) {
+      fprintf(stderr, "#%u: EVP_EncodedLength failed\n", i);
+      return false;
+    }
+
+    std::vector<uint8_t> out_vec(max_encoded_len);
+    uint8_t *out = out_vec.data();
+    size_t len = EVP_EncodeBlock(out, (const uint8_t *)t->decoded, decoded_len);
+
+    std::string encoded(RemoveNewlines(t->encoded));
+    if (len != encoded.size() ||
+        memcmp(out, encoded.data(), len) != 0) {
       fprintf(stderr, "encode(\"%s\") = \"%.*s\", want \"%s\"\n",
-              t->decoded, (int)len, (const char*)out, t->encoded);
+              t->decoded, (int)len, (const char*)out, encoded.c_str());
       return false;
     }
   }
+
   return true;
 }
 
-static bool TestDecode() {
-  uint8_t out[6];
+static bool TestDecodeBase64() {
   size_t len;
 
-  for (size_t i = 0; i < kNumTests; i++) {
-    // Test the normal API.
+  for (unsigned i = 0; i < kNumTests; i++) {
     const TestVector *t = &kTestVectors[i];
-    size_t expected_len = strlen(t->decoded);
-    if (!EVP_DecodeBase64(out, &len, sizeof(out),
-                          (const uint8_t*)t->encoded, strlen(t->encoded))) {
-      fprintf(stderr, "decode(\"%s\") failed\n", t->encoded);
-      return false;
+
+    if (t->relation == valid) {
+      // The non-canonical encodings will generally have odd whitespace etc
+      // that |EVP_DecodeBase64| will reject.
+      continue;
     }
-    if (len != strlen(t->decoded) ||
-        memcmp(out, t->decoded, len) != 0) {
-      fprintf(stderr, "decode(\"%s\") = \"%.*s\", want \"%s\"\n",
-              t->encoded, (int)len, (const char*)out, t->decoded);
-      return false;
+
+    const std::string encoded(RemoveNewlines(t->encoded));
+    std::vector<uint8_t> out_vec(encoded.size());
+    uint8_t *out = out_vec.data();
+
+    int ok = EVP_DecodeBase64(out, &len, out_vec.size(),
+                              (const uint8_t *)encoded.data(), encoded.size());
+
+    if (t->relation == invalid) {
+      if (ok) {
+        fprintf(stderr, "decode(\"%s\") didn't fail but should have\n",
+                encoded.c_str());
+        return false;
+      }
+    } else if (t->relation == canonical) {
+      if (!ok) {
+        fprintf(stderr, "decode(\"%s\") failed\n", encoded.c_str());
+        return false;
+      }
+
+      if (len != strlen(t->decoded) ||
+          memcmp(out, t->decoded, len) != 0) {
+        fprintf(stderr, "decode(\"%s\") = \"%.*s\", want \"%s\"\n",
+                encoded.c_str(), (int)len, (const char*)out, t->decoded);
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
+
+static bool TestDecodeBlock() {
+  for (unsigned i = 0; i < kNumTests; i++) {
+    const TestVector *t = &kTestVectors[i];
+    if (t->relation != canonical) {
+      continue;
     }
 
+    std::string encoded(RemoveNewlines(t->encoded));
+
+    std::vector<uint8_t> out_vec(encoded.size());
+    uint8_t *out = out_vec.data();
+
     // Test that the padding behavior of the deprecated API is preserved.
-    int ret = EVP_DecodeBlock(out, (const uint8_t*)t->encoded,
-                              strlen(t->encoded));
+    int ret =
+        EVP_DecodeBlock(out, (const uint8_t *)encoded.data(), encoded.size());
     if (ret < 0) {
-      fprintf(stderr, "decode(\"%s\") failed\n", t->encoded);
+      fprintf(stderr, "EVP_DecodeBlock(\"%s\") failed\n", t->encoded);
       return false;
     }
     if (ret % 3 != 0) {
       fprintf(stderr, "EVP_DecodeBlock did not ignore padding\n");
       return false;
     }
+    size_t expected_len = strlen(t->decoded);
     if (expected_len % 3 != 0) {
       ret -= 3 - (expected_len % 3);
     }
@@ -96,19 +223,155 @@ static bool TestDecode() {
     }
   }
 
-  if (EVP_DecodeBase64(out, &len, sizeof(out), (const uint8_t*)"a!bc", 4)) {
-    fprintf(stderr, "Failed to reject invalid characters in the middle.\n");
-    return false;
-  }
+  return true;
+}
+
+static bool TestEncodeDecode() {
+  for (unsigned test_num = 0; test_num < kNumTests; test_num++) {
+    const TestVector *t = &kTestVectors[test_num];
+
+    EVP_ENCODE_CTX ctx;
+    const size_t decoded_len = strlen(t->decoded);
+
+    if (t->relation == canonical) {
+      size_t max_encoded_len;
+      if (!EVP_EncodedLength(&max_encoded_len, decoded_len)) {
+        fprintf(stderr, "#%u: EVP_EncodedLength failed\n", test_num);
+        return false;
+      }
+
+      // EVP_EncodeUpdate will output new lines every 64 bytes of output so we
+      // need slightly more than |EVP_EncodedLength| returns. */
+      max_encoded_len += (max_encoded_len + 63) >> 6;
+      std::vector<uint8_t> out_vec(max_encoded_len);
+      uint8_t *out = out_vec.data();
+
+      EVP_EncodeInit(&ctx);
+
+      int out_len;
+      EVP_EncodeUpdate(&ctx, out, &out_len,
+                       reinterpret_cast<const uint8_t *>(t->decoded),
+                       decoded_len);
+      size_t total = out_len;
+
+      EVP_EncodeFinal(&ctx, out + total, &out_len);
+      total += out_len;
+
+      if (total != strlen(t->encoded) || memcmp(out, t->encoded, total) != 0) {
+        fprintf(stderr, "#%u: EVP_EncodeUpdate produced different output: '%s' (%u)\n",
+                test_num, out, static_cast<unsigned>(total));
+        return false;
+      }
+    }
+
+    std::vector<uint8_t> out_vec(strlen(t->encoded));
+    uint8_t *out = out_vec.data();
+
+    EVP_DecodeInit(&ctx);
+    int out_len;
+    size_t total = 0;
+    int ret = EVP_DecodeUpdate(&ctx, out, &out_len,
+                               reinterpret_cast<const uint8_t *>(t->encoded),
+                               strlen(t->encoded));
+    if (ret != -1) {
+      total = out_len;
+      ret = EVP_DecodeFinal(&ctx, out + total, &out_len);
+      total += out_len;
+    }
+
+    switch (t->relation) {
+      case canonical:
+      case valid:
+        if (ret == -1) {
+          fprintf(stderr, "#%u: EVP_DecodeUpdate failed\n", test_num);
+          return false;
+        }
+        if (total != decoded_len || memcmp(out, t->decoded, decoded_len)) {
+          fprintf(stderr, "#%u: EVP_DecodeUpdate produced incorrect output\n",
+                  test_num);
+          return false;
+        }
+        break;
 
-  if (EVP_DecodeBase64(out, &len, sizeof(out), (const uint8_t*)"a=bc", 4)) {
-    fprintf(stderr, "Failed to reject invalid characters in the middle.\n");
-    return false;
+      case invalid:
+        if (ret != -1) {
+          fprintf(stderr, "#%u: EVP_DecodeUpdate was successful but shouldn't have been\n", test_num);
+          return false;
+        }
+        break;
+    }
   }
 
-  if (EVP_DecodeBase64(out, &len, sizeof(out), (const uint8_t*)"abc", 4)) {
-    fprintf(stderr, "Failed to reject invalid input length.\n");
-    return false;
+  return true;
+}
+
+static bool TestDecodeUpdateStreaming() {
+  for (unsigned test_num = 0; test_num < kNumTests; test_num++) {
+    const TestVector *t = &kTestVectors[test_num];
+    if (t->relation == invalid) {
+      continue;
+    }
+
+    const size_t encoded_len = strlen(t->encoded);
+
+    std::vector<uint8_t> out(encoded_len);
+
+    for (size_t chunk_size = 1; chunk_size <= encoded_len; chunk_size++) {
+      size_t out_len = 0;
+      EVP_ENCODE_CTX ctx;
+      EVP_DecodeInit(&ctx);
+
+      for (size_t i = 0; i < encoded_len;) {
+        size_t todo = encoded_len - i;
+        if (todo > chunk_size) {
+          todo = chunk_size;
+        }
+
+        int bytes_written;
+        int ret = EVP_DecodeUpdate(
+            &ctx, out.data() + out_len, &bytes_written,
+            reinterpret_cast<const uint8_t *>(t->encoded + i), todo);
+        i += todo;
+
+        switch (ret) {
+          case -1:
+            fprintf(stderr, "#%u: EVP_DecodeUpdate returned error\n", test_num);
+            return 0;
+          case 0:
+            out_len += bytes_written;
+            if (i == encoded_len ||
+                (i + 1 == encoded_len && t->encoded[i] == '\n') ||
+                /* If there was an '-' in the input (which means “EOF”) then
+                 * this loop will continue to test that |EVP_DecodeUpdate| will
+                 * ignore the remainder of the input. */
+                strchr(t->encoded, '-') != nullptr) {
+              break;
+            }
+
+            fprintf(stderr,
+                    "#%u: EVP_DecodeUpdate returned zero before end of "
+                    "encoded data\n",
+                    test_num);
+            return 0;
+          default:
+            out_len += bytes_written;
+        }
+      }
+
+      int bytes_written;
+      int ret = EVP_DecodeFinal(&ctx, out.data() + out_len, &bytes_written);
+      if (ret == -1) {
+        fprintf(stderr, "#%u: EVP_DecodeFinal returned error\n", test_num);
+        return 0;
+      }
+      out_len += bytes_written;
+
+      if (out_len != strlen(t->decoded) ||
+          memcmp(out.data(), t->decoded, out_len) != 0) {
+        fprintf(stderr, "#%u: incorrect output\n", test_num);
+        return 0;
+      }
+    }
   }
 
   return true;
@@ -117,8 +380,11 @@ static bool TestDecode() {
 int main(void) {
   CRYPTO_library_init();
 
-  if (!TestEncode() ||
-      !TestDecode()) {
+  if (!TestEncodeBlock() ||
+      !TestDecodeBase64() ||
+      !TestDecodeBlock() ||
+      !TestDecodeUpdateStreaming() ||
+      !TestEncodeDecode()) {
     return 1;
   }
author	Adam Langley <alangley@gmail.com>	2016-05-20 20:51:48 +0300
committer	Adam Langley <agl@google.com>	2016-05-26 20:59:10 +0300
commit	d09175ffe335d9be6846b4ac5e9e622d96213a00 (patch)
tree	f4b68d43bbedb052902c3f0c87d581066ef4d519 /crypto/base64
parent	1cb405d96b11db5767446766d76516534067bbd1 (diff)