12 files changed, 1905 insertions, 0 deletions
diff --git a/emb/pastilda/lib/crypto/base64.h b/emb/pastilda/lib/crypto/base64.h
new file mode 100644
index 0000000..3ffaf77
--- /dev/null
+++ b/emb/pastilda/lib/crypto/base64.h
@@ -0,0 +1,255 @@
+#ifndef BASE64_H
+#define BASE64_H
+
+#include <string>
+
+const char kBase64Alphabet[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+class Base64 {
+ public:
+  static bool Encode(const std::string &in, std::string *out) {
+    int i = 0, j = 0;
+    size_t enc_len = 0;
+    unsigned char a3[3];
+    unsigned char a4[4];
+
+    out->resize(EncodedLength(in));
+
+    int input_len = in.size();
+    std::string::const_iterator input = in.begin();
+
+    while (input_len--) {
+      a3[i++] = *(input++);
+      if (i == 3) {
+        a3_to_a4(a4, a3);
+
+        for (i = 0; i < 4; i++) {
+          (*out)[enc_len++] = kBase64Alphabet[a4[i]];
+        }
+
+        i = 0;
+      }
+    }
+
+    if (i) {
+      for (j = i; j < 3; j++) {
+        a3[j] = '\0';
+      }
+
+      a3_to_a4(a4, a3);
+
+      for (j = 0; j < i + 1; j++) {
+        (*out)[enc_len++] = kBase64Alphabet[a4[j]];
+      }
+
+      while ((i++ < 3)) {
+        (*out)[enc_len++] = '=';
+      }
+    }
+
+    return (enc_len == out->size());
+  }
+
+  static bool Encode(const char *input, size_t input_length, char *out, size_t out_length) {
+    int i = 0, j = 0;
+    char *out_begin = out;
+    unsigned char a3[3];
+    unsigned char a4[4];
+
+    size_t encoded_length = EncodedLength(input_length);
+
+    if (out_length < encoded_length) return false;
+
+    while (input_length--) {
+      a3[i++] = *input++;
+      if (i == 3) {
+        a3_to_a4(a4, a3);
+
+        for (i = 0; i < 4; i++) {
+          *out++ = kBase64Alphabet[a4[i]];
+        }
+
+        i = 0;
+      }
+    }
+
+    if (i) {
+      for (j = i; j < 3; j++) {
+        a3[j] = '\0';
+      }
+
+      a3_to_a4(a4, a3);
+
+      for (j = 0; j < i + 1; j++) {
+        *out++ = kBase64Alphabet[a4[j]];
+      }
+
+      while ((i++ < 3)) {
+        *out++ = '=';
+      }
+    }
+
+    return (out == (out_begin + encoded_length));
+  }
+
+  static bool Decode(const std::string &in, std::string *out) {
+    int i = 0, j = 0;
+    size_t dec_len = 0;
+    unsigned char a3[3];
+    unsigned char a4[4];
+
+    int input_len = in.size();
+    std::string::const_iterator input = in.begin();
+
+    out->resize(DecodedLength(in));
+
+    while (input_len--) {
+      if (*input == '=') {
+        break;
+      }
+
+      a4[i++] = *(input++);
+      if (i == 4) {
+        for (i = 0; i <4; i++) {
+          a4[i] = b64_lookup(a4[i]);
+        }
+
+        a4_to_a3(a3,a4);
+
+        for (i = 0; i < 3; i++) {
+          (*out)[dec_len++] = a3[i];
+        }
+
+        i = 0;
+      }
+    }
+
+    if (i) {
+      for (j = i; j < 4; j++) {
+        a4[j] = '\0';
+      }
+
+      for (j = 0; j < 4; j++) {
+        a4[j] = b64_lookup(a4[j]);
+      }
+
+      a4_to_a3(a3,a4);
+
+      for (j = 0; j < i - 1; j++) {
+        (*out)[dec_len++] = a3[j];
+      }
+    }
+
+    return (dec_len == out->size());
+  }
+
+  static bool Decode(const char *input, size_t input_length, char *out, size_t out_length, uint32_t *res_length) {
+    int i = 0, j = 0;
+    char *out_begin = out;
+    unsigned char a3[3];
+    unsigned char a4[4];
+
+    size_t decoded_length = DecodedLength(input, input_length);
+    *res_length = decoded_length;
+
+    if (out_length < decoded_length) return false;
+
+    while (input_length--) {
+      if (*input == '=') {
+        break;
+      }
+
+      a4[i++] = *(input++);
+      if (i == 4) {
+        for (i = 0; i <4; i++) {
+          a4[i] = b64_lookup(a4[i]);
+        }
+
+        a4_to_a3(a3,a4);
+
+        for (i = 0; i < 3; i++) {
+          *out++ = a3[i];
+        }
+
+        i = 0;
+      }
+    }
+
+    if (i) {
+      for (j = i; j < 4; j++) {
+        a4[j] = '\0';
+      }
+
+      for (j = 0; j < 4; j++) {
+        a4[j] = b64_lookup(a4[j]);
+      }
+
+      a4_to_a3(a3,a4);
+
+      for (j = 0; j < i - 1; j++) {
+        *out++ = a3[j];
+      }
+    }
+
+    return (out == (out_begin + decoded_length));
+  }
+
+  static int DecodedLength(const char *in, size_t in_length) {
+    int numEq = 0;
+
+    const char *in_end = in + in_length;
+    while (*--in_end == '=') ++numEq;
+
+    return ((6 * in_length) / 8) - numEq;
+  }
+
+  static int DecodedLength(const std::string &in) {
+    int numEq = 0;
+    int n = in.size();
+
+    for (std::string::const_reverse_iterator it = in.rbegin(); *it == '='; ++it) {
+      ++numEq;
+    }
+
+    return ((6 * n) / 8) - numEq;
+  }
+
+  inline static int EncodedLength(size_t length) {
+    return (length + 2 - ((length + 2) % 3)) / 3 * 4;
+  }
+
+  inline static int EncodedLength(const std::string &in) {
+    return EncodedLength(in.length());
+  }
+
+  inline static void StripPadding(std::string *in) {
+    while (!in->empty() && *(in->rbegin()) == '=') in->resize(in->size() - 1);
+  }
+
+ private:
+  static inline void a3_to_a4(unsigned char * a4, unsigned char * a3) {
+    a4[0] = (a3[0] & 0xfc) >> 2;
+    a4[1] = ((a3[0] & 0x03) << 4) + ((a3[1] & 0xf0) >> 4);
+    a4[2] = ((a3[1] & 0x0f) << 2) + ((a3[2] & 0xc0) >> 6);
+    a4[3] = (a3[2] & 0x3f);
+  }
+
+  static inline void a4_to_a3(unsigned char * a3, unsigned char * a4) {
+    a3[0] = (a4[0] << 2) + ((a4[1] & 0x30) >> 4);
+    a3[1] = ((a4[1] & 0xf) << 4) + ((a4[2] & 0x3c) >> 2);
+    a3[2] = ((a4[2] & 0x3) << 6) + a4[3];
+  }
+
+  static inline unsigned char b64_lookup(unsigned char c) {
+    if(c >='A' && c <='Z') return c - 'A';
+    if(c >='a' && c <='z') return c - 71;
+    if(c >='0' && c <='9') return c + 4;
+    if(c == '+') return 62;
+    if(c == '/') return 63;
+    return 255;
+  }
+};
+
+
+
+#endif // BASE64_H
diff --git a/emb/pastilda/lib/crypto/bitops.h b/emb/pastilda/lib/crypto/bitops.h
new file mode 100644
index 0000000..d0e6942
--- /dev/null
+++ b/emb/pastilda/lib/crypto/bitops.h
@@ -0,0 +1,294 @@
+/*
+ * cifra - embedded cryptography library
+ * Written in 2014 by Joseph Birr-Pixton <jpixton@gmail.com>
+ *
+ * To the extent possible under law, the author(s) have dedicated all
+ * copyright and related and neighboring rights to this software to the
+ * public domain worldwide. This software is distributed without any
+ * warranty.
+ *
+ * You should have received a copy of the CC0 Public Domain Dedication
+ * along with this software. If not, see
+ * <http://creativecommons.org/publicdomain/zero/1.0/>.
+ */
+
+#ifndef BITOPS_H
+#define BITOPS_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+/* Assorted bitwise and common operations used in ciphers. */
+
+/** Circularly rotate right x by n bits.
+ *  0 > n > 32. */
+static inline uint32_t rotr32(uint32_t x, unsigned n)
+{
+  return (x >> n) | (x << (32 - n));
+}
+
+/** Circularly rotate left x by n bits.
+ *  0 > n > 32. */
+static inline uint32_t rotl32(uint32_t x, unsigned n)
+{
+  return (x << n) | (x >> (32 - n));
+}
+
+/** Circularly rotate right x by n bits.
+ *  0 > n > 64. */
+static inline uint64_t rotr64(uint64_t x, unsigned n)
+{
+  return (x >> n) | (x << (64 - n));
+}
+
+/** Circularly rotate left x by n bits.
+ *  0 > n > 64. */
+static inline uint64_t rotl64(uint64_t x, unsigned n)
+{
+  return (x << n) | (x >> (64 - n));
+}
+
+/** Read 4 bytes from buf, as a 32-bit big endian quantity. */
+static inline uint32_t read32_be(const uint8_t buf[4])
+{
+  return (buf[0] << 24) |
+         (buf[1] << 16) |
+         (buf[2] << 8) |
+         (buf[3]);
+}
+
+/** Read 4 bytes from buf, as a 32-bit little endian quantity. */
+static inline uint32_t read32_le(const uint8_t buf[4])
+{
+  return (buf[3] << 24) |
+         (buf[2] << 16) |
+         (buf[1] << 8) |
+         (buf[0]);
+}
+
+/** Read 8 bytes from buf, as a 64-bit big endian quantity. */
+static inline uint64_t read64_be(const uint8_t buf[8])
+{
+  uint32_t hi = read32_be(buf),
+           lo = read32_be(buf + 4);
+  return ((uint64_t)hi) << 32 |
+         lo;
+}
+
+/** Read 8 bytes from buf, as a 64-bit little endian quantity. */
+static inline uint64_t read64_le(const uint8_t buf[8])
+{
+  uint32_t hi = read32_le(buf + 4),
+           lo = read32_le(buf);
+  return ((uint64_t)hi) << 32 |
+         lo;
+}
+
+/** Encode v as a 32-bit big endian quantity into buf. */
+static inline void write32_be(uint32_t v, uint8_t buf[4])
+{
+  *buf++ = (v >> 24) & 0xff;
+  *buf++ = (v >> 16) & 0xff;
+  *buf++ = (v >> 8) & 0xff;
+  *buf   = v & 0xff;
+}
+
+/** Encode v as a 32-bit little endian quantity into buf. */
+static inline void write32_le(uint32_t v, uint8_t buf[4])
+{
+  *buf++ = v & 0xff;
+  *buf++ = (v >> 8) & 0xff;
+  *buf++ = (v >> 16) & 0xff;
+  *buf   = (v >> 24) & 0xff;
+}
+
+/** Encode v as a 64-bit big endian quantity into buf. */
+static inline void write64_be(uint64_t v, uint8_t buf[8])
+{
+  *buf++ = (v >> 56) & 0xff;
+  *buf++ = (v >> 48) & 0xff;
+  *buf++ = (v >> 40) & 0xff;
+  *buf++ = (v >> 32) & 0xff;
+  *buf++ = (v >> 24) & 0xff;
+  *buf++ = (v >> 16) & 0xff;
+  *buf++ = (v >> 8) & 0xff;
+  *buf   = v & 0xff;
+}
+
+/** Encode v as a 64-bit little endian quantity into buf. */
+static inline void write64_le(uint64_t v, uint8_t buf[8])
+{
+  *buf++ = v & 0xff;
+  *buf++ = (v >> 8) & 0xff;
+  *buf++ = (v >> 16) & 0xff;
+  *buf++ = (v >> 24) & 0xff;
+  *buf++ = (v >> 32) & 0xff;
+  *buf++ = (v >> 40) & 0xff;
+  *buf++ = (v >> 48) & 0xff;
+  *buf   = (v >> 56) & 0xff;
+}
+
+/** out = in ^ b8.
+ *  out and in may alias. */
+static inline void xor_b8(uint8_t *out, const uint8_t *in, uint8_t b8, size_t len)
+{
+  for (size_t i = 0; i < len; i++)
+    out[i] = in[i] ^ b8;
+}
+
+/** out = x ^ y.
+ *  out, x and y may alias. */
+static inline void xor_bb(uint8_t *out, const uint8_t *x, const uint8_t *y, size_t len)
+{
+  for (size_t i = 0; i < len; i++)
+    out[i] = x[i] ^ y[i];
+}
+
+/* out ^= x
+ * out and x may alias. */
+static inline void xor_words(uint32_t *out, const uint32_t *x, size_t nwords)
+{
+  for (size_t i = 0; i < nwords; i++)
+    out[i] ^= x[i];
+}
+
+/** Produce 0xffffffff if x == y, zero otherwise, without branching. */
+static inline uint32_t mask_u32(uint32_t x, uint32_t y)
+{
+  uint32_t diff = x ^ y;
+  uint32_t diff_is_zero = ~diff & (diff - 1);
+  return - (diff_is_zero >> 31);
+}
+
+/** Product 0xff if x == y, zero otherwise, without branching. */
+static inline uint8_t mask_u8(uint32_t x, uint32_t y)
+{
+  uint32_t diff = x ^ y;
+  uint8_t diff_is_zero = ~diff & (diff - 1);
+  return - (diff_is_zero >> 7);
+}
+
+/** Select the ith entry from the given table of n values, in a side channel-silent
+ *  way. */
+static inline uint32_t select_u32(uint32_t i, volatile const uint32_t *tab, uint32_t n)
+{
+  uint32_t r = 0;
+
+  for (uint32_t ii = 0; ii < n; ii++)
+  {
+    uint32_t mask = mask_u32(i, ii);
+    r = (r & ~mask) | (tab[ii] & mask);
+  }
+
+  return r;
+}
+
+/** Select the ith entry from the given table of n values, in a side channel-silent
+ *  way. */
+static inline uint8_t select_u8(uint32_t i, volatile const uint8_t *tab, uint32_t n)
+{
+  uint8_t r = 0;
+
+  for (uint32_t ii = 0; ii < n; ii++)
+  {
+    uint8_t mask = mask_u8(i, ii);
+    r = (r & ~mask) | (tab[ii] & mask);
+  }
+
+  return r;
+}
+
+/** Select the ath, bth, cth and dth entries from the given table of n values,
+ *  placing the results into a, b, c and d. */
+static inline void select_u8x4(uint8_t *a, uint8_t *b, uint8_t *c, uint8_t *d,
+                               volatile const uint8_t *tab, uint32_t n)
+{
+  uint8_t ra = 0,
+          rb = 0,
+          rc = 0,
+          rd = 0;
+  uint8_t mask;
+
+  for (uint32_t i = 0; i < n; i++)
+  {
+    uint8_t item = tab[i];
+
+    mask = mask_u8(*a, i); ra = (ra & ~mask) | (item & mask);
+    mask = mask_u8(*b, i); rb = (rb & ~mask) | (item & mask);
+    mask = mask_u8(*c, i); rc = (rc & ~mask) | (item & mask);
+    mask = mask_u8(*d, i); rd = (rd & ~mask) | (item & mask);
+  }
+
+  *a = ra;
+  *b = rb;
+  *c = rc;
+  *d = rd;
+}
+
+/** out ^= if0 or if1, depending on the value of bit. */
+static inline void select_xor128(uint32_t out[4],
+                                 const uint32_t if0[4],
+                                 const uint32_t if1[4],
+                                 uint8_t bit)
+{
+  uint32_t mask1 = mask_u32(bit, 1);
+  uint32_t mask0 = ~mask1;
+
+  out[0] ^= (if0[0] & mask0) | (if1[0] & mask1);
+  out[1] ^= (if0[1] & mask0) | (if1[1] & mask1);
+  out[2] ^= (if0[2] & mask0) | (if1[2] & mask1);
+  out[3] ^= (if0[3] & mask0) | (if1[3] & mask1);
+}
+
+/** Increments the integer stored at v (of non-zero length len)
+ *  with the least significant byte first. */
+static inline void incr_le(uint8_t *v, size_t len)
+{
+  size_t i = 0;
+  while (1)
+  {
+    if (++v[i] != 0)
+      return;
+    i++;
+    if (i == len)
+      return;
+  }
+}
+
+/** Increments the integer stored at v (of non-zero length len)
+ *  with the most significant byte last. */
+static inline void incr_be(uint8_t *v, size_t len)
+{
+  len--;
+  while (1)
+  {
+    if (++v[len] != 0)
+      return;
+    if (len == 0)
+      return;
+    len--;
+  }
+}
+
+/** Copies len bytes from in to out, with in shifted left by offset bits
+ *  to the right. */
+static inline void copy_bytes_unaligned(uint8_t *out, const uint8_t *in, size_t len, uint8_t offset)
+{
+  uint8_t byte_off = offset / 8;
+  uint8_t bit_off = offset & 7;
+  uint8_t rmask = (1 << bit_off) - 1;
+  uint8_t lmask = ~rmask;
+
+  for (size_t i = 0; i < len; i++)
+  {
+    out[i] = (in[i + byte_off] << bit_off) & lmask;
+    out[i] |= (in[i + byte_off + 1] >> (8 - bit_off)) & rmask;
+  }
+}
+
+static inline uint32_t count_trailing_zeroes(uint32_t x)
+{
+  return (uint32_t) __builtin_ctzl(x);
+}
+
+#endif
diff --git a/emb/pastilda/lib/crypto/blockwise.c b/emb/pastilda/lib/crypto/blockwise.c
new file mode 100644
index 0000000..74756e5
--- /dev/null
+++ b/emb/pastilda/lib/crypto/blockwise.c
@@ -0,0 +1,194 @@
+/*
+ * cifra - embedded cryptography library
+ * Written in 2014 by Joseph Birr-Pixton <jpixton@gmail.com>
+ *
+ * To the extent possible under law, the author(s) have dedicated all
+ * copyright and related and neighboring rights to this software to the
+ * public domain worldwide. This software is distributed without any
+ * warranty.
+ *
+ * You should have received a copy of the CC0 Public Domain Dedication
+ * along with this software. If not, see
+ * <http://creativecommons.org/publicdomain/zero/1.0/>.
+ */
+
+#include <bitops.h>
+#include <blockwise.h>
+#include <handy.h>
+#include <tassert.h>
+#include <string.h>
+
+void cf_blockwise_accumulate(uint8_t *partial, size_t *npartial, size_t nblock,
+                             const void *inp, size_t nbytes,
+                             cf_blockwise_in_fn process,
+                             void *ctx)
+{
+  cf_blockwise_accumulate_final(partial, npartial, nblock,
+                                inp, nbytes,
+                                process, process, ctx);
+}
+
+void cf_blockwise_accumulate_final(uint8_t *partial, size_t *npartial, size_t nblock,
+                                   const void *inp, size_t nbytes,
+                                   cf_blockwise_in_fn process,
+                                   cf_blockwise_in_fn process_final,
+                                   void *ctx)
+{
+  const uint8_t *bufin = inp;
+  assert(partial && *npartial < nblock);
+  assert(inp || !nbytes);
+  assert(process && ctx);
+
+  /* If we have partial data, copy in to buffer. */
+  if (*npartial && nbytes)
+  {
+    size_t space = nblock - *npartial;
+    size_t taken = MIN(space, nbytes);
+
+    memcpy(partial + *npartial, bufin, taken);
+
+    bufin += taken;
+    nbytes -= taken;
+    *npartial += taken;
+
+    /* If that gives us a full block, process it. */
+    if (*npartial == nblock)
+    {
+      if (nbytes == 0)
+        process_final(ctx, partial);
+      else
+        process(ctx, partial);
+      *npartial = 0;
+    }
+  }
+
+  /* now nbytes < nblock or *npartial == 0. */
+
+  /* If we have a full block of data, process it directly. */
+  while (nbytes >= nblock)
+  {
+    /* Partial buffer must be empty, or we're ignoring extant data */
+    assert(*npartial == 0);
+
+    if (nbytes == nblock)
+      process_final(ctx, bufin);
+    else
+      process(ctx, bufin);
+    bufin += nblock;
+    nbytes -= nblock;
+  }
+
+  /* Finally, if we have remaining data, buffer it. */
+  while (nbytes)
+  {
+    size_t space = nblock - *npartial;
+    size_t taken = MIN(space, nbytes);
+
+    memcpy(partial + *npartial, bufin, taken);
+
+    bufin += taken;
+    nbytes -= taken;
+    *npartial += taken;
+
+    /* If we started with *npartial, we must have copied it
+     * in first. */
+    assert(*npartial < nblock);
+  }
+}
+
+void cf_blockwise_xor(uint8_t *partial, size_t *npartial, size_t nblock,
+                      const void *inp, void *outp, size_t nbytes,
+                      cf_blockwise_out_fn process, void *ctx)
+{
+  const uint8_t *inb = inp;
+  uint8_t *outb = outp;
+
+  assert(partial && *npartial < nblock);
+  assert(inp || !nbytes);
+  assert(process && ctx);
+
+  while (nbytes)
+  {
+    /* If we're out of material, and need more, produce a block. */
+    if (*npartial == 0)
+    {
+      process(ctx, partial);
+      *npartial = nblock;
+    }
+
+    size_t offset = nblock - *npartial;
+    size_t taken = MIN(*npartial, nbytes);
+    xor_bb(outb, inb, partial + offset, taken);
+    *npartial -= taken;
+    nbytes -= taken;
+    outb += taken;
+    inb += taken;
+  }
+}
+
+void cf_blockwise_acc_byte(uint8_t *partial, size_t *npartial,
+                           size_t nblock,
+                           uint8_t byte, size_t nbytes,
+                           cf_blockwise_in_fn process,
+                           void *ctx)
+{
+  /* only memset the whole of the block once */
+  int filled = 0;
+
+  while (nbytes)
+  {
+    size_t start = *npartial;
+    size_t count = MIN(nbytes, nblock - start);
+
+    if (!filled)
+      memset(partial + start, byte, count);
+
+    if (start == 0 && count == nblock)
+      filled = 1;
+
+    if (start + count == nblock)
+    {
+      process(ctx, partial);
+      *npartial = 0;
+    } else {
+      *npartial += count;
+    }
+
+    nbytes -= count;
+  }
+}
+
+void cf_blockwise_acc_pad(uint8_t *partial, size_t *npartial,
+                          size_t nblock,
+                          uint8_t fbyte, uint8_t mbyte, uint8_t lbyte,
+                          size_t nbytes,
+                          cf_blockwise_in_fn process,
+                          void *ctx)
+{
+
+  switch (nbytes)
+  {
+    case 0: break;
+    case 1: fbyte ^= lbyte;
+            cf_blockwise_accumulate(partial, npartial, nblock, &fbyte, 1, process, ctx);
+            break;
+    case 2:
+            cf_blockwise_accumulate(partial, npartial, nblock, &fbyte, 1, process, ctx);
+            cf_blockwise_accumulate(partial, npartial, nblock, &lbyte, 1, process, ctx);
+            break;
+    default:
+            cf_blockwise_accumulate(partial, npartial, nblock, &fbyte, 1, process, ctx);
+
+            /* If the middle and last bytes differ, then process the last byte separately.
+             * Otherwise, just extend the middle block size. */
+            if (lbyte != mbyte)
+            {
+              cf_blockwise_acc_byte(partial, npartial, nblock, mbyte, nbytes - 2, process, ctx);
+              cf_blockwise_accumulate(partial, npartial, nblock, &lbyte, 1, process, ctx);
+            } else {
+              cf_blockwise_acc_byte(partial, npartial, nblock, mbyte, nbytes - 1, process, ctx);
+            }
+
+            break;
+  }
+}
diff --git a/emb/pastilda/lib/crypto/blockwise.h b/emb/pastilda/lib/crypto/blockwise.h
new file mode 100644
index 0000000..a20ff95
--- /dev/null
+++ b/emb/pastilda/lib/crypto/blockwise.h
@@ -0,0 +1,147 @@
+/*
+ * cifra - embedded cryptography library
+ * Written in 2014 by Joseph Birr-Pixton <jpixton@gmail.com>
+ *
+ * To the extent possible under law, the author(s) have dedicated all
+ * copyright and related and neighboring rights to this software to the
+ * public domain worldwide. This software is distributed without any
+ * warranty.
+ *
+ * You should have received a copy of the CC0 Public Domain Dedication
+ * along with this software. If not, see
+ * <http://creativecommons.org/publicdomain/zero/1.0/>.
+ */
+
+#ifndef BLOCKWISE_H
+#define BLOCKWISE_H
+
+#include <stdint.h>
+#include <stddef.h>
+
+/* Processing function for cf_blockwise_accumulate. */
+typedef void (*cf_blockwise_in_fn)(void *ctx, const uint8_t *data);
+
+/* Processing function for cf_blockwise_xor. */
+typedef void (*cf_blockwise_out_fn)(void *ctx, uint8_t *data);
+
+/* This function manages the common abstraction of accumulating input in
+ * a buffer, and processing it when a full block is available.
+ *
+ * partial is the buffer (maintained by the caller)
+ * on entry, npartial is the currently valid count of used bytes on
+ *   the front of partial.
+ * on exit, npartial is updated to reflect the status of partial.
+ * nblock is the blocksize to accumulate -- partial must be at least
+ *   this long!
+ * input is the new data to process, of length nbytes.
+ * process is the processing function, passed ctx and a pointer
+ *   to the data to process (always exactly nblock bytes long!)
+ *   which may not neccessarily be the same as partial.
+ */
+void cf_blockwise_accumulate(uint8_t *partial, size_t *npartial,
+                             size_t nblock,
+                             const void *input, size_t nbytes,
+                             cf_blockwise_in_fn process, 
+                             void *ctx);
+
+/* This function manages the common abstraction of accumulating input in
+ * a buffer, and processing it when a full block is available.
+ * This version supports calling a different processing function for
+ * the last block.
+ *
+ * partial is the buffer (maintained by the caller)
+ * on entry, npartial is the currently valid count of used bytes on
+ *   the front of partial.
+ * on exit, npartial is updated to reflect the status of partial.
+ * nblock is the blocksize to accumulate -- partial must be at least
+ *   this long!
+ * input is the new data to process, of length nbytes.
+ * process is the processing function, passed ctx and a pointer
+ *   to the data to process (always exactly nblock bytes long!)
+ *   which may not neccessarily be the same as partial.
+ * process_final is called last (but may not be called at all if
+ *   all input is buffered).
+ */
+void cf_blockwise_accumulate_final(uint8_t *partial, size_t *npartial,
+                                   size_t nblock,
+                                   const void *input, size_t nbytes,
+                                   cf_blockwise_in_fn process, 
+                                   cf_blockwise_in_fn process_final,
+                                   void *ctx);
+
+/* This function manages XORing an input stream with a keystream
+ * to produce an output stream.  The keystream is produced in blocks
+ * (ala a block cipher in counter mode).
+ *
+ * partial is the keystream buffer (maintained by the caller)
+ * on entry, *npartial is the currently valid count of bytes in partial:
+ *   unused bytes are at the *end*.  So *npartial = 4 means the last four
+ *   bytes of partial are usable as keystream.
+ * on exit, npartial is updated to reflect the new state of partial.
+ * nblock is the blocksize to accumulate -- partial must be at least
+ *   this long!
+ * input is the new data to process, of length nbytes.
+ * output is where to write input xored with the keystream -- also length
+ *   nbytes.
+ * process is the processing function, passed ctx and partial which it
+ *   should fill with fresh key stream.
+ */
+void cf_blockwise_xor(uint8_t *partial, size_t *npartial,
+                      size_t nblock,
+                      const void *input, void *output, size_t nbytes,
+                      cf_blockwise_out_fn newblock,
+                      void *ctx);
+
+/* This function processes a single byte a number of times. It's useful
+ * for padding, and more efficient than calling cf_blockwise_accumulate
+ * a bunch of times.
+ *
+ * partial is the buffer (maintained by the caller)
+ * on entry, npartial is the currently valid count of used bytes on
+ *   the front of partial.
+ * on exit, npartial is updated to reflect the status of partial.
+ * nblock is the blocksize to accumulate -- partial must be at least
+ *   this long!
+ * process is the processing function, passed ctx and a pointer
+ *   to the data to process (always exactly nblock bytes long!)
+ *   which may not neccessarily be the same as partial.
+ * byte is the byte to process, nbytes times.
+ */
+void cf_blockwise_acc_byte(uint8_t *partial, size_t *npartial,
+                           size_t nblock,
+                           uint8_t byte, size_t nbytes,
+                           cf_blockwise_in_fn process,
+                           void *ctx);
+
+/* This function attempts to process patterns of bytes common in
+ * block cipher padding.
+ *
+ * This takes three bytes:
+ * - a first byte, fbyte,
+ * - a middle byte, mbyte,
+ * - a last byte, lbyte.
+ *
+ * If nbytes is zero, nothing happens.
+ * If nbytes is one, the byte fbyte ^ lbyte is processed.
+ * If nbytes is two, the fbyte then lbyte are processed.
+ * If nbytes is three or more, fbyte, then one or more mbytes, then fbyte
+ *   is processed.
+ *
+ * partial is the buffer (maintained by the caller)
+ * on entry, npartial is the currently valid count of used bytes on
+ *   the front of partial.
+ * on exit, npartial is updated to reflect the status of partial.
+ * nblock is the blocksize to accumulate -- partial must be at least
+ *   this long!
+ * process is the processing function, passed ctx and a pointer
+ *   to the data to process (always exactly nblock bytes long!)
+ *   which may not neccessarily be the same as partial.
+ */
+void cf_blockwise_acc_pad(uint8_t *partial, size_t *npartial,
+                          size_t nblock,
+                          uint8_t fbyte, uint8_t mbyte, uint8_t lbyte,
+                          size_t nbytes,
+                          cf_blockwise_in_fn process,
+                          void *ctx);
+
+#endif
diff --git a/emb/pastilda/lib/crypto/chash.c b/emb/pastilda/lib/crypto/chash.c
new file mode 100644
index 0000000..edd2e05
--- /dev/null
+++ b/emb/pastilda/lib/crypto/chash.c
@@ -0,0 +1,28 @@
+/*
+ * cifra - embedded cryptography library
+ * Written in 2014 by Joseph Birr-Pixton <jpixton@gmail.com>
+ *
+ * To the extent possible under law, the author(s) have dedicated all
+ * copyright and related and neighboring rights to this software to the
+ * public domain worldwide. This software is distributed without any
+ * warranty.
+ *
+ * You should have received a copy of the CC0 Public Domain Dedication
+ * along with this software. If not, see
+ * <http://creativecommons.org/publicdomain/zero/1.0/>.
+ */
+
+#include <chash.h>
+#include <handy.h>
+#include <tassert.h>
+
+void cf_hash(const cf_chash *h, const void *m, size_t nm, uint8_t *out)
+{
+  cf_chash_ctx ctx;
+  assert(h);
+  h->init(&ctx);
+  h->update(&ctx, m, nm);
+  h->digest(&ctx, out);
+  mem_clean(&ctx, sizeof ctx);
+}
+
diff --git a/emb/pastilda/lib/crypto/chash.h b/emb/pastilda/lib/crypto/chash.h
new file mode 100644
index 0000000..8f2e201
--- /dev/null
+++ b/emb/pastilda/lib/crypto/chash.h
@@ -0,0 +1,137 @@
+/*
+ * cifra - embedded cryptography library
+ * Written in 2014 by Joseph Birr-Pixton <jpixton@gmail.com>
+ *
+ * To the extent possible under law, the author(s) have dedicated all
+ * copyright and related and neighboring rights to this software to the
+ * public domain worldwide. This software is distributed without any
+ * warranty.
+ *
+ * You should have received a copy of the CC0 Public Domain Dedication
+ * along with this software. If not, see
+ * <http://creativecommons.org/publicdomain/zero/1.0/>.
+ */
+
+#ifndef CHASH_H
+#define CHASH_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+/**
+ * General hash function description
+ * =================================
+ * This allows us to make use of hash functions without depending
+ * on a specific one.  This is useful in implementing, for example,
+ * :doc:`HMAC <hmac>`.
+ */
+
+/* .. c:type:: cf_chash_init
+ * Hashing initialisation function type.
+ *
+ * Functions of this type should initialise the context in preparation
+ * for hashing a message with `cf_chash_update` functions.
+ *
+ * :rtype: void
+ * :param ctx: hash function-specific context structure.
+ */
+typedef void (*cf_chash_init)(void *ctx);
+
+/* .. c:type:: cf_chash_update
+ * Hashing data processing function type.
+ *
+ * Functions of this type hash `count` bytes of data at `data`,
+ * updating the contents of `ctx`.
+ *
+ * :rtype: void
+ * :param ctx: hash function-specific context structure.
+ * :param data: input data to hash.
+ * :param count: number of bytes to hash.
+ */
+typedef void (*cf_chash_update)(void *ctx, const void *data, size_t count);
+
+/* .. c:type:: cf_chash_digest
+ * Hashing completion function type.
+ *
+ * Functions of this type complete a hashing operation,
+ * writing :c:member:`cf_chash.hashsz` bytes to `hash`.
+ *
+ * This function does not change `ctx` -- any padding which needs doing
+ * must be done seperately (in a copy of `ctx`, say).
+ *
+ * This means you can interlave `_update` and `_digest` calls to
+ * learn `H(A)` and `H(A || B)` without hashing `A` twice.
+ *
+ * :rtype: void
+ * :param ctx: hash function-specific context structure.
+ * :param hash: location to write hash result.
+ */
+typedef void (*cf_chash_digest)(const void *ctx, uint8_t *hash);
+
+/* .. c:type:: cf_chash
+ * This type describes an incremental hash function in an abstract way.
+ *
+ * .. c:member:: cf_chash.hashsz
+ * The hash function's output, in bytes.
+ *
+ * .. c:member:: cf_chash.blocksz
+ * The hash function's internal block size, in bytes.
+ *
+ * .. c:member:: cf_chash.init
+ * Context initialisation function.
+ *
+ * .. c:member:: cf_chash:update
+ * Data processing function.
+ *
+ * .. c:member:: cf_chash:digest
+ * Completion function.
+ *
+ */
+typedef struct
+{
+  size_t hashsz;
+  size_t blocksz;
+
+  cf_chash_init init;
+  cf_chash_update update;
+  cf_chash_digest digest;
+} cf_chash;
+
+/* .. c:macro:: CF_CHASH_MAXCTX
+ * The maximum size of a :c:type:`cf_chash_ctx`.  This allows
+ * use to put a structure in automatic storage that can
+ * store working data for any supported hash function. */
+#define CF_CHASH_MAXCTX 360
+
+/* .. c:macro:: CF_CHASH_MAXBLK
+ * Maximum hash function block size (in bytes). */
+#define CF_CHASH_MAXBLK 128
+
+/* .. c:macro:: CF_MAXHASH
+ * Maximum hash function output (in bytes). */
+#define CF_MAXHASH 64
+
+/* .. c:type:: cf_chash_ctx
+ * A type usable with any `cf_chash` as a context. */
+typedef union
+{
+  uint8_t ctx[CF_CHASH_MAXCTX];
+  uint16_t u16;
+  uint32_t u32;
+  uint64_t u64;
+} cf_chash_ctx;
+
+/* .. c:function:: $DECL
+ * One shot hashing: `out = h(m)`.
+ *
+ * Using the hash function `h`, `nm` bytes at `m` are hashed and `h->hashsz` bytes
+ * of result is written to the buffer `out`.
+ *
+ * :param h: hash function description.
+ * :param m: message buffer.
+ * :param nm: message length.
+ * :param out: hash result buffer (written).
+ */
+void cf_hash(const cf_chash *h, const void *m, size_t nm, uint8_t *out);
+
+#endif
diff --git a/emb/pastilda/lib/crypto/handy.h b/emb/pastilda/lib/crypto/handy.h
new file mode 100644
index 0000000..a9b2d9d
--- /dev/null
+++ b/emb/pastilda/lib/crypto/handy.h
@@ -0,0 +1,86 @@
+#ifndef HANDY_H
+#define HANDY_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+/*
+ * Handy CPP defines and C inline functions.
+ */
+
+/* Evaluates to the number of items in array-type variable arr. */
+#define ARRAYCOUNT(arr) (sizeof arr / sizeof arr[0])
+
+/* Normal MIN/MAX macros.  Evaluate argument expressions only once. */
+#ifndef MIN
+  #define MIN(x, y) \
+    ({ typeof (x) __x = (x); \
+       typeof (y) __y = (y); \
+       __x < __y ? __x : __y; })
+#endif
+#ifndef MAX
+  #define MAX(x, y) \
+    ({ typeof (x) __x = (x); \
+       typeof (y) __y = (y); \
+       __x > __y ? __x : __y; })
+#endif
+
+/* Swap two values.  Uses GCC type inference magic. */
+#ifndef SWAP
+  #define SWAP(x, y) \
+    do { \
+      typeof (x) __tmp = (x); \
+      (x) = (y); \
+      (y) = __tmp; \
+    } while (0)
+#endif
+
+/** Stringify its argument. */
+#define STRINGIFY(x) STRINGIFY_(x)
+#define STRINGIFY_(x) #x
+
+/* Error handling macros.
+ *
+ * These expect a zero = success, non-zero = error convention.
+ */
+
+/** Error: return. 
+ *  
+ *  If the expression fails, return the error from this function. */
+#define ER(expr) do { typeof (expr) err_ = (expr); if (err_) return err_; } while (0)
+
+/** Error: goto.
+ *
+ *  If the expression fails, goto x_err.  Assumes defn of label
+ *  x_err and 'error_type err'. */
+#define EG(expr) do { err = (expr); if (err) goto x_err; } while (0)
+
+/** Like memset(ptr, 0, len), but not allowed to be removed by
+ *  compilers. */
+static inline void mem_clean(volatile void *v, size_t len)
+{
+  if (len)
+  {
+    memset((void *) v, 0, len);
+    (void) *((volatile uint8_t *) v);
+  }
+}
+
+/** Returns 1 if len bytes at va equal len bytes at vb, 0 if they do not.
+ *  Does not leak length of common prefix through timing. */
+static inline unsigned mem_eq(const void *va, const void *vb, size_t len)
+{
+  const volatile uint8_t *a = va;
+  const volatile uint8_t *b = vb;
+  uint8_t diff = 0;
+
+  while (len--)
+  {
+    diff |= *a++ ^ *b++;
+  }
+
+  return !diff;
+}
+
+#endif
diff --git a/emb/pastilda/lib/crypto/salsa20.h b/emb/pastilda/lib/crypto/salsa20.h
new file mode 100644
index 0000000..2e478b3
--- /dev/null
+++ b/emb/pastilda/lib/crypto/salsa20.h
@@ -0,0 +1,111 @@
+// Copyright (c) 2015 Nezametdinov E. Ildus
+// See LICENSE.TXT for licensing details
+
+#ifndef SALSA20_H
+#define SALSA20_H
+
+#include <cassert>
+#include <climits>
+#include <cstdint>
+#include <cstring>
+
+using std::size_t;
+using std::int32_t;
+using std::uint8_t;
+using std::uint32_t;
+
+/**
+ * Represents Salsa20 cypher. Supports only 256-bit keys.
+ */
+class Salsa20
+{
+public:
+	/// Helper constants
+	enum: size_t
+	{
+			VECTOR_SIZE = 16,
+			BLOCK_SIZE = 64,
+			KEY_SIZE = 32,
+			IV_SIZE = 8
+	};
+
+	/**
+	 * \brief Constructs cypher with given key.
+	 * \param[in] key 256-bit key
+	 */
+	inline Salsa20(const uint8_t* key = nullptr);
+	Salsa20(const Salsa20&) = default;
+	Salsa20(Salsa20&&) = default;
+	~Salsa20() = default;
+	Salsa20& operator =(const Salsa20&) = default;
+	Salsa20& operator =(Salsa20&&) = default;
+
+	/**
+	 * \brief Sets key.
+	 * \param[in] key 256-bit key
+	 */
+	inline void setKey(const uint8_t* key);
+
+	/**
+	 * \brief Sets IV.
+	 * \param[in] iv 64-bit IV
+	 */
+	inline void setIv(const uint8_t* iv);
+
+	/**
+	 * \brief Generates key stream.
+	 * \param[out] output generated key stream
+	 */
+	inline void generateKeyStream(uint8_t output[BLOCK_SIZE]);
+
+	/**
+	 * \brief Processes blocks.
+	 * \param[in] input input
+	 * \param[out] output output
+	 * \param[in] numBlocks number of blocks
+	 */
+	inline void processBlocks(const uint8_t* input, uint8_t* output, size_t numBlocks);
+
+	/**
+	 * \brief Processes bytes.
+	 *
+	 * This function should be used carefully. If number of bytes is not multiple of
+	 * block size, then next call to the processBlocks function will be invalid.
+	 * Normally this function should be used once at the end of encryption or
+	 * decryption.
+	 * \param[in] input input
+	 * \param[out] output output
+	 * \param[in] numBytes number of bytes
+	 */
+	inline void processBytes(const uint8_t* input, uint8_t* output, size_t numBytes);
+
+private:
+	/**
+	 * \brief Rotates value.
+	 * \param[in] value value
+	 * \param[in] numBits number of bits to rotate
+	 * \return result of the rotation
+	 */
+	inline uint32_t rotate(uint32_t value, uint32_t numBits);
+
+	/**
+	 * \brief Converts 32-bit unsigned integer value to the array of bytes.
+	 * \param[in] value 32-bit unsigned integer value
+	 * \param[out] array array of bytes
+	 */
+	inline void convert(uint32_t value, uint8_t* array);
+
+	/**
+	 * \brief Converts array of bytes to the 32-bit unsigned integer value.
+	 * \param[in] array array of bytes
+	 * \return 32-bit unsigned integer value
+	 */
+	inline uint32_t convert(const uint8_t* array);
+
+	// Data members
+	uint32_t vector_[VECTOR_SIZE];
+
+};
+
+#include <salsa20.inl>
+#endif
diff --git a/emb/pastilda/lib/crypto/salsa20.inl b/emb/pastilda/lib/crypto/salsa20.inl
new file mode 100644
index 0000000..1f800c0
--- /dev/null
+++ b/emb/pastilda/lib/crypto/salsa20.inl
@@ -0,0 +1,156 @@
+// Copyright (c) 2015 Nezametdinov E. Ildus
+// See LICENSE.TXT for licensing details
+
+#include "Salsa20.h"
+
+Salsa20::Salsa20(const uint8_t* key)
+{
+		std::memset(vector_, 0, sizeof(vector_));
+		setKey(key);
+}
+
+//----------------------------------------------------------------------------------
+void Salsa20::setKey(const uint8_t* key)
+{
+		static const char constants[] = "expand 32-byte k";
+
+		if(key == nullptr)
+				return;
+
+		vector_[0] = convert(reinterpret_cast<const uint8_t*>(&constants[0]));
+		vector_[1] = convert(&key[0]);
+		vector_[2] = convert(&key[4]);
+		vector_[3] = convert(&key[8]);
+		vector_[4] = convert(&key[12]);
+		vector_[5] = convert(reinterpret_cast<const uint8_t*>(&constants[4]));
+
+		std::memset(&vector_[6], 0, 4 * sizeof(uint32_t));
+
+		vector_[10] = convert(reinterpret_cast<const uint8_t*>(&constants[8]));
+		vector_[11] = convert(&key[16]);
+		vector_[12] = convert(&key[20]);
+		vector_[13] = convert(&key[24]);
+		vector_[14] = convert(&key[28]);
+		vector_[15] = convert(reinterpret_cast<const uint8_t*>(&constants[12]));
+}
+
+//----------------------------------------------------------------------------------
+void Salsa20::setIv(const uint8_t* iv)
+{
+		if(iv == nullptr)
+				return;
+
+		vector_[6] = convert(&iv[0]);
+		vector_[7] = convert(&iv[4]);
+		vector_[8] = vector_[9] = 0;
+}
+
+//----------------------------------------------------------------------------------
+void Salsa20::generateKeyStream(uint8_t output[BLOCK_SIZE])
+{
+		uint32_t x[VECTOR_SIZE];
+		std::memcpy(x, vector_, sizeof(vector_));
+
+		for(int32_t i = 20; i > 0; i -= 2)
+		{
+				x[4 ] ^= rotate(static_cast<uint32_t>(x[0 ] + x[12]),  7);
+				x[8 ] ^= rotate(static_cast<uint32_t>(x[4 ] + x[0 ]),  9);
+				x[12] ^= rotate(static_cast<uint32_t>(x[8 ] + x[4 ]), 13);
+				x[0 ] ^= rotate(static_cast<uint32_t>(x[12] + x[8 ]), 18);
+				x[9 ] ^= rotate(static_cast<uint32_t>(x[5 ] + x[1 ]),  7);
+				x[13] ^= rotate(static_cast<uint32_t>(x[9 ] + x[5 ]),  9);
+				x[1 ] ^= rotate(static_cast<uint32_t>(x[13] + x[9 ]), 13);
+				x[5 ] ^= rotate(static_cast<uint32_t>(x[1 ] + x[13]), 18);
+				x[14] ^= rotate(static_cast<uint32_t>(x[10] + x[6 ]),  7);
+				x[2 ] ^= rotate(static_cast<uint32_t>(x[14] + x[10]),  9);
+				x[6 ] ^= rotate(static_cast<uint32_t>(x[2 ] + x[14]), 13);
+				x[10] ^= rotate(static_cast<uint32_t>(x[6 ] + x[2 ]), 18);
+				x[3 ] ^= rotate(static_cast<uint32_t>(x[15] + x[11]),  7);
+				x[7 ] ^= rotate(static_cast<uint32_t>(x[3 ] + x[15]),  9);
+				x[11] ^= rotate(static_cast<uint32_t>(x[7 ] + x[3 ]), 13);
+				x[15] ^= rotate(static_cast<uint32_t>(x[11] + x[7 ]), 18);
+				x[1 ] ^= rotate(static_cast<uint32_t>(x[0 ] + x[3 ]),  7);
+				x[2 ] ^= rotate(static_cast<uint32_t>(x[1 ] + x[0 ]),  9);
+				x[3 ] ^= rotate(static_cast<uint32_t>(x[2 ] + x[1 ]), 13);
+				x[0 ] ^= rotate(static_cast<uint32_t>(x[3 ] + x[2 ]), 18);
+				x[6 ] ^= rotate(static_cast<uint32_t>(x[5 ] + x[4 ]),  7);
+				x[7 ] ^= rotate(static_cast<uint32_t>(x[6 ] + x[5 ]),  9);
+				x[4 ] ^= rotate(static_cast<uint32_t>(x[7 ] + x[6 ]), 13);
+				x[5 ] ^= rotate(static_cast<uint32_t>(x[4 ] + x[7 ]), 18);
+				x[11] ^= rotate(static_cast<uint32_t>(x[10] + x[9 ]),  7);
+				x[8 ] ^= rotate(static_cast<uint32_t>(x[11] + x[10]),  9);
+				x[9 ] ^= rotate(static_cast<uint32_t>(x[8 ] + x[11]), 13);
+				x[10] ^= rotate(static_cast<uint32_t>(x[9 ] + x[8 ]), 18);
+				x[12] ^= rotate(static_cast<uint32_t>(x[15] + x[14]),  7);
+				x[13] ^= rotate(static_cast<uint32_t>(x[12] + x[15]),  9);
+				x[14] ^= rotate(static_cast<uint32_t>(x[13] + x[12]), 13);
+				x[15] ^= rotate(static_cast<uint32_t>(x[14] + x[13]), 18);
+		}
+
+		for(size_t i = 0; i < VECTOR_SIZE; ++i)
+		{
+				x[i] += vector_[i];
+				convert(x[i], &output[4 * i]);
+		}
+
+		++vector_[8];
+		vector_[9] += vector_[8] == 0 ? 1 : 0;
+}
+
+//----------------------------------------------------------------------------------
+void Salsa20::processBlocks(const uint8_t* input, uint8_t* output, size_t numBlocks)
+{
+		assert(input != nullptr && output != nullptr);
+
+		uint8_t keyStream[BLOCK_SIZE];
+
+		for(size_t i = 0; i < numBlocks; ++i)
+		{
+				generateKeyStream(keyStream);
+
+				for(size_t j = 0; j < BLOCK_SIZE; ++j)
+						*(output++) = keyStream[j] ^ *(input++);
+		}
+}
+
+//----------------------------------------------------------------------------------
+void Salsa20::processBytes(const uint8_t* input, uint8_t* output, size_t numBytes)
+{
+		assert(input != nullptr && output != nullptr);
+
+		uint8_t keyStream[BLOCK_SIZE];
+		size_t numBytesToProcess;
+
+		while(numBytes != 0)
+		{
+				generateKeyStream(keyStream);
+				numBytesToProcess = numBytes >= BLOCK_SIZE ? BLOCK_SIZE : numBytes;
+
+				for(size_t i = 0; i < numBytesToProcess; ++i, --numBytes)
+						*(output++) = keyStream[i] ^ *(input++);
+		}
+}
+
+//----------------------------------------------------------------------------------
+uint32_t Salsa20::rotate(uint32_t value, uint32_t numBits)
+{
+		return (value << numBits) | (value >> (32 - numBits));
+}
+
+//----------------------------------------------------------------------------------
+void Salsa20::convert(uint32_t value, uint8_t* array)
+{
+		array[0] = static_cast<uint8_t>(value >> 0);
+		array[1] = static_cast<uint8_t>(value >> 8);
+		array[2] = static_cast<uint8_t>(value >> 16);
+		array[3] = static_cast<uint8_t>(value >> 24);
+}
+
+//----------------------------------------------------------------------------------
+uint32_t Salsa20::convert(const uint8_t* array)
+{
+		return ((static_cast<uint32_t>(array[0]) << 0)  |
+				(static_cast<uint32_t>(array[1]) << 8)  |
+				(static_cast<uint32_t>(array[2]) << 16) |
+				(static_cast<uint32_t>(array[3]) << 24));
+}
+\ No newline at end of file
diff --git a/emb/pastilda/lib/crypto/sha2.h b/emb/pastilda/lib/crypto/sha2.h
new file mode 100644
index 0000000..f72576f
--- /dev/null
+++ b/emb/pastilda/lib/crypto/sha2.h
@@ -0,0 +1,235 @@
+/*
+ * cifra - embedded cryptography library
+ * Written in 2014 by Joseph Birr-Pixton <jpixton@gmail.com>
+ *
+ * To the extent possible under law, the author(s) have dedicated all
+ * copyright and related and neighboring rights to this software to the
+ * public domain worldwide. This software is distributed without any
+ * warranty.
+ *
+ * You should have received a copy of the CC0 Public Domain Dedication
+ * along with this software. If not, see
+ * <http://creativecommons.org/publicdomain/zero/1.0/>.
+ */
+
+#ifndef SHA2_H
+#define SHA2_H
+
+#include <chash.h>
+#include <stddef.h>
+#include <stdint.h>
+
+
+/**
+ * SHA224/SHA256
+ * =============
+ */
+
+/* .. c:macro:: CF_SHA224_HASHSZ
+ * The output size of SHA224: 28 bytes. */
+#define CF_SHA224_HASHSZ 28
+
+/* .. c:macro:: CF_SHA224_BLOCKSZ
+ * The block size of SHA224: 64 bytes. */
+#define CF_SHA224_BLOCKSZ 64
+
+/* .. c:macro:: CF_SHA256_HASHSZ
+ * The output size of SHA256: 32 bytes. */
+#define CF_SHA256_HASHSZ 32
+
+/* .. c:macro:: CF_SHA256_BLOCKSZ
+ * The block size of SHA256: 64 bytes. */
+#define CF_SHA256_BLOCKSZ 64
+
+/* .. c:type:: cf_sha256_context
+ * Incremental SHA256 hashing context.
+ *
+ * .. c:member:: cf_sha256_context.H
+ * Intermediate values.
+ *
+ * .. c:member:: cf_sha256_context.partial
+ * Unprocessed input.
+ *
+ * .. c:member:: cf_sha256_context.npartial
+ * Number of bytes of unprocessed input.
+ *
+ * .. c:member:: cf_sha256_context.blocks
+ * Number of full blocks processed.
+ */
+typedef struct
+{
+  uint32_t H[8];                      /* State. */
+  uint8_t partial[CF_SHA256_BLOCKSZ]; /* Partial block of input. */
+  uint32_t blocks;                    /* Number of full blocks processed into H. */
+  size_t npartial;                    /* Number of bytes in prefix of partial. */
+} cf_sha256_context;
+
+/* .. c:function:: $DECL
+ * Sets up `ctx` ready to hash a new message.
+ */
+extern void cf_sha256_init(cf_sha256_context *ctx);
+
+/* .. c:function:: $DECL
+ * Hashes `nbytes` at `data`.  Copies the data if there isn't enough to make
+ * a full block.
+ */
+extern void cf_sha256_update(cf_sha256_context *ctx, const void *data, size_t nbytes);
+
+/* .. c:function:: $DECL
+ * Finishes the hash operation, writing `CF_SHA256_HASHSZ` bytes to `hash`.
+ *
+ * This leaves `ctx` unchanged.
+ */
+extern void cf_sha256_digest(const cf_sha256_context *ctx, uint8_t hash[CF_SHA256_HASHSZ]);
+
+/* .. c:function:: $DECL
+ * Finishes the hash operation, writing `CF_SHA256_HASHSZ` bytes to `hash`.
+ *
+ * This destroys `ctx`, but uses less stack than :c:func:`cf_sha256_digest`.
+ */
+extern void cf_sha256_digest_final(cf_sha256_context *ctx, uint8_t hash[CF_SHA256_HASHSZ]);
+
+/* .. c:function:: $DECL
+ * Sets up `ctx` ready to hash a new message.
+ *
+ * nb. SHA224 uses SHA256's underlying types.
+ */
+extern void cf_sha224_init(cf_sha256_context *ctx);
+
+/* .. c:function:: $DECL
+ * Hashes `nbytes` at `data`.  Copies the data if there isn't enough to make
+ * a full block.
+ */
+extern void cf_sha224_update(cf_sha256_context *ctx, const void *data, size_t nbytes);
+
+/* .. c:function:: $DECL
+ * Finishes the hash operation, writing `CF_SHA224_HASHSZ` bytes to `hash`.
+ *
+ * This leaves `ctx` unchanged.
+ */
+extern void cf_sha224_digest(const cf_sha256_context *ctx, uint8_t hash[CF_SHA224_HASHSZ]);
+
+/* .. c:function:: $DECL
+ * Finishes the hash operation, writing `CF_SHA224_HASHSZ` bytes to `hash`.
+ *
+ * This destroys `ctx`, but uses less stack than :c:func:`cf_sha224_digest`.
+ */
+extern void cf_sha224_digest_final(cf_sha256_context *ctx, uint8_t hash[CF_SHA224_HASHSZ]);
+
+/* .. c:var:: cf_sha224
+ * Abstract interface to SHA224.  See :c:type:`cf_chash` for more information.
+ */
+extern const cf_chash cf_sha224;
+
+/* .. c:var:: cf_sha256
+ * Abstract interface to SHA256.  See :c:type:`cf_chash` for more information.
+ */
+extern const cf_chash cf_sha256;
+
+/**
+ * SHA384/SHA512
+ * =============
+ */
+
+/* .. c:macro:: CF_SHA384_HASHSZ
+ * The output size of SHA384: 48 bytes. */
+#define CF_SHA384_HASHSZ 48
+
+/* .. c:macro:: CF_SHA384_BLOCKSZ
+ * The block size of SHA384: 128 bytes. */
+#define CF_SHA384_BLOCKSZ 128
+
+/* .. c:macro:: CF_SHA512_HASHSZ
+ * The output size of SHA512: 64 bytes. */
+#define CF_SHA512_HASHSZ 64
+
+/* .. c:macro:: CF_SHA512_BLOCKSZ
+ * The block size of SHA512: 128 bytes. */
+#define CF_SHA512_BLOCKSZ 128
+
+/* .. c:type:: cf_sha512_context
+ * Incremental SHA512 hashing context.
+ *
+ * .. c:member:: cf_sha512_context.H
+ * Intermediate values.
+ *
+ * .. c:member:: cf_sha512_context.partial
+ * Unprocessed input.
+ *
+ * .. c:member:: cf_sha512_context.npartial
+ * Number of bytes of unprocessed input.
+ *
+ * .. c:member:: cf_sha512_context.blocks
+ * Number of full blocks processed.
+ */
+typedef struct
+{
+  uint64_t H[8];
+  uint8_t partial[CF_SHA512_BLOCKSZ];
+  uint32_t blocks;
+  size_t npartial;
+} cf_sha512_context;
+
+/* .. c:function:: $DECL
+ * Sets up `ctx` ready to hash a new message.
+ */
+extern void cf_sha512_init(cf_sha512_context *ctx);
+
+/* .. c:function:: $DECL
+ * Hashes `nbytes` at `data`.  Copies the data if there isn't enough to make
+ * a full block.
+ */
+extern void cf_sha512_update(cf_sha512_context *ctx, const void *data, size_t nbytes);
+
+/* .. c:function:: $DECL
+ * Finishes the hash operation, writing `CF_SHA512_HASHSZ` bytes to `hash`.
+ *
+ * This leaves `ctx` unchanged.
+ */
+extern void cf_sha512_digest(const cf_sha512_context *ctx, uint8_t hash[CF_SHA512_HASHSZ]);
+
+/* .. c:function:: $DECL
+ * Finishes the hash operation, writing `CF_SHA512_HASHSZ` bytes to `hash`.
+ *
+ * This destroys `ctx`, but uses less stack than :c:func:`cf_sha512_digest`.
+ */
+extern void cf_sha512_digest_final(cf_sha512_context *ctx, uint8_t hash[CF_SHA512_HASHSZ]);
+
+/* .. c:function:: $DECL
+ * Sets up `ctx` ready to hash a new message.
+ *
+ * nb. SHA384 uses SHA512's underlying types.
+ */
+extern void cf_sha384_init(cf_sha512_context *ctx);
+
+/* .. c:function:: $DECL
+ * Hashes `nbytes` at `data`.  Copies the data if there isn't enough to make
+ * a full block.
+ */
+extern void cf_sha384_update(cf_sha512_context *ctx, const void *data, size_t nbytes);
+
+/* .. c:function:: $DECL
+ * Finishes the hash operation, writing `CF_SHA384_HASHSZ` bytes to `hash`.
+ *
+ * This leaves `ctx` unchanged.
+ */
+extern void cf_sha384_digest(const cf_sha512_context *ctx, uint8_t hash[CF_SHA384_HASHSZ]);
+
+/* .. c:function:: $DECL
+ * Finishes the hash operation, writing `CF_SHA384_HASHSZ` bytes to `hash`.
+ *
+ * This destroys `ctx`, but uses less stack than :c:func:`cf_sha384_digest`.
+ */
+extern void cf_sha384_digest_final(cf_sha512_context *ctx, uint8_t hash[CF_SHA384_HASHSZ]);
+
+/* .. c:var:: cf_sha384
+ * Abstract interface to SHA384.  See :c:type:`cf_chash` for more information.
+ */
+extern const cf_chash cf_sha384;
+
+/* .. c:var:: cf_sha512
+ * Abstract interface to SHA512.  See :c:type:`cf_chash` for more information.
+ */
+extern const cf_chash cf_sha512;
+
+#endif
diff --git a/emb/pastilda/lib/crypto/sha256.c b/emb/pastilda/lib/crypto/sha256.c
new file mode 100644
index 0000000..88b1db3
--- /dev/null
+++ b/emb/pastilda/lib/crypto/sha256.c
@@ -0,0 +1,230 @@
+/*
+ * cifra - embedded cryptography library
+ * Written in 2014 by Joseph Birr-Pixton <jpixton@gmail.com>
+ *
+ * To the extent possible under law, the author(s) have dedicated all
+ * copyright and related and neighboring rights to this software to the
+ * public domain worldwide. This software is distributed without any
+ * warranty.
+ *
+ * You should have received a copy of the CC0 Public Domain Dedication
+ * along with this software. If not, see
+ * <http://creativecommons.org/publicdomain/zero/1.0/>.
+ */
+
+#include <bitops.h>
+#include <blockwise.h>
+#include <handy.h>
+#include <sha2.h>
+#include <tassert.h>
+#include <string.h>
+
+static const uint32_t K[64] = {
+  0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
+  0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+  0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+  0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+  0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
+  0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+  0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
+  0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+  0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+  0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+  0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
+  0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+  0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
+  0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+  0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+  0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+};
+
+# define CH(x, y, z) (((x) & (y)) ^ (~(x) & (z)))
+# define MAJ(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z)))
+# define BSIG0(x) (rotr32((x), 2) ^ rotr32((x), 13) ^ rotr32((x), 22))
+# define BSIG1(x) (rotr32((x), 6) ^ rotr32((x), 11) ^ rotr32((x), 25))
+# define SSIG0(x) (rotr32((x), 7) ^ rotr32((x), 18) ^ ((x) >> 3))
+# define SSIG1(x) (rotr32((x), 17) ^ rotr32((x), 19) ^ ((x) >> 10))
+
+void cf_sha256_init(cf_sha256_context *ctx)
+{
+  memset(ctx, 0, sizeof *ctx);
+  ctx->H[0] = 0x6a09e667;
+  ctx->H[1] = 0xbb67ae85;
+  ctx->H[2] = 0x3c6ef372;
+  ctx->H[3] = 0xa54ff53a;
+  ctx->H[4] = 0x510e527f;
+  ctx->H[5] = 0x9b05688c;
+  ctx->H[6] = 0x1f83d9ab;
+  ctx->H[7] = 0x5be0cd19;
+}
+
+void cf_sha224_init(cf_sha256_context *ctx)
+{
+  memset(ctx, 0, sizeof *ctx);
+  ctx->H[0] = 0xc1059ed8;
+  ctx->H[1] = 0x367cd507;
+  ctx->H[2] = 0x3070dd17;
+  ctx->H[3] = 0xf70e5939;
+  ctx->H[4] = 0xffc00b31;
+  ctx->H[5] = 0x68581511;
+  ctx->H[6] = 0x64f98fa7;
+  ctx->H[7] = 0xbefa4fa4;
+}
+
+static void sha256_update_block(void *vctx, const uint8_t *inp)
+{
+  cf_sha256_context *ctx = vctx;
+
+  /* This is a 16-word window into the whole W array. */
+  uint32_t W[16];
+
+  uint32_t a = ctx->H[0],
+           b = ctx->H[1],
+           c = ctx->H[2],
+           d = ctx->H[3],
+           e = ctx->H[4],
+           f = ctx->H[5],
+           g = ctx->H[6],
+           h = ctx->H[7],
+           Wt;
+
+  for (size_t t = 0; t < 64; t++)
+  {
+    /* For W[0..16] we process the input into W.
+     * For W[16..64] we compute the next W value:
+     *
+     * W[t] = SSIG1(W[t - 2]) + W[t - 7] + SSIG0(W[t - 15]) + W[t - 16];
+     *
+     * But all W indices are reduced mod 16 into our window.
+     */
+    if (t < 16)
+    {
+      W[t] = Wt = read32_be(inp);
+      inp += 4;
+    } else {
+      Wt = SSIG1(W[(t - 2) % 16]) +
+           W[(t - 7) % 16] +
+           SSIG0(W[(t - 15) % 16]) +
+           W[(t - 16) % 16];
+      W[t % 16] = Wt;
+    }
+
+    uint32_t T1 = h + BSIG1(e) + CH(e, f, g) + K[t] + Wt;
+    uint32_t T2 = BSIG0(a) + MAJ(a, b, c);
+    h = g;
+    g = f;
+    f = e;
+    e = d + T1;
+    d = c;
+    c = b;
+    b = a;
+    a = T1 + T2;
+  }
+
+  ctx->H[0] += a;
+  ctx->H[1] += b;
+  ctx->H[2] += c;
+  ctx->H[3] += d;
+  ctx->H[4] += e;
+  ctx->H[5] += f;
+  ctx->H[6] += g;
+  ctx->H[7] += h;
+
+  ctx->blocks++;
+}
+
+void cf_sha256_update(cf_sha256_context *ctx, const void *data, size_t nbytes)
+{
+  cf_blockwise_accumulate(ctx->partial, &ctx->npartial, sizeof ctx->partial,
+                          data, nbytes,
+                          sha256_update_block, ctx);
+}
+
+void cf_sha224_update(cf_sha256_context *ctx, const void *data, size_t nbytes)
+{
+  cf_sha256_update(ctx, data, nbytes);
+}
+
+void cf_sha256_digest(const cf_sha256_context *ctx, uint8_t hash[CF_SHA256_HASHSZ])
+{
+  /* We copy the context, so the finalisation doesn't effect the caller's
+   * context.  This means the caller can do:
+   *
+   * x = init()
+   * x.update('hello')
+   * h1 = x.digest()
+   * x.update(' world')
+   * h2 = x.digest()
+   *
+   * to get h1 = H('hello') and h2 = H('hello world')
+   *
+   * This wouldn't work if we applied MD-padding to *ctx.
+   */
+
+  cf_sha256_context ours = *ctx;
+  cf_sha256_digest_final(&ours, hash);
+}
+
+void cf_sha256_digest_final(cf_sha256_context *ctx, uint8_t hash[CF_SHA256_HASHSZ])
+{
+  uint64_t digested_bytes = ctx->blocks;
+  digested_bytes = digested_bytes * CF_SHA256_BLOCKSZ + ctx->npartial;
+  uint64_t digested_bits = digested_bytes * 8;
+
+  size_t padbytes = CF_SHA256_BLOCKSZ - ((digested_bytes + 8) % CF_SHA256_BLOCKSZ);
+
+  /* Hash 0x80 00 ... block first. */
+  cf_blockwise_acc_pad(ctx->partial, &ctx->npartial, sizeof ctx->partial,
+                       0x80, 0x00, 0x00, padbytes,
+                       sha256_update_block, ctx);
+
+  /* Now hash length. */
+  uint8_t buf[8];
+  write64_be(digested_bits, buf);
+  cf_sha256_update(ctx, buf, 8);
+
+  /* We ought to have got our padding calculation right! */
+  assert(ctx->npartial == 0);
+
+  write32_be(ctx->H[0], hash + 0);
+  write32_be(ctx->H[1], hash + 4);
+  write32_be(ctx->H[2], hash + 8);
+  write32_be(ctx->H[3], hash + 12);
+  write32_be(ctx->H[4], hash + 16);
+  write32_be(ctx->H[5], hash + 20);
+  write32_be(ctx->H[6], hash + 24);
+  write32_be(ctx->H[7], hash + 28);
+  
+  memset(ctx, 0, sizeof *ctx);
+}
+
+void cf_sha224_digest(const cf_sha256_context *ctx, uint8_t hash[CF_SHA224_HASHSZ])
+{
+  uint8_t full[CF_SHA256_HASHSZ];
+  cf_sha256_digest(ctx, full);
+  memcpy(hash, full, CF_SHA224_HASHSZ);
+}
+
+void cf_sha224_digest_final(cf_sha256_context *ctx, uint8_t hash[CF_SHA224_HASHSZ])
+{
+  uint8_t full[CF_SHA256_HASHSZ];
+  cf_sha256_digest_final(ctx, full);
+  memcpy(hash, full, CF_SHA224_HASHSZ);
+}
+
+const cf_chash cf_sha224 = {
+  .hashsz = CF_SHA224_HASHSZ,
+  .blocksz = CF_SHA256_BLOCKSZ,
+  .init = (cf_chash_init) cf_sha224_init,
+  .update = (cf_chash_update) cf_sha224_update,
+  .digest = (cf_chash_digest) cf_sha224_digest
+};
+
+const cf_chash cf_sha256 = {
+  .hashsz = CF_SHA256_HASHSZ,
+  .blocksz = CF_SHA256_BLOCKSZ,
+  .init = (cf_chash_init) cf_sha256_init,
+  .update = (cf_chash_update) cf_sha256_update,
+  .digest = (cf_chash_digest) cf_sha256_digest
+};
+
diff --git a/emb/pastilda/lib/crypto/tassert.h b/emb/pastilda/lib/crypto/tassert.h
new file mode 100644
index 0000000..58ebb4c
--- /dev/null
+++ b/emb/pastilda/lib/crypto/tassert.h
@@ -0,0 +1,32 @@
+/*
+ * cifra - embedded cryptography library
+ * Written in 2014 by Joseph Birr-Pixton <jpixton@gmail.com>
+ *
+ * To the extent possible under law, the author(s) have dedicated all
+ * copyright and related and neighboring rights to this software to the
+ * public domain worldwide. This software is distributed without any
+ * warranty.
+ *
+ * You should have received a copy of the CC0 Public Domain Dedication
+ * along with this software. If not, see
+ * <http://creativecommons.org/publicdomain/zero/1.0/>.
+ */
+
+#ifndef TASSERT_H
+#define TASSERT_H
+
+/* Tiny assert
+ * -----------
+ *
+ * This is an assert(3) definition which doesn't include any
+ * strings, but just branches to abort(3) on failure.
+ */
+
+#ifndef FULL_FAT_ASSERT
+# include <stdlib.h>
+# define assert(expr) do { if (!(expr)) abort(); } while (0)
+#else
+# include <assert.h>
+#endif
+
+#endif