diff options
author | Dimitrij <kvarkas@gmail.com> | 2022-10-31 00:45:23 +0300 |
---|---|---|
committer | Dimitrij <kvarkas@gmail.com> | 2022-10-31 00:45:23 +0300 |
commit | 302fb2e8ddea1c993552c9a30c02f41d01ca54a9 (patch) | |
tree | d6cf1b32664296ef2cecda33caeafbe39e6695c1 /keygen | |
parent | 59105d9b26363e47f00676bd365b2ac8d4cb536a (diff) | |
parent | 4ff82ab29a22936b78510c68f544a99e677efed3 (diff) |
Diffstat (limited to 'keygen')
-rw-r--r-- | keygen/CMakeLists.txt | 10 | ||||
-rw-r--r-- | keygen/dsa.c | 103 | ||||
-rw-r--r-- | keygen/ecdsa.c | 37 | ||||
-rw-r--r-- | keygen/millerrabin.c | 288 | ||||
-rw-r--r-- | keygen/mpunsafe.c | 48 | ||||
-rw-r--r-- | keygen/mpunsafe.h | 39 | ||||
-rw-r--r-- | keygen/pockle.c | 450 | ||||
-rw-r--r-- | keygen/prime.c | 762 | ||||
-rw-r--r-- | keygen/primecandidate.c | 447 | ||||
-rw-r--r-- | keygen/rsa.c | 292 | ||||
-rw-r--r-- | keygen/smallprimes.c | 44 |
11 files changed, 2520 insertions, 0 deletions
diff --git a/keygen/CMakeLists.txt b/keygen/CMakeLists.txt new file mode 100644 index 00000000..17eea2b1 --- /dev/null +++ b/keygen/CMakeLists.txt @@ -0,0 +1,10 @@ +add_sources_from_current_dir(keygen + dsa.c + ecdsa.c + millerrabin.c + mpunsafe.c + pockle.c + prime.c + primecandidate.c + rsa.c + smallprimes.c) diff --git a/keygen/dsa.c b/keygen/dsa.c new file mode 100644 index 00000000..a7ea4f53 --- /dev/null +++ b/keygen/dsa.c @@ -0,0 +1,103 @@ +/* + * DSA key generation. + */ + +#include "misc.h" +#include "ssh.h" +#include "sshkeygen.h" +#include "mpint.h" + +int dsa_generate(struct dsa_key *key, int bits, PrimeGenerationContext *pgc, + ProgressReceiver *prog) +{ + /* + * Progress-reporting setup. + * + * DSA generation involves three potentially long jobs: inventing + * the small prime q, the large prime p, and finding an order-q + * element of the multiplicative group of p. + * + * The latter is done by finding an element whose order is + * _divisible_ by q and raising it to the power of (p-1)/q. Every + * element whose order is not divisible by q is a qth power of q + * distinct elements whose order _is_ divisible by q, so the + * probability of not finding a suitable element on the first try + * is in the region of 1/q, i.e. at most 2^-159. + * + * (So the probability of success will end up indistinguishable + * from 1 in IEEE standard floating point! But what can you do.) + */ + ProgressPhase phase_q = primegen_add_progress_phase(pgc, prog, 160); + ProgressPhase phase_p = primegen_add_progress_phase(pgc, prog, bits); + double g_failure_probability = 1.0 + / (double)(1ULL << 53) + / (double)(1ULL << 53) + / (double)(1ULL << 53); + ProgressPhase phase_g = progress_add_probabilistic( + prog, estimate_modexp_cost(bits), 1.0 - g_failure_probability); + progress_ready(prog); + + PrimeCandidateSource *pcs; + + /* + * Generate q: a prime of length 160. + */ + progress_start_phase(prog, phase_q); + pcs = pcs_new(160); + mp_int *q = primegen_generate(pgc, pcs, prog); + progress_report_phase_complete(prog); + + /* + * Now generate p: a prime of length `bits', such that p-1 is + * divisible by q. + */ + progress_start_phase(prog, phase_p); + pcs = pcs_new(bits); + pcs_require_residue_1_mod_prime(pcs, q); + mp_int *p = primegen_generate(pgc, pcs, prog); + progress_report_phase_complete(prog); + + /* + * Next we need g. Raise 2 to the power (p-1)/q modulo p, and + * if that comes out to one then try 3, then 4 and so on. As + * soon as we hit a non-unit (and non-zero!) one, that'll do + * for g. + */ + progress_start_phase(prog, phase_g); + mp_int *power = mp_div(p, q); /* this is floor(p/q) == (p-1)/q */ + mp_int *h = mp_from_integer(2); + mp_int *g; + while (1) { + progress_report_attempt(prog); + g = mp_modpow(h, power, p); + if (mp_hs_integer(g, 2)) + break; /* got one */ + mp_free(g); + mp_add_integer_into(h, h, 1); + } + mp_free(h); + mp_free(power); + progress_report_phase_complete(prog); + + /* + * Now we're nearly done. All we need now is our private key x, + * which should be a number between 1 and q-1 exclusive, and + * our public key y = g^x mod p. + */ + mp_int *two = mp_from_integer(2); + mp_int *qm1 = mp_copy(q); + mp_sub_integer_into(qm1, qm1, 1); + mp_int *x = mp_random_in_range(two, qm1); + mp_free(two); + mp_free(qm1); + + key->sshk.vt = &ssh_dsa; + + key->p = p; + key->q = q; + key->g = g; + key->x = x; + key->y = mp_modpow(key->g, key->x, key->p); + + return 1; +} diff --git a/keygen/ecdsa.c b/keygen/ecdsa.c new file mode 100644 index 00000000..28a723b2 --- /dev/null +++ b/keygen/ecdsa.c @@ -0,0 +1,37 @@ +/* + * EC key generation. + */ + +#include "ssh.h" +#include "sshkeygen.h" +#include "mpint.h" + +int ecdsa_generate(struct ecdsa_key *ek, int bits) +{ + if (!ec_nist_alg_and_curve_by_bits(bits, &ek->curve, &ek->sshk.vt)) + return 0; + + mp_int *one = mp_from_integer(1); + ek->privateKey = mp_random_in_range(one, ek->curve->w.G_order); + mp_free(one); + + ek->publicKey = ecdsa_public(ek->privateKey, ek->sshk.vt); + + return 1; +} + +int eddsa_generate(struct eddsa_key *ek, int bits) +{ + if (!ec_ed_alg_and_curve_by_bits(bits, &ek->curve, &ek->sshk.vt)) + return 0; + + /* EdDSA secret keys are just 32 bytes of hash preimage; the + * 64-byte SHA-512 hash of that key will be used when signing, + * but the form of the key stored on disk is the preimage + * only. */ + ek->privateKey = mp_random_bits(bits); + + ek->publicKey = eddsa_public(ek->privateKey, ek->sshk.vt); + + return 1; +} diff --git a/keygen/millerrabin.c b/keygen/millerrabin.c new file mode 100644 index 00000000..24ee6193 --- /dev/null +++ b/keygen/millerrabin.c @@ -0,0 +1,288 @@ +/* + * millerrabin.c: Miller-Rabin probabilistic primality testing, as + * declared in sshkeygen.h. + */ + +#include <assert.h> +#include "ssh.h" +#include "sshkeygen.h" +#include "mpint.h" +#include "mpunsafe.h" + +/* + * The Miller-Rabin primality test is an extension to the Fermat + * test. The Fermat test just checks that a^(p-1) == 1 mod p; this + * is vulnerable to Carmichael numbers. Miller-Rabin considers how + * that 1 is derived as well. + * + * Lemma: if a^2 == 1 (mod p), and p is prime, then either a == 1 + * or a == -1 (mod p). + * + * Proof: p divides a^2-1, i.e. p divides (a+1)(a-1). Hence, + * since p is prime, either p divides (a+1) or p divides (a-1). + * But this is the same as saying that either a is congruent to + * -1 mod p or a is congruent to +1 mod p. [] + * + * Comment: This fails when p is not prime. Consider p=mn, so + * that mn divides (a+1)(a-1). Now we could have m dividing (a+1) + * and n dividing (a-1), without the whole of mn dividing either. + * For example, consider a=10 and p=99. 99 = 9 * 11; 9 divides + * 10-1 and 11 divides 10+1, so a^2 is congruent to 1 mod p + * without a having to be congruent to either 1 or -1. + * + * So the Miller-Rabin test, as well as considering a^(p-1), + * considers a^((p-1)/2), a^((p-1)/4), and so on as far as it can + * go. In other words. we write p-1 as q * 2^k, with k as large as + * possible (i.e. q must be odd), and we consider the powers + * + * a^(q*2^0) a^(q*2^1) ... a^(q*2^(k-1)) a^(q*2^k) + * i.e. a^((n-1)/2^k) a^((n-1)/2^(k-1)) ... a^((n-1)/2) a^(n-1) + * + * If p is to be prime, the last of these must be 1. Therefore, by + * the above lemma, the one before it must be either 1 or -1. And + * _if_ it's 1, then the one before that must be either 1 or -1, + * and so on ... In other words, we expect to see a trailing chain + * of 1s preceded by a -1. (If we're unlucky, our trailing chain of + * 1s will be as long as the list so we'll never get to see what + * lies before it. This doesn't count as a test failure because it + * hasn't _proved_ that p is not prime.) + * + * For example, consider a=2 and p=1729. 1729 is a Carmichael + * number: although it's not prime, it satisfies a^(p-1) == 1 mod p + * for any a coprime to it. So the Fermat test wouldn't have a + * problem with it at all, unless we happened to stumble on an a + * which had a common factor. + * + * So. 1729 - 1 equals 27 * 2^6. So we look at + * + * 2^27 mod 1729 == 645 + * 2^108 mod 1729 == 1065 + * 2^216 mod 1729 == 1 + * 2^432 mod 1729 == 1 + * 2^864 mod 1729 == 1 + * 2^1728 mod 1729 == 1 + * + * We do have a trailing string of 1s, so the Fermat test would + * have been happy. But this trailing string of 1s is preceded by + * 1065; whereas if 1729 were prime, we'd expect to see it preceded + * by -1 (i.e. 1728.). Guards! Seize this impostor. + * + * (If we were unlucky, we might have tried a=16 instead of a=2; + * now 16^27 mod 1729 == 1, so we would have seen a long string of + * 1s and wouldn't have seen the thing _before_ the 1s. So, just + * like the Fermat test, for a given p there may well exist values + * of a which fail to show up its compositeness. So we try several, + * just like the Fermat test. The difference is that Miller-Rabin + * is not _in general_ fooled by Carmichael numbers.) + * + * Put simply, then, the Miller-Rabin test requires us to: + * + * 1. write p-1 as q * 2^k, with q odd + * 2. compute z = (a^q) mod p. + * 3. report success if z == 1 or z == -1. + * 4. square z at most k-1 times, and report success if it becomes + * -1 at any point. + * 5. report failure otherwise. + * + * (We expect z to become -1 after at most k-1 squarings, because + * if it became -1 after k squarings then a^(p-1) would fail to be + * 1. And we don't need to investigate what happens after we see a + * -1, because we _know_ that -1 squared is 1 modulo anything at + * all, so after we've seen a -1 we can be sure of seeing nothing + * but 1s.) + */ + +struct MillerRabin { + MontyContext *mc; + + mp_int *pm1, *m_pm1; + mp_int *lowbit, *two; +}; + +MillerRabin *miller_rabin_new(mp_int *p) +{ + MillerRabin *mr = snew(MillerRabin); + + assert(mp_hs_integer(p, 2)); + assert(mp_get_bit(p, 0) == 1); + + mr->pm1 = mp_copy(p); + mp_sub_integer_into(mr->pm1, mr->pm1, 1); + + /* + * Standard bit-twiddling trick for isolating the lowest set bit + * of a number: x & (-x) + */ + mr->lowbit = mp_new(mp_max_bits(mr->pm1)); + mp_sub_into(mr->lowbit, mr->lowbit, mr->pm1); + mp_and_into(mr->lowbit, mr->lowbit, mr->pm1); + + mr->two = mp_from_integer(2); + + mr->mc = monty_new(p); + mr->m_pm1 = monty_import(mr->mc, mr->pm1); + + return mr; +} + +void miller_rabin_free(MillerRabin *mr) +{ + mp_free(mr->pm1); + mp_free(mr->m_pm1); + mp_free(mr->lowbit); + mp_free(mr->two); + monty_free(mr->mc); + smemclr(mr, sizeof(*mr)); + sfree(mr); +} + +/* + * The main internal function that implements a single M-R test. + * + * Expects the witness integer to be in Montgomery representation. + * (Since in live use witnesses are invented at random, this imposes + * no extra cost on the callers, and saves effort in here.) + */ +static struct mr_result miller_rabin_test_inner(MillerRabin *mr, mp_int *mw) +{ + mp_int *acc = mp_copy(monty_identity(mr->mc)); + mp_int *spare = mp_new(mp_max_bits(mr->pm1)); + size_t bit = mp_max_bits(mr->pm1); + + /* + * The obvious approach to Miller-Rabin would be to start by + * calling monty_pow to raise w to the power q, and then square it + * k times ourselves. But that introduces a timing leak that gives + * away the value of k, i.e., how many factors of 2 there are in + * p-1. + * + * Instead, we don't call monty_pow at all. We do a modular + * exponentiation ourselves to compute w^((p-1)/2), using the + * technique that works from the top bit of the exponent + * downwards. That is, in each iteration we compute + * w^floor(exponent/2^i) for i one less than the previous + * iteration, by squaring the value we previously had and then + * optionally multiplying in w if the next exponent bit is 1. + * + * At the end of that process, once i <= k, the division + * (exponent/2^i) yields an integer, so the values we're computing + * are not just w^(floor of that), but w^(exactly that). In other + * words, the last k intermediate values of this modexp are + * precisely the values M-R wants to check against +1 or -1. + * + * So we interleave those checks with the modexp loop itself, and + * to avoid a timing leak, we check _every_ intermediate result + * against (the Montgomery representations of) both +1 and -1. And + * then we do bitwise masking to arrange that only the sensible + * ones of those checks find their way into our final answer. + */ + + unsigned active = 0; + + struct mr_result result; + result.passed = result.potential_primitive_root = 0; + + while (bit-- > 1) { + /* + * In this iteration, we're computing w^(2e) or w^(2e+1), + * where we have w^e from the previous iteration. So we square + * the value we had already, and then optionally multiply in + * another copy of w depending on the next bit of the exponent. + */ + monty_mul_into(mr->mc, acc, acc, acc); + monty_mul_into(mr->mc, spare, acc, mw); + mp_select_into(acc, acc, spare, mp_get_bit(mr->pm1, bit)); + + /* + * mr->lowbit is a number with only one bit set, corresponding + * to the lowest set bit in p-1. So when that's the bit of the + * exponent we've just processed, we'll detect it by setting + * first_iter to true. That's our indication that we're now + * generating intermediate results useful to M-R, so we also + * set 'active', which stays set from then on. + */ + unsigned first_iter = mp_get_bit(mr->lowbit, bit); + active |= first_iter; + + /* + * Check the intermediate result against both +1 and -1. + */ + unsigned is_plus_1 = mp_cmp_eq(acc, monty_identity(mr->mc)); + unsigned is_minus_1 = mp_cmp_eq(acc, mr->m_pm1); + + /* + * M-R must report success iff either: the first of the useful + * intermediate results (which is w^q) is 1, or _any_ of them + * (from w^q all the way up to w^((p-1)/2)) is -1. + * + * So we want to pass the test if is_plus_1 is set on the + * first iteration, or if is_minus_1 is set on any iteration. + */ + result.passed |= (first_iter & is_plus_1); + result.passed |= (active & is_minus_1); + + /* + * In the final iteration, is_minus_1 is also used to set the + * 'potential primitive root' flag, because we haven't found + * any exponent smaller than p-1 for which w^(that) == 1. + */ + if (bit == 1) + result.potential_primitive_root = is_minus_1; + } + + mp_free(acc); + mp_free(spare); + + return result; +} + +/* + * Wrapper on miller_rabin_test_inner for the convenience of + * testcrypt. Expects the witness integer to be literal, so we + * monty_import it before running the real test. + */ +struct mr_result miller_rabin_test(MillerRabin *mr, mp_int *w) +{ + mp_int *mw = monty_import(mr->mc, w); + struct mr_result result = miller_rabin_test_inner(mr, mw); + mp_free(mw); + return result; +} + +bool miller_rabin_test_random(MillerRabin *mr) +{ + mp_int *mw = mp_random_in_range(mr->two, mr->pm1); + struct mr_result result = miller_rabin_test_inner(mr, mw); + mp_free(mw); + return result.passed; +} + +mp_int *miller_rabin_find_potential_primitive_root(MillerRabin *mr) +{ + while (true) { + mp_int *mw = mp_unsafe_shrink(mp_random_in_range(mr->two, mr->pm1)); + struct mr_result result = miller_rabin_test_inner(mr, mw); + + if (result.passed && result.potential_primitive_root) { + mp_int *pr = monty_export(mr->mc, mw); + mp_free(mw); + return pr; + } + + mp_free(mw); + + if (!result.passed) { + return NULL; + } + } +} + +unsigned miller_rabin_checks_needed(unsigned bits) +{ + /* Table 4.4 from Handbook of Applied Cryptography */ + return (bits >= 1300 ? 2 : bits >= 850 ? 3 : bits >= 650 ? 4 : + bits >= 550 ? 5 : bits >= 450 ? 6 : bits >= 400 ? 7 : + bits >= 350 ? 8 : bits >= 300 ? 9 : bits >= 250 ? 12 : + bits >= 200 ? 15 : bits >= 150 ? 18 : 27); +} + diff --git a/keygen/mpunsafe.c b/keygen/mpunsafe.c new file mode 100644 index 00000000..2cd7a37a --- /dev/null +++ b/keygen/mpunsafe.c @@ -0,0 +1,48 @@ +#include <assert.h> +#include <limits.h> +#include <stdio.h> + +#include "defs.h" +#include "misc.h" +#include "puttymem.h" + +#include "mpint.h" +#include "mpunsafe.h" +#include "crypto/mpint_i.h" + +/* + * This global symbol is also defined in ssh/kex2-client.c, to ensure + * that these unsafe non-constant-time mp_int functions can't end up + * accidentally linked in to any PuTTY tool that actually makes an SSH + * client connection. + * + * (Only _client_ connections, however. Uppity, being a test server + * only, is exempt.) + */ +const int deliberate_symbol_clash = 12345; + +static size_t mp_unsafe_words_needed(mp_int *x) +{ + size_t words = x->nw; + while (words > 1 && !x->w[words-1]) + words--; + return words; +} + +mp_int *mp_unsafe_shrink(mp_int *x) +{ + x->nw = mp_unsafe_words_needed(x); + /* This potentially leaves some allocated words between the new + * and old values of x->nw, which won't be wiped by mp_free now + * that x->nw doesn't mention that they exist. But we've just + * checked they're all zero, so we don't need to wipe them now + * either. */ + return x; +} + +mp_int *mp_unsafe_copy(mp_int *x) +{ + mp_int *copy = mp_make_sized(mp_unsafe_words_needed(x)); + mp_copy_into(copy, x); + return copy; +} diff --git a/keygen/mpunsafe.h b/keygen/mpunsafe.h new file mode 100644 index 00000000..07215372 --- /dev/null +++ b/keygen/mpunsafe.h @@ -0,0 +1,39 @@ +/* + * mpunsafe.h: functions that deal with mp_ints in ways that are *not* + * expected to be constant-time. Used during key generation, in which + * constant run time is a lost cause anyway. + * + * These functions are in a separate header, so that you can easily + * check that you're not calling them in the wrong context. They're + * also defined in a separate source file, which is only linked in to + * the key generation tools. Furthermore, that source file also + * defines a global symbol that intentionally conflicts with one + * defined in the SSH client code, so that any attempt to put these + * functions into the same binary as the live SSH client + * implementation will cause a link-time failure. They should only be + * linked into PuTTYgen and auxiliary test programs. + * + * Also, just in case those precautions aren't enough, all the unsafe + * functions have 'unsafe' in the name. + */ + +#ifndef PUTTY_MPINT_UNSAFE_H +#define PUTTY_MPINT_UNSAFE_H + +/* + * The most obvious unsafe thing you want to do with an mp_int is to + * get rid of leading zero words in its representation, so that its + * nominal size is as close as possible to its true size, and you + * don't waste any time processing it. + * + * mp_unsafe_shrink performs this operation in place, mutating the + * size field of the mp_int it's given. It returns the same pointer it + * was given. + * + * mp_unsafe_copy leaves the original mp_int alone and makes a new one + * with the minimal size. + */ +mp_int *mp_unsafe_shrink(mp_int *m); +mp_int *mp_unsafe_copy(mp_int *m); + +#endif /* PUTTY_MPINT_UNSAFE_H */ diff --git a/keygen/pockle.c b/keygen/pockle.c new file mode 100644 index 00000000..2a072f18 --- /dev/null +++ b/keygen/pockle.c @@ -0,0 +1,450 @@ +#include <assert.h> +#include "ssh.h" +#include "sshkeygen.h" +#include "mpint.h" +#include "mpunsafe.h" +#include "tree234.h" + +typedef struct PocklePrimeRecord PocklePrimeRecord; + +struct Pockle { + tree234 *tree; + + PocklePrimeRecord **list; + size_t nlist, listsize; +}; + +struct PocklePrimeRecord { + mp_int *prime; + PocklePrimeRecord **factors; + size_t nfactors; + mp_int *witness; + + size_t index; /* index in pockle->list */ +}; + +static int ppr_cmp(void *av, void *bv) +{ + PocklePrimeRecord *a = (PocklePrimeRecord *)av; + PocklePrimeRecord *b = (PocklePrimeRecord *)bv; + return mp_cmp_hs(a->prime, b->prime) - mp_cmp_hs(b->prime, a->prime); +} + +static int ppr_find(void *av, void *bv) +{ + mp_int *a = (mp_int *)av; + PocklePrimeRecord *b = (PocklePrimeRecord *)bv; + return mp_cmp_hs(a, b->prime) - mp_cmp_hs(b->prime, a); +} + +Pockle *pockle_new(void) +{ + Pockle *pockle = snew(Pockle); + pockle->tree = newtree234(ppr_cmp); + pockle->list = NULL; + pockle->nlist = pockle->listsize = 0; + return pockle; +} + +void pockle_free(Pockle *pockle) +{ + pockle_release(pockle, 0); + assert(count234(pockle->tree) == 0); + freetree234(pockle->tree); + sfree(pockle->list); + sfree(pockle); +} + +static PockleStatus pockle_insert(Pockle *pockle, mp_int *p, mp_int **factors, + size_t nfactors, mp_int *w) +{ + PocklePrimeRecord *pr = snew(PocklePrimeRecord); + pr->prime = mp_copy(p); + + PocklePrimeRecord *found = add234(pockle->tree, pr); + if (pr != found) { + /* it was already in there */ + mp_free(pr->prime); + sfree(pr); + return POCKLE_OK; + } + + if (w) { + pr->factors = snewn(nfactors, PocklePrimeRecord *); + for (size_t i = 0; i < nfactors; i++) { + pr->factors[i] = find234(pockle->tree, factors[i], ppr_find); + assert(pr->factors[i]); + } + pr->nfactors = nfactors; + pr->witness = mp_copy(w); + } else { + pr->factors = NULL; + pr->nfactors = 0; + pr->witness = NULL; + } + pr->index = pockle->nlist; + + sgrowarray(pockle->list, pockle->listsize, pockle->nlist); + pockle->list[pockle->nlist++] = pr; + return POCKLE_OK; +} + +size_t pockle_mark(Pockle *pockle) +{ + return pockle->nlist; +} + +void pockle_release(Pockle *pockle, size_t mark) +{ + while (pockle->nlist > mark) { + PocklePrimeRecord *pr = pockle->list[--pockle->nlist]; + del234(pockle->tree, pr); + mp_free(pr->prime); + if (pr->witness) + mp_free(pr->witness); + sfree(pr->factors); + sfree(pr); + } +} + +PockleStatus pockle_add_small_prime(Pockle *pockle, mp_int *p) +{ + if (mp_hs_integer(p, (1ULL << 32))) + return POCKLE_SMALL_PRIME_NOT_SMALL; + + uint32_t val = mp_get_integer(p); + + if (val < 2) + return POCKLE_PRIME_SMALLER_THAN_2; + + init_smallprimes(); + for (size_t i = 0; i < NSMALLPRIMES; i++) { + if (val == smallprimes[i]) + break; /* success */ + if (val % smallprimes[i] == 0) + return POCKLE_SMALL_PRIME_NOT_PRIME; + } + + return pockle_insert(pockle, p, NULL, 0, NULL); +} + +PockleStatus pockle_add_prime(Pockle *pockle, mp_int *p, + mp_int **factors, size_t nfactors, + mp_int *witness) +{ + MontyContext *mc = NULL; + mp_int *x = NULL, *f = NULL, *w = NULL; + PockleStatus status; + + /* + * We're going to try to verify that p is prime by using + * Pocklington's theorem. The idea is that we're given w such that + * w^{p-1} == 1 (mod p) (1) + * and for a collection of primes q | p-1, + * w^{(p-1)/q} - 1 is coprime to p. (2) + * + * Suppose r is a prime factor of p itself. Consider the + * multiplicative order of w mod r. By (1), r | w^{p-1}-1. But by + * (2), r does not divide w^{(p-1)/q}-1. So the order of w mod r + * is a factor of p-1, but not a factor of (p-1)/q. Hence, the + * largest power of q that divides p-1 must also divide ord w. + * + * Repeating this reasoning for all q, we find that the product of + * all the q (which we'll denote f) must divide ord w, which in + * turn divides r-1. So f | r-1 for any r | p. + * + * In particular, this means f < r. That is, all primes r | p are + * bigger than f. So if f > sqrt(p), then we've shown p is prime, + * because otherwise it would have to be the product of at least + * two factors bigger than its own square root. + * + * With an extra check, we can also show p to be prime even if + * we're only given enough factors to make f > cbrt(p). See below + * for that part, when we come to it. + */ + + /* + * Start by checking p > 1. It certainly can't be prime otherwise! + * (And since we're going to prove it prime by showing all its + * prime factors are large, we do also have to know it _has_ at + * least one prime factor for that to tell us anything.) + */ + if (!mp_hs_integer(p, 2)) + return POCKLE_PRIME_SMALLER_THAN_2; + + /* + * Check that all the factors we've been given really are primes + * (in the sense that we already had them in our index). Make the + * product f, and check it really does divide p-1. + */ + x = mp_copy(p); + mp_sub_integer_into(x, x, 1); + f = mp_from_integer(1); + for (size_t i = 0; i < nfactors; i++) { + mp_int *q = factors[i]; + + if (!find234(pockle->tree, q, ppr_find)) { + status = POCKLE_FACTOR_NOT_KNOWN_PRIME; + goto out; + } + + mp_int *quotient = mp_new(mp_max_bits(x)); + mp_int *residue = mp_new(mp_max_bits(q)); + mp_divmod_into(x, q, quotient, residue); + + unsigned exact = mp_eq_integer(residue, 0); + mp_free(residue); + + mp_free(x); + x = quotient; + + if (!exact) { + status = POCKLE_FACTOR_NOT_A_FACTOR; + goto out; + } + + mp_int *tmp = f; + f = mp_unsafe_shrink(mp_mul(tmp, q)); + mp_free(tmp); + } + + /* + * Check that f > cbrt(p). + */ + mp_int *f2 = mp_mul(f, f); + mp_int *f3 = mp_mul(f2, f); + bool too_big = mp_cmp_hs(p, f3); + mp_free(f3); + mp_free(f2); + if (too_big) { + status = POCKLE_PRODUCT_OF_FACTORS_TOO_SMALL; + goto out; + } + + /* + * Now do the extra check that allows us to get away with only + * having f > cbrt(p) instead of f > sqrt(p). + * + * If we can show that f | r-1 for any r | p, then we've ruled out + * p being a product of _more_ than two primes (because then it + * would be the product of at least three things bigger than its + * own cube root). But we still have to rule out it being a + * product of exactly two. + * + * Suppose for the sake of contradiction that p is the product of + * two prime factors. We know both of those factors would have to + * be congruent to 1 mod f. So we'd have to have + * + * p = (uf+1)(vf+1) = (uv)f^2 + (u+v)f + 1 (3) + * + * We can't have uv >= f, or else that expression would come to at + * least f^3, i.e. it would exceed p. So uv < f. Hence, u,v < f as + * well. + * + * Can we have u+v >= f? If we did, then we could write v >= f-u, + * and hence f > uv >= u(f-u). That can be rearranged to show that + * u^2 > (u-1)f; decrementing the LHS makes the inequality no + * longer necessarily strict, so we have u^2-1 >= (u-1)f, and + * dividing off u-1 gives u+1 >= f. But we know u < f, so the only + * way this could happen would be if u=f-1, which makes v=1. But + * _then_ (3) gives us p = (f-1)f^2 + f^2 + 1 = f^3+1. But that + * can't be true if f^3 > p. So we can't have u+v >= f either, by + * contradiction. + * + * After all that, what have we shown? We've shown that we can + * write p = (uv)f^2 + (u+v)f + 1, with both uv and u+v strictly + * less than f. In other words, if you write down p in base f, it + * has exactly three digits, and they are uv, u+v and 1. + * + * But that means we can _find_ u and v: we know p and f, so we + * can just extract those digits of p's base-f representation. + * Once we've done so, they give the sum and product of the + * potential u,v. And given the sum and product of two numbers, + * you can make a quadratic which has those numbers as roots. + * + * We don't actually have to _solve_ the quadratic: all we have to + * do is check if its discriminant is a perfect square. If not, + * we'll know that no integers u,v can match this description. + */ + { + /* We already have x = (p-1)/f. So we just need to write x in + * the form aF + b, and then we have a=uv and b=u+v. */ + mp_int *a = mp_new(mp_max_bits(x)); + mp_int *b = mp_new(mp_max_bits(f)); + mp_divmod_into(x, f, a, b); + assert(!mp_cmp_hs(a, f)); + assert(!mp_cmp_hs(b, f)); + + /* If a=0, then that means p < f^2, so we don't need to do + * this check at all: the straightforward Pocklington theorem + * is all we need. */ + if (!mp_eq_integer(a, 0)) { + unsigned perfect_square = 0; + + mp_int *bsq = mp_mul(b, b); + mp_lshift_fixed_into(a, a, 2); + + if (mp_cmp_hs(bsq, a)) { + /* b^2-4a is non-negative, so it might be a square. + * Check it. */ + mp_int *discriminant = mp_sub(bsq, a); + mp_int *remainder = mp_new(mp_max_bits(discriminant)); + mp_int *root = mp_nthroot(discriminant, 2, remainder); + perfect_square = mp_eq_integer(remainder, 0); + mp_free(discriminant); + mp_free(root); + mp_free(remainder); + } + + mp_free(bsq); + + if (perfect_square) { + mp_free(b); + mp_free(a); + status = POCKLE_DISCRIMINANT_IS_SQUARE; + goto out; + } + } + mp_free(b); + mp_free(a); + } + + /* + * Now we've done all the checks that are cheaper than a modpow, + * so we've ruled out as many things as possible before having to + * do any hard work. But there's nothing for it now: make a + * MontyContext. + */ + mc = monty_new(p); + w = monty_import(mc, witness); + + /* + * The initial Fermat check: is w^{p-1} itself congruent to 1 mod + * p? + */ + { + mp_int *pm1 = mp_copy(p); + mp_sub_integer_into(pm1, pm1, 1); + mp_int *power = monty_pow(mc, w, pm1); + unsigned fermat_pass = mp_cmp_eq(power, monty_identity(mc)); + mp_free(power); + mp_free(pm1); + + if (!fermat_pass) { + status = POCKLE_FERMAT_TEST_FAILED; + goto out; + } + } + + /* + * And now, for each factor q, is w^{(p-1)/q}-1 coprime to p? + */ + for (size_t i = 0; i < nfactors; i++) { + mp_int *q = factors[i]; + mp_int *exponent = mp_unsafe_shrink(mp_div(p, q)); + mp_int *power = monty_pow(mc, w, exponent); + mp_int *power_extracted = monty_export(mc, power); + mp_sub_integer_into(power_extracted, power_extracted, 1); + + unsigned coprime = mp_coprime(power_extracted, p); + if (!coprime) { + /* + * If w^{(p-1)/q}-1 is not coprime to p, the test has + * failed. But it makes a difference why. If the power of + * w turned out to be 1, so that we took gcd(1-1,p) = + * gcd(0,p) = p, that's like an inconclusive Fermat or M-R + * test: it might just mean you picked a witness integer + * that wasn't a primitive root. But if the power is any + * _other_ value mod p that is not coprime to p, it means + * we've detected that the number is *actually not prime*! + */ + if (mp_eq_integer(power_extracted, 0)) + status = POCKLE_WITNESS_POWER_IS_1; + else + status = POCKLE_WITNESS_POWER_NOT_COPRIME; + } + + mp_free(exponent); + mp_free(power); + mp_free(power_extracted); + + if (!coprime) + goto out; /* with the status we set up above */ + } + + /* + * Success! p is prime. Insert it into our tree234 of known + * primes, so that future calls to this function can cite it in + * evidence of larger numbers' primality. + */ + status = pockle_insert(pockle, p, factors, nfactors, witness); + + out: + if (x) + mp_free(x); + if (f) + mp_free(f); + if (w) + mp_free(w); + if (mc) + monty_free(mc); + return status; +} + +static void mp_write_decimal(strbuf *sb, mp_int *x) +{ + char *s = mp_get_decimal(x); + ptrlen pl = ptrlen_from_asciz(s); + put_datapl(sb, pl); + smemclr(s, pl.len); + sfree(s); +} + +strbuf *pockle_mpu(Pockle *pockle, mp_int *p) +{ + strbuf *sb = strbuf_new_nm(); + PocklePrimeRecord *pr = find234(pockle->tree, p, ppr_find); + assert(pr); + + bool *needed = snewn(pockle->nlist, bool); + memset(needed, 0, pockle->nlist * sizeof(bool)); + needed[pr->index] = true; + + put_fmt(sb, "[MPU - Primality Certificate]\nVersion 1.0\nBase 10\n\n" + "Proof for:\nN "); + mp_write_decimal(sb, p); + put_fmt(sb, "\n"); + + for (size_t index = pockle->nlist; index-- > 0 ;) { + if (!needed[index]) + continue; + pr = pockle->list[index]; + + if (mp_get_nbits(pr->prime) <= 64) { + put_fmt(sb, "\nType Small\nN "); + mp_write_decimal(sb, pr->prime); + put_fmt(sb, "\n"); + } else { + assert(pr->witness); + put_fmt(sb, "\nType BLS5\nN "); + mp_write_decimal(sb, pr->prime); + put_fmt(sb, "\n"); + for (size_t i = 0; i < pr->nfactors; i++) { + put_fmt(sb, "Q[%"SIZEu"] ", i+1); + mp_write_decimal(sb, pr->factors[i]->prime); + assert(pr->factors[i]->index < index); + needed[pr->factors[i]->index] = true; + put_fmt(sb, "\n"); + } + for (size_t i = 0; i < pr->nfactors + 1; i++) { + put_fmt(sb, "A[%"SIZEu"] ", i); + mp_write_decimal(sb, pr->witness); + put_fmt(sb, "\n"); + } + put_fmt(sb, "----\n"); + } + } + sfree(needed); + + return sb; +} diff --git a/keygen/prime.c b/keygen/prime.c new file mode 100644 index 00000000..d9bdebba --- /dev/null +++ b/keygen/prime.c @@ -0,0 +1,762 @@ +/* + * Prime generation. + */ + +#include <assert.h> +#include <math.h> + +#include "ssh.h" +#include "mpint.h" +#include "mpunsafe.h" +#include "sshkeygen.h" + +/* ---------------------------------------------------------------------- + * Standard probabilistic prime-generation algorithm: + * + * - get a number from our PrimeCandidateSource which will at least + * avoid being divisible by any prime under 2^16 + * + * - perform the Miller-Rabin primality test enough times to + * ensure the probability of it being composite is 2^-80 or + * less + * + * - go back to square one if any M-R test fails. + */ + +static PrimeGenerationContext *probprime_new_context( + const PrimeGenerationPolicy *policy) +{ + PrimeGenerationContext *ctx = snew(PrimeGenerationContext); + ctx->vt = policy; + return ctx; +} + +static void probprime_free_context(PrimeGenerationContext *ctx) +{ + sfree(ctx); +} + +static ProgressPhase probprime_add_progress_phase( + const PrimeGenerationPolicy *policy, + ProgressReceiver *prog, unsigned bits) +{ + /* + * The density of primes near x is 1/(log x). When x is about 2^b, + * that's 1/(b log 2). + * + * But we're only doing the expensive part of the process (the M-R + * checks) for a number that passes the initial winnowing test of + * having no factor less than 2^16 (at least, unless the prime is + * so small that PrimeCandidateSource gives up on that winnowing). + * The density of _those_ numbers is about 1/19.76. So the odds of + * hitting a prime per expensive attempt are boosted by a factor + * of 19.76. + */ + const double log_2 = 0.693147180559945309417232121458; + double winnow_factor = (bits < 32 ? 1.0 : 19.76); + double prob = winnow_factor / (bits * log_2); + + /* + * Estimate the cost of prime generation as the cost of the M-R + * modexps. + */ + double cost = (miller_rabin_checks_needed(bits) * + estimate_modexp_cost(bits)); + return progress_add_probabilistic(prog, cost, prob); +} + +static mp_int *probprime_generate( + PrimeGenerationContext *ctx, + PrimeCandidateSource *pcs, ProgressReceiver *prog) +{ + pcs_ready(pcs); + + while (true) { + progress_report_attempt(prog); + + mp_int *p = pcs_generate(pcs); + if (!p) { + pcs_free(pcs); + return NULL; + } + + MillerRabin *mr = miller_rabin_new(p); + bool known_bad = false; + unsigned nchecks = miller_rabin_checks_needed(mp_get_nbits(p)); + for (unsigned check = 0; check < nchecks; check++) { + if (!miller_rabin_test_random(mr)) { + known_bad = true; + break; + } + } + miller_rabin_free(mr); + + if (!known_bad) { + /* + * We have a prime! + */ + pcs_free(pcs); + return p; + } + + mp_free(p); + } +} + +static strbuf *null_mpu_certificate(PrimeGenerationContext *ctx, mp_int *p) +{ + return NULL; +} + +const PrimeGenerationPolicy primegen_probabilistic = { + probprime_add_progress_phase, + probprime_new_context, + probprime_free_context, + probprime_generate, + null_mpu_certificate, +}; + +/* ---------------------------------------------------------------------- + * Alternative provable-prime algorithm, based on the following paper: + * + * [MAURER] Maurer, U.M. Fast generation of prime numbers and secure + * public-key cryptographic parameters. J. Cryptology 8, 123–155 + * (1995). https://doi.org/10.1007/BF00202269 + */ + +typedef enum SubprimePolicy { + SPP_FAST, + SPP_MAURER_SIMPLE, + SPP_MAURER_COMPLEX, +} SubprimePolicy; + +typedef struct ProvablePrimePolicyExtra { + SubprimePolicy spp; +} ProvablePrimePolicyExtra; + +typedef struct ProvablePrimeContext ProvablePrimeContext; +struct ProvablePrimeContext { + Pockle *pockle; + PrimeGenerationContext pgc; + const ProvablePrimePolicyExtra *extra; +}; + +static PrimeGenerationContext *provableprime_new_context( + const PrimeGenerationPolicy *policy) +{ + ProvablePrimeContext *ppc = snew(ProvablePrimeContext); + ppc->pgc.vt = policy; + ppc->pockle = pockle_new(); + ppc->extra = policy->extra; + return &ppc->pgc; +} + +static void provableprime_free_context(PrimeGenerationContext *ctx) +{ + ProvablePrimeContext *ppc = container_of(ctx, ProvablePrimeContext, pgc); + pockle_free(ppc->pockle); + sfree(ppc); +} + +static ProgressPhase provableprime_add_progress_phase( + const PrimeGenerationPolicy *policy, + ProgressReceiver *prog, unsigned bits) +{ + /* + * Estimating the cost of making a _provable_ prime is difficult + * because of all the recursions to smaller sizes. + * + * Once you have enough factors of p-1 to certify primality of p, + * the remaining work in provable prime generation is not very + * different from probabilistic: you generate a random candidate, + * test its primality probabilistically, and use the witness value + * generated as a byproduct of that test for the full Pocklington + * verification. The expensive part, as usual, is made of modpows. + * + * The Pocklington test needs at least two modpows (one for the + * Fermat check, and one per known factor of p-1). + * + * The prior M-R step needs an unknown number, because we iterate + * until we find a value whose order is divisible by the largest + * power of 2 that divides p-1, say 2^j. That excludes half the + * possible witness values (specifically, the quadratic residues), + * so we expect to need on average two M-R operations to find one. + * But that's only if the number _is_ prime - as usual, it's also + * possible that we hit a non-prime and have to try again. + * + * So, if we were only estimating the cost of that final step, it + * would look a lot like the probabilistic version: we'd have to + * estimate the expected total number of modexps by knowing + * something about the density of primes among our candidate + * integers, and then multiply that by estimate_modexp_cost(bits). + * But the problem is that we also have to _find_ a smaller prime, + * so we have to recurse. + * + * In the MAURER_SIMPLE version of the algorithm, you recurse to + * any one of a range of possible smaller sizes i, each with + * probability proportional to 1/i. So your expected time to + * generate an n-bit prime is given by a horrible recurrence of + * the form E_n = S_n + (sum E_i/i) / (sum 1/i), in which S_n is + * the expected cost of the final step once you have your smaller + * primes, and both sums are over ceil(n/2) <= i <= n-20. + * + * At this point I ran out of effort to actually do the maths + * rigorously, so instead I did the empirical experiment of + * generating that sequence in Python and plotting it on a graph. + * My Python code is here, in case I need it again: + +from math import log + +alpha = log(3)/log(2) + 1 # exponent for modexp using Karatsuba mult + +E = [1] * 16 # assume generating tiny primes is trivial + +for n in range(len(E), 4096): + + # Expected time for sub-generations, as a weighted mean of prior + # values of the same sequence. + lo = (n+1)//2 + hi = n-20 + if lo <= hi: + subrange = range(lo, hi+1) + num = sum(E[i]/i for i in subrange) + den = sum(1/i for i in subrange) + else: + num, den = 0, 1 + + # Constant term (cost of final step). + # Similar to probprime_add_progress_phase. + winnow_factor = 1 if n < 32 else 19.76 + prob = winnow_factor / (n * log(2)) + cost = 4 * n**alpha / prob + + E.append(cost + num / den) + +for i, p in enumerate(E): + try: + print(log(i), log(p)) + except ValueError: + continue + + * The output loop prints the logs of both i and E_i, so that when + * I plot the resulting data file in gnuplot I get a log-log + * diagram. That showed me some early noise and then a very + * straight-looking line; feeding the straight part of the graph + * to linear-regression analysis reported that it fits the line + * + * log E_n = -1.7901825337965498 + 3.6199197179662517 * log(n) + * => E_n = 0.16692969657466802 * n^3.6199197179662517 + * + * So my somewhat empirical estimate is that Maurer prime + * generation costs about 0.167 * bits^3.62, in the same arbitrary + * time units used by estimate_modexp_cost. + */ + + return progress_add_linear(prog, 0.167 * pow(bits, 3.62)); +} + +static mp_int *primegen_small(Pockle *pockle, PrimeCandidateSource *pcs) +{ + assert(pcs_get_bits(pcs) <= 32); + + pcs_ready(pcs); + + while (true) { + mp_int *p = pcs_generate(pcs); + if (!p) { + pcs_free(pcs); + return NULL; + } + if (pockle_add_small_prime(pockle, p) == POCKLE_OK) { + pcs_free(pcs); + return p; + } + mp_free(p); + } +} + +#ifdef DEBUG_PRIMEGEN +static void timestamp(FILE *fp) +{ + struct timespec ts; + clock_gettime(CLOCK_MONOTONIC, &ts); + fprintf(fp, "%lu.%09lu: ", (unsigned long)ts.tv_sec, + (unsigned long)ts.tv_nsec); +} +static PRINTF_LIKE(1, 2) void debug_f(const char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + timestamp(stderr); + vfprintf(stderr, fmt, ap); + fputc('\n', stderr); + va_end(ap); +} +static void debug_f_mp(const char *fmt, mp_int *x, ...) +{ + va_list ap; + va_start(ap, x); + timestamp(stderr); + vfprintf(stderr, fmt, ap); + mp_dump(stderr, "", x, "\n"); + va_end(ap); +} +#else +#define debug_f(...) ((void)0) +#define debug_f_mp(...) ((void)0) +#endif + +static double uniform_random_double(void) +{ + unsigned char randbuf[8]; + random_read(randbuf, 8); + return GET_64BIT_MSB_FIRST(randbuf) * 0x1.0p-64; +} + +static mp_int *mp_ceil_div(mp_int *n, mp_int *d) +{ + mp_int *nplus = mp_add(n, d); + mp_sub_integer_into(nplus, nplus, 1); + mp_int *toret = mp_div(nplus, d); + mp_free(nplus); + return toret; +} + +static mp_int *provableprime_generate_inner( + ProvablePrimeContext *ppc, PrimeCandidateSource *pcs, + ProgressReceiver *prog, double progress_origin, double progress_scale) +{ + unsigned bits = pcs_get_bits(pcs); + assert(bits > 1); + + if (bits <= 32) { + debug_f("ppgi(%u) -> small", bits); + return primegen_small(ppc->pockle, pcs); + } + + unsigned min_bits_needed, max_bits_needed; + { + /* + * Find the product of all the prime factors we already know + * about. + */ + mp_int *size_got = mp_from_integer(1); + size_t nfactors; + mp_int **factors = pcs_get_known_prime_factors(pcs, &nfactors); + for (size_t i = 0; i < nfactors; i++) { + mp_int *to_free = size_got; + size_got = mp_unsafe_shrink(mp_mul(size_got, factors[i])); + mp_free(to_free); + } + + /* + * Find the largest cofactor we might be able to use, and the + * smallest one we can get away with. + */ + mp_int *upperbound = pcs_get_upper_bound(pcs); + mp_int *size_needed = mp_nthroot(upperbound, 3, NULL); + debug_f_mp("upperbound = ", upperbound); + { + mp_int *to_free = upperbound; + upperbound = mp_unsafe_shrink(mp_div(upperbound, size_got)); + mp_free(to_free); + } + debug_f_mp("size_needed = ", size_needed); + { + mp_int *to_free = size_needed; + size_needed = mp_unsafe_shrink(mp_ceil_div(size_needed, size_got)); + mp_free(to_free); + } + + max_bits_needed = pcs_get_bits_remaining(pcs); + + /* + * We need a prime that is greater than or equal to + * 'size_needed' in order for the product of all our known + * factors of p-1 to exceed the cube root of the largest value + * p might take. + * + * Since pcs_new wants a size specified in bits, we must count + * the bits in size_needed and then add 1. Otherwise we might + * get a value with the same bit count as size_needed but + * slightly smaller than it. + * + * An exception is if size_needed = 1. In that case the + * product of existing known factors is _already_ enough, so + * we don't need to generate an extra factor at all. + */ + if (mp_hs_integer(size_needed, 2)) { + min_bits_needed = mp_get_nbits(size_needed) + 1; + } else { + min_bits_needed = 0; + } + + mp_free(upperbound); + mp_free(size_needed); + mp_free(size_got); + } + + double progress = 0.0; + + if (min_bits_needed) { + debug_f("ppgi(%u) recursing, need [%u,%u] more bits", + bits, min_bits_needed, max_bits_needed); + + unsigned *sizes = NULL; + size_t nsizes = 0, sizesize = 0; + + unsigned real_min = max_bits_needed / 2; + unsigned real_max = (max_bits_needed >= 20 ? + max_bits_needed - 20 : 0); + if (real_min < min_bits_needed) + real_min = min_bits_needed; + if (real_max < real_min) + real_max = real_min; + debug_f("ppgi(%u) revised bits interval = [%u,%u]", + bits, real_min, real_max); + + switch (ppc->extra->spp) { + case SPP_FAST: + /* + * Always pick the smallest subsidiary prime we can get + * away with: just over n/3 bits. + * + * This is not a good mode for cryptographic prime + * generation, because it skews the distribution of primes + * greatly, and worse, it skews them in a direction that + * heads away from the properties crypto algorithms tend + * to like. + * + * (For both discrete-log systems and RSA, people have + * tended to recommend in the past that p-1 should have a + * _large_ factor if possible. There's some disagreement + * on which algorithms this is really necessary for, but + * certainly I've never seen anyone recommend arranging a + * _small_ factor on purpose.) + * + * I originally implemented this mode because it was + * convenient for debugging - it wastes as little time as + * possible on finding a sub-prime and lets you get to the + * interesting part! And I leave it in the code because it + * might still be useful for _something_. Because it's + * cryptographically questionable, it's not selectable in + * the UI of either version of PuTTYgen proper; but it can + * be accessed through testcrypt, and if for some reason a + * definite prime is needed for non-crypto purposes, it + * may still be the fastest way to put your hands on one. + */ + debug_f("ppgi(%u) fast mode, just ask for %u bits", + bits, min_bits_needed); + sgrowarray(sizes, sizesize, nsizes); + sizes[nsizes++] = min_bits_needed; + break; + case SPP_MAURER_SIMPLE: { + /* + * Select the size of the subsidiary prime at random from + * sqrt(outputprime) up to outputprime/2^20, in such a way + * that the probability distribution matches that of the + * largest prime factor of a random n-bit number. + * + * Per [MAURER] section 3.4, the cumulative distribution + * function of this relative size is 1+log2(x), for x in + * [1/2,1]. You can generate a value from the distribution + * given by a cdf by applying the inverse cdf to a uniform + * value in [0,1]. Simplifying that in this case, what we + * have to do is raise 2 to the power of a random real + * number between -1 and 0. (And that gives you the number + * of _bits_ in the sub-prime, as a factor of the desired + * output number of bits.) + * + * We also require that the subsidiary prime q is at least + * 20 bits smaller than the output one, to give us a + * fighting chance of there being _any_ prime we can find + * such that q | p-1. + * + * (But these rules have to be applied in an order that + * still leaves us _some_ interval of possible sizes we + * can pick!) + */ + maurer_simple: + debug_f("ppgi(%u) Maurer simple mode", bits); + + unsigned sub_bits; + do { + double uniform = uniform_random_double(); + sub_bits = real_max * pow(2.0, uniform - 1) + 0.5; + debug_f(" ... %.6f -> %u?", uniform, sub_bits); + } while (!(real_min <= sub_bits && sub_bits <= real_max)); + + debug_f("ppgi(%u) asking for %u bits", bits, sub_bits); + sgrowarray(sizes, sizesize, nsizes); + sizes[nsizes++] = sub_bits; + + break; + } + case SPP_MAURER_COMPLEX: { + /* + * In this mode, we may generate multiple factors of p-1 + * which between them add up to at least n/2 bits, in such + * a way that those are guaranteed to be the largest + * factors of p-1 and that they have the same probability + * distribution as the largest k factors would have in a + * random integer. The idea is that this more elaborate + * procedure gets as close as possible to the same + * probability distribution you'd get by selecting a + * completely random prime (if you feasibly could). + * + * Algorithm from Appendix 1 of [MAURER]: we generate + * random real numbers that sum to at most 1, by choosing + * each one uniformly from the range [0, 1 - sum of all + * the previous ones]. We maintain them in a list in + * decreasing order, and we stop as soon as we find an + * initial subsequence of the list s_1,...,s_r such that + * s_1 + ... + s_{r-1} + 2 s_r > 1. In particular, this + * guarantees that the sum of that initial subsequence is + * at least 1/2, so we end up with enough factors to + * satisfy Pocklington. + */ + + if (max_bits_needed / 2 + 1 > real_max) { + /* Early exit path in the case where this algorithm + * can't possibly generate a value in the range we + * need. In that situation, fall back to Maurer + * simple. */ + debug_f("ppgi(%u) skipping GenerateSizeList, " + "real_max too small", bits); + goto maurer_simple; /* sorry! */ + } + + double *s = NULL; + size_t ns, ssize = 0; + + while (true) { + debug_f("ppgi(%u) starting GenerateSizeList", bits); + ns = 0; + double range = 1.0; + while (true) { + /* Generate the next number */ + double u = uniform_random_double() * range; + range -= u; + debug_f(" u_%"SIZEu" = %g", ns, u); + + /* Insert it in the list */ + sgrowarray(s, ssize, ns); + size_t i; + for (i = ns; i > 0 && s[i-1] < u; i--) + s[i] = s[i-1]; + s[i] = u; + ns++; + debug_f(" inserting as s[%"SIZEu"]", i); + + /* Look for a suitable initial subsequence */ + double sum = 0; + for (i = 0; i < ns; i++) { + sum += s[i]; + if (sum + s[i] > 1.0) { + debug_f(" s[0..%"SIZEu"] works!", i); + + /* Truncate the sequence here, and stop + * generating random real numbers. */ + ns = i+1; + goto got_list; + } + } + } + + got_list:; + /* + * Now translate those real numbers into actual bit + * counts, and do a last-minute check to make sure + * their product is going to be in range. + * + * We have to check both the min and max sizes of the + * total. A b-bit number is in [2^{b-1},2^b). So the + * product of numbers of sizes b_1,...,b_k is at least + * 2^{\sum (b_i-1)}, and less than 2^{\sum b_i}. + */ + nsizes = 0; + + unsigned min_total = 0, max_total = 0; + + for (size_t i = 0; i < ns; i++) { + /* These sizes are measured in actual entropy, so + * add 1 bit each time to account for the + * zero-information leading 1 */ + unsigned this_size = max_bits_needed * s[i] + 1; + debug_f(" bits[%"SIZEu"] = %u", i, this_size); + sgrowarray(sizes, sizesize, nsizes); + sizes[nsizes++] = this_size; + + min_total += this_size - 1; + max_total += this_size; + } + + debug_f(" total bits = [%u,%u)", min_total, max_total); + if (min_total < real_min || max_total > real_max+1) { + debug_f(" total out of range, try again"); + } else { + debug_f(" success! %"SIZEu" sub-primes totalling [%u,%u) " + "bits", nsizes, min_total, max_total); + break; + } + } + + smemclr(s, ssize * sizeof(*s)); + sfree(s); + break; + } + default: + unreachable("bad subprime policy"); + } + + for (size_t i = 0; i < nsizes; i++) { + unsigned sub_bits = sizes[i]; + double progress_in_this_prime = (double)sub_bits / bits; + mp_int *q = provableprime_generate_inner( + ppc, pcs_new(sub_bits), + prog, progress_origin + progress_scale * progress, + progress_scale * progress_in_this_prime); + progress += progress_in_this_prime; + assert(q); + debug_f_mp("ppgi(%u) got factor ", q, bits); + pcs_require_residue_1_mod_prime(pcs, q); + mp_free(q); + } + + smemclr(sizes, sizesize * sizeof(*sizes)); + sfree(sizes); + } else { + debug_f("ppgi(%u) no need to recurse", bits); + } + + debug_f("ppgi(%u) ready, %u bits remaining", + bits, pcs_get_bits_remaining(pcs)); + pcs_ready(pcs); + + while (true) { + mp_int *p = pcs_generate(pcs); + if (!p) { + pcs_free(pcs); + return NULL; + } + + debug_f_mp("provable_step p=", p); + + MillerRabin *mr = miller_rabin_new(p); + debug_f("provable_step mr setup done"); + mp_int *witness = miller_rabin_find_potential_primitive_root(mr); + miller_rabin_free(mr); + + if (!witness) { + debug_f("provable_step mr failed"); + mp_free(p); + continue; + } + + size_t nfactors; + mp_int **factors = pcs_get_known_prime_factors(pcs, &nfactors); + PockleStatus st = pockle_add_prime( + ppc->pockle, p, factors, nfactors, witness); + + if (st != POCKLE_OK) { + debug_f("provable_step proof failed %d", (int)st); + + /* + * Check by assertion that the error status is not one of + * the ones we ought to have ruled out already by + * construction. If there's a bug in this code that means + * we can _never_ pass this test (e.g. picking products of + * factors that never quite reach cbrt(n)), we'd rather + * fail an assertion than loop forever. + */ + assert(st == POCKLE_DISCRIMINANT_IS_SQUARE || + st == POCKLE_WITNESS_POWER_IS_1 || + st == POCKLE_WITNESS_POWER_NOT_COPRIME); + + mp_free(p); + if (witness) + mp_free(witness); + continue; + } + + mp_free(witness); + pcs_free(pcs); + debug_f_mp("ppgi(%u) done, got ", p, bits); + progress_report(prog, progress_origin + progress_scale); + return p; + } +} + +static mp_int *provableprime_generate( + PrimeGenerationContext *ctx, + PrimeCandidateSource *pcs, ProgressReceiver *prog) +{ + ProvablePrimeContext *ppc = container_of(ctx, ProvablePrimeContext, pgc); + mp_int *p = provableprime_generate_inner(ppc, pcs, prog, 0.0, 1.0); + + return p; +} + +static inline strbuf *provableprime_mpu_certificate( + PrimeGenerationContext *ctx, mp_int *p) +{ + ProvablePrimeContext *ppc = container_of(ctx, ProvablePrimeContext, pgc); + return pockle_mpu(ppc->pockle, p); +} + +#define DECLARE_POLICY(name, policy) \ + static const struct ProvablePrimePolicyExtra \ + pppextra_##name = {policy}; \ + const PrimeGenerationPolicy name = { \ + provableprime_add_progress_phase, \ + provableprime_new_context, \ + provableprime_free_context, \ + provableprime_generate, \ + provableprime_mpu_certificate, \ + &pppextra_##name, \ + } + +DECLARE_POLICY(primegen_provable_fast, SPP_FAST); +DECLARE_POLICY(primegen_provable_maurer_simple, SPP_MAURER_SIMPLE); +DECLARE_POLICY(primegen_provable_maurer_complex, SPP_MAURER_COMPLEX); + +/* ---------------------------------------------------------------------- + * Reusable null implementation of the progress-reporting API. + */ + +static inline ProgressPhase null_progress_add(void) { + ProgressPhase ph = { .n = 0 }; + return ph; +} +ProgressPhase null_progress_add_linear( + ProgressReceiver *prog, double c) { return null_progress_add(); } +ProgressPhase null_progress_add_probabilistic( + ProgressReceiver *prog, double c, double p) { return null_progress_add(); } +void null_progress_ready(ProgressReceiver *prog) {} +void null_progress_start_phase(ProgressReceiver *prog, ProgressPhase phase) {} +void null_progress_report(ProgressReceiver *prog, double progress) {} +void null_progress_report_attempt(ProgressReceiver *prog) {} +void null_progress_report_phase_complete(ProgressReceiver *prog) {} +const ProgressReceiverVtable null_progress_vt = { + .add_linear = null_progress_add_linear, + .add_probabilistic = null_progress_add_probabilistic, + .ready = null_progress_ready, + .start_phase = null_progress_start_phase, + .report = null_progress_report, + .report_attempt = null_progress_report_attempt, + .report_phase_complete = null_progress_report_phase_complete, +}; + +/* ---------------------------------------------------------------------- + * Helper function for progress estimation. + */ + +double estimate_modexp_cost(unsigned bits) +{ + /* + * A modexp of n bits goes roughly like O(n^2.58), on the grounds + * that our modmul is O(n^1.58) (Karatsuba) and you need O(n) of + * them in a modexp. + */ + return pow(bits, 2.58); +} diff --git a/keygen/primecandidate.c b/keygen/primecandidate.c new file mode 100644 index 00000000..fca2b297 --- /dev/null +++ b/keygen/primecandidate.c @@ -0,0 +1,447 @@ +/* + * primecandidate.c: implementation of the PrimeCandidateSource + * abstraction declared in sshkeygen.h. + */ + +#include <assert.h> +#include "ssh.h" +#include "mpint.h" +#include "mpunsafe.h" +#include "sshkeygen.h" + +struct avoid { + unsigned mod, res; +}; + +struct PrimeCandidateSource { + unsigned bits; + bool ready, try_sophie_germain; + bool one_shot, thrown_away_my_shot; + + /* We'll start by making up a random number strictly less than this ... */ + mp_int *limit; + + /* ... then we'll multiply by 'factor', and add 'addend'. */ + mp_int *factor, *addend; + + /* Then we'll try to add a small multiple of 'factor' to it to + * avoid it being a multiple of any small prime. Also, for RSA, we + * may need to avoid it being _this_ multiple of _this_: */ + unsigned avoid_residue, avoid_modulus; + + /* Once we're actually running, this will be the complete list of + * (modulus, residue) pairs we want to avoid. */ + struct avoid *avoids; + size_t navoids, avoidsize; + + /* List of known primes that our number will be congruent to 1 modulo */ + mp_int **kps; + size_t nkps, kpsize; +}; + +PrimeCandidateSource *pcs_new_with_firstbits(unsigned bits, + unsigned first, unsigned nfirst) +{ + PrimeCandidateSource *s = snew(PrimeCandidateSource); + + assert(first >> (nfirst-1) == 1); + + s->bits = bits; + s->ready = false; + s->try_sophie_germain = false; + s->one_shot = false; + s->thrown_away_my_shot = false; + + s->kps = NULL; + s->nkps = s->kpsize = 0; + + s->avoids = NULL; + s->navoids = s->avoidsize = 0; + + /* Make the number that's the lower limit of our range */ + mp_int *firstmp = mp_from_integer(first); + mp_int *base = mp_lshift_fixed(firstmp, bits - nfirst); + mp_free(firstmp); + + /* Set the low bit of that, because all (nontrivial) primes are odd */ + mp_set_bit(base, 0, 1); + + /* That's our addend. Now initialise factor to 2, to ensure we + * only generate odd numbers */ + s->factor = mp_from_integer(2); + s->addend = base; + + /* And that means the limit of our random numbers must be one + * factor of two _less_ than the position of the low bit of + * 'first', because we'll be multiplying the random number by + * 2 immediately afterwards. */ + s->limit = mp_power_2(bits - nfirst - 1); + + /* avoid_modulus == 0 signals that there's no extra residue to avoid */ + s->avoid_residue = 1; + s->avoid_modulus = 0; + + return s; +} + +PrimeCandidateSource *pcs_new(unsigned bits) +{ + return pcs_new_with_firstbits(bits, 1, 1); +} + +void pcs_free(PrimeCandidateSource *s) +{ + mp_free(s->limit); + mp_free(s->factor); + mp_free(s->addend); + for (size_t i = 0; i < s->nkps; i++) + mp_free(s->kps[i]); + sfree(s->avoids); + sfree(s->kps); + sfree(s); +} + +void pcs_try_sophie_germain(PrimeCandidateSource *s) +{ + s->try_sophie_germain = true; +} + +void pcs_set_oneshot(PrimeCandidateSource *s) +{ + s->one_shot = true; +} + +static void pcs_require_residue_inner(PrimeCandidateSource *s, + mp_int *mod, mp_int *res) +{ + /* + * We already have a factor and addend. Ensure this one doesn't + * contradict it. + */ + mp_int *gcd = mp_gcd(mod, s->factor); + mp_int *test1 = mp_mod(s->addend, gcd); + mp_int *test2 = mp_mod(res, gcd); + assert(mp_cmp_eq(test1, test2)); + mp_free(test1); + mp_free(test2); + + /* + * Reduce our input factor and addend, which are constraints on + * the ultimate output number, so that they're constraints on the + * initial cofactor we're going to make up. + * + * If we're generating x and we want to ensure ax+b == r (mod m), + * how does that work? We've already checked that b == r modulo g + * = gcd(a,m), i.e. r-b is a multiple of g, and so are a and m. So + * let's write a=gA, m=gM, (r-b)=gR, and then we can start by + * dividing that off: + * + * ax == r-b (mod m ) + * => gAx == gR (mod gM) + * => Ax == R (mod M) + * + * Now the moduli A,M are coprime, which makes things easier. + * + * We're going to need to generate the x in this equation by + * generating a new smaller value y, multiplying it by M, and + * adding some constant K. So we have x = My + K, and we need to + * work out what K will satisfy the above equation. In other + * words, we need A(My+K) == R (mod M), and the AMy term vanishes, + * so we just need AK == R (mod M). So our congruence is solved by + * setting K to be R * A^{-1} mod M. + */ + mp_int *A = mp_div(s->factor, gcd); + mp_int *M = mp_div(mod, gcd); + mp_int *Rpre = mp_modsub(res, s->addend, mod); + mp_int *R = mp_div(Rpre, gcd); + mp_int *Ainv = mp_invert(A, M); + mp_int *K = mp_modmul(R, Ainv, M); + + mp_free(gcd); + mp_free(Rpre); + mp_free(Ainv); + mp_free(A); + mp_free(R); + + /* + * So we know we have to transform our existing (factor, addend) + * pair into (factor * M, addend * factor * K). Now we just need + * to work out what the limit should be on the random value we're + * generating. + * + * If we need My+K < old_limit, then y < (old_limit-K)/M. But the + * RHS is a fraction, so in integers, we need y < ceil of it. + */ + assert(!mp_cmp_hs(K, s->limit)); + mp_int *dividend = mp_add(s->limit, M); + mp_sub_integer_into(dividend, dividend, 1); + mp_sub_into(dividend, dividend, K); + mp_free(s->limit); + s->limit = mp_div(dividend, M); + mp_free(dividend); + + /* + * Now just update the real factor and addend, and we're done. + */ + + mp_int *addend_old = s->addend; + mp_int *tmp = mp_mul(s->factor, K); /* use the _old_ value of factor */ + s->addend = mp_add(s->addend, tmp); + mp_free(tmp); + mp_free(addend_old); + + mp_int *factor_old = s->factor; + s->factor = mp_mul(s->factor, M); + mp_free(factor_old); + + mp_free(M); + mp_free(K); + s->factor = mp_unsafe_shrink(s->factor); + s->addend = mp_unsafe_shrink(s->addend); + s->limit = mp_unsafe_shrink(s->limit); +} + +void pcs_require_residue(PrimeCandidateSource *s, + mp_int *mod, mp_int *res_orig) +{ + /* + * Reduce the input residue to its least non-negative value, in + * case it was given as a larger equivalent value. + */ + mp_int *res_reduced = mp_mod(res_orig, mod); + pcs_require_residue_inner(s, mod, res_reduced); + mp_free(res_reduced); +} + +void pcs_require_residue_1(PrimeCandidateSource *s, mp_int *mod) +{ + mp_int *res = mp_from_integer(1); + pcs_require_residue(s, mod, res); + mp_free(res); +} + +void pcs_require_residue_1_mod_prime(PrimeCandidateSource *s, mp_int *mod) +{ + pcs_require_residue_1(s, mod); + + sgrowarray(s->kps, s->kpsize, s->nkps); + s->kps[s->nkps++] = mp_copy(mod); +} + +void pcs_avoid_residue_small(PrimeCandidateSource *s, + unsigned mod, unsigned res) +{ + assert(!s->avoid_modulus); /* can't cope with more than one */ + s->avoid_modulus = mod; + s->avoid_residue = res % mod; /* reduce, just in case */ +} + +static int avoid_cmp(const void *av, const void *bv) +{ + const struct avoid *a = (const struct avoid *)av; + const struct avoid *b = (const struct avoid *)bv; + return a->mod < b->mod ? -1 : a->mod > b->mod ? +1 : 0; +} + +static uint64_t invert(uint64_t a, uint64_t m) +{ + int64_t v0 = a, i0 = 1; + int64_t v1 = m, i1 = 0; + while (v0) { + int64_t tmp, q = v1 / v0; + tmp = v0; v0 = v1 - q*v0; v1 = tmp; + tmp = i0; i0 = i1 - q*i0; i1 = tmp; + } + assert(v1 == 1 || v1 == -1); + return i1 * v1; +} + +void pcs_ready(PrimeCandidateSource *s) +{ + /* + * List all the small (modulus, residue) pairs we want to avoid. + */ + + init_smallprimes(); + +#define ADD_AVOID(newmod, newres) do { \ + sgrowarray(s->avoids, s->avoidsize, s->navoids); \ + s->avoids[s->navoids].mod = (newmod); \ + s->avoids[s->navoids].res = (newres); \ + s->navoids++; \ + } while (0) + + unsigned limit = (mp_hs_integer(s->addend, 65536) ? 65536 : + mp_get_integer(s->addend)); + + /* + * Don't be divisible by any small prime, or at least, any prime + * smaller than our output number might actually manage to be. (If + * asked to generate a really small prime, it would be + * embarrassing to rule out legitimate answers on the grounds that + * they were divisible by themselves.) + */ + for (size_t i = 0; i < NSMALLPRIMES && smallprimes[i] < limit; i++) + ADD_AVOID(smallprimes[i], 0); + + if (s->try_sophie_germain) { + /* + * If we're aiming to generate a Sophie Germain prime (i.e. p + * such that 2p+1 is also prime), then we also want to ensure + * 2p+1 is not congruent to 0 mod any small prime, because if + * it is, we'll waste a lot of time generating a p for which + * 2p+1 can't possibly work. So we have to avoid an extra + * residue mod each odd q. + * + * We can simplify: 2p+1 == 0 (mod q) + * => 2p == -1 (mod q) + * => p == -2^{-1} (mod q) + * + * There's no need to do Euclid's algorithm to compute those + * inverses, because for any odd q, the modular inverse of -2 + * mod q is just (q-1)/2. (Proof: multiplying it by -2 gives + * 1-q, which is congruent to 1 mod q.) + */ + for (size_t i = 0; i < NSMALLPRIMES && smallprimes[i] < limit; i++) + if (smallprimes[i] != 2) + ADD_AVOID(smallprimes[i], (smallprimes[i] - 1) / 2); + } + + /* + * Finally, if there's a particular modulus and residue we've been + * told to avoid, put it on the list. + */ + if (s->avoid_modulus) + ADD_AVOID(s->avoid_modulus, s->avoid_residue); + +#undef ADD_AVOID + + /* + * Sort our to-avoid list by modulus. Partly this is so that we'll + * check the smaller moduli first during the live runs, which lets + * us spot most failing cases earlier rather than later. Also, it + * brings equal moduli together, so that we can reuse the residue + * we computed from a previous one. + */ + qsort(s->avoids, s->navoids, sizeof(*s->avoids), avoid_cmp); + + /* + * Next, adjust each of these moduli to take account of our factor + * and addend. If we want factor*x+addend to avoid being congruent + * to 'res' modulo 'mod', then x itself must avoid being congruent + * to (res - addend) * factor^{-1}. + * + * If factor == 0 modulo mod, then the answer will have a fixed + * residue anyway, so we can discard it from our list to test. + */ + int64_t factor_m = 0, addend_m = 0, last_mod = 0; + + size_t out = 0; + for (size_t i = 0; i < s->navoids; i++) { + int64_t mod = s->avoids[i].mod, res = s->avoids[i].res; + if (mod != last_mod) { + last_mod = mod; + addend_m = mp_mod_known_integer(s->addend, mod); + factor_m = mp_mod_known_integer(s->factor, mod); + } + + if (factor_m == 0) { + assert(res != addend_m); + continue; + } + + res = (res - addend_m) * invert(factor_m, mod); + res %= mod; + if (res < 0) + res += mod; + + s->avoids[out].mod = mod; + s->avoids[out].res = res; + out++; + } + + s->navoids = out; + + s->ready = true; +} + +mp_int *pcs_generate(PrimeCandidateSource *s) +{ + assert(s->ready); + if (s->one_shot) { + if (s->thrown_away_my_shot) + return NULL; + s->thrown_away_my_shot = true; + } + + while (true) { + mp_int *x = mp_random_upto(s->limit); + + int64_t x_res = 0, last_mod = 0; + bool ok = true; + + for (size_t i = 0; i < s->navoids; i++) { + int64_t mod = s->avoids[i].mod, avoid_res = s->avoids[i].res; + + if (mod != last_mod) { + last_mod = mod; + x_res = mp_mod_known_integer(x, mod); + } + + if (x_res == avoid_res) { + ok = false; + break; + } + } + + if (!ok) { + mp_free(x); + if (s->one_shot) + return NULL; + continue; /* try a new x */ + } + + /* + * We've found a viable x. Make the final output value. + */ + mp_int *toret = mp_new(s->bits); + mp_mul_into(toret, x, s->factor); + mp_add_into(toret, toret, s->addend); + mp_free(x); + return toret; + } +} + +void pcs_inspect(PrimeCandidateSource *pcs, mp_int **limit_out, + mp_int **factor_out, mp_int **addend_out) +{ + *limit_out = mp_copy(pcs->limit); + *factor_out = mp_copy(pcs->factor); + *addend_out = mp_copy(pcs->addend); +} + +unsigned pcs_get_bits(PrimeCandidateSource *pcs) +{ + return pcs->bits; +} + +unsigned pcs_get_bits_remaining(PrimeCandidateSource *pcs) +{ + return mp_get_nbits(pcs->limit); +} + +mp_int *pcs_get_upper_bound(PrimeCandidateSource *pcs) +{ + /* Compute (limit-1) * factor + addend */ + mp_int *tmp = mp_mul(pcs->limit, pcs->factor); + mp_int *bound = mp_add(tmp, pcs->addend); + mp_free(tmp); + mp_sub_into(bound, bound, pcs->factor); + return bound; +} + +mp_int **pcs_get_known_prime_factors(PrimeCandidateSource *pcs, size_t *nout) +{ + *nout = pcs->nkps; + return pcs->kps; +} diff --git a/keygen/rsa.c b/keygen/rsa.c new file mode 100644 index 00000000..b9676e7a --- /dev/null +++ b/keygen/rsa.c @@ -0,0 +1,292 @@ +/* + * RSA key generation. + */ + +#include <assert.h> + +#include "ssh.h" +#include "sshkeygen.h" +#include "mpint.h" + +#define RSA_EXPONENT 65537 + +#define NFIRSTBITS 13 +static void invent_firstbits(unsigned *one, unsigned *two, + unsigned min_separation); + +typedef struct RSAPrimeDetails RSAPrimeDetails; +struct RSAPrimeDetails { + bool strong; + int bits, bitsm1m1, bitsm1, bitsp1; + unsigned firstbits; + ProgressPhase phase_main, phase_m1m1, phase_m1, phase_p1; +}; + +#define STRONG_MARGIN (20 + NFIRSTBITS) + +static RSAPrimeDetails setup_rsa_prime( + int bits, bool strong, PrimeGenerationContext *pgc, ProgressReceiver *prog) +{ + RSAPrimeDetails pd; + pd.bits = bits; + if (strong) { + pd.bitsm1 = (bits - STRONG_MARGIN) / 2; + pd.bitsp1 = (bits - STRONG_MARGIN) - pd.bitsm1; + pd.bitsm1m1 = (pd.bitsm1 - STRONG_MARGIN) / 2; + if (pd.bitsm1m1 < STRONG_MARGIN) { + /* Absurdly small prime, but we should at least not crash. */ + strong = false; + } + } + pd.strong = strong; + + if (pd.strong) { + pd.phase_m1m1 = primegen_add_progress_phase(pgc, prog, pd.bitsm1m1); + pd.phase_m1 = primegen_add_progress_phase(pgc, prog, pd.bitsm1); + pd.phase_p1 = primegen_add_progress_phase(pgc, prog, pd.bitsp1); + } + pd.phase_main = primegen_add_progress_phase(pgc, prog, pd.bits); + + return pd; +} + +static mp_int *generate_rsa_prime( + RSAPrimeDetails pd, PrimeGenerationContext *pgc, ProgressReceiver *prog) +{ + mp_int *m1m1 = NULL, *m1 = NULL, *p1 = NULL, *p = NULL; + PrimeCandidateSource *pcs; + + if (pd.strong) { + progress_start_phase(prog, pd.phase_m1m1); + pcs = pcs_new_with_firstbits(pd.bitsm1m1, pd.firstbits, NFIRSTBITS); + m1m1 = primegen_generate(pgc, pcs, prog); + progress_report_phase_complete(prog); + + progress_start_phase(prog, pd.phase_m1); + pcs = pcs_new_with_firstbits(pd.bitsm1, pd.firstbits, NFIRSTBITS); + pcs_require_residue_1_mod_prime(pcs, m1m1); + m1 = primegen_generate(pgc, pcs, prog); + progress_report_phase_complete(prog); + + progress_start_phase(prog, pd.phase_p1); + pcs = pcs_new_with_firstbits(pd.bitsp1, pd.firstbits, NFIRSTBITS); + p1 = primegen_generate(pgc, pcs, prog); + progress_report_phase_complete(prog); + } + + progress_start_phase(prog, pd.phase_main); + pcs = pcs_new_with_firstbits(pd.bits, pd.firstbits, NFIRSTBITS); + pcs_avoid_residue_small(pcs, RSA_EXPONENT, 1); + if (pd.strong) { + pcs_require_residue_1_mod_prime(pcs, m1); + mp_int *p1_minus_1 = mp_copy(p1); + mp_sub_integer_into(p1_minus_1, p1, 1); + pcs_require_residue(pcs, p1, p1_minus_1); + mp_free(p1_minus_1); + } + p = primegen_generate(pgc, pcs, prog); + progress_report_phase_complete(prog); + + if (m1m1) + mp_free(m1m1); + if (m1) + mp_free(m1); + if (p1) + mp_free(p1); + + return p; +} + +int rsa_generate(RSAKey *key, int bits, bool strong, + PrimeGenerationContext *pgc, ProgressReceiver *prog) +{ + key->sshk.vt = &ssh_rsa; + + /* + * We don't generate e; we just use a standard one always. + */ + mp_int *exponent = mp_from_integer(RSA_EXPONENT); + + /* + * Generate p and q: primes with combined length `bits', not + * congruent to 1 modulo e. (Strictly speaking, we wanted (p-1) + * and e to be coprime, and (q-1) and e to be coprime, but in + * general that's slightly more fiddly to arrange. By choosing + * a prime e, we can simplify the criterion.) + * + * We give a min_separation of 2 to invent_firstbits(), ensuring + * that the two primes won't be very close to each other. (The + * chance of them being _dangerously_ close is negligible - even + * more so than an attacker guessing a whole 256-bit session key - + * but it doesn't cost much to make sure.) + */ + int qbits = bits / 2; + int pbits = bits - qbits; + assert(pbits >= qbits); + + RSAPrimeDetails pd = setup_rsa_prime(pbits, strong, pgc, prog); + RSAPrimeDetails qd = setup_rsa_prime(qbits, strong, pgc, prog); + progress_ready(prog); + + invent_firstbits(&pd.firstbits, &qd.firstbits, 2); + + mp_int *p = generate_rsa_prime(pd, pgc, prog); + mp_int *q = generate_rsa_prime(qd, pgc, prog); + + /* + * Ensure p > q, by swapping them if not. + * + * We only need to do this if the two primes were generated with + * the same number of bits (i.e. if the requested key size is + * even) - otherwise it's already guaranteed! + */ + if (pbits == qbits) { + mp_cond_swap(p, q, mp_cmp_hs(q, p)); + } else { + assert(mp_cmp_hs(p, q)); + } + + /* + * Now we have p, q and e. All we need to do now is work out + * the other helpful quantities: n=pq, d=e^-1 mod (p-1)(q-1), + * and (q^-1 mod p). + */ + mp_int *modulus = mp_mul(p, q); + mp_int *pm1 = mp_copy(p); + mp_sub_integer_into(pm1, pm1, 1); + mp_int *qm1 = mp_copy(q); + mp_sub_integer_into(qm1, qm1, 1); + mp_int *phi_n = mp_mul(pm1, qm1); + mp_free(pm1); + mp_free(qm1); + mp_int *private_exponent = mp_invert(exponent, phi_n); + mp_free(phi_n); + mp_int *iqmp = mp_invert(q, p); + + /* + * Populate the returned structure. + */ + key->modulus = modulus; + key->exponent = exponent; + key->private_exponent = private_exponent; + key->p = p; + key->q = q; + key->iqmp = iqmp; + + key->bits = mp_get_nbits(modulus); + key->bytes = (key->bits + 7) / 8; + + return 1; +} + +/* + * Invent a pair of values suitable for use as the 'firstbits' values + * for the two RSA primes, such that their product is at least 2, and + * such that their difference is also at least min_separation. + * + * This is used for generating RSA keys which have exactly the + * specified number of bits rather than one fewer - if you generate an + * a-bit and a b-bit number completely at random and multiply them + * together, you could end up with either an (ab-1)-bit number or an + * (ab)-bit number. The former happens log(2)*2-1 of the time (about + * 39%) and, though actually harmless, every time it occurs it has a + * non-zero probability of sparking a user email along the lines of + * 'Hey, I asked PuTTYgen for a 2048-bit key and I only got 2047 bits! + * Bug!' + */ +static inline unsigned firstbits_b_min( + unsigned a, unsigned lo, unsigned hi, unsigned min_separation) +{ + /* To get a large enough product, b must be at least this much */ + unsigned b_min = (2*lo*lo + a - 1) / a; + /* Now enforce a<b, optionally with minimum separation */ + if (b_min < a + min_separation) + b_min = a + min_separation; + /* And cap at the upper limit */ + if (b_min > hi) + b_min = hi; + return b_min; +} + +static void invent_firstbits(unsigned *one, unsigned *two, + unsigned min_separation) +{ + /* + * We'll pick 12 initial bits (number selected at random) for each + * prime, not counting the leading 1. So we want to return two + * values in the range [2^12,2^13) whose product is at least 2^25. + * + * Strategy: count up all the viable pairs, then select a random + * number in that range and use it to pick a pair. + * + * To keep things simple, we'll ensure a < b, and randomly swap + * them at the end. + */ + const unsigned lo = 1<<12, hi = 1<<13, minproduct = 2*lo*lo; + unsigned a, b; + + /* + * Count up the number of prefixes of b that would be valid for + * each prefix of a. + */ + mp_int *total = mp_new(32); + for (a = lo; a < hi; a++) { + unsigned b_min = firstbits_b_min(a, lo, hi, min_separation); + mp_add_integer_into(total, total, hi - b_min); + } + + /* + * Make up a random number in the range [0,2*total). + */ + mp_int *mlo = mp_from_integer(0), *mhi = mp_new(32); + mp_lshift_fixed_into(mhi, total, 1); + mp_int *randval = mp_random_in_range(mlo, mhi); + mp_free(mlo); + mp_free(mhi); + + /* + * Use the low bit of randval as our swap indicator, leaving the + * rest of it in the range [0,total). + */ + unsigned swap = mp_get_bit(randval, 0); + mp_rshift_fixed_into(randval, randval, 1); + + /* + * Now do the same counting loop again to make the actual choice. + */ + a = b = 0; + for (unsigned a_candidate = lo; a_candidate < hi; a_candidate++) { + unsigned b_min = firstbits_b_min(a_candidate, lo, hi, min_separation); + unsigned limit = hi - b_min; + + unsigned b_candidate = b_min + mp_get_integer(randval); + unsigned use_it = 1 ^ mp_hs_integer(randval, limit); + a ^= (a ^ a_candidate) & -use_it; + b ^= (b ^ b_candidate) & -use_it; + + mp_sub_integer_into(randval, randval, limit); + } + + mp_free(randval); + mp_free(total); + + /* + * Check everything came out right. + */ + assert(lo <= a); + assert(a < hi); + assert(lo <= b); + assert(b < hi); + assert(a * b >= minproduct); + assert(b >= a + min_separation); + + /* + * Last-minute optional swap of a and b. + */ + unsigned diff = (a ^ b) & (-swap); + a ^= diff; + b ^= diff; + + *one = a; + *two = b; +} diff --git a/keygen/smallprimes.c b/keygen/smallprimes.c new file mode 100644 index 00000000..a43b0bde --- /dev/null +++ b/keygen/smallprimes.c @@ -0,0 +1,44 @@ +/* + * smallprimes.c: implementation of the array of small primes defined + * in sshkeygen.h. + */ + +#include <assert.h> +#include "ssh.h" +#include "sshkeygen.h" + +/* The real array that stores the primes. It has to be writable in + * this module, but outside this module, we only expose the + * const-qualified pointer 'smallprimes' so that nobody else can + * accidentally overwrite it. */ +static unsigned short smallprimes_array[NSMALLPRIMES]; + +const unsigned short *const smallprimes = smallprimes_array; + +void init_smallprimes(void) +{ + if (smallprimes_array[0]) + return; /* already done */ + + bool A[65536]; + + for (size_t i = 2; i < lenof(A); i++) + A[i] = true; + + for (size_t i = 2; i < lenof(A); i++) { + if (!A[i]) + continue; + for (size_t j = 2*i; j < lenof(A); j += i) + A[j] = false; + } + + size_t pos = 0; + for (size_t i = 2; i < lenof(A); i++) { + if (A[i]) { + assert(pos < NSMALLPRIMES); + smallprimes_array[pos++] = i; + } + } + + assert(pos == NSMALLPRIMES); +} |