Welcome to mirror list, hosted at ThFree Co, Russian Federation.

github.com/mRemoteNG/PuTTYNG.git - Unnamed repository; edit this file 'description' to name the repository.
summaryrefslogtreecommitdiff
path: root/keygen
diff options
context:
space:
mode:
authorDimitrij <kvarkas@gmail.com>2022-10-31 00:45:23 +0300
committerDimitrij <kvarkas@gmail.com>2022-10-31 00:45:23 +0300
commit302fb2e8ddea1c993552c9a30c02f41d01ca54a9 (patch)
treed6cf1b32664296ef2cecda33caeafbe39e6695c1 /keygen
parent59105d9b26363e47f00676bd365b2ac8d4cb536a (diff)
parent4ff82ab29a22936b78510c68f544a99e677efed3 (diff)
Merge tag 'tags/0.78'HEADmaster
Diffstat (limited to 'keygen')
-rw-r--r--keygen/CMakeLists.txt10
-rw-r--r--keygen/dsa.c103
-rw-r--r--keygen/ecdsa.c37
-rw-r--r--keygen/millerrabin.c288
-rw-r--r--keygen/mpunsafe.c48
-rw-r--r--keygen/mpunsafe.h39
-rw-r--r--keygen/pockle.c450
-rw-r--r--keygen/prime.c762
-rw-r--r--keygen/primecandidate.c447
-rw-r--r--keygen/rsa.c292
-rw-r--r--keygen/smallprimes.c44
11 files changed, 2520 insertions, 0 deletions
diff --git a/keygen/CMakeLists.txt b/keygen/CMakeLists.txt
new file mode 100644
index 00000000..17eea2b1
--- /dev/null
+++ b/keygen/CMakeLists.txt
@@ -0,0 +1,10 @@
+add_sources_from_current_dir(keygen
+ dsa.c
+ ecdsa.c
+ millerrabin.c
+ mpunsafe.c
+ pockle.c
+ prime.c
+ primecandidate.c
+ rsa.c
+ smallprimes.c)
diff --git a/keygen/dsa.c b/keygen/dsa.c
new file mode 100644
index 00000000..a7ea4f53
--- /dev/null
+++ b/keygen/dsa.c
@@ -0,0 +1,103 @@
+/*
+ * DSA key generation.
+ */
+
+#include "misc.h"
+#include "ssh.h"
+#include "sshkeygen.h"
+#include "mpint.h"
+
+int dsa_generate(struct dsa_key *key, int bits, PrimeGenerationContext *pgc,
+ ProgressReceiver *prog)
+{
+ /*
+ * Progress-reporting setup.
+ *
+ * DSA generation involves three potentially long jobs: inventing
+ * the small prime q, the large prime p, and finding an order-q
+ * element of the multiplicative group of p.
+ *
+ * The latter is done by finding an element whose order is
+ * _divisible_ by q and raising it to the power of (p-1)/q. Every
+ * element whose order is not divisible by q is a qth power of q
+ * distinct elements whose order _is_ divisible by q, so the
+ * probability of not finding a suitable element on the first try
+ * is in the region of 1/q, i.e. at most 2^-159.
+ *
+ * (So the probability of success will end up indistinguishable
+ * from 1 in IEEE standard floating point! But what can you do.)
+ */
+ ProgressPhase phase_q = primegen_add_progress_phase(pgc, prog, 160);
+ ProgressPhase phase_p = primegen_add_progress_phase(pgc, prog, bits);
+ double g_failure_probability = 1.0
+ / (double)(1ULL << 53)
+ / (double)(1ULL << 53)
+ / (double)(1ULL << 53);
+ ProgressPhase phase_g = progress_add_probabilistic(
+ prog, estimate_modexp_cost(bits), 1.0 - g_failure_probability);
+ progress_ready(prog);
+
+ PrimeCandidateSource *pcs;
+
+ /*
+ * Generate q: a prime of length 160.
+ */
+ progress_start_phase(prog, phase_q);
+ pcs = pcs_new(160);
+ mp_int *q = primegen_generate(pgc, pcs, prog);
+ progress_report_phase_complete(prog);
+
+ /*
+ * Now generate p: a prime of length `bits', such that p-1 is
+ * divisible by q.
+ */
+ progress_start_phase(prog, phase_p);
+ pcs = pcs_new(bits);
+ pcs_require_residue_1_mod_prime(pcs, q);
+ mp_int *p = primegen_generate(pgc, pcs, prog);
+ progress_report_phase_complete(prog);
+
+ /*
+ * Next we need g. Raise 2 to the power (p-1)/q modulo p, and
+ * if that comes out to one then try 3, then 4 and so on. As
+ * soon as we hit a non-unit (and non-zero!) one, that'll do
+ * for g.
+ */
+ progress_start_phase(prog, phase_g);
+ mp_int *power = mp_div(p, q); /* this is floor(p/q) == (p-1)/q */
+ mp_int *h = mp_from_integer(2);
+ mp_int *g;
+ while (1) {
+ progress_report_attempt(prog);
+ g = mp_modpow(h, power, p);
+ if (mp_hs_integer(g, 2))
+ break; /* got one */
+ mp_free(g);
+ mp_add_integer_into(h, h, 1);
+ }
+ mp_free(h);
+ mp_free(power);
+ progress_report_phase_complete(prog);
+
+ /*
+ * Now we're nearly done. All we need now is our private key x,
+ * which should be a number between 1 and q-1 exclusive, and
+ * our public key y = g^x mod p.
+ */
+ mp_int *two = mp_from_integer(2);
+ mp_int *qm1 = mp_copy(q);
+ mp_sub_integer_into(qm1, qm1, 1);
+ mp_int *x = mp_random_in_range(two, qm1);
+ mp_free(two);
+ mp_free(qm1);
+
+ key->sshk.vt = &ssh_dsa;
+
+ key->p = p;
+ key->q = q;
+ key->g = g;
+ key->x = x;
+ key->y = mp_modpow(key->g, key->x, key->p);
+
+ return 1;
+}
diff --git a/keygen/ecdsa.c b/keygen/ecdsa.c
new file mode 100644
index 00000000..28a723b2
--- /dev/null
+++ b/keygen/ecdsa.c
@@ -0,0 +1,37 @@
+/*
+ * EC key generation.
+ */
+
+#include "ssh.h"
+#include "sshkeygen.h"
+#include "mpint.h"
+
+int ecdsa_generate(struct ecdsa_key *ek, int bits)
+{
+ if (!ec_nist_alg_and_curve_by_bits(bits, &ek->curve, &ek->sshk.vt))
+ return 0;
+
+ mp_int *one = mp_from_integer(1);
+ ek->privateKey = mp_random_in_range(one, ek->curve->w.G_order);
+ mp_free(one);
+
+ ek->publicKey = ecdsa_public(ek->privateKey, ek->sshk.vt);
+
+ return 1;
+}
+
+int eddsa_generate(struct eddsa_key *ek, int bits)
+{
+ if (!ec_ed_alg_and_curve_by_bits(bits, &ek->curve, &ek->sshk.vt))
+ return 0;
+
+ /* EdDSA secret keys are just 32 bytes of hash preimage; the
+ * 64-byte SHA-512 hash of that key will be used when signing,
+ * but the form of the key stored on disk is the preimage
+ * only. */
+ ek->privateKey = mp_random_bits(bits);
+
+ ek->publicKey = eddsa_public(ek->privateKey, ek->sshk.vt);
+
+ return 1;
+}
diff --git a/keygen/millerrabin.c b/keygen/millerrabin.c
new file mode 100644
index 00000000..24ee6193
--- /dev/null
+++ b/keygen/millerrabin.c
@@ -0,0 +1,288 @@
+/*
+ * millerrabin.c: Miller-Rabin probabilistic primality testing, as
+ * declared in sshkeygen.h.
+ */
+
+#include <assert.h>
+#include "ssh.h"
+#include "sshkeygen.h"
+#include "mpint.h"
+#include "mpunsafe.h"
+
+/*
+ * The Miller-Rabin primality test is an extension to the Fermat
+ * test. The Fermat test just checks that a^(p-1) == 1 mod p; this
+ * is vulnerable to Carmichael numbers. Miller-Rabin considers how
+ * that 1 is derived as well.
+ *
+ * Lemma: if a^2 == 1 (mod p), and p is prime, then either a == 1
+ * or a == -1 (mod p).
+ *
+ * Proof: p divides a^2-1, i.e. p divides (a+1)(a-1). Hence,
+ * since p is prime, either p divides (a+1) or p divides (a-1).
+ * But this is the same as saying that either a is congruent to
+ * -1 mod p or a is congruent to +1 mod p. []
+ *
+ * Comment: This fails when p is not prime. Consider p=mn, so
+ * that mn divides (a+1)(a-1). Now we could have m dividing (a+1)
+ * and n dividing (a-1), without the whole of mn dividing either.
+ * For example, consider a=10 and p=99. 99 = 9 * 11; 9 divides
+ * 10-1 and 11 divides 10+1, so a^2 is congruent to 1 mod p
+ * without a having to be congruent to either 1 or -1.
+ *
+ * So the Miller-Rabin test, as well as considering a^(p-1),
+ * considers a^((p-1)/2), a^((p-1)/4), and so on as far as it can
+ * go. In other words. we write p-1 as q * 2^k, with k as large as
+ * possible (i.e. q must be odd), and we consider the powers
+ *
+ * a^(q*2^0) a^(q*2^1) ... a^(q*2^(k-1)) a^(q*2^k)
+ * i.e. a^((n-1)/2^k) a^((n-1)/2^(k-1)) ... a^((n-1)/2) a^(n-1)
+ *
+ * If p is to be prime, the last of these must be 1. Therefore, by
+ * the above lemma, the one before it must be either 1 or -1. And
+ * _if_ it's 1, then the one before that must be either 1 or -1,
+ * and so on ... In other words, we expect to see a trailing chain
+ * of 1s preceded by a -1. (If we're unlucky, our trailing chain of
+ * 1s will be as long as the list so we'll never get to see what
+ * lies before it. This doesn't count as a test failure because it
+ * hasn't _proved_ that p is not prime.)
+ *
+ * For example, consider a=2 and p=1729. 1729 is a Carmichael
+ * number: although it's not prime, it satisfies a^(p-1) == 1 mod p
+ * for any a coprime to it. So the Fermat test wouldn't have a
+ * problem with it at all, unless we happened to stumble on an a
+ * which had a common factor.
+ *
+ * So. 1729 - 1 equals 27 * 2^6. So we look at
+ *
+ * 2^27 mod 1729 == 645
+ * 2^108 mod 1729 == 1065
+ * 2^216 mod 1729 == 1
+ * 2^432 mod 1729 == 1
+ * 2^864 mod 1729 == 1
+ * 2^1728 mod 1729 == 1
+ *
+ * We do have a trailing string of 1s, so the Fermat test would
+ * have been happy. But this trailing string of 1s is preceded by
+ * 1065; whereas if 1729 were prime, we'd expect to see it preceded
+ * by -1 (i.e. 1728.). Guards! Seize this impostor.
+ *
+ * (If we were unlucky, we might have tried a=16 instead of a=2;
+ * now 16^27 mod 1729 == 1, so we would have seen a long string of
+ * 1s and wouldn't have seen the thing _before_ the 1s. So, just
+ * like the Fermat test, for a given p there may well exist values
+ * of a which fail to show up its compositeness. So we try several,
+ * just like the Fermat test. The difference is that Miller-Rabin
+ * is not _in general_ fooled by Carmichael numbers.)
+ *
+ * Put simply, then, the Miller-Rabin test requires us to:
+ *
+ * 1. write p-1 as q * 2^k, with q odd
+ * 2. compute z = (a^q) mod p.
+ * 3. report success if z == 1 or z == -1.
+ * 4. square z at most k-1 times, and report success if it becomes
+ * -1 at any point.
+ * 5. report failure otherwise.
+ *
+ * (We expect z to become -1 after at most k-1 squarings, because
+ * if it became -1 after k squarings then a^(p-1) would fail to be
+ * 1. And we don't need to investigate what happens after we see a
+ * -1, because we _know_ that -1 squared is 1 modulo anything at
+ * all, so after we've seen a -1 we can be sure of seeing nothing
+ * but 1s.)
+ */
+
+struct MillerRabin {
+ MontyContext *mc;
+
+ mp_int *pm1, *m_pm1;
+ mp_int *lowbit, *two;
+};
+
+MillerRabin *miller_rabin_new(mp_int *p)
+{
+ MillerRabin *mr = snew(MillerRabin);
+
+ assert(mp_hs_integer(p, 2));
+ assert(mp_get_bit(p, 0) == 1);
+
+ mr->pm1 = mp_copy(p);
+ mp_sub_integer_into(mr->pm1, mr->pm1, 1);
+
+ /*
+ * Standard bit-twiddling trick for isolating the lowest set bit
+ * of a number: x & (-x)
+ */
+ mr->lowbit = mp_new(mp_max_bits(mr->pm1));
+ mp_sub_into(mr->lowbit, mr->lowbit, mr->pm1);
+ mp_and_into(mr->lowbit, mr->lowbit, mr->pm1);
+
+ mr->two = mp_from_integer(2);
+
+ mr->mc = monty_new(p);
+ mr->m_pm1 = monty_import(mr->mc, mr->pm1);
+
+ return mr;
+}
+
+void miller_rabin_free(MillerRabin *mr)
+{
+ mp_free(mr->pm1);
+ mp_free(mr->m_pm1);
+ mp_free(mr->lowbit);
+ mp_free(mr->two);
+ monty_free(mr->mc);
+ smemclr(mr, sizeof(*mr));
+ sfree(mr);
+}
+
+/*
+ * The main internal function that implements a single M-R test.
+ *
+ * Expects the witness integer to be in Montgomery representation.
+ * (Since in live use witnesses are invented at random, this imposes
+ * no extra cost on the callers, and saves effort in here.)
+ */
+static struct mr_result miller_rabin_test_inner(MillerRabin *mr, mp_int *mw)
+{
+ mp_int *acc = mp_copy(monty_identity(mr->mc));
+ mp_int *spare = mp_new(mp_max_bits(mr->pm1));
+ size_t bit = mp_max_bits(mr->pm1);
+
+ /*
+ * The obvious approach to Miller-Rabin would be to start by
+ * calling monty_pow to raise w to the power q, and then square it
+ * k times ourselves. But that introduces a timing leak that gives
+ * away the value of k, i.e., how many factors of 2 there are in
+ * p-1.
+ *
+ * Instead, we don't call monty_pow at all. We do a modular
+ * exponentiation ourselves to compute w^((p-1)/2), using the
+ * technique that works from the top bit of the exponent
+ * downwards. That is, in each iteration we compute
+ * w^floor(exponent/2^i) for i one less than the previous
+ * iteration, by squaring the value we previously had and then
+ * optionally multiplying in w if the next exponent bit is 1.
+ *
+ * At the end of that process, once i <= k, the division
+ * (exponent/2^i) yields an integer, so the values we're computing
+ * are not just w^(floor of that), but w^(exactly that). In other
+ * words, the last k intermediate values of this modexp are
+ * precisely the values M-R wants to check against +1 or -1.
+ *
+ * So we interleave those checks with the modexp loop itself, and
+ * to avoid a timing leak, we check _every_ intermediate result
+ * against (the Montgomery representations of) both +1 and -1. And
+ * then we do bitwise masking to arrange that only the sensible
+ * ones of those checks find their way into our final answer.
+ */
+
+ unsigned active = 0;
+
+ struct mr_result result;
+ result.passed = result.potential_primitive_root = 0;
+
+ while (bit-- > 1) {
+ /*
+ * In this iteration, we're computing w^(2e) or w^(2e+1),
+ * where we have w^e from the previous iteration. So we square
+ * the value we had already, and then optionally multiply in
+ * another copy of w depending on the next bit of the exponent.
+ */
+ monty_mul_into(mr->mc, acc, acc, acc);
+ monty_mul_into(mr->mc, spare, acc, mw);
+ mp_select_into(acc, acc, spare, mp_get_bit(mr->pm1, bit));
+
+ /*
+ * mr->lowbit is a number with only one bit set, corresponding
+ * to the lowest set bit in p-1. So when that's the bit of the
+ * exponent we've just processed, we'll detect it by setting
+ * first_iter to true. That's our indication that we're now
+ * generating intermediate results useful to M-R, so we also
+ * set 'active', which stays set from then on.
+ */
+ unsigned first_iter = mp_get_bit(mr->lowbit, bit);
+ active |= first_iter;
+
+ /*
+ * Check the intermediate result against both +1 and -1.
+ */
+ unsigned is_plus_1 = mp_cmp_eq(acc, monty_identity(mr->mc));
+ unsigned is_minus_1 = mp_cmp_eq(acc, mr->m_pm1);
+
+ /*
+ * M-R must report success iff either: the first of the useful
+ * intermediate results (which is w^q) is 1, or _any_ of them
+ * (from w^q all the way up to w^((p-1)/2)) is -1.
+ *
+ * So we want to pass the test if is_plus_1 is set on the
+ * first iteration, or if is_minus_1 is set on any iteration.
+ */
+ result.passed |= (first_iter & is_plus_1);
+ result.passed |= (active & is_minus_1);
+
+ /*
+ * In the final iteration, is_minus_1 is also used to set the
+ * 'potential primitive root' flag, because we haven't found
+ * any exponent smaller than p-1 for which w^(that) == 1.
+ */
+ if (bit == 1)
+ result.potential_primitive_root = is_minus_1;
+ }
+
+ mp_free(acc);
+ mp_free(spare);
+
+ return result;
+}
+
+/*
+ * Wrapper on miller_rabin_test_inner for the convenience of
+ * testcrypt. Expects the witness integer to be literal, so we
+ * monty_import it before running the real test.
+ */
+struct mr_result miller_rabin_test(MillerRabin *mr, mp_int *w)
+{
+ mp_int *mw = monty_import(mr->mc, w);
+ struct mr_result result = miller_rabin_test_inner(mr, mw);
+ mp_free(mw);
+ return result;
+}
+
+bool miller_rabin_test_random(MillerRabin *mr)
+{
+ mp_int *mw = mp_random_in_range(mr->two, mr->pm1);
+ struct mr_result result = miller_rabin_test_inner(mr, mw);
+ mp_free(mw);
+ return result.passed;
+}
+
+mp_int *miller_rabin_find_potential_primitive_root(MillerRabin *mr)
+{
+ while (true) {
+ mp_int *mw = mp_unsafe_shrink(mp_random_in_range(mr->two, mr->pm1));
+ struct mr_result result = miller_rabin_test_inner(mr, mw);
+
+ if (result.passed && result.potential_primitive_root) {
+ mp_int *pr = monty_export(mr->mc, mw);
+ mp_free(mw);
+ return pr;
+ }
+
+ mp_free(mw);
+
+ if (!result.passed) {
+ return NULL;
+ }
+ }
+}
+
+unsigned miller_rabin_checks_needed(unsigned bits)
+{
+ /* Table 4.4 from Handbook of Applied Cryptography */
+ return (bits >= 1300 ? 2 : bits >= 850 ? 3 : bits >= 650 ? 4 :
+ bits >= 550 ? 5 : bits >= 450 ? 6 : bits >= 400 ? 7 :
+ bits >= 350 ? 8 : bits >= 300 ? 9 : bits >= 250 ? 12 :
+ bits >= 200 ? 15 : bits >= 150 ? 18 : 27);
+}
+
diff --git a/keygen/mpunsafe.c b/keygen/mpunsafe.c
new file mode 100644
index 00000000..2cd7a37a
--- /dev/null
+++ b/keygen/mpunsafe.c
@@ -0,0 +1,48 @@
+#include <assert.h>
+#include <limits.h>
+#include <stdio.h>
+
+#include "defs.h"
+#include "misc.h"
+#include "puttymem.h"
+
+#include "mpint.h"
+#include "mpunsafe.h"
+#include "crypto/mpint_i.h"
+
+/*
+ * This global symbol is also defined in ssh/kex2-client.c, to ensure
+ * that these unsafe non-constant-time mp_int functions can't end up
+ * accidentally linked in to any PuTTY tool that actually makes an SSH
+ * client connection.
+ *
+ * (Only _client_ connections, however. Uppity, being a test server
+ * only, is exempt.)
+ */
+const int deliberate_symbol_clash = 12345;
+
+static size_t mp_unsafe_words_needed(mp_int *x)
+{
+ size_t words = x->nw;
+ while (words > 1 && !x->w[words-1])
+ words--;
+ return words;
+}
+
+mp_int *mp_unsafe_shrink(mp_int *x)
+{
+ x->nw = mp_unsafe_words_needed(x);
+ /* This potentially leaves some allocated words between the new
+ * and old values of x->nw, which won't be wiped by mp_free now
+ * that x->nw doesn't mention that they exist. But we've just
+ * checked they're all zero, so we don't need to wipe them now
+ * either. */
+ return x;
+}
+
+mp_int *mp_unsafe_copy(mp_int *x)
+{
+ mp_int *copy = mp_make_sized(mp_unsafe_words_needed(x));
+ mp_copy_into(copy, x);
+ return copy;
+}
diff --git a/keygen/mpunsafe.h b/keygen/mpunsafe.h
new file mode 100644
index 00000000..07215372
--- /dev/null
+++ b/keygen/mpunsafe.h
@@ -0,0 +1,39 @@
+/*
+ * mpunsafe.h: functions that deal with mp_ints in ways that are *not*
+ * expected to be constant-time. Used during key generation, in which
+ * constant run time is a lost cause anyway.
+ *
+ * These functions are in a separate header, so that you can easily
+ * check that you're not calling them in the wrong context. They're
+ * also defined in a separate source file, which is only linked in to
+ * the key generation tools. Furthermore, that source file also
+ * defines a global symbol that intentionally conflicts with one
+ * defined in the SSH client code, so that any attempt to put these
+ * functions into the same binary as the live SSH client
+ * implementation will cause a link-time failure. They should only be
+ * linked into PuTTYgen and auxiliary test programs.
+ *
+ * Also, just in case those precautions aren't enough, all the unsafe
+ * functions have 'unsafe' in the name.
+ */
+
+#ifndef PUTTY_MPINT_UNSAFE_H
+#define PUTTY_MPINT_UNSAFE_H
+
+/*
+ * The most obvious unsafe thing you want to do with an mp_int is to
+ * get rid of leading zero words in its representation, so that its
+ * nominal size is as close as possible to its true size, and you
+ * don't waste any time processing it.
+ *
+ * mp_unsafe_shrink performs this operation in place, mutating the
+ * size field of the mp_int it's given. It returns the same pointer it
+ * was given.
+ *
+ * mp_unsafe_copy leaves the original mp_int alone and makes a new one
+ * with the minimal size.
+ */
+mp_int *mp_unsafe_shrink(mp_int *m);
+mp_int *mp_unsafe_copy(mp_int *m);
+
+#endif /* PUTTY_MPINT_UNSAFE_H */
diff --git a/keygen/pockle.c b/keygen/pockle.c
new file mode 100644
index 00000000..2a072f18
--- /dev/null
+++ b/keygen/pockle.c
@@ -0,0 +1,450 @@
+#include <assert.h>
+#include "ssh.h"
+#include "sshkeygen.h"
+#include "mpint.h"
+#include "mpunsafe.h"
+#include "tree234.h"
+
+typedef struct PocklePrimeRecord PocklePrimeRecord;
+
+struct Pockle {
+ tree234 *tree;
+
+ PocklePrimeRecord **list;
+ size_t nlist, listsize;
+};
+
+struct PocklePrimeRecord {
+ mp_int *prime;
+ PocklePrimeRecord **factors;
+ size_t nfactors;
+ mp_int *witness;
+
+ size_t index; /* index in pockle->list */
+};
+
+static int ppr_cmp(void *av, void *bv)
+{
+ PocklePrimeRecord *a = (PocklePrimeRecord *)av;
+ PocklePrimeRecord *b = (PocklePrimeRecord *)bv;
+ return mp_cmp_hs(a->prime, b->prime) - mp_cmp_hs(b->prime, a->prime);
+}
+
+static int ppr_find(void *av, void *bv)
+{
+ mp_int *a = (mp_int *)av;
+ PocklePrimeRecord *b = (PocklePrimeRecord *)bv;
+ return mp_cmp_hs(a, b->prime) - mp_cmp_hs(b->prime, a);
+}
+
+Pockle *pockle_new(void)
+{
+ Pockle *pockle = snew(Pockle);
+ pockle->tree = newtree234(ppr_cmp);
+ pockle->list = NULL;
+ pockle->nlist = pockle->listsize = 0;
+ return pockle;
+}
+
+void pockle_free(Pockle *pockle)
+{
+ pockle_release(pockle, 0);
+ assert(count234(pockle->tree) == 0);
+ freetree234(pockle->tree);
+ sfree(pockle->list);
+ sfree(pockle);
+}
+
+static PockleStatus pockle_insert(Pockle *pockle, mp_int *p, mp_int **factors,
+ size_t nfactors, mp_int *w)
+{
+ PocklePrimeRecord *pr = snew(PocklePrimeRecord);
+ pr->prime = mp_copy(p);
+
+ PocklePrimeRecord *found = add234(pockle->tree, pr);
+ if (pr != found) {
+ /* it was already in there */
+ mp_free(pr->prime);
+ sfree(pr);
+ return POCKLE_OK;
+ }
+
+ if (w) {
+ pr->factors = snewn(nfactors, PocklePrimeRecord *);
+ for (size_t i = 0; i < nfactors; i++) {
+ pr->factors[i] = find234(pockle->tree, factors[i], ppr_find);
+ assert(pr->factors[i]);
+ }
+ pr->nfactors = nfactors;
+ pr->witness = mp_copy(w);
+ } else {
+ pr->factors = NULL;
+ pr->nfactors = 0;
+ pr->witness = NULL;
+ }
+ pr->index = pockle->nlist;
+
+ sgrowarray(pockle->list, pockle->listsize, pockle->nlist);
+ pockle->list[pockle->nlist++] = pr;
+ return POCKLE_OK;
+}
+
+size_t pockle_mark(Pockle *pockle)
+{
+ return pockle->nlist;
+}
+
+void pockle_release(Pockle *pockle, size_t mark)
+{
+ while (pockle->nlist > mark) {
+ PocklePrimeRecord *pr = pockle->list[--pockle->nlist];
+ del234(pockle->tree, pr);
+ mp_free(pr->prime);
+ if (pr->witness)
+ mp_free(pr->witness);
+ sfree(pr->factors);
+ sfree(pr);
+ }
+}
+
+PockleStatus pockle_add_small_prime(Pockle *pockle, mp_int *p)
+{
+ if (mp_hs_integer(p, (1ULL << 32)))
+ return POCKLE_SMALL_PRIME_NOT_SMALL;
+
+ uint32_t val = mp_get_integer(p);
+
+ if (val < 2)
+ return POCKLE_PRIME_SMALLER_THAN_2;
+
+ init_smallprimes();
+ for (size_t i = 0; i < NSMALLPRIMES; i++) {
+ if (val == smallprimes[i])
+ break; /* success */
+ if (val % smallprimes[i] == 0)
+ return POCKLE_SMALL_PRIME_NOT_PRIME;
+ }
+
+ return pockle_insert(pockle, p, NULL, 0, NULL);
+}
+
+PockleStatus pockle_add_prime(Pockle *pockle, mp_int *p,
+ mp_int **factors, size_t nfactors,
+ mp_int *witness)
+{
+ MontyContext *mc = NULL;
+ mp_int *x = NULL, *f = NULL, *w = NULL;
+ PockleStatus status;
+
+ /*
+ * We're going to try to verify that p is prime by using
+ * Pocklington's theorem. The idea is that we're given w such that
+ * w^{p-1} == 1 (mod p) (1)
+ * and for a collection of primes q | p-1,
+ * w^{(p-1)/q} - 1 is coprime to p. (2)
+ *
+ * Suppose r is a prime factor of p itself. Consider the
+ * multiplicative order of w mod r. By (1), r | w^{p-1}-1. But by
+ * (2), r does not divide w^{(p-1)/q}-1. So the order of w mod r
+ * is a factor of p-1, but not a factor of (p-1)/q. Hence, the
+ * largest power of q that divides p-1 must also divide ord w.
+ *
+ * Repeating this reasoning for all q, we find that the product of
+ * all the q (which we'll denote f) must divide ord w, which in
+ * turn divides r-1. So f | r-1 for any r | p.
+ *
+ * In particular, this means f < r. That is, all primes r | p are
+ * bigger than f. So if f > sqrt(p), then we've shown p is prime,
+ * because otherwise it would have to be the product of at least
+ * two factors bigger than its own square root.
+ *
+ * With an extra check, we can also show p to be prime even if
+ * we're only given enough factors to make f > cbrt(p). See below
+ * for that part, when we come to it.
+ */
+
+ /*
+ * Start by checking p > 1. It certainly can't be prime otherwise!
+ * (And since we're going to prove it prime by showing all its
+ * prime factors are large, we do also have to know it _has_ at
+ * least one prime factor for that to tell us anything.)
+ */
+ if (!mp_hs_integer(p, 2))
+ return POCKLE_PRIME_SMALLER_THAN_2;
+
+ /*
+ * Check that all the factors we've been given really are primes
+ * (in the sense that we already had them in our index). Make the
+ * product f, and check it really does divide p-1.
+ */
+ x = mp_copy(p);
+ mp_sub_integer_into(x, x, 1);
+ f = mp_from_integer(1);
+ for (size_t i = 0; i < nfactors; i++) {
+ mp_int *q = factors[i];
+
+ if (!find234(pockle->tree, q, ppr_find)) {
+ status = POCKLE_FACTOR_NOT_KNOWN_PRIME;
+ goto out;
+ }
+
+ mp_int *quotient = mp_new(mp_max_bits(x));
+ mp_int *residue = mp_new(mp_max_bits(q));
+ mp_divmod_into(x, q, quotient, residue);
+
+ unsigned exact = mp_eq_integer(residue, 0);
+ mp_free(residue);
+
+ mp_free(x);
+ x = quotient;
+
+ if (!exact) {
+ status = POCKLE_FACTOR_NOT_A_FACTOR;
+ goto out;
+ }
+
+ mp_int *tmp = f;
+ f = mp_unsafe_shrink(mp_mul(tmp, q));
+ mp_free(tmp);
+ }
+
+ /*
+ * Check that f > cbrt(p).
+ */
+ mp_int *f2 = mp_mul(f, f);
+ mp_int *f3 = mp_mul(f2, f);
+ bool too_big = mp_cmp_hs(p, f3);
+ mp_free(f3);
+ mp_free(f2);
+ if (too_big) {
+ status = POCKLE_PRODUCT_OF_FACTORS_TOO_SMALL;
+ goto out;
+ }
+
+ /*
+ * Now do the extra check that allows us to get away with only
+ * having f > cbrt(p) instead of f > sqrt(p).
+ *
+ * If we can show that f | r-1 for any r | p, then we've ruled out
+ * p being a product of _more_ than two primes (because then it
+ * would be the product of at least three things bigger than its
+ * own cube root). But we still have to rule out it being a
+ * product of exactly two.
+ *
+ * Suppose for the sake of contradiction that p is the product of
+ * two prime factors. We know both of those factors would have to
+ * be congruent to 1 mod f. So we'd have to have
+ *
+ * p = (uf+1)(vf+1) = (uv)f^2 + (u+v)f + 1 (3)
+ *
+ * We can't have uv >= f, or else that expression would come to at
+ * least f^3, i.e. it would exceed p. So uv < f. Hence, u,v < f as
+ * well.
+ *
+ * Can we have u+v >= f? If we did, then we could write v >= f-u,
+ * and hence f > uv >= u(f-u). That can be rearranged to show that
+ * u^2 > (u-1)f; decrementing the LHS makes the inequality no
+ * longer necessarily strict, so we have u^2-1 >= (u-1)f, and
+ * dividing off u-1 gives u+1 >= f. But we know u < f, so the only
+ * way this could happen would be if u=f-1, which makes v=1. But
+ * _then_ (3) gives us p = (f-1)f^2 + f^2 + 1 = f^3+1. But that
+ * can't be true if f^3 > p. So we can't have u+v >= f either, by
+ * contradiction.
+ *
+ * After all that, what have we shown? We've shown that we can
+ * write p = (uv)f^2 + (u+v)f + 1, with both uv and u+v strictly
+ * less than f. In other words, if you write down p in base f, it
+ * has exactly three digits, and they are uv, u+v and 1.
+ *
+ * But that means we can _find_ u and v: we know p and f, so we
+ * can just extract those digits of p's base-f representation.
+ * Once we've done so, they give the sum and product of the
+ * potential u,v. And given the sum and product of two numbers,
+ * you can make a quadratic which has those numbers as roots.
+ *
+ * We don't actually have to _solve_ the quadratic: all we have to
+ * do is check if its discriminant is a perfect square. If not,
+ * we'll know that no integers u,v can match this description.
+ */
+ {
+ /* We already have x = (p-1)/f. So we just need to write x in
+ * the form aF + b, and then we have a=uv and b=u+v. */
+ mp_int *a = mp_new(mp_max_bits(x));
+ mp_int *b = mp_new(mp_max_bits(f));
+ mp_divmod_into(x, f, a, b);
+ assert(!mp_cmp_hs(a, f));
+ assert(!mp_cmp_hs(b, f));
+
+ /* If a=0, then that means p < f^2, so we don't need to do
+ * this check at all: the straightforward Pocklington theorem
+ * is all we need. */
+ if (!mp_eq_integer(a, 0)) {
+ unsigned perfect_square = 0;
+
+ mp_int *bsq = mp_mul(b, b);
+ mp_lshift_fixed_into(a, a, 2);
+
+ if (mp_cmp_hs(bsq, a)) {
+ /* b^2-4a is non-negative, so it might be a square.
+ * Check it. */
+ mp_int *discriminant = mp_sub(bsq, a);
+ mp_int *remainder = mp_new(mp_max_bits(discriminant));
+ mp_int *root = mp_nthroot(discriminant, 2, remainder);
+ perfect_square = mp_eq_integer(remainder, 0);
+ mp_free(discriminant);
+ mp_free(root);
+ mp_free(remainder);
+ }
+
+ mp_free(bsq);
+
+ if (perfect_square) {
+ mp_free(b);
+ mp_free(a);
+ status = POCKLE_DISCRIMINANT_IS_SQUARE;
+ goto out;
+ }
+ }
+ mp_free(b);
+ mp_free(a);
+ }
+
+ /*
+ * Now we've done all the checks that are cheaper than a modpow,
+ * so we've ruled out as many things as possible before having to
+ * do any hard work. But there's nothing for it now: make a
+ * MontyContext.
+ */
+ mc = monty_new(p);
+ w = monty_import(mc, witness);
+
+ /*
+ * The initial Fermat check: is w^{p-1} itself congruent to 1 mod
+ * p?
+ */
+ {
+ mp_int *pm1 = mp_copy(p);
+ mp_sub_integer_into(pm1, pm1, 1);
+ mp_int *power = monty_pow(mc, w, pm1);
+ unsigned fermat_pass = mp_cmp_eq(power, monty_identity(mc));
+ mp_free(power);
+ mp_free(pm1);
+
+ if (!fermat_pass) {
+ status = POCKLE_FERMAT_TEST_FAILED;
+ goto out;
+ }
+ }
+
+ /*
+ * And now, for each factor q, is w^{(p-1)/q}-1 coprime to p?
+ */
+ for (size_t i = 0; i < nfactors; i++) {
+ mp_int *q = factors[i];
+ mp_int *exponent = mp_unsafe_shrink(mp_div(p, q));
+ mp_int *power = monty_pow(mc, w, exponent);
+ mp_int *power_extracted = monty_export(mc, power);
+ mp_sub_integer_into(power_extracted, power_extracted, 1);
+
+ unsigned coprime = mp_coprime(power_extracted, p);
+ if (!coprime) {
+ /*
+ * If w^{(p-1)/q}-1 is not coprime to p, the test has
+ * failed. But it makes a difference why. If the power of
+ * w turned out to be 1, so that we took gcd(1-1,p) =
+ * gcd(0,p) = p, that's like an inconclusive Fermat or M-R
+ * test: it might just mean you picked a witness integer
+ * that wasn't a primitive root. But if the power is any
+ * _other_ value mod p that is not coprime to p, it means
+ * we've detected that the number is *actually not prime*!
+ */
+ if (mp_eq_integer(power_extracted, 0))
+ status = POCKLE_WITNESS_POWER_IS_1;
+ else
+ status = POCKLE_WITNESS_POWER_NOT_COPRIME;
+ }
+
+ mp_free(exponent);
+ mp_free(power);
+ mp_free(power_extracted);
+
+ if (!coprime)
+ goto out; /* with the status we set up above */
+ }
+
+ /*
+ * Success! p is prime. Insert it into our tree234 of known
+ * primes, so that future calls to this function can cite it in
+ * evidence of larger numbers' primality.
+ */
+ status = pockle_insert(pockle, p, factors, nfactors, witness);
+
+ out:
+ if (x)
+ mp_free(x);
+ if (f)
+ mp_free(f);
+ if (w)
+ mp_free(w);
+ if (mc)
+ monty_free(mc);
+ return status;
+}
+
+static void mp_write_decimal(strbuf *sb, mp_int *x)
+{
+ char *s = mp_get_decimal(x);
+ ptrlen pl = ptrlen_from_asciz(s);
+ put_datapl(sb, pl);
+ smemclr(s, pl.len);
+ sfree(s);
+}
+
+strbuf *pockle_mpu(Pockle *pockle, mp_int *p)
+{
+ strbuf *sb = strbuf_new_nm();
+ PocklePrimeRecord *pr = find234(pockle->tree, p, ppr_find);
+ assert(pr);
+
+ bool *needed = snewn(pockle->nlist, bool);
+ memset(needed, 0, pockle->nlist * sizeof(bool));
+ needed[pr->index] = true;
+
+ put_fmt(sb, "[MPU - Primality Certificate]\nVersion 1.0\nBase 10\n\n"
+ "Proof for:\nN ");
+ mp_write_decimal(sb, p);
+ put_fmt(sb, "\n");
+
+ for (size_t index = pockle->nlist; index-- > 0 ;) {
+ if (!needed[index])
+ continue;
+ pr = pockle->list[index];
+
+ if (mp_get_nbits(pr->prime) <= 64) {
+ put_fmt(sb, "\nType Small\nN ");
+ mp_write_decimal(sb, pr->prime);
+ put_fmt(sb, "\n");
+ } else {
+ assert(pr->witness);
+ put_fmt(sb, "\nType BLS5\nN ");
+ mp_write_decimal(sb, pr->prime);
+ put_fmt(sb, "\n");
+ for (size_t i = 0; i < pr->nfactors; i++) {
+ put_fmt(sb, "Q[%"SIZEu"] ", i+1);
+ mp_write_decimal(sb, pr->factors[i]->prime);
+ assert(pr->factors[i]->index < index);
+ needed[pr->factors[i]->index] = true;
+ put_fmt(sb, "\n");
+ }
+ for (size_t i = 0; i < pr->nfactors + 1; i++) {
+ put_fmt(sb, "A[%"SIZEu"] ", i);
+ mp_write_decimal(sb, pr->witness);
+ put_fmt(sb, "\n");
+ }
+ put_fmt(sb, "----\n");
+ }
+ }
+ sfree(needed);
+
+ return sb;
+}
diff --git a/keygen/prime.c b/keygen/prime.c
new file mode 100644
index 00000000..d9bdebba
--- /dev/null
+++ b/keygen/prime.c
@@ -0,0 +1,762 @@
+/*
+ * Prime generation.
+ */
+
+#include <assert.h>
+#include <math.h>
+
+#include "ssh.h"
+#include "mpint.h"
+#include "mpunsafe.h"
+#include "sshkeygen.h"
+
+/* ----------------------------------------------------------------------
+ * Standard probabilistic prime-generation algorithm:
+ *
+ * - get a number from our PrimeCandidateSource which will at least
+ * avoid being divisible by any prime under 2^16
+ *
+ * - perform the Miller-Rabin primality test enough times to
+ * ensure the probability of it being composite is 2^-80 or
+ * less
+ *
+ * - go back to square one if any M-R test fails.
+ */
+
+static PrimeGenerationContext *probprime_new_context(
+ const PrimeGenerationPolicy *policy)
+{
+ PrimeGenerationContext *ctx = snew(PrimeGenerationContext);
+ ctx->vt = policy;
+ return ctx;
+}
+
+static void probprime_free_context(PrimeGenerationContext *ctx)
+{
+ sfree(ctx);
+}
+
+static ProgressPhase probprime_add_progress_phase(
+ const PrimeGenerationPolicy *policy,
+ ProgressReceiver *prog, unsigned bits)
+{
+ /*
+ * The density of primes near x is 1/(log x). When x is about 2^b,
+ * that's 1/(b log 2).
+ *
+ * But we're only doing the expensive part of the process (the M-R
+ * checks) for a number that passes the initial winnowing test of
+ * having no factor less than 2^16 (at least, unless the prime is
+ * so small that PrimeCandidateSource gives up on that winnowing).
+ * The density of _those_ numbers is about 1/19.76. So the odds of
+ * hitting a prime per expensive attempt are boosted by a factor
+ * of 19.76.
+ */
+ const double log_2 = 0.693147180559945309417232121458;
+ double winnow_factor = (bits < 32 ? 1.0 : 19.76);
+ double prob = winnow_factor / (bits * log_2);
+
+ /*
+ * Estimate the cost of prime generation as the cost of the M-R
+ * modexps.
+ */
+ double cost = (miller_rabin_checks_needed(bits) *
+ estimate_modexp_cost(bits));
+ return progress_add_probabilistic(prog, cost, prob);
+}
+
+static mp_int *probprime_generate(
+ PrimeGenerationContext *ctx,
+ PrimeCandidateSource *pcs, ProgressReceiver *prog)
+{
+ pcs_ready(pcs);
+
+ while (true) {
+ progress_report_attempt(prog);
+
+ mp_int *p = pcs_generate(pcs);
+ if (!p) {
+ pcs_free(pcs);
+ return NULL;
+ }
+
+ MillerRabin *mr = miller_rabin_new(p);
+ bool known_bad = false;
+ unsigned nchecks = miller_rabin_checks_needed(mp_get_nbits(p));
+ for (unsigned check = 0; check < nchecks; check++) {
+ if (!miller_rabin_test_random(mr)) {
+ known_bad = true;
+ break;
+ }
+ }
+ miller_rabin_free(mr);
+
+ if (!known_bad) {
+ /*
+ * We have a prime!
+ */
+ pcs_free(pcs);
+ return p;
+ }
+
+ mp_free(p);
+ }
+}
+
+static strbuf *null_mpu_certificate(PrimeGenerationContext *ctx, mp_int *p)
+{
+ return NULL;
+}
+
+const PrimeGenerationPolicy primegen_probabilistic = {
+ probprime_add_progress_phase,
+ probprime_new_context,
+ probprime_free_context,
+ probprime_generate,
+ null_mpu_certificate,
+};
+
+/* ----------------------------------------------------------------------
+ * Alternative provable-prime algorithm, based on the following paper:
+ *
+ * [MAURER] Maurer, U.M. Fast generation of prime numbers and secure
+ * public-key cryptographic parameters. J. Cryptology 8, 123–155
+ * (1995). https://doi.org/10.1007/BF00202269
+ */
+
+typedef enum SubprimePolicy {
+ SPP_FAST,
+ SPP_MAURER_SIMPLE,
+ SPP_MAURER_COMPLEX,
+} SubprimePolicy;
+
+typedef struct ProvablePrimePolicyExtra {
+ SubprimePolicy spp;
+} ProvablePrimePolicyExtra;
+
+typedef struct ProvablePrimeContext ProvablePrimeContext;
+struct ProvablePrimeContext {
+ Pockle *pockle;
+ PrimeGenerationContext pgc;
+ const ProvablePrimePolicyExtra *extra;
+};
+
+static PrimeGenerationContext *provableprime_new_context(
+ const PrimeGenerationPolicy *policy)
+{
+ ProvablePrimeContext *ppc = snew(ProvablePrimeContext);
+ ppc->pgc.vt = policy;
+ ppc->pockle = pockle_new();
+ ppc->extra = policy->extra;
+ return &ppc->pgc;
+}
+
+static void provableprime_free_context(PrimeGenerationContext *ctx)
+{
+ ProvablePrimeContext *ppc = container_of(ctx, ProvablePrimeContext, pgc);
+ pockle_free(ppc->pockle);
+ sfree(ppc);
+}
+
+static ProgressPhase provableprime_add_progress_phase(
+ const PrimeGenerationPolicy *policy,
+ ProgressReceiver *prog, unsigned bits)
+{
+ /*
+ * Estimating the cost of making a _provable_ prime is difficult
+ * because of all the recursions to smaller sizes.
+ *
+ * Once you have enough factors of p-1 to certify primality of p,
+ * the remaining work in provable prime generation is not very
+ * different from probabilistic: you generate a random candidate,
+ * test its primality probabilistically, and use the witness value
+ * generated as a byproduct of that test for the full Pocklington
+ * verification. The expensive part, as usual, is made of modpows.
+ *
+ * The Pocklington test needs at least two modpows (one for the
+ * Fermat check, and one per known factor of p-1).
+ *
+ * The prior M-R step needs an unknown number, because we iterate
+ * until we find a value whose order is divisible by the largest
+ * power of 2 that divides p-1, say 2^j. That excludes half the
+ * possible witness values (specifically, the quadratic residues),
+ * so we expect to need on average two M-R operations to find one.
+ * But that's only if the number _is_ prime - as usual, it's also
+ * possible that we hit a non-prime and have to try again.
+ *
+ * So, if we were only estimating the cost of that final step, it
+ * would look a lot like the probabilistic version: we'd have to
+ * estimate the expected total number of modexps by knowing
+ * something about the density of primes among our candidate
+ * integers, and then multiply that by estimate_modexp_cost(bits).
+ * But the problem is that we also have to _find_ a smaller prime,
+ * so we have to recurse.
+ *
+ * In the MAURER_SIMPLE version of the algorithm, you recurse to
+ * any one of a range of possible smaller sizes i, each with
+ * probability proportional to 1/i. So your expected time to
+ * generate an n-bit prime is given by a horrible recurrence of
+ * the form E_n = S_n + (sum E_i/i) / (sum 1/i), in which S_n is
+ * the expected cost of the final step once you have your smaller
+ * primes, and both sums are over ceil(n/2) <= i <= n-20.
+ *
+ * At this point I ran out of effort to actually do the maths
+ * rigorously, so instead I did the empirical experiment of
+ * generating that sequence in Python and plotting it on a graph.
+ * My Python code is here, in case I need it again:
+
+from math import log
+
+alpha = log(3)/log(2) + 1 # exponent for modexp using Karatsuba mult
+
+E = [1] * 16 # assume generating tiny primes is trivial
+
+for n in range(len(E), 4096):
+
+ # Expected time for sub-generations, as a weighted mean of prior
+ # values of the same sequence.
+ lo = (n+1)//2
+ hi = n-20
+ if lo <= hi:
+ subrange = range(lo, hi+1)
+ num = sum(E[i]/i for i in subrange)
+ den = sum(1/i for i in subrange)
+ else:
+ num, den = 0, 1
+
+ # Constant term (cost of final step).
+ # Similar to probprime_add_progress_phase.
+ winnow_factor = 1 if n < 32 else 19.76
+ prob = winnow_factor / (n * log(2))
+ cost = 4 * n**alpha / prob
+
+ E.append(cost + num / den)
+
+for i, p in enumerate(E):
+ try:
+ print(log(i), log(p))
+ except ValueError:
+ continue
+
+ * The output loop prints the logs of both i and E_i, so that when
+ * I plot the resulting data file in gnuplot I get a log-log
+ * diagram. That showed me some early noise and then a very
+ * straight-looking line; feeding the straight part of the graph
+ * to linear-regression analysis reported that it fits the line
+ *
+ * log E_n = -1.7901825337965498 + 3.6199197179662517 * log(n)
+ * => E_n = 0.16692969657466802 * n^3.6199197179662517
+ *
+ * So my somewhat empirical estimate is that Maurer prime
+ * generation costs about 0.167 * bits^3.62, in the same arbitrary
+ * time units used by estimate_modexp_cost.
+ */
+
+ return progress_add_linear(prog, 0.167 * pow(bits, 3.62));
+}
+
+static mp_int *primegen_small(Pockle *pockle, PrimeCandidateSource *pcs)
+{
+ assert(pcs_get_bits(pcs) <= 32);
+
+ pcs_ready(pcs);
+
+ while (true) {
+ mp_int *p = pcs_generate(pcs);
+ if (!p) {
+ pcs_free(pcs);
+ return NULL;
+ }
+ if (pockle_add_small_prime(pockle, p) == POCKLE_OK) {
+ pcs_free(pcs);
+ return p;
+ }
+ mp_free(p);
+ }
+}
+
+#ifdef DEBUG_PRIMEGEN
+static void timestamp(FILE *fp)
+{
+ struct timespec ts;
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ fprintf(fp, "%lu.%09lu: ", (unsigned long)ts.tv_sec,
+ (unsigned long)ts.tv_nsec);
+}
+static PRINTF_LIKE(1, 2) void debug_f(const char *fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ timestamp(stderr);
+ vfprintf(stderr, fmt, ap);
+ fputc('\n', stderr);
+ va_end(ap);
+}
+static void debug_f_mp(const char *fmt, mp_int *x, ...)
+{
+ va_list ap;
+ va_start(ap, x);
+ timestamp(stderr);
+ vfprintf(stderr, fmt, ap);
+ mp_dump(stderr, "", x, "\n");
+ va_end(ap);
+}
+#else
+#define debug_f(...) ((void)0)
+#define debug_f_mp(...) ((void)0)
+#endif
+
+static double uniform_random_double(void)
+{
+ unsigned char randbuf[8];
+ random_read(randbuf, 8);
+ return GET_64BIT_MSB_FIRST(randbuf) * 0x1.0p-64;
+}
+
+static mp_int *mp_ceil_div(mp_int *n, mp_int *d)
+{
+ mp_int *nplus = mp_add(n, d);
+ mp_sub_integer_into(nplus, nplus, 1);
+ mp_int *toret = mp_div(nplus, d);
+ mp_free(nplus);
+ return toret;
+}
+
+static mp_int *provableprime_generate_inner(
+ ProvablePrimeContext *ppc, PrimeCandidateSource *pcs,
+ ProgressReceiver *prog, double progress_origin, double progress_scale)
+{
+ unsigned bits = pcs_get_bits(pcs);
+ assert(bits > 1);
+
+ if (bits <= 32) {
+ debug_f("ppgi(%u) -> small", bits);
+ return primegen_small(ppc->pockle, pcs);
+ }
+
+ unsigned min_bits_needed, max_bits_needed;
+ {
+ /*
+ * Find the product of all the prime factors we already know
+ * about.
+ */
+ mp_int *size_got = mp_from_integer(1);
+ size_t nfactors;
+ mp_int **factors = pcs_get_known_prime_factors(pcs, &nfactors);
+ for (size_t i = 0; i < nfactors; i++) {
+ mp_int *to_free = size_got;
+ size_got = mp_unsafe_shrink(mp_mul(size_got, factors[i]));
+ mp_free(to_free);
+ }
+
+ /*
+ * Find the largest cofactor we might be able to use, and the
+ * smallest one we can get away with.
+ */
+ mp_int *upperbound = pcs_get_upper_bound(pcs);
+ mp_int *size_needed = mp_nthroot(upperbound, 3, NULL);
+ debug_f_mp("upperbound = ", upperbound);
+ {
+ mp_int *to_free = upperbound;
+ upperbound = mp_unsafe_shrink(mp_div(upperbound, size_got));
+ mp_free(to_free);
+ }
+ debug_f_mp("size_needed = ", size_needed);
+ {
+ mp_int *to_free = size_needed;
+ size_needed = mp_unsafe_shrink(mp_ceil_div(size_needed, size_got));
+ mp_free(to_free);
+ }
+
+ max_bits_needed = pcs_get_bits_remaining(pcs);
+
+ /*
+ * We need a prime that is greater than or equal to
+ * 'size_needed' in order for the product of all our known
+ * factors of p-1 to exceed the cube root of the largest value
+ * p might take.
+ *
+ * Since pcs_new wants a size specified in bits, we must count
+ * the bits in size_needed and then add 1. Otherwise we might
+ * get a value with the same bit count as size_needed but
+ * slightly smaller than it.
+ *
+ * An exception is if size_needed = 1. In that case the
+ * product of existing known factors is _already_ enough, so
+ * we don't need to generate an extra factor at all.
+ */
+ if (mp_hs_integer(size_needed, 2)) {
+ min_bits_needed = mp_get_nbits(size_needed) + 1;
+ } else {
+ min_bits_needed = 0;
+ }
+
+ mp_free(upperbound);
+ mp_free(size_needed);
+ mp_free(size_got);
+ }
+
+ double progress = 0.0;
+
+ if (min_bits_needed) {
+ debug_f("ppgi(%u) recursing, need [%u,%u] more bits",
+ bits, min_bits_needed, max_bits_needed);
+
+ unsigned *sizes = NULL;
+ size_t nsizes = 0, sizesize = 0;
+
+ unsigned real_min = max_bits_needed / 2;
+ unsigned real_max = (max_bits_needed >= 20 ?
+ max_bits_needed - 20 : 0);
+ if (real_min < min_bits_needed)
+ real_min = min_bits_needed;
+ if (real_max < real_min)
+ real_max = real_min;
+ debug_f("ppgi(%u) revised bits interval = [%u,%u]",
+ bits, real_min, real_max);
+
+ switch (ppc->extra->spp) {
+ case SPP_FAST:
+ /*
+ * Always pick the smallest subsidiary prime we can get
+ * away with: just over n/3 bits.
+ *
+ * This is not a good mode for cryptographic prime
+ * generation, because it skews the distribution of primes
+ * greatly, and worse, it skews them in a direction that
+ * heads away from the properties crypto algorithms tend
+ * to like.
+ *
+ * (For both discrete-log systems and RSA, people have
+ * tended to recommend in the past that p-1 should have a
+ * _large_ factor if possible. There's some disagreement
+ * on which algorithms this is really necessary for, but
+ * certainly I've never seen anyone recommend arranging a
+ * _small_ factor on purpose.)
+ *
+ * I originally implemented this mode because it was
+ * convenient for debugging - it wastes as little time as
+ * possible on finding a sub-prime and lets you get to the
+ * interesting part! And I leave it in the code because it
+ * might still be useful for _something_. Because it's
+ * cryptographically questionable, it's not selectable in
+ * the UI of either version of PuTTYgen proper; but it can
+ * be accessed through testcrypt, and if for some reason a
+ * definite prime is needed for non-crypto purposes, it
+ * may still be the fastest way to put your hands on one.
+ */
+ debug_f("ppgi(%u) fast mode, just ask for %u bits",
+ bits, min_bits_needed);
+ sgrowarray(sizes, sizesize, nsizes);
+ sizes[nsizes++] = min_bits_needed;
+ break;
+ case SPP_MAURER_SIMPLE: {
+ /*
+ * Select the size of the subsidiary prime at random from
+ * sqrt(outputprime) up to outputprime/2^20, in such a way
+ * that the probability distribution matches that of the
+ * largest prime factor of a random n-bit number.
+ *
+ * Per [MAURER] section 3.4, the cumulative distribution
+ * function of this relative size is 1+log2(x), for x in
+ * [1/2,1]. You can generate a value from the distribution
+ * given by a cdf by applying the inverse cdf to a uniform
+ * value in [0,1]. Simplifying that in this case, what we
+ * have to do is raise 2 to the power of a random real
+ * number between -1 and 0. (And that gives you the number
+ * of _bits_ in the sub-prime, as a factor of the desired
+ * output number of bits.)
+ *
+ * We also require that the subsidiary prime q is at least
+ * 20 bits smaller than the output one, to give us a
+ * fighting chance of there being _any_ prime we can find
+ * such that q | p-1.
+ *
+ * (But these rules have to be applied in an order that
+ * still leaves us _some_ interval of possible sizes we
+ * can pick!)
+ */
+ maurer_simple:
+ debug_f("ppgi(%u) Maurer simple mode", bits);
+
+ unsigned sub_bits;
+ do {
+ double uniform = uniform_random_double();
+ sub_bits = real_max * pow(2.0, uniform - 1) + 0.5;
+ debug_f(" ... %.6f -> %u?", uniform, sub_bits);
+ } while (!(real_min <= sub_bits && sub_bits <= real_max));
+
+ debug_f("ppgi(%u) asking for %u bits", bits, sub_bits);
+ sgrowarray(sizes, sizesize, nsizes);
+ sizes[nsizes++] = sub_bits;
+
+ break;
+ }
+ case SPP_MAURER_COMPLEX: {
+ /*
+ * In this mode, we may generate multiple factors of p-1
+ * which between them add up to at least n/2 bits, in such
+ * a way that those are guaranteed to be the largest
+ * factors of p-1 and that they have the same probability
+ * distribution as the largest k factors would have in a
+ * random integer. The idea is that this more elaborate
+ * procedure gets as close as possible to the same
+ * probability distribution you'd get by selecting a
+ * completely random prime (if you feasibly could).
+ *
+ * Algorithm from Appendix 1 of [MAURER]: we generate
+ * random real numbers that sum to at most 1, by choosing
+ * each one uniformly from the range [0, 1 - sum of all
+ * the previous ones]. We maintain them in a list in
+ * decreasing order, and we stop as soon as we find an
+ * initial subsequence of the list s_1,...,s_r such that
+ * s_1 + ... + s_{r-1} + 2 s_r > 1. In particular, this
+ * guarantees that the sum of that initial subsequence is
+ * at least 1/2, so we end up with enough factors to
+ * satisfy Pocklington.
+ */
+
+ if (max_bits_needed / 2 + 1 > real_max) {
+ /* Early exit path in the case where this algorithm
+ * can't possibly generate a value in the range we
+ * need. In that situation, fall back to Maurer
+ * simple. */
+ debug_f("ppgi(%u) skipping GenerateSizeList, "
+ "real_max too small", bits);
+ goto maurer_simple; /* sorry! */
+ }
+
+ double *s = NULL;
+ size_t ns, ssize = 0;
+
+ while (true) {
+ debug_f("ppgi(%u) starting GenerateSizeList", bits);
+ ns = 0;
+ double range = 1.0;
+ while (true) {
+ /* Generate the next number */
+ double u = uniform_random_double() * range;
+ range -= u;
+ debug_f(" u_%"SIZEu" = %g", ns, u);
+
+ /* Insert it in the list */
+ sgrowarray(s, ssize, ns);
+ size_t i;
+ for (i = ns; i > 0 && s[i-1] < u; i--)
+ s[i] = s[i-1];
+ s[i] = u;
+ ns++;
+ debug_f(" inserting as s[%"SIZEu"]", i);
+
+ /* Look for a suitable initial subsequence */
+ double sum = 0;
+ for (i = 0; i < ns; i++) {
+ sum += s[i];
+ if (sum + s[i] > 1.0) {
+ debug_f(" s[0..%"SIZEu"] works!", i);
+
+ /* Truncate the sequence here, and stop
+ * generating random real numbers. */
+ ns = i+1;
+ goto got_list;
+ }
+ }
+ }
+
+ got_list:;
+ /*
+ * Now translate those real numbers into actual bit
+ * counts, and do a last-minute check to make sure
+ * their product is going to be in range.
+ *
+ * We have to check both the min and max sizes of the
+ * total. A b-bit number is in [2^{b-1},2^b). So the
+ * product of numbers of sizes b_1,...,b_k is at least
+ * 2^{\sum (b_i-1)}, and less than 2^{\sum b_i}.
+ */
+ nsizes = 0;
+
+ unsigned min_total = 0, max_total = 0;
+
+ for (size_t i = 0; i < ns; i++) {
+ /* These sizes are measured in actual entropy, so
+ * add 1 bit each time to account for the
+ * zero-information leading 1 */
+ unsigned this_size = max_bits_needed * s[i] + 1;
+ debug_f(" bits[%"SIZEu"] = %u", i, this_size);
+ sgrowarray(sizes, sizesize, nsizes);
+ sizes[nsizes++] = this_size;
+
+ min_total += this_size - 1;
+ max_total += this_size;
+ }
+
+ debug_f(" total bits = [%u,%u)", min_total, max_total);
+ if (min_total < real_min || max_total > real_max+1) {
+ debug_f(" total out of range, try again");
+ } else {
+ debug_f(" success! %"SIZEu" sub-primes totalling [%u,%u) "
+ "bits", nsizes, min_total, max_total);
+ break;
+ }
+ }
+
+ smemclr(s, ssize * sizeof(*s));
+ sfree(s);
+ break;
+ }
+ default:
+ unreachable("bad subprime policy");
+ }
+
+ for (size_t i = 0; i < nsizes; i++) {
+ unsigned sub_bits = sizes[i];
+ double progress_in_this_prime = (double)sub_bits / bits;
+ mp_int *q = provableprime_generate_inner(
+ ppc, pcs_new(sub_bits),
+ prog, progress_origin + progress_scale * progress,
+ progress_scale * progress_in_this_prime);
+ progress += progress_in_this_prime;
+ assert(q);
+ debug_f_mp("ppgi(%u) got factor ", q, bits);
+ pcs_require_residue_1_mod_prime(pcs, q);
+ mp_free(q);
+ }
+
+ smemclr(sizes, sizesize * sizeof(*sizes));
+ sfree(sizes);
+ } else {
+ debug_f("ppgi(%u) no need to recurse", bits);
+ }
+
+ debug_f("ppgi(%u) ready, %u bits remaining",
+ bits, pcs_get_bits_remaining(pcs));
+ pcs_ready(pcs);
+
+ while (true) {
+ mp_int *p = pcs_generate(pcs);
+ if (!p) {
+ pcs_free(pcs);
+ return NULL;
+ }
+
+ debug_f_mp("provable_step p=", p);
+
+ MillerRabin *mr = miller_rabin_new(p);
+ debug_f("provable_step mr setup done");
+ mp_int *witness = miller_rabin_find_potential_primitive_root(mr);
+ miller_rabin_free(mr);
+
+ if (!witness) {
+ debug_f("provable_step mr failed");
+ mp_free(p);
+ continue;
+ }
+
+ size_t nfactors;
+ mp_int **factors = pcs_get_known_prime_factors(pcs, &nfactors);
+ PockleStatus st = pockle_add_prime(
+ ppc->pockle, p, factors, nfactors, witness);
+
+ if (st != POCKLE_OK) {
+ debug_f("provable_step proof failed %d", (int)st);
+
+ /*
+ * Check by assertion that the error status is not one of
+ * the ones we ought to have ruled out already by
+ * construction. If there's a bug in this code that means
+ * we can _never_ pass this test (e.g. picking products of
+ * factors that never quite reach cbrt(n)), we'd rather
+ * fail an assertion than loop forever.
+ */
+ assert(st == POCKLE_DISCRIMINANT_IS_SQUARE ||
+ st == POCKLE_WITNESS_POWER_IS_1 ||
+ st == POCKLE_WITNESS_POWER_NOT_COPRIME);
+
+ mp_free(p);
+ if (witness)
+ mp_free(witness);
+ continue;
+ }
+
+ mp_free(witness);
+ pcs_free(pcs);
+ debug_f_mp("ppgi(%u) done, got ", p, bits);
+ progress_report(prog, progress_origin + progress_scale);
+ return p;
+ }
+}
+
+static mp_int *provableprime_generate(
+ PrimeGenerationContext *ctx,
+ PrimeCandidateSource *pcs, ProgressReceiver *prog)
+{
+ ProvablePrimeContext *ppc = container_of(ctx, ProvablePrimeContext, pgc);
+ mp_int *p = provableprime_generate_inner(ppc, pcs, prog, 0.0, 1.0);
+
+ return p;
+}
+
+static inline strbuf *provableprime_mpu_certificate(
+ PrimeGenerationContext *ctx, mp_int *p)
+{
+ ProvablePrimeContext *ppc = container_of(ctx, ProvablePrimeContext, pgc);
+ return pockle_mpu(ppc->pockle, p);
+}
+
+#define DECLARE_POLICY(name, policy) \
+ static const struct ProvablePrimePolicyExtra \
+ pppextra_##name = {policy}; \
+ const PrimeGenerationPolicy name = { \
+ provableprime_add_progress_phase, \
+ provableprime_new_context, \
+ provableprime_free_context, \
+ provableprime_generate, \
+ provableprime_mpu_certificate, \
+ &pppextra_##name, \
+ }
+
+DECLARE_POLICY(primegen_provable_fast, SPP_FAST);
+DECLARE_POLICY(primegen_provable_maurer_simple, SPP_MAURER_SIMPLE);
+DECLARE_POLICY(primegen_provable_maurer_complex, SPP_MAURER_COMPLEX);
+
+/* ----------------------------------------------------------------------
+ * Reusable null implementation of the progress-reporting API.
+ */
+
+static inline ProgressPhase null_progress_add(void) {
+ ProgressPhase ph = { .n = 0 };
+ return ph;
+}
+ProgressPhase null_progress_add_linear(
+ ProgressReceiver *prog, double c) { return null_progress_add(); }
+ProgressPhase null_progress_add_probabilistic(
+ ProgressReceiver *prog, double c, double p) { return null_progress_add(); }
+void null_progress_ready(ProgressReceiver *prog) {}
+void null_progress_start_phase(ProgressReceiver *prog, ProgressPhase phase) {}
+void null_progress_report(ProgressReceiver *prog, double progress) {}
+void null_progress_report_attempt(ProgressReceiver *prog) {}
+void null_progress_report_phase_complete(ProgressReceiver *prog) {}
+const ProgressReceiverVtable null_progress_vt = {
+ .add_linear = null_progress_add_linear,
+ .add_probabilistic = null_progress_add_probabilistic,
+ .ready = null_progress_ready,
+ .start_phase = null_progress_start_phase,
+ .report = null_progress_report,
+ .report_attempt = null_progress_report_attempt,
+ .report_phase_complete = null_progress_report_phase_complete,
+};
+
+/* ----------------------------------------------------------------------
+ * Helper function for progress estimation.
+ */
+
+double estimate_modexp_cost(unsigned bits)
+{
+ /*
+ * A modexp of n bits goes roughly like O(n^2.58), on the grounds
+ * that our modmul is O(n^1.58) (Karatsuba) and you need O(n) of
+ * them in a modexp.
+ */
+ return pow(bits, 2.58);
+}
diff --git a/keygen/primecandidate.c b/keygen/primecandidate.c
new file mode 100644
index 00000000..fca2b297
--- /dev/null
+++ b/keygen/primecandidate.c
@@ -0,0 +1,447 @@
+/*
+ * primecandidate.c: implementation of the PrimeCandidateSource
+ * abstraction declared in sshkeygen.h.
+ */
+
+#include <assert.h>
+#include "ssh.h"
+#include "mpint.h"
+#include "mpunsafe.h"
+#include "sshkeygen.h"
+
+struct avoid {
+ unsigned mod, res;
+};
+
+struct PrimeCandidateSource {
+ unsigned bits;
+ bool ready, try_sophie_germain;
+ bool one_shot, thrown_away_my_shot;
+
+ /* We'll start by making up a random number strictly less than this ... */
+ mp_int *limit;
+
+ /* ... then we'll multiply by 'factor', and add 'addend'. */
+ mp_int *factor, *addend;
+
+ /* Then we'll try to add a small multiple of 'factor' to it to
+ * avoid it being a multiple of any small prime. Also, for RSA, we
+ * may need to avoid it being _this_ multiple of _this_: */
+ unsigned avoid_residue, avoid_modulus;
+
+ /* Once we're actually running, this will be the complete list of
+ * (modulus, residue) pairs we want to avoid. */
+ struct avoid *avoids;
+ size_t navoids, avoidsize;
+
+ /* List of known primes that our number will be congruent to 1 modulo */
+ mp_int **kps;
+ size_t nkps, kpsize;
+};
+
+PrimeCandidateSource *pcs_new_with_firstbits(unsigned bits,
+ unsigned first, unsigned nfirst)
+{
+ PrimeCandidateSource *s = snew(PrimeCandidateSource);
+
+ assert(first >> (nfirst-1) == 1);
+
+ s->bits = bits;
+ s->ready = false;
+ s->try_sophie_germain = false;
+ s->one_shot = false;
+ s->thrown_away_my_shot = false;
+
+ s->kps = NULL;
+ s->nkps = s->kpsize = 0;
+
+ s->avoids = NULL;
+ s->navoids = s->avoidsize = 0;
+
+ /* Make the number that's the lower limit of our range */
+ mp_int *firstmp = mp_from_integer(first);
+ mp_int *base = mp_lshift_fixed(firstmp, bits - nfirst);
+ mp_free(firstmp);
+
+ /* Set the low bit of that, because all (nontrivial) primes are odd */
+ mp_set_bit(base, 0, 1);
+
+ /* That's our addend. Now initialise factor to 2, to ensure we
+ * only generate odd numbers */
+ s->factor = mp_from_integer(2);
+ s->addend = base;
+
+ /* And that means the limit of our random numbers must be one
+ * factor of two _less_ than the position of the low bit of
+ * 'first', because we'll be multiplying the random number by
+ * 2 immediately afterwards. */
+ s->limit = mp_power_2(bits - nfirst - 1);
+
+ /* avoid_modulus == 0 signals that there's no extra residue to avoid */
+ s->avoid_residue = 1;
+ s->avoid_modulus = 0;
+
+ return s;
+}
+
+PrimeCandidateSource *pcs_new(unsigned bits)
+{
+ return pcs_new_with_firstbits(bits, 1, 1);
+}
+
+void pcs_free(PrimeCandidateSource *s)
+{
+ mp_free(s->limit);
+ mp_free(s->factor);
+ mp_free(s->addend);
+ for (size_t i = 0; i < s->nkps; i++)
+ mp_free(s->kps[i]);
+ sfree(s->avoids);
+ sfree(s->kps);
+ sfree(s);
+}
+
+void pcs_try_sophie_germain(PrimeCandidateSource *s)
+{
+ s->try_sophie_germain = true;
+}
+
+void pcs_set_oneshot(PrimeCandidateSource *s)
+{
+ s->one_shot = true;
+}
+
+static void pcs_require_residue_inner(PrimeCandidateSource *s,
+ mp_int *mod, mp_int *res)
+{
+ /*
+ * We already have a factor and addend. Ensure this one doesn't
+ * contradict it.
+ */
+ mp_int *gcd = mp_gcd(mod, s->factor);
+ mp_int *test1 = mp_mod(s->addend, gcd);
+ mp_int *test2 = mp_mod(res, gcd);
+ assert(mp_cmp_eq(test1, test2));
+ mp_free(test1);
+ mp_free(test2);
+
+ /*
+ * Reduce our input factor and addend, which are constraints on
+ * the ultimate output number, so that they're constraints on the
+ * initial cofactor we're going to make up.
+ *
+ * If we're generating x and we want to ensure ax+b == r (mod m),
+ * how does that work? We've already checked that b == r modulo g
+ * = gcd(a,m), i.e. r-b is a multiple of g, and so are a and m. So
+ * let's write a=gA, m=gM, (r-b)=gR, and then we can start by
+ * dividing that off:
+ *
+ * ax == r-b (mod m )
+ * => gAx == gR (mod gM)
+ * => Ax == R (mod M)
+ *
+ * Now the moduli A,M are coprime, which makes things easier.
+ *
+ * We're going to need to generate the x in this equation by
+ * generating a new smaller value y, multiplying it by M, and
+ * adding some constant K. So we have x = My + K, and we need to
+ * work out what K will satisfy the above equation. In other
+ * words, we need A(My+K) == R (mod M), and the AMy term vanishes,
+ * so we just need AK == R (mod M). So our congruence is solved by
+ * setting K to be R * A^{-1} mod M.
+ */
+ mp_int *A = mp_div(s->factor, gcd);
+ mp_int *M = mp_div(mod, gcd);
+ mp_int *Rpre = mp_modsub(res, s->addend, mod);
+ mp_int *R = mp_div(Rpre, gcd);
+ mp_int *Ainv = mp_invert(A, M);
+ mp_int *K = mp_modmul(R, Ainv, M);
+
+ mp_free(gcd);
+ mp_free(Rpre);
+ mp_free(Ainv);
+ mp_free(A);
+ mp_free(R);
+
+ /*
+ * So we know we have to transform our existing (factor, addend)
+ * pair into (factor * M, addend * factor * K). Now we just need
+ * to work out what the limit should be on the random value we're
+ * generating.
+ *
+ * If we need My+K < old_limit, then y < (old_limit-K)/M. But the
+ * RHS is a fraction, so in integers, we need y < ceil of it.
+ */
+ assert(!mp_cmp_hs(K, s->limit));
+ mp_int *dividend = mp_add(s->limit, M);
+ mp_sub_integer_into(dividend, dividend, 1);
+ mp_sub_into(dividend, dividend, K);
+ mp_free(s->limit);
+ s->limit = mp_div(dividend, M);
+ mp_free(dividend);
+
+ /*
+ * Now just update the real factor and addend, and we're done.
+ */
+
+ mp_int *addend_old = s->addend;
+ mp_int *tmp = mp_mul(s->factor, K); /* use the _old_ value of factor */
+ s->addend = mp_add(s->addend, tmp);
+ mp_free(tmp);
+ mp_free(addend_old);
+
+ mp_int *factor_old = s->factor;
+ s->factor = mp_mul(s->factor, M);
+ mp_free(factor_old);
+
+ mp_free(M);
+ mp_free(K);
+ s->factor = mp_unsafe_shrink(s->factor);
+ s->addend = mp_unsafe_shrink(s->addend);
+ s->limit = mp_unsafe_shrink(s->limit);
+}
+
+void pcs_require_residue(PrimeCandidateSource *s,
+ mp_int *mod, mp_int *res_orig)
+{
+ /*
+ * Reduce the input residue to its least non-negative value, in
+ * case it was given as a larger equivalent value.
+ */
+ mp_int *res_reduced = mp_mod(res_orig, mod);
+ pcs_require_residue_inner(s, mod, res_reduced);
+ mp_free(res_reduced);
+}
+
+void pcs_require_residue_1(PrimeCandidateSource *s, mp_int *mod)
+{
+ mp_int *res = mp_from_integer(1);
+ pcs_require_residue(s, mod, res);
+ mp_free(res);
+}
+
+void pcs_require_residue_1_mod_prime(PrimeCandidateSource *s, mp_int *mod)
+{
+ pcs_require_residue_1(s, mod);
+
+ sgrowarray(s->kps, s->kpsize, s->nkps);
+ s->kps[s->nkps++] = mp_copy(mod);
+}
+
+void pcs_avoid_residue_small(PrimeCandidateSource *s,
+ unsigned mod, unsigned res)
+{
+ assert(!s->avoid_modulus); /* can't cope with more than one */
+ s->avoid_modulus = mod;
+ s->avoid_residue = res % mod; /* reduce, just in case */
+}
+
+static int avoid_cmp(const void *av, const void *bv)
+{
+ const struct avoid *a = (const struct avoid *)av;
+ const struct avoid *b = (const struct avoid *)bv;
+ return a->mod < b->mod ? -1 : a->mod > b->mod ? +1 : 0;
+}
+
+static uint64_t invert(uint64_t a, uint64_t m)
+{
+ int64_t v0 = a, i0 = 1;
+ int64_t v1 = m, i1 = 0;
+ while (v0) {
+ int64_t tmp, q = v1 / v0;
+ tmp = v0; v0 = v1 - q*v0; v1 = tmp;
+ tmp = i0; i0 = i1 - q*i0; i1 = tmp;
+ }
+ assert(v1 == 1 || v1 == -1);
+ return i1 * v1;
+}
+
+void pcs_ready(PrimeCandidateSource *s)
+{
+ /*
+ * List all the small (modulus, residue) pairs we want to avoid.
+ */
+
+ init_smallprimes();
+
+#define ADD_AVOID(newmod, newres) do { \
+ sgrowarray(s->avoids, s->avoidsize, s->navoids); \
+ s->avoids[s->navoids].mod = (newmod); \
+ s->avoids[s->navoids].res = (newres); \
+ s->navoids++; \
+ } while (0)
+
+ unsigned limit = (mp_hs_integer(s->addend, 65536) ? 65536 :
+ mp_get_integer(s->addend));
+
+ /*
+ * Don't be divisible by any small prime, or at least, any prime
+ * smaller than our output number might actually manage to be. (If
+ * asked to generate a really small prime, it would be
+ * embarrassing to rule out legitimate answers on the grounds that
+ * they were divisible by themselves.)
+ */
+ for (size_t i = 0; i < NSMALLPRIMES && smallprimes[i] < limit; i++)
+ ADD_AVOID(smallprimes[i], 0);
+
+ if (s->try_sophie_germain) {
+ /*
+ * If we're aiming to generate a Sophie Germain prime (i.e. p
+ * such that 2p+1 is also prime), then we also want to ensure
+ * 2p+1 is not congruent to 0 mod any small prime, because if
+ * it is, we'll waste a lot of time generating a p for which
+ * 2p+1 can't possibly work. So we have to avoid an extra
+ * residue mod each odd q.
+ *
+ * We can simplify: 2p+1 == 0 (mod q)
+ * => 2p == -1 (mod q)
+ * => p == -2^{-1} (mod q)
+ *
+ * There's no need to do Euclid's algorithm to compute those
+ * inverses, because for any odd q, the modular inverse of -2
+ * mod q is just (q-1)/2. (Proof: multiplying it by -2 gives
+ * 1-q, which is congruent to 1 mod q.)
+ */
+ for (size_t i = 0; i < NSMALLPRIMES && smallprimes[i] < limit; i++)
+ if (smallprimes[i] != 2)
+ ADD_AVOID(smallprimes[i], (smallprimes[i] - 1) / 2);
+ }
+
+ /*
+ * Finally, if there's a particular modulus and residue we've been
+ * told to avoid, put it on the list.
+ */
+ if (s->avoid_modulus)
+ ADD_AVOID(s->avoid_modulus, s->avoid_residue);
+
+#undef ADD_AVOID
+
+ /*
+ * Sort our to-avoid list by modulus. Partly this is so that we'll
+ * check the smaller moduli first during the live runs, which lets
+ * us spot most failing cases earlier rather than later. Also, it
+ * brings equal moduli together, so that we can reuse the residue
+ * we computed from a previous one.
+ */
+ qsort(s->avoids, s->navoids, sizeof(*s->avoids), avoid_cmp);
+
+ /*
+ * Next, adjust each of these moduli to take account of our factor
+ * and addend. If we want factor*x+addend to avoid being congruent
+ * to 'res' modulo 'mod', then x itself must avoid being congruent
+ * to (res - addend) * factor^{-1}.
+ *
+ * If factor == 0 modulo mod, then the answer will have a fixed
+ * residue anyway, so we can discard it from our list to test.
+ */
+ int64_t factor_m = 0, addend_m = 0, last_mod = 0;
+
+ size_t out = 0;
+ for (size_t i = 0; i < s->navoids; i++) {
+ int64_t mod = s->avoids[i].mod, res = s->avoids[i].res;
+ if (mod != last_mod) {
+ last_mod = mod;
+ addend_m = mp_mod_known_integer(s->addend, mod);
+ factor_m = mp_mod_known_integer(s->factor, mod);
+ }
+
+ if (factor_m == 0) {
+ assert(res != addend_m);
+ continue;
+ }
+
+ res = (res - addend_m) * invert(factor_m, mod);
+ res %= mod;
+ if (res < 0)
+ res += mod;
+
+ s->avoids[out].mod = mod;
+ s->avoids[out].res = res;
+ out++;
+ }
+
+ s->navoids = out;
+
+ s->ready = true;
+}
+
+mp_int *pcs_generate(PrimeCandidateSource *s)
+{
+ assert(s->ready);
+ if (s->one_shot) {
+ if (s->thrown_away_my_shot)
+ return NULL;
+ s->thrown_away_my_shot = true;
+ }
+
+ while (true) {
+ mp_int *x = mp_random_upto(s->limit);
+
+ int64_t x_res = 0, last_mod = 0;
+ bool ok = true;
+
+ for (size_t i = 0; i < s->navoids; i++) {
+ int64_t mod = s->avoids[i].mod, avoid_res = s->avoids[i].res;
+
+ if (mod != last_mod) {
+ last_mod = mod;
+ x_res = mp_mod_known_integer(x, mod);
+ }
+
+ if (x_res == avoid_res) {
+ ok = false;
+ break;
+ }
+ }
+
+ if (!ok) {
+ mp_free(x);
+ if (s->one_shot)
+ return NULL;
+ continue; /* try a new x */
+ }
+
+ /*
+ * We've found a viable x. Make the final output value.
+ */
+ mp_int *toret = mp_new(s->bits);
+ mp_mul_into(toret, x, s->factor);
+ mp_add_into(toret, toret, s->addend);
+ mp_free(x);
+ return toret;
+ }
+}
+
+void pcs_inspect(PrimeCandidateSource *pcs, mp_int **limit_out,
+ mp_int **factor_out, mp_int **addend_out)
+{
+ *limit_out = mp_copy(pcs->limit);
+ *factor_out = mp_copy(pcs->factor);
+ *addend_out = mp_copy(pcs->addend);
+}
+
+unsigned pcs_get_bits(PrimeCandidateSource *pcs)
+{
+ return pcs->bits;
+}
+
+unsigned pcs_get_bits_remaining(PrimeCandidateSource *pcs)
+{
+ return mp_get_nbits(pcs->limit);
+}
+
+mp_int *pcs_get_upper_bound(PrimeCandidateSource *pcs)
+{
+ /* Compute (limit-1) * factor + addend */
+ mp_int *tmp = mp_mul(pcs->limit, pcs->factor);
+ mp_int *bound = mp_add(tmp, pcs->addend);
+ mp_free(tmp);
+ mp_sub_into(bound, bound, pcs->factor);
+ return bound;
+}
+
+mp_int **pcs_get_known_prime_factors(PrimeCandidateSource *pcs, size_t *nout)
+{
+ *nout = pcs->nkps;
+ return pcs->kps;
+}
diff --git a/keygen/rsa.c b/keygen/rsa.c
new file mode 100644
index 00000000..b9676e7a
--- /dev/null
+++ b/keygen/rsa.c
@@ -0,0 +1,292 @@
+/*
+ * RSA key generation.
+ */
+
+#include <assert.h>
+
+#include "ssh.h"
+#include "sshkeygen.h"
+#include "mpint.h"
+
+#define RSA_EXPONENT 65537
+
+#define NFIRSTBITS 13
+static void invent_firstbits(unsigned *one, unsigned *two,
+ unsigned min_separation);
+
+typedef struct RSAPrimeDetails RSAPrimeDetails;
+struct RSAPrimeDetails {
+ bool strong;
+ int bits, bitsm1m1, bitsm1, bitsp1;
+ unsigned firstbits;
+ ProgressPhase phase_main, phase_m1m1, phase_m1, phase_p1;
+};
+
+#define STRONG_MARGIN (20 + NFIRSTBITS)
+
+static RSAPrimeDetails setup_rsa_prime(
+ int bits, bool strong, PrimeGenerationContext *pgc, ProgressReceiver *prog)
+{
+ RSAPrimeDetails pd;
+ pd.bits = bits;
+ if (strong) {
+ pd.bitsm1 = (bits - STRONG_MARGIN) / 2;
+ pd.bitsp1 = (bits - STRONG_MARGIN) - pd.bitsm1;
+ pd.bitsm1m1 = (pd.bitsm1 - STRONG_MARGIN) / 2;
+ if (pd.bitsm1m1 < STRONG_MARGIN) {
+ /* Absurdly small prime, but we should at least not crash. */
+ strong = false;
+ }
+ }
+ pd.strong = strong;
+
+ if (pd.strong) {
+ pd.phase_m1m1 = primegen_add_progress_phase(pgc, prog, pd.bitsm1m1);
+ pd.phase_m1 = primegen_add_progress_phase(pgc, prog, pd.bitsm1);
+ pd.phase_p1 = primegen_add_progress_phase(pgc, prog, pd.bitsp1);
+ }
+ pd.phase_main = primegen_add_progress_phase(pgc, prog, pd.bits);
+
+ return pd;
+}
+
+static mp_int *generate_rsa_prime(
+ RSAPrimeDetails pd, PrimeGenerationContext *pgc, ProgressReceiver *prog)
+{
+ mp_int *m1m1 = NULL, *m1 = NULL, *p1 = NULL, *p = NULL;
+ PrimeCandidateSource *pcs;
+
+ if (pd.strong) {
+ progress_start_phase(prog, pd.phase_m1m1);
+ pcs = pcs_new_with_firstbits(pd.bitsm1m1, pd.firstbits, NFIRSTBITS);
+ m1m1 = primegen_generate(pgc, pcs, prog);
+ progress_report_phase_complete(prog);
+
+ progress_start_phase(prog, pd.phase_m1);
+ pcs = pcs_new_with_firstbits(pd.bitsm1, pd.firstbits, NFIRSTBITS);
+ pcs_require_residue_1_mod_prime(pcs, m1m1);
+ m1 = primegen_generate(pgc, pcs, prog);
+ progress_report_phase_complete(prog);
+
+ progress_start_phase(prog, pd.phase_p1);
+ pcs = pcs_new_with_firstbits(pd.bitsp1, pd.firstbits, NFIRSTBITS);
+ p1 = primegen_generate(pgc, pcs, prog);
+ progress_report_phase_complete(prog);
+ }
+
+ progress_start_phase(prog, pd.phase_main);
+ pcs = pcs_new_with_firstbits(pd.bits, pd.firstbits, NFIRSTBITS);
+ pcs_avoid_residue_small(pcs, RSA_EXPONENT, 1);
+ if (pd.strong) {
+ pcs_require_residue_1_mod_prime(pcs, m1);
+ mp_int *p1_minus_1 = mp_copy(p1);
+ mp_sub_integer_into(p1_minus_1, p1, 1);
+ pcs_require_residue(pcs, p1, p1_minus_1);
+ mp_free(p1_minus_1);
+ }
+ p = primegen_generate(pgc, pcs, prog);
+ progress_report_phase_complete(prog);
+
+ if (m1m1)
+ mp_free(m1m1);
+ if (m1)
+ mp_free(m1);
+ if (p1)
+ mp_free(p1);
+
+ return p;
+}
+
+int rsa_generate(RSAKey *key, int bits, bool strong,
+ PrimeGenerationContext *pgc, ProgressReceiver *prog)
+{
+ key->sshk.vt = &ssh_rsa;
+
+ /*
+ * We don't generate e; we just use a standard one always.
+ */
+ mp_int *exponent = mp_from_integer(RSA_EXPONENT);
+
+ /*
+ * Generate p and q: primes with combined length `bits', not
+ * congruent to 1 modulo e. (Strictly speaking, we wanted (p-1)
+ * and e to be coprime, and (q-1) and e to be coprime, but in
+ * general that's slightly more fiddly to arrange. By choosing
+ * a prime e, we can simplify the criterion.)
+ *
+ * We give a min_separation of 2 to invent_firstbits(), ensuring
+ * that the two primes won't be very close to each other. (The
+ * chance of them being _dangerously_ close is negligible - even
+ * more so than an attacker guessing a whole 256-bit session key -
+ * but it doesn't cost much to make sure.)
+ */
+ int qbits = bits / 2;
+ int pbits = bits - qbits;
+ assert(pbits >= qbits);
+
+ RSAPrimeDetails pd = setup_rsa_prime(pbits, strong, pgc, prog);
+ RSAPrimeDetails qd = setup_rsa_prime(qbits, strong, pgc, prog);
+ progress_ready(prog);
+
+ invent_firstbits(&pd.firstbits, &qd.firstbits, 2);
+
+ mp_int *p = generate_rsa_prime(pd, pgc, prog);
+ mp_int *q = generate_rsa_prime(qd, pgc, prog);
+
+ /*
+ * Ensure p > q, by swapping them if not.
+ *
+ * We only need to do this if the two primes were generated with
+ * the same number of bits (i.e. if the requested key size is
+ * even) - otherwise it's already guaranteed!
+ */
+ if (pbits == qbits) {
+ mp_cond_swap(p, q, mp_cmp_hs(q, p));
+ } else {
+ assert(mp_cmp_hs(p, q));
+ }
+
+ /*
+ * Now we have p, q and e. All we need to do now is work out
+ * the other helpful quantities: n=pq, d=e^-1 mod (p-1)(q-1),
+ * and (q^-1 mod p).
+ */
+ mp_int *modulus = mp_mul(p, q);
+ mp_int *pm1 = mp_copy(p);
+ mp_sub_integer_into(pm1, pm1, 1);
+ mp_int *qm1 = mp_copy(q);
+ mp_sub_integer_into(qm1, qm1, 1);
+ mp_int *phi_n = mp_mul(pm1, qm1);
+ mp_free(pm1);
+ mp_free(qm1);
+ mp_int *private_exponent = mp_invert(exponent, phi_n);
+ mp_free(phi_n);
+ mp_int *iqmp = mp_invert(q, p);
+
+ /*
+ * Populate the returned structure.
+ */
+ key->modulus = modulus;
+ key->exponent = exponent;
+ key->private_exponent = private_exponent;
+ key->p = p;
+ key->q = q;
+ key->iqmp = iqmp;
+
+ key->bits = mp_get_nbits(modulus);
+ key->bytes = (key->bits + 7) / 8;
+
+ return 1;
+}
+
+/*
+ * Invent a pair of values suitable for use as the 'firstbits' values
+ * for the two RSA primes, such that their product is at least 2, and
+ * such that their difference is also at least min_separation.
+ *
+ * This is used for generating RSA keys which have exactly the
+ * specified number of bits rather than one fewer - if you generate an
+ * a-bit and a b-bit number completely at random and multiply them
+ * together, you could end up with either an (ab-1)-bit number or an
+ * (ab)-bit number. The former happens log(2)*2-1 of the time (about
+ * 39%) and, though actually harmless, every time it occurs it has a
+ * non-zero probability of sparking a user email along the lines of
+ * 'Hey, I asked PuTTYgen for a 2048-bit key and I only got 2047 bits!
+ * Bug!'
+ */
+static inline unsigned firstbits_b_min(
+ unsigned a, unsigned lo, unsigned hi, unsigned min_separation)
+{
+ /* To get a large enough product, b must be at least this much */
+ unsigned b_min = (2*lo*lo + a - 1) / a;
+ /* Now enforce a<b, optionally with minimum separation */
+ if (b_min < a + min_separation)
+ b_min = a + min_separation;
+ /* And cap at the upper limit */
+ if (b_min > hi)
+ b_min = hi;
+ return b_min;
+}
+
+static void invent_firstbits(unsigned *one, unsigned *two,
+ unsigned min_separation)
+{
+ /*
+ * We'll pick 12 initial bits (number selected at random) for each
+ * prime, not counting the leading 1. So we want to return two
+ * values in the range [2^12,2^13) whose product is at least 2^25.
+ *
+ * Strategy: count up all the viable pairs, then select a random
+ * number in that range and use it to pick a pair.
+ *
+ * To keep things simple, we'll ensure a < b, and randomly swap
+ * them at the end.
+ */
+ const unsigned lo = 1<<12, hi = 1<<13, minproduct = 2*lo*lo;
+ unsigned a, b;
+
+ /*
+ * Count up the number of prefixes of b that would be valid for
+ * each prefix of a.
+ */
+ mp_int *total = mp_new(32);
+ for (a = lo; a < hi; a++) {
+ unsigned b_min = firstbits_b_min(a, lo, hi, min_separation);
+ mp_add_integer_into(total, total, hi - b_min);
+ }
+
+ /*
+ * Make up a random number in the range [0,2*total).
+ */
+ mp_int *mlo = mp_from_integer(0), *mhi = mp_new(32);
+ mp_lshift_fixed_into(mhi, total, 1);
+ mp_int *randval = mp_random_in_range(mlo, mhi);
+ mp_free(mlo);
+ mp_free(mhi);
+
+ /*
+ * Use the low bit of randval as our swap indicator, leaving the
+ * rest of it in the range [0,total).
+ */
+ unsigned swap = mp_get_bit(randval, 0);
+ mp_rshift_fixed_into(randval, randval, 1);
+
+ /*
+ * Now do the same counting loop again to make the actual choice.
+ */
+ a = b = 0;
+ for (unsigned a_candidate = lo; a_candidate < hi; a_candidate++) {
+ unsigned b_min = firstbits_b_min(a_candidate, lo, hi, min_separation);
+ unsigned limit = hi - b_min;
+
+ unsigned b_candidate = b_min + mp_get_integer(randval);
+ unsigned use_it = 1 ^ mp_hs_integer(randval, limit);
+ a ^= (a ^ a_candidate) & -use_it;
+ b ^= (b ^ b_candidate) & -use_it;
+
+ mp_sub_integer_into(randval, randval, limit);
+ }
+
+ mp_free(randval);
+ mp_free(total);
+
+ /*
+ * Check everything came out right.
+ */
+ assert(lo <= a);
+ assert(a < hi);
+ assert(lo <= b);
+ assert(b < hi);
+ assert(a * b >= minproduct);
+ assert(b >= a + min_separation);
+
+ /*
+ * Last-minute optional swap of a and b.
+ */
+ unsigned diff = (a ^ b) & (-swap);
+ a ^= diff;
+ b ^= diff;
+
+ *one = a;
+ *two = b;
+}
diff --git a/keygen/smallprimes.c b/keygen/smallprimes.c
new file mode 100644
index 00000000..a43b0bde
--- /dev/null
+++ b/keygen/smallprimes.c
@@ -0,0 +1,44 @@
+/*
+ * smallprimes.c: implementation of the array of small primes defined
+ * in sshkeygen.h.
+ */
+
+#include <assert.h>
+#include "ssh.h"
+#include "sshkeygen.h"
+
+/* The real array that stores the primes. It has to be writable in
+ * this module, but outside this module, we only expose the
+ * const-qualified pointer 'smallprimes' so that nobody else can
+ * accidentally overwrite it. */
+static unsigned short smallprimes_array[NSMALLPRIMES];
+
+const unsigned short *const smallprimes = smallprimes_array;
+
+void init_smallprimes(void)
+{
+ if (smallprimes_array[0])
+ return; /* already done */
+
+ bool A[65536];
+
+ for (size_t i = 2; i < lenof(A); i++)
+ A[i] = true;
+
+ for (size_t i = 2; i < lenof(A); i++) {
+ if (!A[i])
+ continue;
+ for (size_t j = 2*i; j < lenof(A); j += i)
+ A[j] = false;
+ }
+
+ size_t pos = 0;
+ for (size_t i = 2; i < lenof(A); i++) {
+ if (A[i]) {
+ assert(pos < NSMALLPRIMES);
+ smallprimes_array[pos++] = i;
+ }
+ }
+
+ assert(pos == NSMALLPRIMES);
+}