Wire Sysio Wire Sysion 1.0.0
Loading...
Searching...
No Matches
modinv32_impl.h
Go to the documentation of this file.
1/***********************************************************************
2 * Copyright (c) 2020 Peter Dettman *
3 * Distributed under the MIT software license, see the accompanying *
4 * file COPYING or https://www.opensource.org/licenses/mit-license.php.*
5 **********************************************************************/
6
7#ifndef SECP256K1_MODINV32_IMPL_H
8#define SECP256K1_MODINV32_IMPL_H
9
10#include "modinv32.h"
11
12#include "util.h"
13
14#include <stdlib.h>
15
16/* This file implements modular inversion based on the paper "Fast constant-time gcd computation and
17 * modular inversion" by Daniel J. Bernstein and Bo-Yin Yang.
18 *
19 * For an explanation of the algorithm, see doc/safegcd_implementation.md. This file contains an
20 * implementation for N=30, using 30-bit signed limbs represented as int32_t.
21 */
22
23#ifdef VERIFY
24static const secp256k1_modinv32_signed30 SECP256K1_SIGNED30_ONE = {{1}};
25
26/* Compute a*factor and put it in r. All but the top limb in r will be in range [0,2^30). */
27static void secp256k1_modinv32_mul_30(secp256k1_modinv32_signed30 *r, const secp256k1_modinv32_signed30 *a, int alen, int32_t factor) {
28 const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
29 int64_t c = 0;
30 int i;
31 for (i = 0; i < 8; ++i) {
32 if (i < alen) c += (int64_t)a->v[i] * factor;
33 r->v[i] = (int32_t)c & M30; c >>= 30;
34 }
35 if (8 < alen) c += (int64_t)a->v[8] * factor;
36 VERIFY_CHECK(c == (int32_t)c);
37 r->v[8] = (int32_t)c;
38}
39
40/* Return -1 for a<b*factor, 0 for a==b*factor, 1 for a>b*factor. A consists of alen limbs; b has 9. */
41static int secp256k1_modinv32_mul_cmp_30(const secp256k1_modinv32_signed30 *a, int alen, const secp256k1_modinv32_signed30 *b, int32_t factor) {
42 int i;
44 secp256k1_modinv32_mul_30(&am, a, alen, 1); /* Normalize all but the top limb of a. */
45 secp256k1_modinv32_mul_30(&bm, b, 9, factor);
46 for (i = 0; i < 8; ++i) {
47 /* Verify that all but the top limb of a and b are normalized. */
48 VERIFY_CHECK(am.v[i] >> 30 == 0);
49 VERIFY_CHECK(bm.v[i] >> 30 == 0);
50 }
51 for (i = 8; i >= 0; --i) {
52 if (am.v[i] < bm.v[i]) return -1;
53 if (am.v[i] > bm.v[i]) return 1;
54 }
55 return 0;
56}
57#endif
58
59/* Take as input a signed30 number in range (-2*modulus,modulus), and add a multiple of the modulus
60 * to it to bring it to range [0,modulus). If sign < 0, the input will also be negated in the
61 * process. The input must have limbs in range (-2^30,2^30). The output will have limbs in range
62 * [0,2^30). */
63static void secp256k1_modinv32_normalize_30(secp256k1_modinv32_signed30 *r, int32_t sign, const secp256k1_modinv32_modinfo *modinfo) {
64 const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
65 int32_t r0 = r->v[0], r1 = r->v[1], r2 = r->v[2], r3 = r->v[3], r4 = r->v[4],
66 r5 = r->v[5], r6 = r->v[6], r7 = r->v[7], r8 = r->v[8];
67 int32_t cond_add, cond_negate;
68
69#ifdef VERIFY
70 /* Verify that all limbs are in range (-2^30,2^30). */
71 int i;
72 for (i = 0; i < 9; ++i) {
73 VERIFY_CHECK(r->v[i] >= -M30);
74 VERIFY_CHECK(r->v[i] <= M30);
75 }
76 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, -2) > 0); /* r > -2*modulus */
77 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 1) < 0); /* r < modulus */
78#endif
79
80 /* In a first step, add the modulus if the input is negative, and then negate if requested.
81 * This brings r from range (-2*modulus,modulus) to range (-modulus,modulus). As all input
82 * limbs are in range (-2^30,2^30), this cannot overflow an int32_t. Note that the right
83 * shifts below are signed sign-extending shifts (see assumptions.h for tests that that is
84 * indeed the behavior of the right shift operator). */
85 cond_add = r8 >> 31;
86 r0 += modinfo->modulus.v[0] & cond_add;
87 r1 += modinfo->modulus.v[1] & cond_add;
88 r2 += modinfo->modulus.v[2] & cond_add;
89 r3 += modinfo->modulus.v[3] & cond_add;
90 r4 += modinfo->modulus.v[4] & cond_add;
91 r5 += modinfo->modulus.v[5] & cond_add;
92 r6 += modinfo->modulus.v[6] & cond_add;
93 r7 += modinfo->modulus.v[7] & cond_add;
94 r8 += modinfo->modulus.v[8] & cond_add;
95 cond_negate = sign >> 31;
96 r0 = (r0 ^ cond_negate) - cond_negate;
97 r1 = (r1 ^ cond_negate) - cond_negate;
98 r2 = (r2 ^ cond_negate) - cond_negate;
99 r3 = (r3 ^ cond_negate) - cond_negate;
100 r4 = (r4 ^ cond_negate) - cond_negate;
101 r5 = (r5 ^ cond_negate) - cond_negate;
102 r6 = (r6 ^ cond_negate) - cond_negate;
103 r7 = (r7 ^ cond_negate) - cond_negate;
104 r8 = (r8 ^ cond_negate) - cond_negate;
105 /* Propagate the top bits, to bring limbs back to range (-2^30,2^30). */
106 r1 += r0 >> 30; r0 &= M30;
107 r2 += r1 >> 30; r1 &= M30;
108 r3 += r2 >> 30; r2 &= M30;
109 r4 += r3 >> 30; r3 &= M30;
110 r5 += r4 >> 30; r4 &= M30;
111 r6 += r5 >> 30; r5 &= M30;
112 r7 += r6 >> 30; r6 &= M30;
113 r8 += r7 >> 30; r7 &= M30;
114
115 /* In a second step add the modulus again if the result is still negative, bringing r to range
116 * [0,modulus). */
117 cond_add = r8 >> 31;
118 r0 += modinfo->modulus.v[0] & cond_add;
119 r1 += modinfo->modulus.v[1] & cond_add;
120 r2 += modinfo->modulus.v[2] & cond_add;
121 r3 += modinfo->modulus.v[3] & cond_add;
122 r4 += modinfo->modulus.v[4] & cond_add;
123 r5 += modinfo->modulus.v[5] & cond_add;
124 r6 += modinfo->modulus.v[6] & cond_add;
125 r7 += modinfo->modulus.v[7] & cond_add;
126 r8 += modinfo->modulus.v[8] & cond_add;
127 /* And propagate again. */
128 r1 += r0 >> 30; r0 &= M30;
129 r2 += r1 >> 30; r1 &= M30;
130 r3 += r2 >> 30; r2 &= M30;
131 r4 += r3 >> 30; r3 &= M30;
132 r5 += r4 >> 30; r4 &= M30;
133 r6 += r5 >> 30; r5 &= M30;
134 r7 += r6 >> 30; r6 &= M30;
135 r8 += r7 >> 30; r7 &= M30;
136
137 r->v[0] = r0;
138 r->v[1] = r1;
139 r->v[2] = r2;
140 r->v[3] = r3;
141 r->v[4] = r4;
142 r->v[5] = r5;
143 r->v[6] = r6;
144 r->v[7] = r7;
145 r->v[8] = r8;
146
147#ifdef VERIFY
148 VERIFY_CHECK(r0 >> 30 == 0);
149 VERIFY_CHECK(r1 >> 30 == 0);
150 VERIFY_CHECK(r2 >> 30 == 0);
151 VERIFY_CHECK(r3 >> 30 == 0);
152 VERIFY_CHECK(r4 >> 30 == 0);
153 VERIFY_CHECK(r5 >> 30 == 0);
154 VERIFY_CHECK(r6 >> 30 == 0);
155 VERIFY_CHECK(r7 >> 30 == 0);
156 VERIFY_CHECK(r8 >> 30 == 0);
157 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 0) >= 0); /* r >= 0 */
158 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(r, 9, &modinfo->modulus, 1) < 0); /* r < modulus */
159#endif
160}
161
162/* Data type for transition matrices (see section 3 of explanation).
163 *
164 * t = [ u v ]
165 * [ q r ]
166 */
167typedef struct {
168 int32_t u, v, q, r;
170
171/* Compute the transition matrix and zeta for 30 divsteps.
172 *
173 * Input: zeta: initial zeta
174 * f0: bottom limb of initial f
175 * g0: bottom limb of initial g
176 * Output: t: transition matrix
177 * Return: final zeta
178 *
179 * Implements the divsteps_n_matrix function from the explanation.
180 */
181static int32_t secp256k1_modinv32_divsteps_30(int32_t zeta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t) {
182 /* u,v,q,r are the elements of the transformation matrix being built up,
183 * starting with the identity matrix. Semantically they are signed integers
184 * in range [-2^30,2^30], but here represented as unsigned mod 2^32. This
185 * permits left shifting (which is UB for negative numbers). The range
186 * being inside [-2^31,2^31) means that casting to signed works correctly.
187 */
188 uint32_t u = 1, v = 0, q = 0, r = 1;
189 uint32_t c1, c2, f = f0, g = g0, x, y, z;
190 int i;
191
192 for (i = 0; i < 30; ++i) {
193 VERIFY_CHECK((f & 1) == 1); /* f must always be odd */
194 VERIFY_CHECK((u * f0 + v * g0) == f << i);
195 VERIFY_CHECK((q * f0 + r * g0) == g << i);
196 /* Compute conditional masks for (zeta < 0) and for (g & 1). */
197 c1 = zeta >> 31;
198 c2 = -(g & 1);
199 /* Compute x,y,z, conditionally negated versions of f,u,v. */
200 x = (f ^ c1) - c1;
201 y = (u ^ c1) - c1;
202 z = (v ^ c1) - c1;
203 /* Conditionally add x,y,z to g,q,r. */
204 g += x & c2;
205 q += y & c2;
206 r += z & c2;
207 /* In what follows, c1 is a condition mask for (zeta < 0) and (g & 1). */
208 c1 &= c2;
209 /* Conditionally change zeta into -zeta-2 or zeta-1. */
210 zeta = (zeta ^ c1) - 1;
211 /* Conditionally add g,q,r to f,u,v. */
212 f += g & c1;
213 u += q & c1;
214 v += r & c1;
215 /* Shifts */
216 g >>= 1;
217 u <<= 1;
218 v <<= 1;
219 /* Bounds on zeta that follow from the bounds on iteration count (max 20*30 divsteps). */
220 VERIFY_CHECK(zeta >= -601 && zeta <= 601);
221 }
222 /* Return data in t and return value. */
223 t->u = (int32_t)u;
224 t->v = (int32_t)v;
225 t->q = (int32_t)q;
226 t->r = (int32_t)r;
227 /* The determinant of t must be a power of two. This guarantees that multiplication with t
228 * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
229 * will be divided out again). As each divstep's individual matrix has determinant 2, the
230 * aggregate of 30 of them will have determinant 2^30. */
231 VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30);
232 return zeta;
233}
234
235/* Compute the transition matrix and eta for 30 divsteps (variable time).
236 *
237 * Input: eta: initial eta
238 * f0: bottom limb of initial f
239 * g0: bottom limb of initial g
240 * Output: t: transition matrix
241 * Return: final eta
242 *
243 * Implements the divsteps_n_matrix_var function from the explanation.
244 */
245static int32_t secp256k1_modinv32_divsteps_30_var(int32_t eta, uint32_t f0, uint32_t g0, secp256k1_modinv32_trans2x2 *t) {
246 /* inv256[i] = -(2*i+1)^-1 (mod 256) */
247 static const uint8_t inv256[128] = {
248 0xFF, 0x55, 0x33, 0x49, 0xC7, 0x5D, 0x3B, 0x11, 0x0F, 0xE5, 0xC3, 0x59,
249 0xD7, 0xED, 0xCB, 0x21, 0x1F, 0x75, 0x53, 0x69, 0xE7, 0x7D, 0x5B, 0x31,
250 0x2F, 0x05, 0xE3, 0x79, 0xF7, 0x0D, 0xEB, 0x41, 0x3F, 0x95, 0x73, 0x89,
251 0x07, 0x9D, 0x7B, 0x51, 0x4F, 0x25, 0x03, 0x99, 0x17, 0x2D, 0x0B, 0x61,
252 0x5F, 0xB5, 0x93, 0xA9, 0x27, 0xBD, 0x9B, 0x71, 0x6F, 0x45, 0x23, 0xB9,
253 0x37, 0x4D, 0x2B, 0x81, 0x7F, 0xD5, 0xB3, 0xC9, 0x47, 0xDD, 0xBB, 0x91,
254 0x8F, 0x65, 0x43, 0xD9, 0x57, 0x6D, 0x4B, 0xA1, 0x9F, 0xF5, 0xD3, 0xE9,
255 0x67, 0xFD, 0xDB, 0xB1, 0xAF, 0x85, 0x63, 0xF9, 0x77, 0x8D, 0x6B, 0xC1,
256 0xBF, 0x15, 0xF3, 0x09, 0x87, 0x1D, 0xFB, 0xD1, 0xCF, 0xA5, 0x83, 0x19,
257 0x97, 0xAD, 0x8B, 0xE1, 0xDF, 0x35, 0x13, 0x29, 0xA7, 0x3D, 0x1B, 0xF1,
258 0xEF, 0xC5, 0xA3, 0x39, 0xB7, 0xCD, 0xAB, 0x01
259 };
260
261 /* Transformation matrix; see comments in secp256k1_modinv32_divsteps_30. */
262 uint32_t u = 1, v = 0, q = 0, r = 1;
263 uint32_t f = f0, g = g0, m;
264 uint16_t w;
265 int i = 30, limit, zeros;
266
267 for (;;) {
268 /* Use a sentinel bit to count zeros only up to i. */
269 zeros = secp256k1_ctz32_var(g | (UINT32_MAX << i));
270 /* Perform zeros divsteps at once; they all just divide g by two. */
271 g >>= zeros;
272 u <<= zeros;
273 v <<= zeros;
274 eta -= zeros;
275 i -= zeros;
276 /* We're done once we've done 30 divsteps. */
277 if (i == 0) break;
278 VERIFY_CHECK((f & 1) == 1);
279 VERIFY_CHECK((g & 1) == 1);
280 VERIFY_CHECK((u * f0 + v * g0) == f << (30 - i));
281 VERIFY_CHECK((q * f0 + r * g0) == g << (30 - i));
282 /* Bounds on eta that follow from the bounds on iteration count (max 25*30 divsteps). */
283 VERIFY_CHECK(eta >= -751 && eta <= 751);
284 /* If eta is negative, negate it and replace f,g with g,-f. */
285 if (eta < 0) {
286 uint32_t tmp;
287 eta = -eta;
288 tmp = f; f = g; g = -tmp;
289 tmp = u; u = q; q = -tmp;
290 tmp = v; v = r; r = -tmp;
291 }
292 /* eta is now >= 0. In what follows we're going to cancel out the bottom bits of g. No more
293 * than i can be cancelled out (as we'd be done before that point), and no more than eta+1
294 * can be done as its sign will flip once that happens. */
295 limit = ((int)eta + 1) > i ? i : ((int)eta + 1);
296 /* m is a mask for the bottom min(limit, 8) bits (our table only supports 8 bits). */
297 VERIFY_CHECK(limit > 0 && limit <= 30);
298 m = (UINT32_MAX >> (32 - limit)) & 255U;
299 /* Find what multiple of f must be added to g to cancel its bottom min(limit, 8) bits. */
300 w = (g * inv256[(f >> 1) & 127]) & m;
301 /* Do so. */
302 g += f * w;
303 q += u * w;
304 r += v * w;
305 VERIFY_CHECK((g & m) == 0);
306 }
307 /* Return data in t and return value. */
308 t->u = (int32_t)u;
309 t->v = (int32_t)v;
310 t->q = (int32_t)q;
311 t->r = (int32_t)r;
312 /* The determinant of t must be a power of two. This guarantees that multiplication with t
313 * does not change the gcd of f and g, apart from adding a power-of-2 factor to it (which
314 * will be divided out again). As each divstep's individual matrix has determinant 2, the
315 * aggregate of 30 of them will have determinant 2^30. */
316 VERIFY_CHECK((int64_t)t->u * t->r - (int64_t)t->v * t->q == ((int64_t)1) << 30);
317 return eta;
318}
319
320/* Compute (t/2^30) * [d, e] mod modulus, where t is a transition matrix for 30 divsteps.
321 *
322 * On input and output, d and e are in range (-2*modulus,modulus). All output limbs will be in range
323 * (-2^30,2^30).
324 *
325 * This implements the update_de function from the explanation.
326 */
327static void secp256k1_modinv32_update_de_30(secp256k1_modinv32_signed30 *d, secp256k1_modinv32_signed30 *e, const secp256k1_modinv32_trans2x2 *t, const secp256k1_modinv32_modinfo* modinfo) {
328 const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
329 const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
330 int32_t di, ei, md, me, sd, se;
331 int64_t cd, ce;
332 int i;
333#ifdef VERIFY
334 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
335 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, 1) < 0); /* d < modulus */
336 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, -2) > 0); /* e > -2*modulus */
337 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, 1) < 0); /* e < modulus */
338 VERIFY_CHECK((labs(u) + labs(v)) >= 0); /* |u|+|v| doesn't overflow */
339 VERIFY_CHECK((labs(q) + labs(r)) >= 0); /* |q|+|r| doesn't overflow */
340 VERIFY_CHECK((labs(u) + labs(v)) <= M30 + 1); /* |u|+|v| <= 2^30 */
341 VERIFY_CHECK((labs(q) + labs(r)) <= M30 + 1); /* |q|+|r| <= 2^30 */
342#endif
343 /* [md,me] start as zero; plus [u,q] if d is negative; plus [v,r] if e is negative. */
344 sd = d->v[8] >> 31;
345 se = e->v[8] >> 31;
346 md = (u & sd) + (v & se);
347 me = (q & sd) + (r & se);
348 /* Begin computing t*[d,e]. */
349 di = d->v[0];
350 ei = e->v[0];
351 cd = (int64_t)u * di + (int64_t)v * ei;
352 ce = (int64_t)q * di + (int64_t)r * ei;
353 /* Correct md,me so that t*[d,e]+modulus*[md,me] has 30 zero bottom bits. */
354 md -= (modinfo->modulus_inv30 * (uint32_t)cd + md) & M30;
355 me -= (modinfo->modulus_inv30 * (uint32_t)ce + me) & M30;
356 /* Update the beginning of computation for t*[d,e]+modulus*[md,me] now md,me are known. */
357 cd += (int64_t)modinfo->modulus.v[0] * md;
358 ce += (int64_t)modinfo->modulus.v[0] * me;
359 /* Verify that the low 30 bits of the computation are indeed zero, and then throw them away. */
360 VERIFY_CHECK(((int32_t)cd & M30) == 0); cd >>= 30;
361 VERIFY_CHECK(((int32_t)ce & M30) == 0); ce >>= 30;
362 /* Now iteratively compute limb i=1..8 of t*[d,e]+modulus*[md,me], and store them in output
363 * limb i-1 (shifting down by 30 bits). */
364 for (i = 1; i < 9; ++i) {
365 di = d->v[i];
366 ei = e->v[i];
367 cd += (int64_t)u * di + (int64_t)v * ei;
368 ce += (int64_t)q * di + (int64_t)r * ei;
369 cd += (int64_t)modinfo->modulus.v[i] * md;
370 ce += (int64_t)modinfo->modulus.v[i] * me;
371 d->v[i - 1] = (int32_t)cd & M30; cd >>= 30;
372 e->v[i - 1] = (int32_t)ce & M30; ce >>= 30;
373 }
374 /* What remains is limb 9 of t*[d,e]+modulus*[md,me]; store it as output limb 8. */
375 d->v[8] = (int32_t)cd;
376 e->v[8] = (int32_t)ce;
377#ifdef VERIFY
378 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, -2) > 0); /* d > -2*modulus */
379 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(d, 9, &modinfo->modulus, 1) < 0); /* d < modulus */
380 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, -2) > 0); /* e > -2*modulus */
381 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(e, 9, &modinfo->modulus, 1) < 0); /* e < modulus */
382#endif
383}
384
385/* Compute (t/2^30) * [f, g], where t is a transition matrix for 30 divsteps.
386 *
387 * This implements the update_fg function from the explanation.
388 */
389static void secp256k1_modinv32_update_fg_30(secp256k1_modinv32_signed30 *f, secp256k1_modinv32_signed30 *g, const secp256k1_modinv32_trans2x2 *t) {
390 const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
391 const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
392 int32_t fi, gi;
393 int64_t cf, cg;
394 int i;
395 /* Start computing t*[f,g]. */
396 fi = f->v[0];
397 gi = g->v[0];
398 cf = (int64_t)u * fi + (int64_t)v * gi;
399 cg = (int64_t)q * fi + (int64_t)r * gi;
400 /* Verify that the bottom 30 bits of the result are zero, and then throw them away. */
401 VERIFY_CHECK(((int32_t)cf & M30) == 0); cf >>= 30;
402 VERIFY_CHECK(((int32_t)cg & M30) == 0); cg >>= 30;
403 /* Now iteratively compute limb i=1..8 of t*[f,g], and store them in output limb i-1 (shifting
404 * down by 30 bits). */
405 for (i = 1; i < 9; ++i) {
406 fi = f->v[i];
407 gi = g->v[i];
408 cf += (int64_t)u * fi + (int64_t)v * gi;
409 cg += (int64_t)q * fi + (int64_t)r * gi;
410 f->v[i - 1] = (int32_t)cf & M30; cf >>= 30;
411 g->v[i - 1] = (int32_t)cg & M30; cg >>= 30;
412 }
413 /* What remains is limb 9 of t*[f,g]; store it as output limb 8. */
414 f->v[8] = (int32_t)cf;
415 g->v[8] = (int32_t)cg;
416}
417
418/* Compute (t/2^30) * [f, g], where t is a transition matrix for 30 divsteps.
419 *
420 * Version that operates on a variable number of limbs in f and g.
421 *
422 * This implements the update_fg function from the explanation in modinv64_impl.h.
423 */
424static void secp256k1_modinv32_update_fg_30_var(int len, secp256k1_modinv32_signed30 *f, secp256k1_modinv32_signed30 *g, const secp256k1_modinv32_trans2x2 *t) {
425 const int32_t M30 = (int32_t)(UINT32_MAX >> 2);
426 const int32_t u = t->u, v = t->v, q = t->q, r = t->r;
427 int32_t fi, gi;
428 int64_t cf, cg;
429 int i;
430 VERIFY_CHECK(len > 0);
431 /* Start computing t*[f,g]. */
432 fi = f->v[0];
433 gi = g->v[0];
434 cf = (int64_t)u * fi + (int64_t)v * gi;
435 cg = (int64_t)q * fi + (int64_t)r * gi;
436 /* Verify that the bottom 62 bits of the result are zero, and then throw them away. */
437 VERIFY_CHECK(((int32_t)cf & M30) == 0); cf >>= 30;
438 VERIFY_CHECK(((int32_t)cg & M30) == 0); cg >>= 30;
439 /* Now iteratively compute limb i=1..len of t*[f,g], and store them in output limb i-1 (shifting
440 * down by 30 bits). */
441 for (i = 1; i < len; ++i) {
442 fi = f->v[i];
443 gi = g->v[i];
444 cf += (int64_t)u * fi + (int64_t)v * gi;
445 cg += (int64_t)q * fi + (int64_t)r * gi;
446 f->v[i - 1] = (int32_t)cf & M30; cf >>= 30;
447 g->v[i - 1] = (int32_t)cg & M30; cg >>= 30;
448 }
449 /* What remains is limb (len) of t*[f,g]; store it as output limb (len-1). */
450 f->v[len - 1] = (int32_t)cf;
451 g->v[len - 1] = (int32_t)cg;
452}
453
454/* Compute the inverse of x modulo modinfo->modulus, and replace x with it (constant time in x). */
455static void secp256k1_modinv32(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo) {
456 /* Start with d=0, e=1, f=modulus, g=x, zeta=-1. */
461 int i;
462 int32_t zeta = -1; /* zeta = -(delta+1/2); delta is initially 1/2. */
463
464 /* Do 20 iterations of 30 divsteps each = 600 divsteps. 590 suffices for 256-bit inputs. */
465 for (i = 0; i < 20; ++i) {
466 /* Compute transition matrix and new zeta after 30 divsteps. */
468 zeta = secp256k1_modinv32_divsteps_30(zeta, f.v[0], g.v[0], &t);
469 /* Update d,e using that transition matrix. */
470 secp256k1_modinv32_update_de_30(&d, &e, &t, modinfo);
471 /* Update f,g using that transition matrix. */
472#ifdef VERIFY
473 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) > 0); /* f > -modulus */
474 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) <= 0); /* f <= modulus */
475 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, -1) > 0); /* g > -modulus */
476 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, 1) < 0); /* g < modulus */
477#endif
478 secp256k1_modinv32_update_fg_30(&f, &g, &t);
479#ifdef VERIFY
480 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) > 0); /* f > -modulus */
481 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) <= 0); /* f <= modulus */
482 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, -1) > 0); /* g > -modulus */
483 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &modinfo->modulus, 1) < 0); /* g < modulus */
484#endif
485 }
486
487 /* At this point sufficient iterations have been performed that g must have reached 0
488 * and (if g was not originally 0) f must now equal +/- GCD of the initial f, g
489 * values i.e. +/- 1, and d now contains +/- the modular inverse. */
490#ifdef VERIFY
491 /* g == 0 */
492 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, 9, &SECP256K1_SIGNED30_ONE, 0) == 0);
493 /* |f| == 1, or (x == 0 and d == 0 and |f|=modulus) */
494 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, 9, &SECP256K1_SIGNED30_ONE, -1) == 0 ||
495 secp256k1_modinv32_mul_cmp_30(&f, 9, &SECP256K1_SIGNED30_ONE, 1) == 0 ||
496 (secp256k1_modinv32_mul_cmp_30(x, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
497 secp256k1_modinv32_mul_cmp_30(&d, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
498 (secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, 1) == 0 ||
499 secp256k1_modinv32_mul_cmp_30(&f, 9, &modinfo->modulus, -1) == 0)));
500#endif
501
502 /* Optionally negate d, normalize to [0,modulus), and return it. */
503 secp256k1_modinv32_normalize_30(&d, f.v[8], modinfo);
504 *x = d;
505}
506
507/* Compute the inverse of x modulo modinfo->modulus, and replace x with it (variable time). */
508static void secp256k1_modinv32_var(secp256k1_modinv32_signed30 *x, const secp256k1_modinv32_modinfo *modinfo) {
509 /* Start with d=0, e=1, f=modulus, g=x, eta=-1. */
510 secp256k1_modinv32_signed30 d = {{0, 0, 0, 0, 0, 0, 0, 0, 0}};
511 secp256k1_modinv32_signed30 e = {{1, 0, 0, 0, 0, 0, 0, 0, 0}};
514#ifdef VERIFY
515 int i = 0;
516#endif
517 int j, len = 9;
518 int32_t eta = -1; /* eta = -delta; delta is initially 1 (faster for the variable-time code) */
519 int32_t cond, fn, gn;
520
521 /* Do iterations of 30 divsteps each until g=0. */
522 while (1) {
523 /* Compute transition matrix and new eta after 30 divsteps. */
525 eta = secp256k1_modinv32_divsteps_30_var(eta, f.v[0], g.v[0], &t);
526 /* Update d,e using that transition matrix. */
527 secp256k1_modinv32_update_de_30(&d, &e, &t, modinfo);
528 /* Update f,g using that transition matrix. */
529#ifdef VERIFY
530 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) > 0); /* f > -modulus */
531 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
532 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, -1) > 0); /* g > -modulus */
533 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
534#endif
535 secp256k1_modinv32_update_fg_30_var(len, &f, &g, &t);
536 /* If the bottom limb of g is 0, there is a chance g=0. */
537 if (g.v[0] == 0) {
538 cond = 0;
539 /* Check if all other limbs are also 0. */
540 for (j = 1; j < len; ++j) {
541 cond |= g.v[j];
542 }
543 /* If so, we're done. */
544 if (cond == 0) break;
545 }
546
547 /* Determine if len>1 and limb (len-1) of both f and g is 0 or -1. */
548 fn = f.v[len - 1];
549 gn = g.v[len - 1];
550 cond = ((int32_t)len - 2) >> 31;
551 cond |= fn ^ (fn >> 31);
552 cond |= gn ^ (gn >> 31);
553 /* If so, reduce length, propagating the sign of f and g's top limb into the one below. */
554 if (cond == 0) {
555 f.v[len - 2] |= (uint32_t)fn << 30;
556 g.v[len - 2] |= (uint32_t)gn << 30;
557 --len;
558 }
559#ifdef VERIFY
560 VERIFY_CHECK(++i < 25); /* We should never need more than 25*30 = 750 divsteps */
561 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) > 0); /* f > -modulus */
562 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) <= 0); /* f <= modulus */
563 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, -1) > 0); /* g > -modulus */
564 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &modinfo->modulus, 1) < 0); /* g < modulus */
565#endif
566 }
567
568 /* At this point g is 0 and (if g was not originally 0) f must now equal +/- GCD of
569 * the initial f, g values i.e. +/- 1, and d now contains +/- the modular inverse. */
570#ifdef VERIFY
571 /* g == 0 */
572 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&g, len, &SECP256K1_SIGNED30_ONE, 0) == 0);
573 /* |f| == 1, or (x == 0 and d == 0 and |f|=modulus) */
574 VERIFY_CHECK(secp256k1_modinv32_mul_cmp_30(&f, len, &SECP256K1_SIGNED30_ONE, -1) == 0 ||
575 secp256k1_modinv32_mul_cmp_30(&f, len, &SECP256K1_SIGNED30_ONE, 1) == 0 ||
576 (secp256k1_modinv32_mul_cmp_30(x, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
577 secp256k1_modinv32_mul_cmp_30(&d, 9, &SECP256K1_SIGNED30_ONE, 0) == 0 &&
578 (secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, 1) == 0 ||
579 secp256k1_modinv32_mul_cmp_30(&f, len, &modinfo->modulus, -1) == 0)));
580#endif
581
582 /* Optionally negate d, normalize to [0,modulus), and return it. */
583 secp256k1_modinv32_normalize_30(&d, f.v[len - 1], modinfo);
584 *x = d;
585}
586
587#endif /* SECP256K1_MODINV32_IMPL_H */
const mie::Vuint & r
Definition bn.cpp:28
#define VERIFY_CHECK(cond)
Definition util.h:95
static const Reg16 di(Operand::DI)
const GenericPointer< typename T::ValueType > T2 T::AllocatorType & a
Definition pointer.h:1181
unsigned short uint16_t
Definition stdint.h:125
signed __int64 int64_t
Definition stdint.h:135
unsigned int uint32_t
Definition stdint.h:126
signed int int32_t
Definition stdint.h:123
#define UINT32_MAX
Definition stdint.h:188
unsigned char uint8_t
Definition stdint.h:124
secp256k1_modinv32_signed30 modulus
Definition modinv32.h:25
CK_ULONG d
uint16_t j
size_t len