-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgf2_32.c
175 lines (134 loc) · 5.44 KB
/
gf2_32.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
#include <stdint.h>
#include "gf2_16.h"
#include "gf2_32.h"
#include <stdio.h>
// In this file we construct the Galois field GF(2^32)
// as a finite field extension of GF(2^16). The
// field is constructed using the following irreducible
// polynomial, whose coefficents should be understood
// as the binary representation of elements of GF(2^16):
//
// x^2 + x + 8192
//
// Because the constant term is the only coefficent not
// equal to 0 or 1, modular reduction is fairly fast, requiring
// only one additional multipliation in GF(2^16).
//
// Moreover, we get very lucky with this polynomial because
// the Itoh-Tsujii method for calculating multiplicative inverses
// works especially well for this polynomial. The main task of
// raising to the r = 2^16 power turns out to be almost trivial.
uint32_t gf2_32_mult( uint32_t a, uint32_t b ) {
uint16_t a0, a1, b0, b1, za0, za1, zb0, zb1;
uint32_t alog0, alog1, blog0, blog1;
a0 = (uint16_t) a;
a1 = (uint16_t) (a >> 16);
b0 = (uint16_t) b;
b1 = (uint16_t) (b >> 16);
alog0 = gf2_16_log_table[a0];
alog1 = gf2_16_log_table[a1];
blog0 = gf2_16_log_table[b0];
blog1 = gf2_16_log_table[b1];
za0 = zeroMask( a0 );
za1 = zeroMask( a1 );
zb0 = zeroMask( b0 );
zb1 = zeroMask( b1 );
uint16_t c0, c1, c2;
c0 = gf2_16_expadd( za0|zb0, alog0, blog0 );
c1 = gf2_16_expadd( za0|zb1, alog0, blog1 );
c1 ^= gf2_16_expadd( za1|zb0, alog1, blog0 );
c2 = gf2_16_expadd( za1|zb1, alog1, blog1 );
// Now reduce by the field extension irreducible
// polynomial x^2 + x + 8192
//uint16_t vlog = gf2_16_log_table[8192];
// this magic number is equal to gf2_16_log_table[8192];
const uint16_t vlog = 0x000d;
c1 ^= c2;
c0 ^= gf2_16_expadd3( zeroMask(c2), alog1, blog1, vlog );
// polynomial is now reduced, output the result
uint32_t res = (((uint32_t) c1) << 16) | ((uint32_t) c0);
return res;
}
uint32_t gf2_32_square( uint32_t a ) {
uint16_t a0, a1, b0, b2, c1, c0, za1;
uint32_t alog0, alog1;
// a1(x) + a0
a0 = (uint16_t) a;
a1 = (uint16_t) (a >> 16);
alog0 = gf2_16_log_table[a0];
alog1 = gf2_16_log_table[a1];
// (a1(x) + a0)^2 = b2(x^2) + b0
b0 = gf2_16_expadd( zeroMask(a0), alog0, alog0 );
za1 = zeroMask( a1 );
b2 = gf2_16_expadd( za1, alog1, alog1 );
// Now reduce by the field extension irreducible
// polynomial x^2 + x + 8192
//uint16_t vlog = gf2_16_log_table[8192];
// this magic number is equal to gf2_16_log_table[8192];
const uint16_t vlog = 0x000d;
c1 = b2;
c0 = b0 ^ gf2_16_expadd3( za1, alog1, alog1, vlog );
uint32_t res = (((uint32_t) c1) << 16) | ((uint32_t) c0);
return res;
}
uint32_t gf2_32_inv( uint32_t a ) {
// Let q = 2^16. let r = (q^2 - 1) / (q - 1) = 2^16 + 1
// This rather special number has to do with caluclating finite field norms.
// For all x in GF((2^16)^2), x^r yields a value in GF(2^16); that is, for which
// the high coefficent is 0. We exploit this fact to perform fast inversions
// in GF(2^32) by reducing them to inversion in GF(2^16).
//
// We are going to calculate a^(-1) = a^(-r) * a^(r-1). The algorithm below goes
// roughly as follows:
//
// s = a^(r-1) = a^(2^16)
// t = a * s = a^r
// b = t^(-1) * s = a^(-1)
uint16_t s0, s1;
uint16_t t0, t1;
// split a into high and low coefficents
uint16_t a0 = (uint16_t) a;
uint16_t a1 = (uint16_t) (a >> 16);
// This is some crazy magic! We calculate s = a^(r-1) = a^(2^16) by the
// following formula, which seems too insanely simple to be correct,
// but somehow works out.
s1 = a1;
s0 = a0 ^ a1;
// Now we calculate t = s * a = a^r. This is guaranteed, by a theorem
// of finite fields, to be a constant polynomial, i.e., the high coefficent
// of t must be equal to 0. We exploit this to do only the minimal amount
// of calculation necessary to find the low coefficent of t. This saves
// several GF(2^16) multiplies and adds.
uint16_t slog0 = gf2_16_log_table[s0];
uint16_t slog1 = gf2_16_log_table[s1];
uint16_t sz0 = zeroMask(s0);
uint16_t sz1 = zeroMask(s1);
uint16_t alog0 = gf2_16_log_table[a0];
uint16_t alog1 = gf2_16_log_table[a1];
uint16_t az0 = zeroMask(a0);
uint16_t az1 = zeroMask(a1);
t0 = gf2_16_expadd( sz0|az0, slog0, alog0 );
// Multiply the high coefficents and perform polynomial reduction all at once...
const uint16_t vlog = 0x000d;
t1 = gf2_16_expadd3( sz1|az1, slog1, alog1, vlog );
t0 ^= t1;
// Now, we can find the inverse of t by just taking the inverse of t0 in
// GF(2^16), which we can easily do via our lookup tables. However, we
// can save a few table lookups by instead just directly taking the log of t0,
// and using that to perform the necessary divisions below.
//uint16_t t0_inv = gf2_16_inv( t0 );
uint16_t t0_inv_log = Q - gf2_16_log_table[ t0 ];
// Now we calculate b = t^(-1) * s = a^(-r) * a^(r-1) = a^(-1). Because the
// high coefficent of t is 0, we simply multiply the inverse of t0 by the coefficents
// of s. We do this by using the t0_log value we calculated just above.
// We know t0 != 0, so we don't have to mask for that case.
uint16_t b0 = gf2_16_expadd( sz0, slog0, t0_inv_log );
//uint16_t b0 = gf2_16_mult( s0, t0_inv );
uint16_t b1 = gf2_16_expadd( sz1, slog1, t0_inv_log );
//uint16_t b1 = gf2_16_mult( s1, t0_inv );
// Package the result and ship it
uint32_t b = (((uint32_t) b1) << 16) | ((uint32_t) b0);
return b;
}
// r = (2^32 - 1) / (2^16 - 1) = 65537 = 2^16 + 1
// r-1 = 65536 = 2^16