-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcrack-vigenere.c
246 lines (207 loc) · 6.26 KB
/
crack-vigenere.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
#include <assert.h>
#include <ctype.h>
#include <math.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include "str.h"
#define MIN(a, b) ((a) > (b) ? (b) : (a))
#define MAX(a, b) ((a) > (b) ? (a) : (b))
static const char charset[26] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
static double charfreq_english[sizeof charset] = {
['A' - 'A'] = 0.082,
['B' - 'A'] = 0.015,
['C' - 'A'] = 0.028,
['D' - 'A'] = 0.043,
['E' - 'A'] = 0.127,
['F' - 'A'] = 0.022,
['G' - 'A'] = 0.020,
['H' - 'A'] = 0.061,
['I' - 'A'] = 0.070,
['J' - 'A'] = 0.0015,
['K' - 'A'] = 0.0077,
['L' - 'A'] = 0.040,
['M' - 'A'] = 0.024,
['N' - 'A'] = 0.067,
['O' - 'A'] = 0.075,
['P' - 'A'] = 0.019,
['Q' - 'A'] = 0.0095,
['R' - 'A'] = 0.060,
['S' - 'A'] = 0.063,
['T' - 'A'] = 0.091,
['U' - 'A'] = 0.028,
['V' - 'A'] = 0.0098,
['W' - 'A'] = 0.024,
['X' - 'A'] = 0.0015,
['Y' - 'A'] = 0.020,
['Z' - 'A'] = 0.00074,
};
static int do_nothing(int ch)
{
return ch;
}
static int charset_contains(int ch)
{
return ch >= 'A' && ch <= 'Z';
}
static size_t charset_index(char ch)
{
if (isalpha(ch)) {
return toupper(ch) - 'A';
}
fprintf(stderr, "%s: invalid char %d\n", __func__, ch);
abort();
}
/* calculate index of coincidence of `text`
*
* map will transform the characters before calculating the ioc. For example,
* ioc(data, ..., tolower) will transform samples with tolower before checking
* if they are equal
* */
static double ioc(struct str text, int stride, int offset, int (*map)(int))
{
assert(offset < stride);
if (stride > text.len) {
return NAN;
}
if (text.len < 1) {
return NAN;
}
int samples = 2048;
int matches = 0;
if (map == NULL) {
map == do_nothing;
}
for (int i = 0; i < samples; i++) {
size_t rand_a = (rand() % (text.len / stride)) * stride + offset;
size_t rand_b;
do {
rand_b = (rand() % (text.len / stride)) * stride + offset;
} while (rand_a == rand_b);
char a = map(text.data[rand_a]);
char b = map(text.data[rand_b]);
if (a == b) {
matches++;
}
}
return (double)matches / (double)(samples);
}
static void frequency_count(double output[static sizeof charset], const struct str text, size_t offset, size_t stride)
{
for (size_t i = 0; i < sizeof charset; i++) {
output[i] = 0;
}
assert(offset < stride);
for (size_t i = offset; i < text.len; i += stride) {
if (!charset_contains(text.data[i])) {
continue;
}
output[charset_index(text.data[i])] += 1.0;
}
}
static double frequency_correlation(const double a[static sizeof charset], const double b[static sizeof charset], size_t shift)
{
double sum = 0;
for (size_t i = 0; i < sizeof charset; i++) {
sum += a[i] * b[(i + shift) % sizeof charset];
}
return sum;
}
static void frequency_print(const double freq[static sizeof charset])
{
for (int i = 0; i < sizeof charset; i++) {
fprintf(stderr, "[%c] = %.0lf, ", charset[i], freq[i]);
}
}
static void vigenere_encode(struct str text, char* output, const char* key, size_t key_len, const char* charset, size_t charset_len)
{
for (size_t i = 0; i < text.len; i++) {
const char ch = text.data[i];
if (charset_contains(ch)) {
output[i] = charset[(charset_index(ch) + key[i % key_len]) % charset_len];
}
}
}
static void vigenere_decode(struct str text, char* output, const char* key, size_t key_len, const char* charset, size_t charset_len)
{
for (size_t i = 0; i < text.len; i++) {
const char ch = text.data[i];
if (charset_contains(ch)) {
output[i] = charset[(charset_index(ch) - key[i % key_len] + charset_len) % charset_len];
}
}
}
int main(int argc, char** argv)
{
srand(0);
FILE* f = argc < 2
? stdin
: fopen(argv[1], "r");
if (f == NULL) {
fprintf(stderr, "couldn't open file %s%m", argv[1]);
exit(EXIT_FAILURE);
}
struct str text = read_all_filter(f, charset_contains, toupper);
if (fclose(f) != 0) {
perror("fclose");
/* not fatal, continue */
}
if (text.data == NULL) {
exit(EXIT_FAILURE);
}
/* Find key length (stride)
* ========================*/
int key_len = 1;
{
/* values better than threshold immidiately break the loop */
constexpr double threshold = 1.6;
double best_score = -1.0;
for (int stride = 1; stride < text.len / 2; stride++) {
double result = 0.0;
for (int j = 0; j < stride; j++) {
result += ioc(text, stride, j, toupper);
}
result /= stride;
result *= 26.0; /* normalization */
if (result > best_score) {
best_score = result;
key_len = stride;
if (result > threshold) {
break;
}
}
}
fprintf(stderr, "best stride: %i (IOC %.2lf)\n", key_len, best_score);
}
/* Crack caesar ciphers column wise
* ================================ */
char key[key_len] = {}; /* VLAs are bad but whatever */
{
double frequencies[sizeof charset] = { 0 };
for (size_t col = 0; col < key_len; col++) {
frequency_count(frequencies, text, col, key_len);
double best = 0;
for (size_t i = 0; i < sizeof charset; i++) {
double n = frequency_correlation(frequencies, charfreq_english, i);
if (n > best) {
key[col] = (sizeof charset - i) % sizeof charset;
best = n;
}
}
}
}
/* print key to stdout (other info goes to stderr) */
printf("key: ");
for (size_t i = 0; i < key_len; i++) {
printf("%c", charset[key[i]]);
}
vigenere_decode(text, text.data, key, key_len, charset, sizeof charset);
/* print preview to stderr to avoid clutter when piping */
fprintf(stderr, "preview:\n");
str_println(str_slice(text, 0, 79), stderr);
str_free(&text);
return EXIT_SUCCESS;
}