This repository has been archived by the owner on Mar 17, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvigenere.py
171 lines (139 loc) · 6.48 KB
/
vigenere.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import argparse
import sys
import hashlib
from math import ceil
from functools import reduce as funcreduce
from operator import iconcat
from itertools import product
MOST_COMMON_LETTER = ('E', 4)
WORD_LENGTH = 4
KEY_LENGTH_THRESHOLD = 4
OCCURRENCES_THRESHOLD = 2
def decipher(string, key, a2i_dict, i2a_dict):
"""
This function is BASED on https://github.com/jameslyons/pycipher
"""
key = [k.upper() for k in key]
ret = ''
for (i, c) in enumerate(string):
i = i % len(key)
ret += i2a_dict[(a2i_dict[c] - a2i_dict[key[i]]) % len(a2i_dict)]
return ret
def count_word_occurrences(text, length):
word_occurrences = {}
max_occurrences = 0
for i in range(len(text)-length):
word = text[i:i+length]
if word in word_occurrences:
word_occurrences[word][0] += 1
word_occurrences[word][1].append(i)
else:
word_occurrences.update({word: [1, [i]]})
max_occurrences = max(word_occurrences[word][0], max_occurrences)
occurrences_threshold = ceil(max_occurrences*0.66)
return {word: list(map(lambda x, y: y-x, indexes[:-1], indexes[1:]))
for (word, [occurrences, indexes]) in word_occurrences.items()
if occurrences >= occurrences_threshold}
def get_divs(_int):
yield _int
for i in range(2, int(_int / 2) + 1):
mod = _int % i
if mod == 0:
yield i
def key_lengths(int_list):
occurrences = {}
for _int in int_list:
divs = list(get_divs(_int))
for div in divs:
if div in occurrences:
occurrences[div] += 1
else:
occurrences.update({div: 1})
return [key
for key, ocurrence in reversed(sorted(occurrences.items(), key=lambda item: item[1]))
if ocurrence != 1
] + [1]
def find_occurrences(key_length, input_text, occurences_threshold):
occurrences_trimmed = []
for index in range(key_length):
occurrences = {}
for checked in range(0, len(input_text)-key_length, key_length):
letter = input_text[index+checked]
if letter in occurrences:
occurrences[letter] += 1
else:
occurrences.update({letter: 1})
occurrences = sorted(occurrences.items(),
key=lambda item: item[1], reverse=True)
occurrences_trimmed.append([letter
for letter, _frequency in occurrences[0:occurences_threshold]
])
return occurrences_trimmed
def main():
argparser = argparse.ArgumentParser(prog='vigenere')
argparser.add_argument('-i', '--input', type=argparse.FileType(), help='input file')
argparser.add_argument('-d', '--dictionary', type=argparse.FileType(),
help='file with the dictionary to be used')
argparser.add_argument('--hash', type=argparse.FileType(),
help='file with the hash to be used for comparison')
argparser.add_argument('-wl', '--word-length', type=int,
help='length of words for ocurrence count in length guessing (Kasiski)' +
f': defaults to {WORD_LENGTH}')
argparser.add_argument('-klt', '--key-length-threshold', type=int,
help='number of key length candidates to examine (Kasiski)' +
f': defaults to {KEY_LENGTH_THRESHOLD}')
argparser.add_argument('-ot', '--occurrences-threshold', type=int,
help=f'number of top letters to examine for each column (Kasiski)'+
f': defaults to {OCCURRENCES_THRESHOLD}')
argparser.add_argument('-v', '--verbose', action='store_true', help='enables verbose mode')
args = argparser.parse_args()
if (not args.input) or (not args.dictionary) or (not args.hash):
sys.exit(argparser.print_help())
# Kasiski arguments
word_length = args.word_length if args.word_length else WORD_LENGTH
key_length_threshold = args.key_length_threshold if args.key_length_threshold else KEY_LENGTH_THRESHOLD
occurrences_threshold = args.occurrences_threshold if args.occurrences_threshold else OCCURRENCES_THRESHOLD
# Dictionaries generation
a2i_dict, i2a_dict = {}, {}
for (index, value) in enumerate(args.dictionary.read().replace('\n', '')):
a2i_dict.update({value: index})
i2a_dict.update({index: value})
if args.verbose:
print(f'DEBUG\tDictionary: {a2i_dict.keys()}')
# Input text preprocessing
input_text = args.input.read().replace('\n', '')
# Hash preprocessing
input_hash = args.hash.read().replace('\n', '')
# Propose key lengths
most_ocurrent_words = count_word_occurrences(
input_text, word_length)
key_length_candidates = key_lengths(
set(funcreduce(iconcat, most_ocurrent_words.values(), [])))
if args.verbose:
print(f'DEBUG\tKey length candidates: {key_length_candidates}')
# Decipher
for key_length in key_length_candidates[0:key_length_threshold]:
if args.verbose:
print(f'DEBUG\tTrying keylength: {key_length}')
occurrences = find_occurrences(key_length, input_text, occurrences_threshold)
letter_key_candidates = []
for letters in occurrences:
common_column_letters = []
for letter in letters:
common_column_letters.append(i2a_dict[
(a2i_dict[letter]-MOST_COMMON_LETTER[1]) % len(a2i_dict)
])
letter_key_candidates.append(common_column_letters)
for key_candidate in product(*letter_key_candidates):
decrypted_text = decipher(
input_text, key_candidate, a2i_dict, i2a_dict)
if hashlib.sha256(decrypted_text.encode('utf-8')).hexdigest() == input_hash:
print(f'{"".join(key_candidate)}')
sys.exit()
sys.exit('A key has not be found under the following Kasiski configuration:\n' + \
f'\t--word-length={word_length}\n' + \
f'\t--key-length-threshold={key_length_threshold}\n' + \
f'\t--occurrences-threshold={occurrences_threshold}\n' + \
'These heuristics are too restrictive. Try again changing the configuration values!')
if __name__ == "__main__":
main()