-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcompressor.py
69 lines (69 loc) · 2.15 KB
/
compressor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import math
def compress(ifile, codes, ofile):
symbol_codes = {}
with open(codes, 'r', encoding='utf-8') as f:
for line in f:
last_space_index = line.rfind(' ')
if last_space_index != -1:
symbol = line[:last_space_index]
code = line[last_space_index + 1:]
symbol_codes[symbol] = code
print(symbol)
with open(ifile, 'r', encoding='utf-8') as f:
itext = f.read()
compressed = ''
for c in itext:
if c == '\n':
compressed += symbol_codes['n']
if c.isalpha() :
c=c.upper()
if c in symbol_codes:
compressed += symbol_codes[c]
compressed_bytes = compress_bits(compressed)
with open(ofile, 'wb') as f:
f.write(compressed_bytes)
entropy(itext,compressed)
def entropy(a,b):
D = {}
E = {}
for i in a:
if i not in D:
D[i]=1
else:
D[i]+=1
for i in b:
if i not in E:
E[i]=1
else:
E[i]+=1
entropya=0
for i in D:
entropya+=(D[i]/len(a))*math.log2((len(a)/D[i]))
entropyb=0
for i in E:
entropyb+=(E[i]/len(b))*math.log2((len(b)/E[i]))
print("The information gain from compression is: ",entropya-entropyb)
def compress_bits(compressed):
compressed_bytes = bytearray()
current_byte = 0
bit_count = 0
for bit in compressed:
if bit == '0' or bit == '1':
current_byte <<= 1
current_byte |= int(bit)
bit_count += 1
if bit_count == 8:
compressed_bytes.append(current_byte)
current_byte = 0
bit_count = 0
elif bit == 'N':
continue
if bit_count > 0:
current_byte <<= (8 - bit_count)
compressed_bytes.append(current_byte)
return bytes(compressed_bytes)
ifile = input("Enter your input filename: ")
codes = input("Enter the filename where you have saved your codes: ")
ofile = input("Enter the filename where you want your compressed document saved: ")
compress(ifile, codes, ofile)
print("Compressed code has been written to:", ofile)