-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathchecker.py
141 lines (116 loc) · 4.71 KB
/
checker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
'''
Miku AI is designed to support both high-quality audio and stunning visualizations.
To test its stability, you will be prompted with a random string and asked to upload an MP3.
Server has several checks to ensure that the AI is working as expected.
'''
import io
import os
import random
import time
import matplotlib.pyplot as plt
import numpy as np
import torchaudio
import transformers
from azure.cognitiveservices.vision.computervision import ComputerVisionClient
from azure.cognitiveservices.vision.computervision.models import OperationStatusCodes
from msrest.authentication import CognitiveServicesCredentials
from PIL import Image, ImageDraw, ImageFont
from pydub import AudioSegment
from scipy.io import wavfile
from transformers import pipeline
transformers.logging.set_verbosity_error()
font = ImageFont.truetype("Arial.ttf", size=60)
def generate_challenge() -> tuple[str, bytes]:
'''
Generates each challenge.
'''
text = ''.join(random.choices("ABCDEFGHIJKLMNOPQRSTUVWXYZ", k=12))
img = Image.new("RGBA", [i + 40 for i in font.getmask(text).size])
draw = ImageDraw.Draw(img)
draw_point = (0, 0)
colors = [(134,206,203,255), (19,122,127,255)] # miku
draw.multiline_text(draw_point, text, font=font, fill=random.choice(colors))
text_window = img.getbbox()
img = img.crop(text_window)
res = io.BytesIO()
img.save(res, format="PNG")
return text, res.getvalue()
def mp3_to_wav(mp3: bytes, flatten=False) -> io.BytesIO:
audio = AudioSegment.from_file(io.BytesIO(mp3), format="mp3")
if flatten:
audio = audio.set_channels(1)
wav_io = io.BytesIO()
audio.export(wav_io, format="wav")
return wav_io
def check1(mp3: bytes) -> bool:
'''
Since it's an AI app, ChatGPT usage is a must
https://chatgpt.com/share/e22ae484-6dc0-4cdf-8cab-89b7e0e49033
'''
_, data = wavfile.read(mp3_to_wav(mp3))
fft_data = np.fft.fft(data)
return 500 <= np.mean(np.abs(fft_data)) <= 1500
def check2(mp3: bytes) -> bool:
'''
Just an easy check to make sure it's like Miku voice...
PS. Original intention was speech_to_text(mp3)==generate_challenge() but I'm running out of time
'''
pipe = pipeline("audio-classification", model="alefiury/wav2vec2-large-xlsr-53-gender-recognition-librispeech")
audio, rate = torchaudio.load(mp3_to_wav(mp3))
transform = torchaudio.transforms.Resample(rate, 16000)
audio = transform(audio).numpy().reshape(-1)
prediction = pipe(audio)
prob = prediction[0]['score'] if prediction[0]['label'] == 'female' else prediction[1]['score']
return prob > 0.9727
def check3(mp3: bytes, res: str) -> bool:
'''
SuperBaldoGenni — Yesterday at 12:12 PM
is there any osint or stego
JoshO — Yesterday at 12:12 PM
yes stego
'''
sample_rate, data = wavfile.read(mp3_to_wav(mp3, True))
plt.axis('off')
plt.specgram(data, Fs=sample_rate, NFFT=512)
buf = io.BytesIO()
plt.gcf().set_size_inches(2*plt.gcf().get_size_inches()[0], 0.5*plt.gcf().get_size_inches()[1])
plt.savefig(buf, format='png', bbox_inches='tight')
buf.seek(0)
# Authenticates Azure credentials and creates a client.
subscription_key = os.environ["VISION_KEY"]
endpoint = os.environ["VISION_ENDPOINT"]
computervision_client = ComputerVisionClient(endpoint, CognitiveServicesCredentials(subscription_key))
# Start OCR
read_image = io.BufferedReader(buf)
read_response = computervision_client.read_in_stream(read_image, raw=True)
read_operation_location = read_response.headers["Operation-Location"]
operation_id = read_operation_location.split("/")[-1]
# Call the "GET" API and wait for the retrieval of the results
while True:
read_result = computervision_client.get_read_result(operation_id)
if read_result.status.lower() not in ['notstarted', 'running']:
break
time.sleep(2)
text_retrieved = ''
if read_result.status == OperationStatusCodes.succeeded:
for text_result in read_result.analyze_result.read_results:
for line in text_result.lines:
text_retrieved += line.text
return text_retrieved.strip() == res
def check(mp3: bytes, res: str) -> tuple[bool, str]:
try:
if not check1(mp3):
return False, "Check 1 failed"
if not check2(mp3):
return False, "Check 2 failed"
if not check3(mp3, res):
return False, "Check 3 failed"
return True, "All checks passed"
except Exception as e:
print(e)
return False, f"An error occurred: {e}"
if __name__ == "__main__":
text, res = generate_challenge()
print(f"Generated challenge: {text}")
mp3 = open("./miku.mp3", "rb").read()
print(check(mp3, text))