-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcrawl_from_dllME_com.py
127 lines (113 loc) · 4.48 KB
/
crawl_from_dllME_com.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import json
import os
import time,re
from bs4 import BeautifulSoup
import requests
# import query_db
#############################################################################
DB_COLLECTION_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'DataBase')
CREDENTIAL_COLLECTION_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'credential')
GEN_HTML_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'gen_html')
host_ip = "localhost"
#############################################################################
#############################################################################
def write_file(text):
with open(os.path.join(GEN_HTML_PATH, 'gen_html.txt'), 'w') as file:
file.writelines(str(text))
def write_to_json_file(filename, version, md5,sha1, error = 0):
dict = {
'File name': filename,
'Version': version,
'MD5': md5,
'SHA-1': sha1
}
DB_file = os.path.join(DB_COLLECTION_PATH , 'DataBaseCollection12.json')
if os.path.exists(DB_file):
with open(DB_file, 'r') as file:
try:
data = json.load(file)
except json.JSONDecodeError:
data = []
else:
data = []
data.append(dict)
with open(DB_file, 'w') as file:
json.dump(data, file, ensure_ascii=False, indent=4)
print('written')
def send_requests(url):
post_body = {
"cmd": "request.get",
"url": url,
"maxTimeout": 60000
}
try:
response = requests.post(f'http://{host_ip}:8191/v1', headers={'Content-Type': 'application/json'},
json=post_body)
except requests.exceptions.Timeout as e:
print("Request timed out!\nDetails:", e)
return "Time Out"
except requests.exceptions.HTTPError as http_err:
print(f"HTTP error occurred: {http_err}")
except Exception as err:
print(f"Other error occurred: {err}")
else:
if response.status_code == 200:
json_response = response.json()
if json_response.get('status') == 'ok':
html = json_response['solution']['response']
if html.find('MD5') == -1:
return False
write_file(html)
response.raise_for_status()
return True
else:
return False
def get_info_file(url, filename):
with open('test.txt', 'r') as f:
dll_legimate_data = f.read().splitlines()
print (url)
res = send_requests(url)
if res == "Time Out":
res = send_requests(url)
if not res:
print ("[-] Not Found")
return False
with open(os.path.join(GEN_HTML_PATH, 'gen_html.txt'), 'r', encoding='utf-8') as file:
html_content = file.read().replace('\\', '')
version_pattern = r'Version:\s*<strong style="color:#222;">(.*?)</strong>'
product_version_pattern = r'Product Version:\s*<span style="color:#222;">(.*?)<\/span>'
md5_pattern = r'MD5\s*(.*?)<'
sha1_pattern = r'SHA1\s*(.*?)<'
_versions = re.findall(version_pattern, html_content)
_md5 = re.findall(md5_pattern, html_content)
_sha1 = re.findall(sha1_pattern, html_content)
if not _versions:
_versions = re.findall(product_version_pattern, html_content)
for pos in range(len(_sha1)):
if pos >= len(_versions):
_version = "Missing version"
else:
_version = _versions[pos]
if _sha1[pos] not in dll_legimate_data:
write_to_json_file(filename, _version, _md5[pos], _sha1[pos])
return True
if __name__ == '__main__':
with open(os.path.join(CREDENTIAL_COLLECTION_PATH, 'list_file_name.txt')) as file:
list_file_name = file.read().split('\n')
for file_name in list_file_name:
file_name = file_name.replace(".dll", '')
page = 0
while True:
page += 1
# url = "https://www.dllme.com/dll/files/api-ms-win-crt-stdio-l1-1-0/versions.html?sort=version&arch=&ajax=true&page=25"
url = f"https://www.dllme.com/dll/files/{file_name}/versions.html?sort=version&arch=&ajax=true&page={page}"
if not get_info_file(url, file_name):
break
#
# file_name = "ws_imageproc"
# page = 1
# while True:
# page += 1
# url = f"https://www.dllme.com/dll/files/{file_name}/versions.html?sort=version&arch=&ajax=true&page={page}"
# if not get_info_file(url, file_name):
# break