-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcombocrawler.py
67 lines (62 loc) · 1.81 KB
/
combocrawler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# crawls and downloads multiple anonfiles user:pass combo files
from googlesearch import search
from bs4 import BeautifulSoup
import requests
import sys
import os
from pyunpack import Archive
from shutil import copyfile
if sys.version_info >= (3, 0): #Python 3
import urllib.request as request
from urllib.parse import quote
else:
import urllib as request
from urllib import quote
import urllib
import time
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
SEARCHFOR = ["\"combo\" site:anonfile.com","\"Combo\" site:anonfile.com"]
DOWNLOADED = "downloaded.log"
DOWNPATH = "downloads"
DESTPATH = "inputbreach"
def extract(path):
try:
if path.lower().endswith(".txt"):
copyfile(path,os.path.join(DESTPATH,os.path.basename(path)))
else:
Archive(path).extractall(DESTPATH)
except Exception as e:
print("Error extracting {}:{}".format(path,e))
done = []
#scrape anonfile and downloads
try:
f = open(DOWNLOADED,"r")
done = f.readlines()
f.close()
except:
pass
f = open(DOWNLOADED,"a+")
for s in SEARCHFOR:
query = search(s)
i = 0
for url in query:
dest = url.split("/")[-1].strip().strip("'")
if not dest+"\n" in done:
try:
r = requests.get(url,verify=False)
html = BeautifulSoup(r.text,"html.parser")
down = html.find(id="download-url")
down = quote(down.get("href").strip()).replace("%3A//","://")
print("downloading: {}".format(down))
path = os.path.join(DOWNPATH,str(i)+dest)
request.urlretrieve(down,path)
extract(path)
os.remove(path)
f.write(dest+"\n")
i+=1
f.flush()
except Exception as e:
print("Error in {}:{}".format(url,e))
time.sleep(10) # avoids google ban
f.close()