forked from bernard57/qBittorrent_search_engine
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcpasbien.py
141 lines (125 loc) · 5.01 KB
/
cpasbien.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# VERSION: 1.1
# AUTHORS: mauricci
from helpers import retrieve_url
from helpers import download_file, retrieve_url
from novaprinter import prettyPrinter
import re
try:
# python3
from html.parser import HTMLParser
except ImportError:
# python2
from HTMLParser import HTMLParser
class cpasbien(object):
url = "http://ww2.cpasbiens.co"
name = 'Cpasbien (French)'
supported_categories = {
"all": [""],
"books": ["ebook/"],
"movies": ["films/"],
"tv": ["series/"],
"music": ["musique/"],
"software": ["logiciels/"],
"games": ["jeux-pc/", "jeux-consoles/"]
}
class MyHTMLParser(HTMLParser):
def __init__(self):
HTMLParser.__init__(self)
self.url = "http://ww2.cpasbiens.co"
self.TABLE_INDEX = 0
self.insideTd = False
self.insideDataTd = False
self.tableCount = -1
self.tdCount = -1
self.fullResData = []
self.pageRes = []
self.singleResData = self.getSingleData()
self.sizeFound = False
self.seedsFound = False
self.leechFound = False
def getSingleData(self):
return {'name': '-1', 'seeds': '-1', 'leech': '-1', 'size': '-1', 'link': '-1', 'desc_link': '-1',
'engine_url': self.url}
def handle_starttag(self, tag, attrs):
if tag == 'table':
self.tableCount += 1
if tag == 'td':
self.insideTd = True
Dict = dict(attrs)
if self.tableCount == self.TABLE_INDEX:
self.insideDataTd = True
self.tdCount += 1
if self.insideDataTd:
Dict = dict(attrs)
if tag == 'a' and len(attrs) > 0:
self.singleResData['name'] = Dict['title']
self.singleResData['desc_link'] = self.url + Dict['href']
self.singleResData['link'] = self.singleResData['desc_link']
elif tag == 'div' and len(attrs) > 0:
if 'poid' in Dict.get('class', ''):
self.sizeFound = True
if 'up' in Dict.get('class', ''):
self.seedsFound = True
if 'down' in Dict.get('class', ''):
self.leechFound = True
def handle_endtag(self, tag):
if tag == 'td':
self.insideTd = False
self.insideDataTd = False
if tag == 'tr':
self.tdCount = -1
if len(self.singleResData) > 0:
# ignore trash stuff
if self.singleResData['name'] != '-1':
# ignore those with link and desc_link equals to -1
if (self.singleResData['desc_link'] != '-1' or self.singleResData['link'] != '-1'):
prettyPrinter(self.singleResData)
self.pageRes.append(self.singleResData)
self.fullResData.append(self.singleResData)
self.singleResData = self.getSingleData()
def handle_data(self, data):
if self.insideDataTd:
data = data.strip()
if self.sizeFound:
self.singleResData['size'] = data + 'MB'
self.sizeFound = False
if self.seedsFound:
self.singleResData['seeds'] = data
self.seedsFound = False
if self.leechFound:
self.singleResData['leech'] = data
self.leechFound = False
def feed(self, html):
HTMLParser.feed(self, html)
self.insideDataTd = False
self.tdCount = -1
self.tableCount = -1
self.sizeFound = False
self.seedsFound = False
self.leechFound = False
# DO NOT CHANGE the name and parameters of this function
# This function will be the one called by nova2.py
def search(self, what, cat='all'):
currCat = self.supported_categories[cat]
parser = self.MyHTMLParser()
# analyze firt 10 pages of results (thre are 40 entries)
for currPage in range(1, 11):
for subcat in currCat:
url = '/search_torrent/{}/{}.html,page-{}' \
.format(subcat, what, currPage).replace('//', '/')
url = self.url + url
# print(url)
html = retrieve_url(url)
parser.feed(html)
if len(parser.pageRes) <= 0:
break
del parser.pageRes[:]
# print(parser.fullResData)
parser.close()
def download_torrent(self, info):
""" Downloader """
# download is the same file path but with different domain name
print(info.replace(self.url, 'https://download.cpasbiens.co'))
if __name__ == "__main__":
c = cpasbien()
c.search('tomb%20raider')