-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathpackt_crawler.py
70 lines (56 loc) · 2.58 KB
/
packt_crawler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#! -*- coding: UTF-8 -*-
from bs4 import BeautifulSoup
import requests
import sys
from log import write_log
import re
import os
if sys.version_info.major >= 3:
from urllib.request import urlopen
else:
from urllib import urlopen
BASE_DIR = os.path.dirname(__file__)
class PacktFreeLearningCrawler(object):
def __init__(self, url=None):
self.url = 'https://www.packtpub.com/packt/offers/free-learning' if not url else url
self.soup = BeautifulSoup(urlopen(self.url), 'html.parser')
self.user, self.password = self._read_conf_file()
self.create_session()
def _read_conf_file(self):
with open(os.path.join(BASE_DIR, ".packt_user.cfg")) as conf:
return conf.readlines()[:2]
def create_session(self):
data = {'email': self.user.rstrip(),
'password': self.password.rstrip(),
'op': 'Login',
'form_build_id': 'form-a4d03c38ea8befcab94afc2cd4a7c8af',
'form_id': 'packt_user_login_form'
}
self.session = requests.session()
self.session.post('https://www.packtpub.com/register', data=data)
def _clear_element(self, element):
return re.sub("\s(\\.)*\s", "", element)
def free_book_title(self):
h2_title_book = self.soup.find_all("div", { "class" : "dotd-title" })[0].find('h2')
return self._clear_element(h2_title_book.string)
def all_books_user(self):
try:
html_user_ebooks = self.session.get('https://www.packtpub.com/account/my-ebooks').text
my_ebooks = BeautifulSoup(html_user_ebooks, 'html.parser')
books = my_ebooks.find(id='product-account-list').find_all('div', {'class': 'product-line unseen'})
return [book['title'].replace(' [eBook]', "") for book in books]
except:
raise ValueError("Login or Passowrd is incorrect")
def link_free_book(self):
clain_book_input = self.soup.find('div', {'class': 'dotd-main-book-form cf'})
return clain_book_input.find('a', {'class': 'twelve-days-claim'})['href']
def claim_free_book(self):
if self.free_book_title() not in self.all_books_user():
return self.session.get('https://www.packtpub.com/{link}'.format(link=self.link_free_book())).status_code
return 200
if __name__ == '__main__':
packt = PacktFreeLearningCrawler()
if packt.claim_free_book() == 200:
write_log(BASE_DIR, title=packt.free_book_title(), status="Added")
else:
write_log(BASE_DIR, title=packt.free_book_title(), status="Error in adding the book")