-
-
Notifications
You must be signed in to change notification settings - Fork 73
/
Copy pathbiqukan.py
134 lines (121 loc) · 4.1 KB
/
biqukan.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/usr/bin/env python
# encoding: utf-8
'''
#-------------------------------------------------------------------
# CONFIDENTIAL --- CUSTOM STUDIOS
#-------------------------------------------------------------------
#
# @Project Name : 下载《笔趣看》网小说
#
# @File Name : biqukan.py
#
# @Programmer : tinygeeker
#
# @Start Date : 2022/01/10 13:14
#
# @Last Update : 2022/01/10 13:14
#
#-------------------------------------------------------------------
'''
from urllib import request
from bs4 import BeautifulSoup
import collections, re, os, sys
class biqukan:
'''
This is a main Class, the file contains all documents.
One document contains paragraphs that have several sentences
It loads the original file and converts the original file to new content
Then the new content will be saved by this class
'''
def __init__(self):
self.header = {
'User-Agent': 'Mozilla/5.0 (Linux; Android 4.1.1; Nexus 7 Build/JRO03D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Safari/535.19'
}
def hello(self):
'''
This is a welcome speech
:return: self
'''
print('*' * 50)
print(' ' * 15 + '下载《笔趣看》网小说')
print(' ' * 5 + '作者: tinygeeker Date: 2022-01-10 13:14')
print(' ' * 5 + '主页: https://tinygeeker.blog.csdn.net')
print('*' * 50)
return self
def get_download_url(self, target_url):
'''
get download url
'''
charter = re.compile(u'[第弟](.+)章', re.IGNORECASE)
target_req = request.Request(url = target_url, headers = self.header)
target_response = request.urlopen(target_req)
target_html = target_response.read().decode('gbk','ignore')
list_main_soup = BeautifulSoup(target_html,'lxml')
chapters = list_main_soup.find_all('div',class_ = 'listmain')
download_soup = BeautifulSoup(str(chapters), 'lxml')
novel_name = str(download_soup.dl.dt).split("》")[0][5:]
flag_name = "《" + novel_name + "》" + "正文卷"
numbers = (len(download_soup.dl.contents) - 1) / 2 - 8
download_dict = collections.OrderedDict()
begin_flag = False
numbers = 1
for child in download_soup.dl.children:
if child != '\n':
if child.string == u"%s" % flag_name:
begin_flag = True
if begin_flag == True and child.a != None:
download_url = "https://www.biqukan.com" + child.a.get('href')
download_name = child.string
names = str(download_name).split('章')
name = charter.findall(names[0] + '章')
if name:
download_dict['第' + str(numbers) + '章 ' + names[1]] = download_url
numbers += 1
return novel_name + '.txt', numbers, download_dict
def downloader(self, url):
'''
download the text
'''
download_req = request.Request(url = url, headers = self.header)
download_response = request.urlopen(download_req)
download_html = download_response.read().decode('gbk','ignore')
soup_texts = BeautifulSoup(download_html, 'lxml')
texts = soup_texts.find_all(id = 'content', class_ = 'showtxt')
soup_text = BeautifulSoup(str(texts), 'lxml').div.text.replace('\xa0','')
return soup_text
def writer(self, name, path, text):
'''
write to file
'''
write_flag = True
with open(path, 'a', encoding='utf-8') as f:
f.write(name + '\n\n')
for each in text:
if each == 'h':
write_flag = False
if write_flag == True and each != ' ':
f.write(each)
if write_flag == True and each == '\r':
f.write('\n')
f.write('\n\n')
def run(self):
'''
program entry
'''
target_url = str(input("请输入小说目录下载地址:\n"))
# 实例化下载类
d = self.downloader(target_url)
name, numbers, url_dict = d.get_download_url(target_url)
if name in os.listdir():
os.remove(name)
index = 1
# 下载中
print("《%s》下载中:" % name[:-4])
for key, value in url_dict.items():
d.Writer(key, name, d.Downloader(value))
sys.stdout.write("已下载:%.3f%%" % float(index / numbers) + '\r')
sys.stdout.flush()
index += 1
print("《%s》下载完成!" % name[:-4])
if __name__ == '__main__':
biqukan().hello().run()