-
Notifications
You must be signed in to change notification settings - Fork 231
/
album_by_artist.py
52 lines (44 loc) · 2.65 KB
/
album_by_artist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
"""
根据上一步获取的歌手的 ID 来用于获取所有的专辑 ID
"""
import requests
from bs4 import BeautifulSoup
import time
from music_163 import sql
class Album(object):
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, sdch',
'Accept-Language': 'zh-CN,zh;q=0.8,en;q=0.6',
'Cache-Control': 'no-cache',
'Connection': 'keep-alive',
'Cookie': '_ntes_nnid=7eced19b27ffae35dad3f8f2bf5885cd,1476521011210; _ntes_nuid=7eced19b27ffae35dad3f8f2bf5885cd; usertrack=c+5+hlgB7TgnsAmACnXtAg==; Province=025; City=025; _ga=GA1.2.1405085820.1476521280; NTES_PASSPORT=6n9ihXhbWKPi8yAqG.i2kETSCRa.ug06Txh8EMrrRsliVQXFV_orx5HffqhQjuGHkNQrLOIRLLotGohL9s10wcYSPiQfI2wiPacKlJ3nYAXgM; [email protected]|1476523293|1|study|11&12|jis&1476511733&mail163#jis&320100#10#0#0|151889&0|g37_client_check&mailsettings&mail163&study&blog|[email protected]; JSESSIONID-WYYY=189f31767098c3bd9d03d9b968c065daf43cbd4c1596732e4dcb471beafe2bf0605b85e969f92600064a977e0b64a24f0af7894ca898b696bd58ad5f39c8fce821ec2f81f826ea967215de4d10469e9bd672e75d25f116a9d309d360582a79620b250625859bc039161c78ab125a1e9bf5d291f6d4e4da30574ccd6bbab70b710e3f358f%3A1476594130342; _iuqxldmzr_=25; __utma=94650624.1038096298.1476521011.1476588849.1476592408.6; __utmb=94650624.11.10.1476592408; __utmc=94650624; __utmz=94650624.1476521011.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)',
'DNT': '1',
'Host': 'music.163.com',
'Pragma': 'no-cache',
'Referer': 'http://music.163.com/',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36'
}
def save_albums(self, artist_id):
params = {'id': artist_id, 'limit': '200'}
# 获取歌手个人主页
r = requests.get('http://music.163.com/artist/album', headers=self.headers, params=params)
# 网页解析
soup = BeautifulSoup(r.content.decode(), 'html.parser')
body = soup.body
albums = body.find_all('a', attrs={'class': 'tit f-thide s-fc0'}) # 获取所有专辑
for album in albums:
albume_id = album['href'].replace('/album?id=', '')
sql.insert_album(albume_id, artist_id)
if __name__ == '__main__':
artists = sql.get_all_artist()
my_album = Album()
for i in artists:
try:
my_album.save_albums(i['ARTIST_ID'])
# print(i)
except Exception as e:
# 打印错误日志
print(str(i) + ': ' + str(e))
time.sleep(5)