-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspotify.py
91 lines (87 loc) · 2.87 KB
/
spotify.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
from selenium import webdriver
from bs4 import BeautifulSoup as BS
from selenium.webdriver.common.keys import Keys
from time import sleep
import unittest
import requests
import json
class Spotify(unittest.TestCase):
def setUp(self):
chromeDriver = r'D:\Git\spotify-webscraping\chromedriver.exe'
self.driver = webdriver.Chrome(chromeDriver)
self.baseUrl = 'https://open.spotify.com'
def login(self):
driver = self.driver
driver.get(self.baseUrl)
sleep(1)
driver.find_element_by_id('has-account').click()
sleep(2)
username = input('Username: ')
password = input('Password: ')
driver.find_element_by_id('login-username').send_keys(username)
driver.find_element_by_id('login-password').send_keys(password)
driver.find_element_by_id('login-button').click()
sleep(2)
self.currentUrl = driver.current_url
def test_playlists(self):
self.login()
driver = self.driver
link = 'https://open.spotify.com/collection/playlists'
driver.get(link)
lastHeight = driver.execute_script('return document.body.scrollHeight')
while True:
driver.execute_script('window.scrollTo(0, document.body.scrollHeight)')
newHeight = driver.execute_script('return document.body.scrollHeight')
sleep(1)
if lastHeight == newHeight:
break
lastHeight = newHeight
soup = BS(driver.page_source, 'html.parser')
playlistItem = soup.find_all(attrs={'class':'media-object'})
for item in playlistItem:
title = item.find(attrs={'class':'mo-info-name'})
print('Title: {}\nLink: {}'.format(
title.text,
title['href']
))
url = title['href']
self.innerPlaylist(url)
def replace_text(self, word):
while True:
word = word.replace('-', '')
word = word.replace(',', '')
word = word.replace('-', '')
if '-' not in word:
word = word.replace("'", '-')
word = word.replace('/', '-')
word = word.replace(' ', '-')
break
return word
def innerPlaylist(self, url):
driver = self.driver
driver.get(self.baseUrl + url)
sleep(2)
soup = BS(driver.page_source, 'html.parser')
row = soup.find_all(attrs={'class':'tracklist-row'})
meta = soup.find('div', attrs={'class':'entity-name'})
title = meta.find('h2').text
title = self.replace_text(title)
fileName = title.lower() + '.json'
list = []
for item in row:
songName = item.find(attrs={'class':'tracklist-name'})
songAlbum = item.find_all(attrs={'class':'link-subtle'})
artists = []
for meta in songAlbum:
item = meta.text
artists.append(item)
item = {
'title': songName.text,
'artist': artists
}
list.append(item)
with open(fileName, 'a+', encoding='utf-8') as fo:
json.dump(list, fo, ensure_ascii=False)
fo.close()
if __name__ == '__main__':
unittest.main()