-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathscraper.py
31 lines (27 loc) · 911 Bytes
/
scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
from bs4 import BeautifulSoup
import requests
import csv
csv_file = open('org_info.csv','w')
csv_write = csv.writer(csv_file)
csv_write.writerow(['Student Name','Organisation','Project'])
url=input("Enter the URL:")
url=url[:len(url)-1]
for j in range(1,13):
#url = "https://summerofcode.withgoogle.com/archive/2019/projects/?page=" + str(j)
url = url + str(j)
source = requests.get(url).text
#print(source)
soup = BeautifulSoup(source,"lxml")
#print(soup.prettify())
orgs = soup.find('section',class_='lifted-section')
#print(orgs)
for i in orgs.find_all('li'):
details = (i.div.text).splitlines()
stud_name = details[2]
project = details[4]
org_name = (details[5].split(':'))[1][1:]
print(stud_name)
print(project)
print(org_name)
csv_write.writerow([stud_name,org_name,project])
csv_file.close()