-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrape.py
108 lines (82 loc) · 3.85 KB
/
scrape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
from selenium import webdriver
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium import webdriver
from dotenv import load_dotenv
import time
import random
import os
def Scrape():
backspace_weights = [98, 2]
backspace_exec = 1
load_dotenv()
from_user = os.getenv("TWIT_USERNAME")
from_pass = os.getenv("TWIT_PASSWORD")
driver = webdriver.Chrome()
driver.get("https://twitter.com/i/flow/login") #Switch to http://www.x.com if necessary
time.sleep(random.uniform(1.0, 5.0)) #randomized timings for actions to avoid bot detection <-- I love copilot bruh. this just saved me so much typing
username = driver.find_element(By.XPATH, "//input[@name='text']")
user_creds = from_user
for letter in user_creds:
username.send_keys(letter)
time.sleep(random.uniform(0.1, 0.5))
if (random.choices(backspace_weights, weights = [98, 2]) == backspace_exec):
username.send_keys(Keys.BACKSPACE)
time.sleep(random.uniform(0.1, 0.5))
time.sleep(random.uniform(1.0, 5.0))
next_button = driver.find_element(By.XPATH, "//span[contains(text(), 'Next')]")
next_button.click()
time.sleep(random.uniform(1.0, 5.0))
password = driver.find_element(By.XPATH, "//input[@name='password']")
pass_creds = from_pass
for letter in pass_creds:
password.send_keys(letter)
time.sleep(random.uniform(0.1, 0.5))
if (random.choices(backspace_weights, weights = [98, 2]) == backspace_exec):
password.send_keys(Keys.BACKSPACE)
time.sleep(random.uniform(0.1, 0.5))
log_in_button = driver.find_element(By.XPATH, "//span[contains(text(), 'Log in')]")
log_in_button.click()
time.sleep(10)
userTag = driver.find_element(By.XPATH, "//div[@data-testid='User-Name']").text
timeStamp = driver.find_element(By.XPATH, "//time").get_attribute("datetime")
tweet = driver.find_element(By.XPATH, "//div[@data-testid='tweetText']").text
retweet = driver.find_element(By.XPATH, "//div[@data-testid='retweet']").text
like = driver.find_element(By.XPATH, "//div[@data-testid='like']").text
UserTags = []
TimeStamps = []
Tweets = []
reTweets = []
Likes = []
time.sleep(5)
articles = driver.find_elements(By.XPATH, "//article[@data-testid='tweet']")
while True:
for _ in articles:
userTag = driver.find_element(By.XPATH, ".//div[@data-testid='User-Name']").text
print(userTag)
UserTags.append(userTag)
timeStamp = driver.find_element(By.XPATH, ".//time").get_attribute("datetime")
print(timeStamp)
TimeStamps.append(timeStamp)
tweet = driver.find_element(By.XPATH, ".//div[@data-testid='tweetText']").text
Tweets.append(tweet)
retweet = driver.find_element(By.XPATH, ".//div[@data-testid='retweet']").text
reTweets.append(retweet)
like = driver.find_element(By.XPATH, ".//div[@data-testid='like']").text
Likes.append(like)
driver.execute_script('window.scrollTo(0, document.body.scrollHeight)')
time.sleep(2)
articles = driver.find_elements(By.XPATH, "//article[@data-testid='tweet']")
tweetSet = list(set(Tweets))
if len(tweetSet) > 5: #Change based on how many tweets you want to scrape
break
import pandas as pd
UserTags = set(UserTags)
TimeStamps = set(TimeStamps)
Tweets = set(Tweets)
reTweets = set(reTweets)
Likes = set(Likes)
df = pd.DataFrame(zip(UserTags, TimeStamps, Tweets, reTweets, Likes),
columns = ['UserTags', 'TimeStamps', 'Tweets', 'reTweets', 'Likes'])
df.to_csv("~/Downloads/twitter.csv", index = False) #This can be changed depending on where you want to save the file