-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
81 lines (61 loc) · 2.71 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import mariadb
import sys
import time
from datetime import datetime
import chromedriver_autoinstaller
chromedriver_autoinstaller.install()
chrome_options = Options()
chrome_options.add_argument("--headless=new") # for Chrome >= 109
# Set the path for your ChromeDriver here
browser = webdriver.Chrome(options=chrome_options)
tickers = ['9988', '3690', '700', '1810', '9618']
translation = str.maketrans("", "", ",%")
try:
conn = mariadb.connect(
user="admin",
password="K03cmErgBSz07mMfhDs6",
host="db-warrants.c58gq2a6gh56.ap-southeast-1.rds.amazonaws.com",
port=3306,
database="ccass"
)
cur = conn.cursor()
except mariadb.Error as e:
print(f"Error connecting to MariaDB Platform: {e}")
sys.exit(1)
def save_data(created_date, ticker, shareholding, percent):
print('Saving data to DB')
print(f"{created_date}, {ticker}, {shareholding}, {percent}")
cur.execute("INSERT IGNORE INTO ccass_summary (created_date, ticker, shareholding, percentage) VALUES (?,?,?,?)", (created_date, ticker, shareholding, percent))
conn.commit()
def scrape_data(ticker):
url = 'https://www3.hkexnews.hk/sdw/search/searchsdw.aspx'
browser.get(url)
input_element = browser.find_element(By.NAME, 'txtStockCode')
input_element.send_keys(ticker)
input_element.send_keys(Keys.RETURN)
time.sleep(1)
button = browser.find_element(By.ID, 'btnSearch')
button.click()
date = browser.find_element(By.NAME, 'txtShareholdingDate').get_attribute('value')
formatted_date = datetime.strptime(date, "%Y/%m/%d").strftime("%Y-%m-%d")
ccass_search_total = browser.find_element(By.CLASS_NAME, 'ccass-search-total')
shareholding = ccass_search_total.find_element(By.CLASS_NAME, 'shareholding').text
formatted_shareholding = int(shareholding.translate(translation))
percent = ccass_search_total.find_element(By.CLASS_NAME, 'percent-of-participants').text
formatted_percent = float(percent.translate(translation))
return formatted_date, ticker, formatted_shareholding, formatted_percent
def main():
for ticker in tickers:
formatted_date, ticker, formatted_shareholding, formatted_percent = scrape_data(ticker)
save_data(formatted_date, ticker, formatted_shareholding, formatted_percent)
print(f'Date: {formatted_date}, Ticker: {ticker} ,Shareholding: {formatted_shareholding}, Percent: {formatted_percent}')
# pause the script to wait for page to load
time.sleep(1)
cur.close()
browser.quit()
if __name__ == '__main__':
main()