-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathdatabase.py
350 lines (298 loc) · 14.7 KB
/
database.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
from urllib.parse import urlparse
from dataclasses import dataclass
from lxml import html
import configparser
import curiouscat
import threading
import services
import operator
import datetime
import requests
import twython
import pymysql
import random
import os
import re
@dataclass
class Database:
safeLogin:bool = True #automatically login with the user in the config file, who is read only
user:str = None #otherwise, login with the given username and passwd
passwd:str = None
def __enter__(self):
self.config = configparser.ConfigParser(interpolation = None)
self.config.read("edaweb.conf")
if self.safeLogin:
self.__connection = pymysql.connect(
**self.config["mysql"],
charset = "utf8mb4"
)
else:
self.__connection = pymysql.connect(
user = self.user,
passwd = self.passwd,
host = self.config["mysql"]["host"],
db = self.config["mysql"]["db"],
charset = "utf8mb4"
)
return self
def __exit__(self, type, value, traceback):
self.__connection.commit()
self.__connection.close()
def get_header_links(self):
with self.__connection.cursor() as cursor:
cursor.execute("SELECT name, link FROM headerLinks ORDER BY name;")
return cursor.fetchall()
def get_image(self, imageName):
with self.__connection.cursor() as cursor:
cursor.execute("SELECT alt, url FROM images WHERE imageName = %s;", (imageName, ))
return cursor.fetchone()
def get_pfp_images(self):
with self.__connection.cursor() as cursor:
cursor.execute("SELECT alt, url FROM images WHERE pfp_img = 1;")
return cursor.fetchall()
def get_sidebar_images(self):
with self.__connection.cursor() as cursor:
cursor.execute("SELECT alt, url FROM images WHERE sidebar_image = 1;")
return cursor.fetchall()
def get_header_articles(self):
with self.__connection.cursor() as cursor:
cursor.execute("SELECT articleName, link FROM headerArticles;")
return cursor.fetchall()
def get_all_categories(self):
with self.__connection.cursor() as cursor:
cursor.execute("SELECT category_name FROM categories;")
return [i[0] for i in cursor.fetchall()]
def add_category(self, category):
if not category in self.get_all_categories():
with self.__connection.cursor() as cursor:
cursor.execute("INSERT INTO categories (category_name) VALUES (%s);", (category, ))
self.__connection.commit()
return True
return False
def add_thought(self, category, title, markdown):
with self.__connection.cursor() as cursor:
cursor.execute("""
INSERT INTO thoughts (category_id, title, markdown_text)
VALUES ((
SELECT category_id FROM categories WHERE category_name = %s
), %s, %s);""", (category, title, markdown))
self.__connection.commit()
def get_thought(self, id_):
with self.__connection.cursor() as cursor:
cursor.execute("""
SELECT categories.category_name, thoughts.title, thoughts.dt, thoughts.markdown_text, thoughts.redirect
FROM thoughts INNER JOIN categories
ON thoughts.category_id = categories.category_id
WHERE thought_id = %s;""", (id_, ))
return cursor.fetchone()
def get_similar_thoughts(self, category, id_):
with self.__connection.cursor() as cursor:
cursor.execute("""
SELECT thought_id, title, dt, category_name FROM thoughts
INNER JOIN categories ON thoughts.category_id = categories.category_id
WHERE category_name = %s AND thought_id != %s;""",
(category, id_))
return cursor.fetchall()
def get_featured_thoughts(self):
with self.__connection.cursor() as cursor:
cursor.execute("SELECT thought_id, title FROM thoughts WHERE featured = 1;")
return cursor.fetchall()
def update_thought_markdown(self, id_, markdown):
with self.__connection.cursor() as cursor:
cursor.execute("UPDATE thoughts SET markdown_text = %s WHERE thought_id = %s;", (markdown, id_))
self.__connection.commit()
def get_categories_not(self, category_name):
with self.__connection.cursor() as cursor:
cursor.execute("SELECT category_name FROM categories WHERE category_name != %s;", (category_name, ))
return [i[0] for i in cursor.fetchall()]
def get_all_thoughts(self):
with self.__connection.cursor() as cursor:
cursor.execute("""
SELECT thought_id, title, dt, category_name FROM thoughts
INNER JOIN categories ON thoughts.category_id = categories.category_id;
""")
return cursor.fetchall()
def get_cached_tweets(self, numToGet = None):
with self.__connection.cursor() as cursor:
sql = "SELECT tweet, tweet_id, account FROM diary WHERE account = %s ORDER BY tweeted_at DESC"
args = (self.config.get("twitter", "main_account"), )
if numToGet is not None:
sql += " LIMIT %s;"
args = (self.config.get("twitter", "main_account"), numToGet)
else:
sql += ";"
cursor.execute(sql, args)
return [(i[0], "https://%s/%s/status/%d" % (self.config.get("nitter", "outsideurl"), i[2], i[1])) for i in cursor.fetchall()]
def get_cached_commits(self, since = None, recurse = True):
with self.__connection.cursor() as cursor:
if since is not None:
cursor.execute("SELECT message, url, commitTime, additions, deletions, total FROM commitCache WHERE commitTime > %s ORDER BY commitTime DESC;", (since, ))
else:
cursor.execute("SELECT message, url, commitTime, additions, deletions, total FROM commitCache ORDER BY commitTime DESC;")
return [{
"repo": urlparse(i[1]).path.split("/")[2],
"message": i[0],
"url": i[1],
"datetime": i[2],
"stats": {
"additions": i[3],
"deletions": i[4],
"total": i[5]
}
} for i in cursor.fetchall()]
def update_commit_cache(self, requested):
with self.__connection.cursor() as cursor:
cursor.execute("SELECT DISTINCT url FROM commitCache;")
urls = [i[0] for i in cursor.fetchall()]
for commit in requested:
if commit["url"] not in urls:
cursor.execute("""
INSERT INTO commitCache (message, url, commitTime, additions, deletions, total)
VALUES (%s, %s, %s, %s, %s, %s)""",
(commit["message"], commit["url"], commit["datetime"], commit["stats"]["additions"], commit["stats"]["deletions"], commit["stats"]["total"])
)
self.__connection.commit()
def get_last_commit_time(self):
with self.__connection.cursor() as cursor:
cursor.execute("SELECT MAX(commitTime) FROM commitCache;")
return cursor.fetchone()[0]
def get_my_twitter(self):
with self.__connection.cursor() as cursor:
cursor.execute("SELECT link FROM headerLinks WHERE name = 'twitter';")
return cursor.fetchone()[0]
def get_my_diary_twitter(self):
return self.config.get("twitter", "diary_account")
def get_iso_cd_options(self):
iso_dir = self.config.get("cds", "location")
return [
i
for i in os.listdir(iso_dir)
if os.path.splitext(i)[-1].lower() in [".iso"]
and os.path.getsize(os.path.join(iso_dir, i)) < self.config.getint("cds", "maxsize")
]
def append_cd_orders(self, iso, email, house, street, city, county, postcode, name):
with self.__connection.cursor() as cursor:
cursor.execute("""
INSERT INTO cd_orders_2 (iso, email, house, street, city, county, postcode, name)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s);
""", (iso, email, house, street, city, county, postcode, name))
id_ = cursor.lastrowid
self.__connection.commit()
return id_
def append_diary(self, tweet_id, tweeted_at, replying_to, tweet, account):
if tweet is None:
tweet = "(Image only)"
with self.__connection.cursor() as cursor:
cursor.execute("INSERT INTO diary VALUES (%s, %s, %s, %s, %s);", (tweet_id, tweeted_at, replying_to, tweet, account))
self.__connection.commit()
print("Appended diary with tweet " + tweet + " @ " + str(tweeted_at))
def append_diary_images(self, tweet_id, imurl):
with self.__connection.cursor() as cursor:
cursor.execute("INSERT INTO diaryimages (tweet_id, link) VALUES (%s, %s);", (tweet_id, imurl))
self.__connection.commit()
def get_diary(self, account = None):
threading.Thread(target = update_cache).start()
out = {}
if account is None:
account = self.get_my_diary_twitter()
with self.__connection.cursor() as cursor:
# cursor.execute("SELECT tweet_id, tweeted_at, tweet FROM diary WHERE replying_to IS NULL ORDER BY tweeted_at DESC;")
# attempt to ignore curiouscat automatic tweets by comparing with the q&a table
cursor.execute("SELECT tweet_id, tweeted_at, tweet FROM diary WHERE replying_to IS NULL AND tweet_id NOT IN (SELECT tweet_id FROM diary INNER JOIN qnas ON SUBSTRING(tweet, 1, 16) = SUBSTRING(question, 1, 16)) AND account IN %s ORDER BY tweeted_at DESC;", ([account, "HONMISGENDERER"], ))
for tweet_id, tweeted_at, tweet_text in cursor.fetchall():
# print(tweet_id, tweeted_at, tweet_text)
out[tweeted_at] = [{
"text": tweet_text,
"images": self.get_diary_image(tweet_id),
"link": "https://%s/%s/status/%d" % (
self.config.get("nitter", "outsideurl"),
self.get_my_diary_twitter(),
tweet_id
)
}]
next_tweet = self.get_child_tweets(tweet_id)
while next_tweet is not None:
tweet, id_ = next_tweet
out[tweeted_at].append(tweet)
next_tweet = self.get_child_tweets(id_)
return out
def get_diary_image(self, tweet_id):
with self.__connection.cursor() as cursor:
cursor.execute("SELECT link FROM diaryimages WHERE tweet_id = %s;", (tweet_id, ))
return [i[0] for i in cursor.fetchall()]
def get_child_tweets(self, parent_id):
with self.__connection.cursor() as cursor:
cursor.execute("SELECT tweet_id, tweet FROM diary WHERE replying_to = %s;", (parent_id, ))
out = cursor.fetchall()
if out == ():
return None
out = out[0]
id_ = out[0]
return {
"text": out[1],
"images": self.get_diary_image(id_),
"link": "https://%s/%s/status/%d" % (
self.config.get("nitter", "outsideurl"), self.get_my_diary_twitter(), id_
)
}, id_
def get_newest_diary_tweet_id(self, account = None):
if account is None:
account = self.get_my_diary_twitter()
with self.__connection.cursor() as cursor:
cursor.execute("SELECT MAX(tweet_id) FROM diary WHERE account = %s;", (account, ))
return cursor.fetchone()[0]
def get_curiouscat_username(self):
with self.__connection.cursor() as cursor:
cursor.execute("SELECT link FROM headerLinks WHERE name = 'curiouscat';")
return urlparse(cursor.fetchone()[0]).path.split("/")[1]
def append_curiouscat_qnas(self, qnas):
with self.__connection.cursor() as cursor:
for qna in qnas:
cursor.execute("SELECT curiouscat_id FROM qnas WHERE curiouscat_id = %s;", (qna["id"], ))
if cursor.fetchone() is None:
cursor.execute("INSERT INTO `qnas` VALUES (%s, %s, %s, %s, %s);", (
qna["id"], qna["link"], qna["datetime"], qna["question"], qna["answer"]
))
print("Appended question with timestamp %s" % datetime.datetime.fromtimestamp(qna["id"]).isoformat())
else:
print("Skipped question with timestamp %s" % datetime.datetime.fromtimestamp(qna["id"]).isoformat())
self.__connection.commit()
def get_biggest_curiouscat_timestamp(self):
with self.__connection.cursor() as cursor:
cursor.execute("SELECT MAX(`timestamp`) FROM `qnas`;")
return cursor.fetchone()[0]
def get_curiouscat_qnas(self):
with self.__connection.cursor() as cursor:
cursor.execute("SELECT * FROM qnas;")
return sorted(cursor.fetchall(), key = operator.itemgetter(2), reverse = True)
def update_cache():
print("Updating cache...")
with Database() as db:
db.append_curiouscat_qnas(
curiouscat.get_all_curiouscat_qnas_before(
db.get_curiouscat_username(),
db.get_biggest_curiouscat_timestamp()
)
)
print("Finished adding curiouscat...")
db.update_commit_cache(services.request_recent_commits(since = db.get_last_commit_time()))
print("Finished adding github commits...")
for id_, dt, replying_to, text, username, images in services.scrape_nitter(db.get_my_diary_twitter(), db.get_newest_diary_tweet_id()):
db.append_diary(id_, dt, replying_to, text, username)
for image in images:
db.append_diary_images(id_, image)
print("Finished getting diary tweets...")
main_account = db.config.get("twitter", "main_account")
oldest_tweet = db.get_newest_diary_tweet_id(main_account)
print("Fetching tweets from account '%s' older than %d" % (main_account, oldest_tweet))
for id_, dt, replying_to, text, username, images in services.scrape_nitter(
main_account,
oldest_tweet
):
db.append_diary(id_, dt, replying_to, text, username)
for image in images:
db.append_diary_images(id_, image)
print("Done updating commit cache...")
if __name__ == "__main__":
with Database() as db:
print(db.get_diary())