forked from AlexanderFengler/tbip
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsample_authors.py
44 lines (37 loc) · 1.56 KB
/
sample_authors.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# we identified: Phrag, MrFlesh, thetimeisnow, MyaloMark, mexicodoug
author_list = ["MrFlesh","oddmanout","Phrag","NoMoreNicksLeft","permaculture",
"aletoledo","thetimeisnow","MyaloMark","mexicodoug","rainman_104","mutatron",
"otakucode","cuteman","donh","nixonrichard","garyp714","Stormflux","seeker135",
"dirtymoney","folderol"]
sample_num = 5
author_counts = {i:[] for i in author_list}
with open("prolific_authors","r") as authors:
for id_,author in enumerate(authors):
if author.strip() in author_list:
if len(author_counts[author.strip()]) >= sample_num:
pass
else:
author_counts[author.strip()].append(id_)
finished = 0
for authorship in author_counts:
if author_counts[authorship] == sample_num:
finished += 1
if finished == len(author_list):
break
author_texts = {i:[] for i in author_list}
with open("prolific_texts","r") as texts:
reader = texts.readlines()
with open("prolific_texts","w") as texts:
for line in reader:
if line.strip() != "":
print(line.strip(),end="\n",file=texts)
with open("prolific_texts","r") as texts:
for id_,text in enumerate(texts):
for author in author_list:
if id_ in author_counts[author]:
author_texts[author].append(text.strip())
with open("prolific_sampled_comments", "w") as sample:
for author in author_texts:
print("\n"+author+":\n",file=sample)
for text in author_texts[author]:
print(text,end="\n",file=sample)