-
-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathcwlists.py
182 lines (159 loc) · 5.68 KB
/
cwlists.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
__filename__ = "cwlists.py"
__author__ = "Bob Mottram"
__license__ = "AGPL3+"
__version__ = "1.6.0"
__maintainer__ = "Bob Mottram"
__email__ = "[email protected]"
__status__ = "Production"
__module_group__ = "Core"
import os
from utils import load_json
from utils import get_content_from_post
def load_cw_lists(base_dir: str, verbose: bool) -> {}:
"""Load lists used for content warnings
"""
if not os.path.isdir(base_dir + '/cwlists'):
return {}
result = {}
# NOTE: here we do want to allow recursive walk through
# possible subdirectories
for _, _, files in os.walk(base_dir + '/cwlists'):
for fname in files:
if not fname.endswith('.json'):
continue
list_filename = os.path.join(base_dir + '/cwlists', fname)
print('list_filename: ' + list_filename)
list_json = load_json(list_filename)
if not list_json:
continue
if not list_json.get('name'):
continue
if not list_json.get('words') and \
not list_json.get('hashtags') and \
not list_json.get('domains'):
continue
name = list_json['name']
if verbose:
print('List: ' + name)
result[name] = list_json
return result
def _add_cw_match_tags(item: {}, post_tags: {}, cw_text: str,
warning: str) -> (bool, str):
"""Updates content warning text using hashtags from within
the post content
"""
matched = False
for tag in item['hashtags']:
tag = tag.strip()
if not tag:
continue
if not tag.startswith('#'):
tag = '#' + tag
tag = tag.lower()
for tag_dict in post_tags:
if not isinstance(tag_dict, dict):
continue
if not tag_dict.get('Hashtag'):
continue
if not tag_dict.get('name'):
continue
if tag_dict['name'].lower() == tag:
if cw_text:
cw_text = warning + ' / ' + cw_text
else:
cw_text = warning
matched = True
break
if matched:
break
return matched, cw_text
def _add_cw_match_domains(item: {}, content: str, cw_text: str,
warning: str) -> (bool, str):
"""Updates content warning text using domains from within
the post content
"""
matched = False
for domain in item['domains']:
if '.' in domain:
first_section = domain.split('.')[0]
if len(first_section) < 4:
if '.' + domain in content or \
'/' + domain in content:
if cw_text:
cw_text = warning + ' / ' + cw_text
else:
cw_text = warning
matched = True
break
continue
if domain in content:
if cw_text:
cw_text = warning + ' / ' + cw_text
else:
cw_text = warning
matched = True
break
return matched, cw_text
def add_cw_from_lists(post_json_object: {}, cw_lists: {}, translate: {},
lists_enabled: str, system_language: str,
languages_understood: []) -> None:
"""Adds content warnings by matching the post content
against domains or keywords
"""
if not lists_enabled:
return
if 'content' not in post_json_object['object']:
if 'contentMap' not in post_json_object['object']:
return
cw_text = ''
if post_json_object['object'].get('summary'):
cw_text = post_json_object['object']['summary']
content = get_content_from_post(post_json_object, system_language,
languages_understood, "content")
if not content:
return
post_tags: list[dict] = []
if post_json_object['object'].get('tag'):
if isinstance(post_json_object['object']['tag'], list):
post_tags = post_json_object['object']['tag']
for name, item in cw_lists.items():
if name not in lists_enabled:
continue
if not item.get('warning'):
continue
warning = item['warning']
# is there a translated version of the warning?
if translate.get(warning):
warning = translate[warning]
# is the warning already in the CW?
if warning in cw_text:
continue
matched = False
# match hashtags within the post
if post_tags and item.get('hashtags'):
matched, cw_text = \
_add_cw_match_tags(item, post_tags, cw_text, warning)
if matched:
continue
# match domains within the content
if item.get('domains'):
matched, cw_text = \
_add_cw_match_domains(item, content, cw_text, warning)
if matched:
continue
# match words within the content
if item.get('words'):
for word_str in item['words']:
if word_str in content or word_str.title() in content:
if cw_text:
cw_text = warning + ' / ' + cw_text
else:
cw_text = warning
break
if cw_text:
post_json_object['object']['summary'] = cw_text
post_json_object['object']['sensitive'] = True
def get_cw_list_variable(list_name: str) -> str:
"""Returns the variable associated with a CW list
"""
return 'list' + list_name.replace(' ', '').replace("'", '')