forked from MER-C/wiki-java
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathWMFWiki.java
348 lines (326 loc) · 15 KB
/
WMFWiki.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
/**
* @(#)WMFWiki.java 0.02 28/08/2021
* Copyright (C) 2011 - 20xx MER-C and contributors
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 3
* of the License, or (at your option) any later version. Additionally
* this file is subject to the "Classpath" exception.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software Foundation,
* Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package org.wikipedia;
import java.io.*;
import java.util.*;
import java.util.logging.*;
import java.time.*;
import javax.security.auth.login.*;
/**
* Stuff specific to Wikimedia wikis.
* @author MER-C
* @version 0.02
*/
public class WMFWiki extends Wiki
{
/**
* Denotes entries in the [[Special:Abuselog]]. These cannot be accessed
* through [[Special:Log]] or getLogEntries.
* @see #getAbuseLogEntries(int[], Wiki.RequestHelper)
*/
public static final String ABUSE_LOG = "abuselog";
/**
* Denotes entries in the spam blacklist log. This is a privileged log type.
* Requires extension SpamBlacklist.
* @see Wiki#getLogEntries
*/
public static final String SPAM_BLACKLIST_LOG = "spamblacklist";
/**
* Denotes the global auth log (contains (un)lock/(un)hide actions).
* Requires extension CentralAuth.
* @see Wiki#getLogEntries
*/
public static final String GLOBAL_AUTH_LOG = "globalauth";
/**
* Denotes the global (IP) block log. Requires extension GlobalBlocking.
* @see Wiki#getLogEntries
*/
public static final String GLOBAL_BLOCK_LOG = "gblblock";
/**
* Creates a new MediaWiki API client for the WMF wiki that has the given
* domain name.
* @param domain a WMF wiki domain name e.g. en.wikipedia.org
*/
protected WMFWiki(String domain)
{
super(domain, "/w", "https://");
}
/**
* Creates a new MediaWiki API client for the WMF wiki that has the given
* domain name.
* @param domain a WMF wiki domain name e.g. en.wikipedia.org
* @return the constructed API client object
*/
public static WMFWiki newSession(String domain)
{
WMFWiki wiki = new WMFWiki(domain);
wiki.initVars();
return wiki;
}
/**
* Get the global usage for a file.
*
* @param title the title of the page (must contain "File:")
* @return the global usage of the file, including the wiki and page the file is used on
* @throws IOException if a network error occurs
* @throws IllegalArgumentException if <code>{@link Wiki#namespace(String)
* namespace(title)} != {@link Wiki#FILE_NAMESPACE}</code>
* @throws UnsupportedOperationException if the GlobalUsage extension is
* not installed
* @see <a href="https://mediawiki.org/wiki/Extension:GlobalUsage">Extension:GlobalUsage</a>
*/
public List<String[]> getGlobalUsage(String title) throws IOException
{
requiresExtension("Global Usage");
if (namespace(title) != FILE_NAMESPACE)
throw new IllegalArgumentException("Cannot retrieve Globalusage for pages other than File pages!");
Map<String, String> getparams = new HashMap<>();
getparams.put("prop", "globalusage");
getparams.put("titles", normalize(title));
List<String[]> usage = makeListQuery("gu", getparams, null, "getGlobalUsage", -1, (line, results) ->
{
for (int i = line.indexOf("<gu"); i > 0; i = line.indexOf("<gu", ++i))
results.add(new String[] {
parseAttribute(line, "wiki", i),
parseAttribute(line, "title", i)
});
});
return usage;
}
/**
* Gets abuse log entries. Requires extension AbuseFilter. An abuse log
* entry will have a set <var>id</var>, <var>target</var> set to the title
* of the page, <var>action</var> set to the action that was attempted (e.g.
* "edit") and {@code null} (parsed)comment. <var>details</var> are a Map
* containing <var>filter_id</var>, <var>revid</var> if the edit was
* successful and <var>result</var> (what happened).
*
* <p>
* Accepted parameters from <var>helper</var> are:
* <ul>
* <li>{@link Wiki.RequestHelper#withinDateRange(OffsetDateTime,
* OffsetDateTime) date range}
* <li>{@link Wiki.RequestHelper#byUser(String) user}
* <li>{@link Wiki.RequestHelper#byTitle(String) title}
* <li>{@link Wiki.RequestHelper#reverse(boolean) reverse}
* <li>{@link Wiki.RequestHelper#limitedTo(int) local query limit}
* </ul>
*
* @param filters fetch log entries triggered by these filters (optional,
* use null or empty list to get all filters)
* @param helper a {@link Wiki.RequestHelper} (optional, use null to not
* provide any of the optional parameters noted above)
* @return the abuse filter log entries
* @throws IOException or UncheckedIOException if a network error occurs
* @throws UnsupportedOperationException if the AbuseFilter extension
* is not installed
* @see <a href="https://mediawiki.org/wiki/Extension:AbuseFilter">Extension:AbuseFilter</a>
*/
public List<LogEntry> getAbuseLogEntries(int[] filters, Wiki.RequestHelper helper) throws IOException
{
requiresExtension("Abuse Filter");
int limit = -1;
Map<String, String> getparams = new HashMap<>();
getparams.put("list", "abuselog");
if (filters.length > 0)
getparams.put("aflfilter", constructNamespaceString(filters));
if (helper != null)
{
helper.setRequestType("afl");
getparams.putAll(helper.addUserParameter());
getparams.putAll(helper.addTitleParameter());
getparams.putAll(helper.addReverseParameter());
getparams.putAll(helper.addDateRangeParameters());
limit = helper.limit();
}
List<LogEntry> filterlog = makeListQuery("afl", getparams, null, "WMFWiki.getAbuseLogEntries", limit, (line, results) ->
{
String[] items = line.split("<item ");
for (int i = 1; i < items.length; i++)
{
long id = Long.parseLong(parseAttribute(items[i], "id", 0));
OffsetDateTime timestamp = OffsetDateTime.parse(parseAttribute(items[i], "timestamp", 0));
String loguser = parseAttribute(items[i], "user", 0);
String action = parseAttribute(items[i], "action", 0);
String target = parseAttribute(items[i], "title", 0);
Map<String, String> details = new HashMap<>();
details.put("revid", parseAttribute(items[i], "revid", 0));
details.put("filter_id", parseAttribute(items[i], "filter_id", 0));
details.put("result", parseAttribute(items[i], "result", 0));
results.add(new LogEntry(id, timestamp, loguser, null, null, ABUSE_LOG, action, target, details));
}
});
log(Level.INFO, "WMFWiki.getAbuselogEntries", "Sucessfully returned abuse filter log entries (" + filterlog.size() + " entries).");
return filterlog;
}
/**
* Renders the ledes of articles (everything in the first section) as plain
* text. Requires extension CirrusSearch. Output order is the same as input
* order, and a missing page will correspond to {@code null}. The returned
* plain text has the following characteristics:
*
* <ul>
* <li>There are no paragraph breaks or new lines.
* <li>HTML block elements (e.g. <blockquote>), image captions,
* tables, reference markers, references and external links with no
* description are all removed.
* <li>Templates are evaluated. If a template yields inline text (as opposed
* to a table or image) the result is not removed. That means navboxes
* and hatnotes are removed, but inline warnings such as [citation
* needed] are not.
* <li>The text of list items is not removed, but bullet points are.
* </ul>
*
* @param titles a list of pages to get plain text for
* @return the first section of those pages, as plain text
* @throws IOException if a network error occurs
* @see #getPlainText(List)
*/
public List<String> getLedeAsPlainText(List<String> titles) throws IOException
{
requiresExtension("CirrusSearch");
Map<String, String> getparams = new HashMap<>();
getparams.put("action", "query");
getparams.put("prop", "cirrusbuilddoc"); // slightly shorter output than cirrusdoc
List<List<String>> temp = makeVectorizedQuery("cb", getparams, titles,
"getLedeAsPlainText", -1, (text, result) ->
{
result.add(parseAttribute(text, "opening_text", 0));
});
// mapping is one to one, so should flatten each entry
List<String> ret = new ArrayList<>();
for (List<String> item : temp)
ret.add(item.get(0));
return ret;
}
/**
* Renders articles as plain text. Requires extension CirrusSearch. Output
* order is the same as input order, and a missing page will correspond to
* {@code null}.The returned plain text has the following characteristics
* additional to those mentioned in {@link #getLedeAsPlainText(List)}:
*
* <ul>
* <li>There are no sections or section headers.
* <li>References are present at the end of the text, regardless of
* how they were formatted in wikitext.
* <li>Some small <div> templates still remain.
* <li>See also and external link lists remain (but the external links
* themselves are removed).
* </ul>
*
* @param titles a list of pages to get plain text for
* @return those pages rendered as plain text
* @throws IOException if a network error occurs
* @see #getLedeAsPlainText(List)
*/
public List<String> getPlainText(List<String> titles) throws IOException
{
requiresExtension("CirrusSearch");
Map<String, String> getparams = new HashMap<>();
getparams.put("action", "query");
getparams.put("prop", "cirrusbuilddoc"); // slightly shorter output than cirrusdoc
List<List<String>> temp = makeVectorizedQuery("cb", getparams, titles,
"getPlainText", -1, (text, result) ->
{
result.add(parseAttribute(text, " text", 0)); // not to be confused with "opening_text" etc.
});
// mapping is one to one, so should flatten each entry
List<String> ret = new ArrayList<>();
for (List<String> item : temp)
ret.add(item.get(0));
return ret;
}
/**
* Patrols or unpatrols new pages using the PageTriage extension. If a page
* is not in the queue, then this method adds the page to the PageTriage
* queue, which also unpatrols it.
*
* @param pageid the page to (un)patrol
* @param reason the reason for (un)patrolling the page for the log
* @param patrol true to patrol, false to unpatrol
* @param skipnotif does not send a notification to the author when the page
* is (un)patrolled, ignored for old pages
* @throws IOException if a network error occurs
* @throws SecurityException if one does not have the rights to patrol pages
* @throws CredentialExpiredException if cookies have expired
* @throws AccountLockedException if user is blocked
* @throws IllegalArgumentException if the page is not in a namespace where
* PageTriage is enabled.
* @since 0.02
* @see <a href="https://en.wikipedia.org/wiki/Wikipedia:Page_Curation">Extension
* documentation</a>
*/
public void triageNewPage(long pageid, String reason, boolean patrol, boolean skipnotif) throws IOException, LoginException
{
pageTriageAction(pageid, reason, patrol, skipnotif);
}
/**
* Internal method for handling PageTriage actions.
* @param pageid the page to (un)patrol or add to the queue
* @param reason the reason for (un)patrolling the page
* @param patrol whether to (un)patrol the page, or null to add it to the queue
* @param skipnotif do not notify the article author. Only applicable if
* {@code patrol != null}.
* @throws IOException if a network error occurs
* @throws SecurityException if one does not have the rights to patrol pages
* @throws CredentialExpiredException if cookies have expired
* @throws AccountLockedException if user is blocked
* @throws IllegalArgumentException if the page is not in a namespace where
* PageTriage is enabled.
* @since 0.02
* @see <a href="https://en.wikipedia.org/wiki/Wikipedia:Page_Curation">Extension
* documentation</a>
* @see <a href="https://en.wikipedia.org/w/api.php?action=help&modules=pagetriageaction">API
* documentation</a>
*/
protected synchronized void pageTriageAction(long pageid, String reason, Boolean patrol, boolean skipnotif) throws IOException, LoginException
{
requiresExtension("PageTriage");
checkPermissions("patrol", "patrol");
throttle();
Map<String, Object> params = new HashMap<>();
params.put("action", "pagetriageaction");
params.put("pageid", pageid);
params.put("token", getToken("csrf"));
if (reason != null)
params.put("note", reason);
if (patrol != null)
{
params.put("reviewed", patrol ? "1" : "0");
if (skipnotif)
params.put("skipnotif", "1");
}
else
params.put("enqueue", "1");
String response = makeApiCall(new HashMap<>(), params, "pageTriageAction");
// Unfortunately there is no way to tell whether a particular page is
// in a PageTriage queue, so we are left with this.
// Enqueue pages that aren't in the queue when unpatrol requested.
if (response.contains("<error code=\"bad-pagetriage-page\"") && Boolean.FALSE.equals(patrol))
pageTriageAction(pageid, reason, null, false);
else
{
checkErrorsAndUpdateStatus(response, "pageTriageAction", Map.of(
"bad-pagetriage-enqueue-invalidnamespace", desc ->
new IllegalArgumentException("Cannot (un)patrol page, PageTriage is not enabled for this namespace.")), null);
log(Level.INFO, "pageTriageAction", "Successfully (un)patrolled page " + pageid);
}
}
}