-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathfix-double-redirects.py
executable file
·75 lines (56 loc) · 2.38 KB
/
fix-double-redirects.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#! /usr/bin/env python3
import logging
import mwparserfromhell
from ws.client import API
from ws.parser_helpers.wikicode import is_redirect
logger = logging.getLogger(__name__)
class DoubleRedirects:
edit_summary = "fix double redirect"
def __init__(self, api):
self.api = api
def update_redirect_page(self, page, target):
title = page["title"]
text_old = page["revisions"][0]["slots"]["main"]["*"]
timestamp = page["revisions"][0]["timestamp"]
if not is_redirect(text_old, full_match=True):
logger.error("Double redirect page '{}' is not empty, so it cannot be fixed automatically.".format(title))
return
logger.info("Parsing '{}'...".format(title))
wikicode = mwparserfromhell.parse(text_old)
# asserted by the regex match above
assert(len(wikicode.nodes) == 3)
assert(isinstance(wikicode.nodes[2], mwparserfromhell.nodes.wikilink.Wikilink))
wl_target = wikicode.nodes[2]
wl_target.title = target
wl_target.text = None
text_new = str(wikicode)
# also add Category:Archive to the redirect
if target.startswith("ArchWiki:Archive"):
text_new = text_new.rstrip() + "\n[[Category:Archive]]"
if text_old != text_new:
self.api.edit(title, page["pageid"], text_new, timestamp, self.edit_summary, bot="")
def findall(self):
double = {}
for source, target in self.api.redirects.map.items():
target = target.split("#", maxsplit=1)[0]
if target in self.api.redirects.map:
double[source] = target
return double
def fixall(self):
double = self.findall()
if not double:
logger.info("There are no double redirects.")
return
# fetch all revisions at once
result = self.api.call_api(action="query", titles="|".join(double.keys()), prop="revisions", rvprop="content|timestamp", rvslots="main")
pages = result["pages"]
for page in pages.values():
source = page["title"]
target = self.api.redirects.resolve(source)
if target:
self.update_redirect_page(page, target)
if __name__ == "__main__":
import ws.config
api = ws.config.object_from_argparser(API, description="Fix double redirects")
dr = DoubleRedirects(api)
dr.fixall()