mediawiki-client-tools · elsiehupp · Sep 8, 2023
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -9,7 +9,7 @@ repos:
     rev: 1.6.0
     hooks:
     -   id: poetry-check
-    # -   id: poetry-lock
+    -   id: poetry-lock
     -   id: poetry-export
         args: ["-f", "requirements.txt", "-o", "requirements.txt"]
 -   repo: https://github.com/pre-commit/pre-commit-hooks

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -77,10 +77,18 @@ requests = "^2.31.0"
 flake8 = "^3.9.2"
 pre-commit = "^2.17.0"
 pymarkdown = "^0.1.4"
+mypy = "^1.5.1"
+types-requests = "^2.31.0.2"
+# flake8-black may be unnecessary?
+flake8-black = "^0.3.6"
 
 [build-system]
 requires = ["poetry-core>=1.0.0"]
 build-backend = "poetry.core.masonry.api"
 
 [tool.pymarkdown]
 disable-rules = "line-length,no-inline-html"
+
+[tool.mypy]
+check_untyped_defs = true
+ignore_missing_imports = true
diff --git a/requirements.txt b/requirements.txt
@@ -217,9 +217,9 @@ requests==2.31.0 ; python_version >= "3.8" and python_version < "4.0" \
 schema==0.7.5 ; python_version >= "3.8" and python_version < "4.0" \
     --hash=sha256:f06717112c61895cabc4707752b88716e8420a8819d71404501e114f91043197 \
     --hash=sha256:f3ffdeeada09ec34bf40d7d79996d9f7175db93b7a5065de0faa7f41083c1e6c
-setuptools==68.1.2 ; python_version >= "3.8" and python_version < "4.0" \
-    --hash=sha256:3d4dfa6d95f1b101d695a6160a7626e15583af71a5f52176efa5d39a054d475d \
-    --hash=sha256:3d8083eed2d13afc9426f227b24fd1659489ec107c0e86cec2ffdde5c92e790b
+setuptools==68.2.0 ; python_version >= "3.8" and python_version < "4.0" \
+    --hash=sha256:00478ca80aeebeecb2f288d3206b0de568df5cd2b8fada1209843cc9a8d88a48 \
+    --hash=sha256:af3d5949030c3f493f550876b2fd1dd5ec66689c4ee5d5344f009746f71fd5a8
 six==1.16.0 ; python_version >= "3.8" and python_version < "4.0" \
     --hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926 \
     --hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254

diff --git a/wikiteam3/dumpgenerator/__init__.py b/wikiteam3/dumpgenerator/__init__.py
@@ -1,26 +0,0 @@
-#!/usr/bin/env python3
-
-# DumpGenerator A generator of dumps for wikis
-# Copyright (C) 2011-2018 WikiTeam developers
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-# To learn more, read the documentation:
-#     https://github.com/WikiTeam/wikiteam/wiki
-
-
-from wikiteam3.dumpgenerator.dump import DumpGenerator
-
-
-def main():
-    DumpGenerator()

diff --git a/wikiteam3/dumpgenerator/__main__.py b/wikiteam3/dumpgenerator/__main__.py
@@ -1,6 +1,32 @@
+#!/usr/bin/env python3
+
+# DumpGenerator A generator of dumps for wikis
+# Copyright (C) 2011-2018 WikiTeam developers
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# To learn more, read the documentation:
+#     https://github.com/WikiTeam/wikiteam/wiki
+
+
+from wikiteam3.dumpgenerator.dump import DumpGenerator
+
+
+def main():
+    DumpGenerator()
+
+
 if __name__ == "__main__":
     import sys
 
-    from .__init__ import main
-
     sys.exit(main())
diff --git a/wikiteam3/dumpgenerator/api/__init__.py b/wikiteam3/dumpgenerator/api/__init__.py
@@ -2,3 +2,5 @@
 from .get_json import getJSON
 from .handle_status_code import handleStatusCode
 from .wiki_check import getWikiEngine
+
+__all__ = [checkAPI, checkRetryAPI, mwGetAPIAndIndex, getJSON, handleStatusCode, getWikiEngine]  # type: ignore
diff --git a/wikiteam3/dumpgenerator/api/api.py b/wikiteam3/dumpgenerator/api/api.py
@@ -1,7 +1,6 @@
 import re
-import time
-from typing import *
-from urllib.parse import urljoin, urlparse, urlunparse
+from typing import Any, Literal, Optional
+from urllib.parse import urljoin, urlparse
 
 import mwclient
 import requests
@@ -11,7 +10,8 @@
 from .get_json import getJSON
 
 
-def checkAPI(api="", session: requests.Session = None):
+# api="", session: requests.Session = None
+def checkAPI(api: str, session: requests.Session):
     """Checking API availability"""
     global cj
     # handle redirects
@@ -34,29 +34,31 @@ def checkAPI(api="", session: requests.Session = None):
                 "MediaWiki API URL not found or giving error: HTTP %d" % r.status_code
             )
             return None
-    if "MediaWiki API is not enabled for this site." in r.text:
-        return None
-    try:
-        result = getJSON(r)
-        index = None
-        if result:
-            try:
-                index = (
-                    result["query"]["general"]["server"]
-                    + result["query"]["general"]["script"]
-                )
-                return (True, index, api)
-            except KeyError:
-                print("MediaWiki API seems to work but returned no index URL")
-                return (True, None, api)
-    except ValueError:
-        print(repr(r.text))
-        print("MediaWiki API returned data we could not parse")
-        return None
+    if r is not None:
+        if "MediaWiki API is not enabled for this site." in r.text:
+            return None
+        try:
+            result = getJSON(r)
+            index = None
+            if result:
+                try:
+                    index = (
+                        result["query"]["general"]["server"]
+                        + result["query"]["general"]["script"]
+                    )
+                    return (True, index, api)
+                except KeyError:
+                    print("MediaWiki API seems to work but returned no index URL")
+                    return (True, None, api)
+        except ValueError:
+            print(repr(r.text))
+            print("MediaWiki API returned data we could not parse")
+            return None
     return None
 
 
-def mwGetAPIAndIndex(url="", session: requests.Session = None):
+# url=""
+def mwGetAPIAndIndex(url: str, session: requests.Session):
     """Returns the MediaWiki API and Index.php"""
 
     api = ""
@@ -108,18 +110,21 @@ def mwGetAPIAndIndex(url="", session: requests.Session = None):
     return api, index
 
 
-def checkRetryAPI(api="", apiclient=False, session: requests.Session = None):
+# api="", apiclient=False
+def checkRetryAPI(api: str, apiclient: bool, session: requests.Session):
     """Call checkAPI and mwclient if necessary"""
-    check = None
+    check: (tuple[Literal[True], Any, str] | tuple[Literal[True], None, str] | None)
     try:
         check = checkAPI(api, session=session)
     except requests.exceptions.ConnectionError as e:
         print(f"Connection error: {str(e)}")
+        check = None
 
     if check and apiclient:
         apiurl = urlparse(api)
         try:
-            site = mwclient.Site(
+            # Returns a value, but we're just checking for an error here
+            mwclient.Site(
                 apiurl.netloc,
                 apiurl.path.replace("api.php", ""),
                 scheme=apiurl.scheme,
@@ -138,13 +143,14 @@ def checkRetryAPI(api="", apiclient=False, session: requests.Session = None):
             )
 
             try:
-                site = mwclient.Site(
+                # Returns a value, but we're just checking for an error here
+                mwclient.Site(
                     apiurl.netloc,
                     apiurl.path.replace("api.php", ""),
                     scheme=newscheme,
                     pool=session,
                 )
             except KeyError:
-                check = False
+                check = False  # type: ignore
 
-    return check, api
+    return check, api  # type: ignore
diff --git a/wikiteam3/dumpgenerator/api/get_json.py b/wikiteam3/dumpgenerator/api/get_json.py
@@ -8,6 +8,6 @@ def getJSON(request: requests.Response):
     # request.encoding = request.apparent_encoding
     try:
         return request.json()
-    except:
+    except Exception:
         # Maybe an older API version which did not return correct JSON
         return {}
diff --git a/wikiteam3/dumpgenerator/api/index_check.py b/wikiteam3/dumpgenerator/api/index_check.py
@@ -3,9 +3,10 @@
 import requests
 
 
-def checkIndex(index="", cookies="", session: requests.Session = None):
+# index="", cookies="", session=None
+def checkIndex(index: str, cookies: str, session: requests.Session):
     """Checking index.php availability"""
-    r = session.post(url=index, data={"title": "Special:Version"}, timeout=30)
+    r = session.post(url=index, data={"title": "Special:Version"}, timeout=30)  # type: ignore
     if r.status_code >= 400:
         print(f"ERROR: The wiki returned status code HTTP {r.status_code}")
         return False

diff --git a/wikiteam3/dumpgenerator/api/namespaces.py b/wikiteam3/dumpgenerator/api/namespaces.py
@@ -1,53 +1,50 @@
 import re
 
+import requests
+
 from wikiteam3.dumpgenerator.api import getJSON
 from wikiteam3.dumpgenerator.cli import Delay
 from wikiteam3.dumpgenerator.config import Config
 
 
-def getNamespacesScraper(config: Config = None, session=None):
+def getNamespacesScraper(config: Config, session: requests.Session):
     """Hackishly gets the list of namespaces names and ids from the dropdown in the HTML of Special:AllPages"""
     """Function called if no API is available"""
     namespaces = config.namespaces
-    namespacenames = {0: ""}  # main is 0, no prefix
+    # namespacenames = {0: ""}  # main is 0, no prefix
     if namespaces:
         r = session.post(
-            url=config.index, params={"title": "Special:Allpages"}, timeout=30
+            url=config.index, params={"title": "Special:Allpages"}, timeout=30  # type: ignore
         )
         raw = r.text
-        Delay(config=config, session=session)
+        Delay(config=config)
 
         # [^>]*? to include selected="selected"
         m = re.compile(
             r'<option [^>]*?value=[\'"](?P<namespaceid>\d+)[\'"][^>]*?>(?P<namespacename>[^<]+)</option>'
         ).finditer(raw)
         if "all" in namespaces:
-            namespaces = []
-            for i in m:
-                namespaces.append(int(i.group("namespaceid")))
-                namespacenames[int(i.group("namespaceid"))] = i.group("namespacename")
+            namespaces = [int(i.group("namespaceid")) for i in m]
+            # namespacenames[int(i.group("namespaceid"))] = i.group("namespacename")
         else:
-            # check if those namespaces really exist in this wiki
-            namespaces2 = []
-            for i in m:
-                if int(i.group("namespaceid")) in namespaces:
-                    namespaces2.append(int(i.group("namespaceid")))
-                    namespacenames[int(i.group("namespaceid"))] = i.group(
-                        "namespacename"
-                    )
+            namespaces2 = [
+                int(i.group("namespaceid"))
+                for i in m
+                if int(i.group("namespaceid")) in namespaces
+            ]
             namespaces = namespaces2
     else:
         namespaces = [0]
 
     namespaces = list(set(namespaces))  # uniques
     print("%d namespaces found" % (len(namespaces)))
-    return namespaces, namespacenames
+    return namespaces
 
 
-def getNamespacesAPI(config: Config = None, session=None):
+def getNamespacesAPI(config: Config, session: requests.Session):
     """Uses the API to get the list of namespaces names and ids"""
     namespaces = config.namespaces
-    namespacenames = {0: ""}  # main is 0, no prefix
+    # namespacenames = {0: ""}  # main is 0, no prefix
     if namespaces:
         r = session.get(
             url=config.api,
@@ -60,37 +57,34 @@ def getNamespacesAPI(config: Config = None, session=None):
             timeout=30,
         )
         result = getJSON(r)
-        Delay(config=config, session=session)
+        Delay(config=config)
         try:
             nsquery = result["query"]["namespaces"]
-        except KeyError:
+        except KeyError as ke:
             print("Error: could not get namespaces from the API request.")
             print("HTTP %d" % r.status_code)
             print(r.text)
-            return None
+            raise ke
 
         if "all" in namespaces:
-            namespaces = []
-            for i in nsquery.keys():
-                if int(i) < 0:  # -1: Special, -2: Media, excluding
-                    continue
-                namespaces.append(int(i))
-                namespacenames[int(i)] = nsquery[i]["*"]
+            namespaces = [int(i) for i in nsquery.keys() if int(i) >= 0]
+            # -1: Special, -2: Media, excluding
+            # namespacenames[int(i)] = nsquery[i]["*"]
         else:
             # check if those namespaces really exist in this wiki
             namespaces2 = []
             for i in nsquery.keys():
-                bi = i
+                # bi = i
                 i = int(i)
                 if i < 0:  # -1: Special, -2: Media, excluding
                     continue
                 if i in namespaces:
                     namespaces2.append(i)
-                    namespacenames[i] = nsquery[bi]["*"]
+                    # namespacenames[i] = nsquery[bi]["*"]
             namespaces = namespaces2
     else:
         namespaces = [0]
 
     namespaces = list(set(namespaces))  # uniques
     print("%d namespaces found" % (len(namespaces)))
-    return namespaces, namespacenames
+    return namespaces