Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Placate mypy (redux) #197

Draft
wants to merge 1 commit into
base: python3
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ repos:
rev: 1.6.0
hooks:
- id: poetry-check
# - id: poetry-lock
- id: poetry-lock
- id: poetry-export
args: ["-f", "requirements.txt", "-o", "requirements.txt"]
- repo: https://github.com/pre-commit/pre-commit-hooks
Expand Down
303 changes: 231 additions & 72 deletions poetry.lock

Large diffs are not rendered by default.

8 changes: 8 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,18 @@ requests = "^2.31.0"
flake8 = "^3.9.2"
pre-commit = "^2.17.0"
pymarkdown = "^0.1.4"
mypy = "^1.5.1"
types-requests = "^2.31.0.2"
# flake8-black may be unnecessary?
flake8-black = "^0.3.6"

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"

[tool.pymarkdown]
disable-rules = "line-length,no-inline-html"

[tool.mypy]
check_untyped_defs = true
ignore_missing_imports = true
6 changes: 3 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -217,9 +217,9 @@ requests==2.31.0 ; python_version >= "3.8" and python_version < "4.0" \
schema==0.7.5 ; python_version >= "3.8" and python_version < "4.0" \
--hash=sha256:f06717112c61895cabc4707752b88716e8420a8819d71404501e114f91043197 \
--hash=sha256:f3ffdeeada09ec34bf40d7d79996d9f7175db93b7a5065de0faa7f41083c1e6c
setuptools==68.1.2 ; python_version >= "3.8" and python_version < "4.0" \
--hash=sha256:3d4dfa6d95f1b101d695a6160a7626e15583af71a5f52176efa5d39a054d475d \
--hash=sha256:3d8083eed2d13afc9426f227b24fd1659489ec107c0e86cec2ffdde5c92e790b
setuptools==68.2.0 ; python_version >= "3.8" and python_version < "4.0" \
--hash=sha256:00478ca80aeebeecb2f288d3206b0de568df5cd2b8fada1209843cc9a8d88a48 \
--hash=sha256:af3d5949030c3f493f550876b2fd1dd5ec66689c4ee5d5344f009746f71fd5a8
six==1.16.0 ; python_version >= "3.8" and python_version < "4.0" \
--hash=sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926 \
--hash=sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254
Expand Down
26 changes: 0 additions & 26 deletions wikiteam3/dumpgenerator/__init__.py
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,26 +0,0 @@
#!/usr/bin/env python3

# DumpGenerator A generator of dumps for wikis
# Copyright (C) 2011-2018 WikiTeam developers
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

# To learn more, read the documentation:
# https://github.com/WikiTeam/wikiteam/wiki


from wikiteam3.dumpgenerator.dump import DumpGenerator


def main():
DumpGenerator()
30 changes: 28 additions & 2 deletions wikiteam3/dumpgenerator/__main__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,32 @@
#!/usr/bin/env python3

# DumpGenerator A generator of dumps for wikis
# Copyright (C) 2011-2018 WikiTeam developers
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

# To learn more, read the documentation:
# https://github.com/WikiTeam/wikiteam/wiki


from wikiteam3.dumpgenerator.dump import DumpGenerator


def main():
DumpGenerator()


if __name__ == "__main__":
import sys

from .__init__ import main

sys.exit(main())
2 changes: 2 additions & 0 deletions wikiteam3/dumpgenerator/api/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,5 @@
from .get_json import getJSON
from .handle_status_code import handleStatusCode
from .wiki_check import getWikiEngine

__all__ = [checkAPI, checkRetryAPI, mwGetAPIAndIndex, getJSON, handleStatusCode, getWikiEngine] # type: ignore
66 changes: 36 additions & 30 deletions wikiteam3/dumpgenerator/api/api.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import re
import time
from typing import *
from urllib.parse import urljoin, urlparse, urlunparse
from typing import Any, Literal, Optional
from urllib.parse import urljoin, urlparse

import mwclient
import requests
Expand All @@ -11,7 +10,8 @@
from .get_json import getJSON


def checkAPI(api="", session: requests.Session = None):
# api="", session: requests.Session = None
def checkAPI(api: str, session: requests.Session):
"""Checking API availability"""
global cj
# handle redirects
Expand All @@ -34,29 +34,31 @@ def checkAPI(api="", session: requests.Session = None):
"MediaWiki API URL not found or giving error: HTTP %d" % r.status_code
)
return None
if "MediaWiki API is not enabled for this site." in r.text:
return None
try:
result = getJSON(r)
index = None
if result:
try:
index = (
result["query"]["general"]["server"]
+ result["query"]["general"]["script"]
)
return (True, index, api)
except KeyError:
print("MediaWiki API seems to work but returned no index URL")
return (True, None, api)
except ValueError:
print(repr(r.text))
print("MediaWiki API returned data we could not parse")
return None
if r is not None:
if "MediaWiki API is not enabled for this site." in r.text:
return None
try:
result = getJSON(r)
index = None
if result:
try:
index = (
result["query"]["general"]["server"]
+ result["query"]["general"]["script"]
)
return (True, index, api)
except KeyError:
print("MediaWiki API seems to work but returned no index URL")
return (True, None, api)
except ValueError:
print(repr(r.text))
print("MediaWiki API returned data we could not parse")
return None
return None


def mwGetAPIAndIndex(url="", session: requests.Session = None):
# url=""
def mwGetAPIAndIndex(url: str, session: requests.Session):
"""Returns the MediaWiki API and Index.php"""

api = ""
Expand Down Expand Up @@ -108,18 +110,21 @@ def mwGetAPIAndIndex(url="", session: requests.Session = None):
return api, index


def checkRetryAPI(api="", apiclient=False, session: requests.Session = None):
# api="", apiclient=False
def checkRetryAPI(api: str, apiclient: bool, session: requests.Session):
"""Call checkAPI and mwclient if necessary"""
check = None
check: (tuple[Literal[True], Any, str] | tuple[Literal[True], None, str] | None)
try:
check = checkAPI(api, session=session)
except requests.exceptions.ConnectionError as e:
print(f"Connection error: {str(e)}")
check = None

if check and apiclient:
apiurl = urlparse(api)
try:
site = mwclient.Site(
# Returns a value, but we're just checking for an error here
mwclient.Site(
apiurl.netloc,
apiurl.path.replace("api.php", ""),
scheme=apiurl.scheme,
Expand All @@ -138,13 +143,14 @@ def checkRetryAPI(api="", apiclient=False, session: requests.Session = None):
)

try:
site = mwclient.Site(
# Returns a value, but we're just checking for an error here
mwclient.Site(
apiurl.netloc,
apiurl.path.replace("api.php", ""),
scheme=newscheme,
pool=session,
)
except KeyError:
check = False
check = False # type: ignore

return check, api
return check, api # type: ignore
2 changes: 1 addition & 1 deletion wikiteam3/dumpgenerator/api/get_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,6 @@ def getJSON(request: requests.Response):
# request.encoding = request.apparent_encoding
try:
return request.json()
except:
except Exception:
# Maybe an older API version which did not return correct JSON
return {}
5 changes: 3 additions & 2 deletions wikiteam3/dumpgenerator/api/index_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
import requests


def checkIndex(index="", cookies="", session: requests.Session = None):
# index="", cookies="", session=None
def checkIndex(index: str, cookies: str, session: requests.Session):
"""Checking index.php availability"""
r = session.post(url=index, data={"title": "Special:Version"}, timeout=30)
r = session.post(url=index, data={"title": "Special:Version"}, timeout=30) # type: ignore
if r.status_code >= 400:
print(f"ERROR: The wiki returned status code HTTP {r.status_code}")
return False
Expand Down
56 changes: 25 additions & 31 deletions wikiteam3/dumpgenerator/api/namespaces.py
Original file line number Diff line number Diff line change
@@ -1,53 +1,50 @@
import re

import requests

from wikiteam3.dumpgenerator.api import getJSON
from wikiteam3.dumpgenerator.cli import Delay
from wikiteam3.dumpgenerator.config import Config


def getNamespacesScraper(config: Config = None, session=None):
def getNamespacesScraper(config: Config, session: requests.Session):
"""Hackishly gets the list of namespaces names and ids from the dropdown in the HTML of Special:AllPages"""
"""Function called if no API is available"""
namespaces = config.namespaces
namespacenames = {0: ""} # main is 0, no prefix
# namespacenames = {0: ""} # main is 0, no prefix
if namespaces:
r = session.post(
url=config.index, params={"title": "Special:Allpages"}, timeout=30
url=config.index, params={"title": "Special:Allpages"}, timeout=30 # type: ignore
)
raw = r.text
Delay(config=config, session=session)
Delay(config=config)

# [^>]*? to include selected="selected"
m = re.compile(
r'<option [^>]*?value=[\'"](?P<namespaceid>\d+)[\'"][^>]*?>(?P<namespacename>[^<]+)</option>'
).finditer(raw)
if "all" in namespaces:
namespaces = []
for i in m:
namespaces.append(int(i.group("namespaceid")))
namespacenames[int(i.group("namespaceid"))] = i.group("namespacename")
namespaces = [int(i.group("namespaceid")) for i in m]
# namespacenames[int(i.group("namespaceid"))] = i.group("namespacename")
else:
# check if those namespaces really exist in this wiki
namespaces2 = []
for i in m:
if int(i.group("namespaceid")) in namespaces:
namespaces2.append(int(i.group("namespaceid")))
namespacenames[int(i.group("namespaceid"))] = i.group(
"namespacename"
)
namespaces2 = [
int(i.group("namespaceid"))
for i in m
if int(i.group("namespaceid")) in namespaces
]
namespaces = namespaces2
else:
namespaces = [0]

namespaces = list(set(namespaces)) # uniques
print("%d namespaces found" % (len(namespaces)))
return namespaces, namespacenames
return namespaces


def getNamespacesAPI(config: Config = None, session=None):
def getNamespacesAPI(config: Config, session: requests.Session):
"""Uses the API to get the list of namespaces names and ids"""
namespaces = config.namespaces
namespacenames = {0: ""} # main is 0, no prefix
# namespacenames = {0: ""} # main is 0, no prefix
if namespaces:
r = session.get(
url=config.api,
Expand All @@ -60,37 +57,34 @@ def getNamespacesAPI(config: Config = None, session=None):
timeout=30,
)
result = getJSON(r)
Delay(config=config, session=session)
Delay(config=config)
try:
nsquery = result["query"]["namespaces"]
except KeyError:
except KeyError as ke:
print("Error: could not get namespaces from the API request.")
print("HTTP %d" % r.status_code)
print(r.text)
return None
raise ke

if "all" in namespaces:
namespaces = []
for i in nsquery.keys():
if int(i) < 0: # -1: Special, -2: Media, excluding
continue
namespaces.append(int(i))
namespacenames[int(i)] = nsquery[i]["*"]
namespaces = [int(i) for i in nsquery.keys() if int(i) >= 0]
# -1: Special, -2: Media, excluding
# namespacenames[int(i)] = nsquery[i]["*"]
else:
# check if those namespaces really exist in this wiki
namespaces2 = []
for i in nsquery.keys():
bi = i
# bi = i
i = int(i)
if i < 0: # -1: Special, -2: Media, excluding
continue
if i in namespaces:
namespaces2.append(i)
namespacenames[i] = nsquery[bi]["*"]
# namespacenames[i] = nsquery[bi]["*"]
namespaces = namespaces2
else:
namespaces = [0]

namespaces = list(set(namespaces)) # uniques
print("%d namespaces found" % (len(namespaces)))
return namespaces, namespacenames
return namespaces
Loading