Skip to content

Commit

Permalink
Merge pull request #13 from bbtufty/missing-links
Browse files Browse the repository at this point in the history
Fixed case where links could be missed
  • Loading branch information
bbtufty authored Oct 26, 2024
2 parents 522d90a + f2010e4 commit 5fe89df
Show file tree
Hide file tree
Showing 5 changed files with 56 additions and 10 deletions.
3 changes: 3 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
0.3 (Unreleased)
================

- Make URL de-shortening less flaky [#13]
- Simplified fetching thumbnail URL [#13]
- Fixed cases where links could be missed [#13]
- GUI updates [#12]
- Logging updates [#11]
- Update docs to reflect region preferences [#10]
Expand Down
4 changes: 4 additions & 0 deletions nxbrew_dl/configs/general.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ dl_sites:
- "FreeDL"
- "GoFile"
- "HexLoad"
- "HexUpload"
- "MegaUp"
- "MixDrop"

Expand All @@ -17,6 +18,9 @@ dl_names:
forbidden_titles:
- "Latest RAW Game Updates [17th April 2024][47 New Updates] [DISCONTINUED]"

regionless_titles:
- "Full Game"

default_selected_regions:
- "USA"
- "Europe"
Expand Down
5 changes: 5 additions & 0 deletions nxbrew_dl/nxbrew_dl/nxbrew.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ def download_game(

# Pull out useful things from the config
regions = list(self.general_config["regions"].keys())
regionless_titles = self.general_config["regionless_titles"]
languages = self.general_config["languages"]
implied_languages = self.general_config["implied_languages"]
dl_sites = self.general_config["dl_sites"]
Expand All @@ -259,13 +260,17 @@ def download_game(
dl_dict = get_dl_dict(
soup,
regions=regions,
regionless_titles=regionless_titles,
languages=languages,
implied_languages=implied_languages,
dl_sites=dl_sites,
dl_names=dl_names,
)
n_releases = len(dl_dict)

if n_releases == 0:
raise ValueError("No releases found")

self.logger.info(f"Found {n_releases} release(s):")

for release in dl_dict:
Expand Down
47 changes: 42 additions & 5 deletions nxbrew_dl/util/download_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def get_dl_dict(
dl_names,
regions=None,
languages=None,
regionless_titles=None,
implied_languages=None,
):
"""For a particular page, parse out download links
Expand All @@ -41,17 +42,22 @@ def get_dl_dict(
to None, which will use an empty list
languages (dict): list of languages potentially parse. Defaults
to None, which will use an empty dict
regionless_titles (list): list of titles that have no region info.
Defaults to None, which will use an empty list
implied_languages (dict): Dictionary of mappings from regions
to implied languages. Defaults to None, which will use
an empty dict
"""

if implied_languages is None:
implied_languages = {}

if regions is None:
regions = []

if regionless_titles is None:
regionless_titles = []

if implied_languages is None:
implied_languages = {}

dl_dict = {}

# Find the strong tags, then start hunting
Expand Down Expand Up @@ -99,6 +105,15 @@ def get_dl_dict(

tag = tag.find_next("p")

# Alternatively, we might find something that looks like a region title,
# but doesn't contain any useful info

elif any([n in tag.text for n in regionless_titles]):

parsed_regions = ["All"]
parsed_languages = ["All"]
tag = tag.find_next("p")

else:
parsed_regions = ["All"]
parsed_languages = ["All"]
Expand Down Expand Up @@ -284,7 +299,12 @@ def parse_base_game(
finding_links = False

# Finally, hunt through to the next tag WITHOUT a link in
tag = tag.find_next("p", href=False)
found_links = True
while found_links:
tag = tag.find_next("p", href=False)
links = tag.find_all("a", href=True)
if len(links) == 0:
found_links = False

return tag, base_game_dict

Expand Down Expand Up @@ -406,7 +426,24 @@ def bypass_ouo(
break

bs4 = BeautifulSoup(res.content, "lxml")
inputs = bs4.form.findAll("input", {"name": re.compile(r"token$")})

# Try and find the token. If we don't find anything, assume
# it's broken and try again
inputs = None
try:
inputs = bs4.form.findAll("input", {"name": re.compile(r"token$")})
except AttributeError:
if logger is not None:
logger.warning(f"Page load error. Waiting then retrying")

time.sleep(10)
bypass_ouo(
url,
logger=logger,
impersonate=impersonate,
n_retry=n_retry + 1,
)

data = {i.get("name"): i.get("value") for i in inputs}
data["x-token"] = RecaptchaV3()

Expand Down
7 changes: 2 additions & 5 deletions nxbrew_dl/util/html_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,7 @@ def get_thumb_url(soup):
soup (bs4.BeautifulSoup): soup object to find languages in
"""

# Get the main content, then find the first figure which we'll take as the thumbnail
content = soup.find("div", {"id": "content"})
thumb = content.findAll("figure")[0]
img = thumb.find("img")
url = img["src"]
img = soup.find("meta", {"property": "og:image"})
url = img["content"]

return url

0 comments on commit 5fe89df

Please sign in to comment.