Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed case where links could be missed #13

Merged
merged 1 commit into from
Oct 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
0.3 (Unreleased)
================

- Make URL de-shortening less flaky [#13]
- Simplified fetching thumbnail URL [#13]
- Fixed cases where links could be missed [#13]
- GUI updates [#12]
- Logging updates [#11]
- Update docs to reflect region preferences [#10]
Expand Down
4 changes: 4 additions & 0 deletions nxbrew_dl/configs/general.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ dl_sites:
- "FreeDL"
- "GoFile"
- "HexLoad"
- "HexUpload"
- "MegaUp"
- "MixDrop"

Expand All @@ -17,6 +18,9 @@ dl_names:
forbidden_titles:
- "Latest RAW Game Updates [17th April 2024][47 New Updates] [DISCONTINUED]"

regionless_titles:
- "Full Game"

default_selected_regions:
- "USA"
- "Europe"
Expand Down
5 changes: 5 additions & 0 deletions nxbrew_dl/nxbrew_dl/nxbrew.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ def download_game(

# Pull out useful things from the config
regions = list(self.general_config["regions"].keys())
regionless_titles = self.general_config["regionless_titles"]
languages = self.general_config["languages"]
implied_languages = self.general_config["implied_languages"]
dl_sites = self.general_config["dl_sites"]
Expand All @@ -259,13 +260,17 @@ def download_game(
dl_dict = get_dl_dict(
soup,
regions=regions,
regionless_titles=regionless_titles,
languages=languages,
implied_languages=implied_languages,
dl_sites=dl_sites,
dl_names=dl_names,
)
n_releases = len(dl_dict)

if n_releases == 0:
raise ValueError("No releases found")

self.logger.info(f"Found {n_releases} release(s):")

for release in dl_dict:
Expand Down
47 changes: 42 additions & 5 deletions nxbrew_dl/util/download_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def get_dl_dict(
dl_names,
regions=None,
languages=None,
regionless_titles=None,
implied_languages=None,
):
"""For a particular page, parse out download links
Expand All @@ -41,17 +42,22 @@ def get_dl_dict(
to None, which will use an empty list
languages (dict): list of languages potentially parse. Defaults
to None, which will use an empty dict
regionless_titles (list): list of titles that have no region info.
Defaults to None, which will use an empty list
implied_languages (dict): Dictionary of mappings from regions
to implied languages. Defaults to None, which will use
an empty dict
"""

if implied_languages is None:
implied_languages = {}

if regions is None:
regions = []

if regionless_titles is None:
regionless_titles = []

if implied_languages is None:
implied_languages = {}

dl_dict = {}

# Find the strong tags, then start hunting
Expand Down Expand Up @@ -99,6 +105,15 @@ def get_dl_dict(

tag = tag.find_next("p")

# Alternatively, we might find something that looks like a region title,
# but doesn't contain any useful info

elif any([n in tag.text for n in regionless_titles]):

parsed_regions = ["All"]
parsed_languages = ["All"]
tag = tag.find_next("p")

else:
parsed_regions = ["All"]
parsed_languages = ["All"]
Expand Down Expand Up @@ -284,7 +299,12 @@ def parse_base_game(
finding_links = False

# Finally, hunt through to the next tag WITHOUT a link in
tag = tag.find_next("p", href=False)
found_links = True
while found_links:
tag = tag.find_next("p", href=False)
links = tag.find_all("a", href=True)
if len(links) == 0:
found_links = False

return tag, base_game_dict

Expand Down Expand Up @@ -406,7 +426,24 @@ def bypass_ouo(
break

bs4 = BeautifulSoup(res.content, "lxml")
inputs = bs4.form.findAll("input", {"name": re.compile(r"token$")})

# Try and find the token. If we don't find anything, assume
# it's broken and try again
inputs = None
try:
inputs = bs4.form.findAll("input", {"name": re.compile(r"token$")})
except AttributeError:
if logger is not None:
logger.warning(f"Page load error. Waiting then retrying")

time.sleep(10)
bypass_ouo(
url,
logger=logger,
impersonate=impersonate,
n_retry=n_retry + 1,
)

data = {i.get("name"): i.get("value") for i in inputs}
data["x-token"] = RecaptchaV3()

Expand Down
7 changes: 2 additions & 5 deletions nxbrew_dl/util/html_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,7 @@ def get_thumb_url(soup):
soup (bs4.BeautifulSoup): soup object to find languages in
"""

# Get the main content, then find the first figure which we'll take as the thumbnail
content = soup.find("div", {"id": "content"})
thumb = content.findAll("figure")[0]
img = thumb.find("img")
url = img["src"]
img = soup.find("meta", {"property": "og:image"})
url = img["content"]

return url