Merge pull request #13 from bbtufty/missing-links

Fixed case where links could be missed
bbtufty · Oct 26, 2024 · 5fe89df · 5fe89df
2 parents 522d90a + f2010e4
commit 5fe89df
Show file tree

Hide file tree

Showing 5 changed files with 56 additions and 10 deletions.
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -1,6 +1,9 @@
 0.3 (Unreleased)
 ================
 
+- Make URL de-shortening less flaky [#13]
+- Simplified fetching thumbnail URL [#13]
+- Fixed cases where links could be missed [#13]
 - GUI updates [#12]
 - Logging updates [#11]
 - Update docs to reflect region preferences [#10]

diff --git a/nxbrew_dl/configs/general.yml b/nxbrew_dl/configs/general.yml
@@ -3,6 +3,7 @@ dl_sites:
   - "FreeDL"
   - "GoFile"
   - "HexLoad"
+  - "HexUpload"
   - "MegaUp"
   - "MixDrop"
 
@@ -17,6 +18,9 @@ dl_names:
 forbidden_titles:
   - "Latest RAW Game Updates [17th April 2024][47 New Updates] [DISCONTINUED]"
 
+regionless_titles:
+  - "Full Game"
+
 default_selected_regions:
   - "USA"
   - "Europe"

diff --git a/nxbrew_dl/nxbrew_dl/nxbrew.py b/nxbrew_dl/nxbrew_dl/nxbrew.py
@@ -251,6 +251,7 @@ def download_game(
 
         # Pull out useful things from the config
         regions = list(self.general_config["regions"].keys())
+        regionless_titles = self.general_config["regionless_titles"]
         languages = self.general_config["languages"]
         implied_languages = self.general_config["implied_languages"]
         dl_sites = self.general_config["dl_sites"]
@@ -259,13 +260,17 @@ def download_game(
         dl_dict = get_dl_dict(
             soup,
             regions=regions,
+            regionless_titles=regionless_titles,
             languages=languages,
             implied_languages=implied_languages,
             dl_sites=dl_sites,
             dl_names=dl_names,
         )
         n_releases = len(dl_dict)
 
+        if n_releases == 0:
+            raise ValueError("No releases found")
+
         self.logger.info(f"Found {n_releases} release(s):")
 
         for release in dl_dict:

diff --git a/nxbrew_dl/util/download_tools.py b/nxbrew_dl/util/download_tools.py
@@ -25,6 +25,7 @@ def get_dl_dict(
     dl_names,
     regions=None,
     languages=None,
+    regionless_titles=None,
     implied_languages=None,
 ):
     """For a particular page, parse out download links
@@ -41,17 +42,22 @@ def get_dl_dict(
             to None, which will use an empty list
         languages (dict): list of languages potentially parse. Defaults
             to None, which will use an empty dict
+        regionless_titles (list): list of titles that have no region info.
+            Defaults to None, which will use an empty list
         implied_languages (dict): Dictionary of mappings from regions
             to implied languages. Defaults to None, which will use
             an empty dict
     """
 
-    if implied_languages is None:
-        implied_languages = {}
-
     if regions is None:
         regions = []
 
+    if regionless_titles is None:
+        regionless_titles = []
+
+    if implied_languages is None:
+        implied_languages = {}
+
     dl_dict = {}
 
     # Find the strong tags, then start hunting
@@ -99,6 +105,15 @@ def get_dl_dict(
 
             tag = tag.find_next("p")
 
+        # Alternatively, we might find something that looks like a region title,
+        # but doesn't contain any useful info
+
+        elif any([n in tag.text for n in regionless_titles]):
+
+            parsed_regions = ["All"]
+            parsed_languages = ["All"]
+            tag = tag.find_next("p")
+
         else:
             parsed_regions = ["All"]
             parsed_languages = ["All"]
@@ -284,7 +299,12 @@ def parse_base_game(
             finding_links = False
 
     # Finally, hunt through to the next tag WITHOUT a link in
-    tag = tag.find_next("p", href=False)
+    found_links = True
+    while found_links:
+        tag = tag.find_next("p", href=False)
+        links = tag.find_all("a", href=True)
+        if len(links) == 0:
+            found_links = False
 
     return tag, base_game_dict
 
@@ -406,7 +426,24 @@ def bypass_ouo(
             break
 
         bs4 = BeautifulSoup(res.content, "lxml")
-        inputs = bs4.form.findAll("input", {"name": re.compile(r"token$")})
+
+        # Try and find the token. If we don't find anything, assume
+        # it's broken and try again
+        inputs = None
+        try:
+            inputs = bs4.form.findAll("input", {"name": re.compile(r"token$")})
+        except AttributeError:
+            if logger is not None:
+                logger.warning(f"Page load error. Waiting then retrying")
+
+            time.sleep(10)
+            bypass_ouo(
+                url,
+                logger=logger,
+                impersonate=impersonate,
+                n_retry=n_retry + 1,
+            )
+
         data = {i.get("name"): i.get("value") for i in inputs}
         data["x-token"] = RecaptchaV3()
 

diff --git a/nxbrew_dl/util/html_tools.py b/nxbrew_dl/util/html_tools.py
@@ -127,10 +127,7 @@ def get_thumb_url(soup):
         soup (bs4.BeautifulSoup): soup object to find languages in
     """
 
-    # Get the main content, then find the first figure which we'll take as the thumbnail
-    content = soup.find("div", {"id": "content"})
-    thumb = content.findAll("figure")[0]
-    img = thumb.find("img")
-    url = img["src"]
+    img = soup.find("meta", {"property": "og:image"})
+    url = img["content"]
 
     return url