[cleanup] Misc (yt-dlp#11347)

Closes yt-dlp#11361 Authored by: avagordon01, bashonly, grqz, Grub4K, seproDev Co-authored-by: Ava Gordon <[email protected]> Co-authored-by: bashonly <[email protected]> Co-authored-by: N/Ame <[email protected]> Co-authored-by: Simon Sawicki <[email protected]>
nicolaasjan · Nov 4, 2024 · 197d0b0 · 197d0b0
1 parent b03267b
commit 197d0b0
Show file tree

Hide file tree

Showing 72 changed files with 239 additions and 253 deletions.
diff --git a/README.md b/README.md
@@ -479,7 +479,8 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
     --no-download-archive           Do not use archive file (default)
     --max-downloads NUMBER          Abort after downloading NUMBER files
     --break-on-existing             Stop the download process when encountering
-                                    a file that is in the archive
+                                    a file that is in the archive supplied with
+                                    the --download-archive option
     --no-break-on-existing          Do not stop the download process when
                                     encountering a file that is in the archive
                                     (default)

diff --git a/test/test_traversal.py b/test/test_traversal.py
@@ -490,7 +490,7 @@ def test_subs_list_to_dict(self):
             {'url': 'https://example.com/subs/en', 'name': 'en'},
         ], [..., {
             'id': 'name',
-            'ext': ['url', {lambda x: determine_ext(x, default_ext=None)}],
+            'ext': ['url', {determine_ext(default_ext=None)}],
             'url': 'url',
         }, all, {subs_list_to_dict(ext='ext')}]) == {
             'de': [{'url': 'https://example.com/subs/de.ass', 'ext': 'ass'}],

diff --git a/test/test_utils.py b/test/test_utils.py
@@ -2156,7 +2156,7 @@ def test_partial_application(self):
         assert callable(int_or_none(scale=10)), 'missing positional parameter should apply partially'
         assert int_or_none(10, scale=0.1) == 100, 'positionally passed argument should call function'
         assert int_or_none(v=10) == 10, 'keyword passed positional should call function'
-        assert int_or_none(scale=0.1)(10) == 100, 'call after partial applicatino should call the function'
+        assert int_or_none(scale=0.1)(10) == 100, 'call after partial application should call the function'
 
         assert callable(join_nonempty(delim=', ')), 'varargs positional should apply partially'
         assert callable(join_nonempty()), 'varargs positional should apply partially'

diff --git a/yt_dlp/extractor/afreecatv.py b/yt_dlp/extractor/afreecatv.py
@@ -154,7 +154,7 @@ def _real_extract(self, url):
             'title': ('title', {str}),
             'uploader': ('writer_nick', {str}),
             'uploader_id': ('bj_id', {str}),
-            'duration': ('total_file_duration', {functools.partial(int_or_none, scale=1000)}),
+            'duration': ('total_file_duration', {int_or_none(scale=1000)}),
             'thumbnail': ('thumb', {url_or_none}),
         })
 
@@ -178,7 +178,7 @@ def _real_extract(self, url):
                 'title': f'{common_info.get("title") or "Untitled"} (part {file_num})',
                 'formats': formats,
                 **traverse_obj(file_element, {
-                    'duration': ('duration', {functools.partial(int_or_none, scale=1000)}),
+                    'duration': ('duration', {int_or_none(scale=1000)}),
                     'timestamp': ('file_start', {unified_timestamp}),
                 }),
             })
@@ -234,7 +234,7 @@ def _entries(data):
             'catch_list', lambda _, v: v['files'][0]['file'], {
                 'id': ('files', 0, 'file_info_key', {str}),
                 'url': ('files', 0, 'file', {url_or_none}),
-                'duration': ('files', 0, 'duration', {functools.partial(int_or_none, scale=1000)}),
+                'duration': ('files', 0, 'duration', {int_or_none(scale=1000)}),
                 'title': ('title', {str}),
                 'uploader': ('writer_nick', {str}),
                 'uploader_id': ('writer_id', {str}),

diff --git a/yt_dlp/extractor/allstar.py b/yt_dlp/extractor/allstar.py
@@ -71,7 +71,7 @@ def media_url_or_none(path):
             'thumbnails': (('clipImageThumb', 'clipImageSource'), {'url': {media_url_or_none}}),
             'duration': ('clipLength', {int_or_none}),
             'filesize': ('clipSizeBytes', {int_or_none}),
-            'timestamp': ('createdDate', {functools.partial(int_or_none, scale=1000)}),
+            'timestamp': ('createdDate', {int_or_none(scale=1000)}),
             'uploader': ('username', {str}),
             'uploader_id': ('user', '_id', {str}),
             'view_count': ('views', {int_or_none}),

diff --git a/yt_dlp/extractor/bandcamp.py b/yt_dlp/extractor/bandcamp.py
@@ -1,4 +1,3 @@
-import functools
 import json
 import random
 import re
@@ -10,7 +9,6 @@
     ExtractorError,
     extract_attributes,
     float_or_none,
-    get_element_html_by_id,
     int_or_none,
     parse_filesize,
     str_or_none,
@@ -21,7 +19,7 @@
     url_or_none,
     urljoin,
 )
-from ..utils.traversal import traverse_obj
+from ..utils.traversal import find_element, traverse_obj
 
 
 class BandcampIE(InfoExtractor):
@@ -45,6 +43,8 @@ class BandcampIE(InfoExtractor):
             'uploader_url': 'https://youtube-dl.bandcamp.com',
             'uploader_id': 'youtube-dl',
             'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg',
+            'artists': ['youtube-dl "\'/\\ä↭'],
+            'album_artists': ['youtube-dl "\'/\\ä↭'],
         },
         'skip': 'There is a limit of 200 free downloads / month for the test song',
     }, {
@@ -271,6 +271,18 @@ class BandcampAlbumIE(BandcampIE):  # XXX: Do not subclass from concrete IE
                     'timestamp': 1311756226,
                     'upload_date': '20110727',
                     'uploader': 'Blazo',
+                    'thumbnail': 'https://f4.bcbits.com/img/a1721150828_5.jpg',
+                    'album_artists': ['Blazo'],
+                    'uploader_url': 'https://blazo.bandcamp.com',
+                    'release_date': '20110727',
+                    'release_timestamp': 1311724800.0,
+                    'track': 'Intro',
+                    'uploader_id': 'blazo',
+                    'track_number': 1,
+                    'album': 'Jazz Format Mixtape vol.1',
+                    'artists': ['Blazo'],
+                    'duration': 19.335,
+                    'track_id': '1353101989',
                 },
             },
             {
@@ -282,13 +294,26 @@ class BandcampAlbumIE(BandcampIE):  # XXX: Do not subclass from concrete IE
                     'timestamp': 1311757238,
                     'upload_date': '20110727',
                     'uploader': 'Blazo',
+                    'track': 'Kero One - Keep It Alive (Blazo remix)',
+                    'release_date': '20110727',
+                    'track_id': '38097443',
+                    'track_number': 2,
+                    'duration': 181.467,
+                    'uploader_url': 'https://blazo.bandcamp.com',
+                    'album': 'Jazz Format Mixtape vol.1',
+                    'uploader_id': 'blazo',
+                    'album_artists': ['Blazo'],
+                    'artists': ['Blazo'],
+                    'thumbnail': 'https://f4.bcbits.com/img/a1721150828_5.jpg',
+                    'release_timestamp': 1311724800.0,
                 },
             },
         ],
         'info_dict': {
             'title': 'Jazz Format Mixtape vol.1',
             'id': 'jazz-format-mixtape-vol-1',
             'uploader_id': 'blazo',
+            'description': 'md5:38052a93217f3ffdc033cd5dbbce2989',
         },
         'params': {
             'playlistend': 2,
@@ -363,10 +388,10 @@ class BandcampWeeklyIE(BandcampIE):  # XXX: Do not subclass from concrete IE
     _VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
     _TESTS = [{
         'url': 'https://bandcamp.com/?show=224',
-        'md5': 'b00df799c733cf7e0c567ed187dea0fd',
+        'md5': '61acc9a002bed93986b91168aa3ab433',
         'info_dict': {
             'id': '224',
-            'ext': 'opus',
+            'ext': 'mp3',
             'title': 'BC Weekly April 4th 2017 - Magic Moments',
             'description': 'md5:5d48150916e8e02d030623a48512c874',
             'duration': 5829.77,
@@ -376,7 +401,7 @@ class BandcampWeeklyIE(BandcampIE):  # XXX: Do not subclass from concrete IE
             'episode_id': '224',
         },
         'params': {
-            'format': 'opus-lo',
+            'format': 'mp3-128',
         },
     }, {
         'url': 'https://bandcamp.com/?blah/blah@&show=228',
@@ -484,7 +509,7 @@ def _yield_items(self, webpage):
             or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))
 
         yield from traverse_obj(webpage, (
-            {functools.partial(get_element_html_by_id, 'music-grid')}, {extract_attributes},
+            {find_element(id='music-grid', html=True)}, {extract_attributes},
             'data-client-items', {json.loads}, ..., 'page_url', {str}))
 
     def _real_extract(self, url):
@@ -493,4 +518,4 @@ def _real_extract(self, url):
 
         return self.playlist_from_matches(
             self._yield_items(webpage), uploader, f'Discography of {uploader}',
-            getter=functools.partial(urljoin, url))
+            getter=urljoin(url))
diff --git a/yt_dlp/extractor/bbc.py b/yt_dlp/extractor/bbc.py
@@ -1284,9 +1284,9 @@ def parse_model(model):
                 **traverse_obj(model, {
                     'title': ('title', {str}),
                     'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
-                    'description': ('synopses', ('long', 'medium', 'short'), {str}, {lambda x: x or None}, any),
+                    'description': ('synopses', ('long', 'medium', 'short'), {str}, filter, any),
                     'duration': ('versions', 0, 'duration', {int}),
-                    'timestamp': ('versions', 0, 'availableFrom', {functools.partial(int_or_none, scale=1000)}),
+                    'timestamp': ('versions', 0, 'availableFrom', {int_or_none(scale=1000)}),
                 }),
             }
 
@@ -1386,7 +1386,7 @@ def parse_media(media):
                     formats = traverse_obj(media_data, ('playlist', lambda _, v: url_or_none(v['url']), {
                         'url': ('url', {url_or_none}),
                         'ext': ('format', {str}),
-                        'tbr': ('bitrate', {functools.partial(int_or_none, scale=1000)}),
+                        'tbr': ('bitrate', {int_or_none(scale=1000)}),
                     }))
                     if formats:
                         entry = {
@@ -1398,7 +1398,7 @@ def parse_media(media):
                                 'title': ('title', {str}),
                                 'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
                                 'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
-                                'timestamp': ('firstPublished', {functools.partial(int_or_none, scale=1000)}),
+                                'timestamp': ('firstPublished', {int_or_none(scale=1000)}),
                             }),
                         }
                         done = True
@@ -1428,7 +1428,7 @@ def extract_all(pattern):
             if not entry.get('timestamp'):
                 entry['timestamp'] = traverse_obj(next_data, (
                     ..., 'contents', is_type('timestamp'), 'model',
-                    'timestamp', {functools.partial(int_or_none, scale=1000)}, any))
+                    'timestamp', {int_or_none(scale=1000)}, any))
             entries.append(entry)
             return self.playlist_result(
                 entries, playlist_id, playlist_title, playlist_description)

diff --git a/yt_dlp/extractor/bibeltv.py b/yt_dlp/extractor/bibeltv.py
@@ -1,4 +1,3 @@
-import functools
 
 from .common import InfoExtractor
 from ..utils import (
@@ -50,7 +49,7 @@ def _extract_base_info(data):
             **traverse_obj(data, {
                 'title': 'title',
                 'description': 'description',
-                'duration': ('duration', {functools.partial(int_or_none, scale=1000)}),
+                'duration': ('duration', {int_or_none(scale=1000)}),
                 'timestamp': ('schedulingStart', {parse_iso8601}),
                 'season_number': 'seasonNumber',
                 'episode_number': 'episodeNumber',

diff --git a/yt_dlp/extractor/bilibili.py b/yt_dlp/extractor/bilibili.py
@@ -109,7 +109,7 @@ def extract_formats(self, play_info):
 
         fragments = traverse_obj(play_info, ('durl', lambda _, v: url_or_none(v['url']), {
             'url': ('url', {url_or_none}),
-            'duration': ('length', {functools.partial(float_or_none, scale=1000)}),
+            'duration': ('length', {float_or_none(scale=1000)}),
             'filesize': ('size', {int_or_none}),
         }))
         if fragments:
@@ -124,7 +124,7 @@ def extract_formats(self, play_info):
                     'quality': ('quality', {int_or_none}),
                     'format_id': ('quality', {str_or_none}),
                     'format_note': ('quality', {lambda x: format_names.get(x)}),
-                    'duration': ('timelength', {functools.partial(float_or_none, scale=1000)}),
+                    'duration': ('timelength', {float_or_none(scale=1000)}),
                 }),
                 **parse_resolution(format_names.get(play_info.get('quality'))),
             })
@@ -1585,7 +1585,7 @@ def _real_extract(self, url):
                 'title': ('title', {str}),
                 'uploader': ('upper', 'name', {str}),
                 'uploader_id': ('upper', 'mid', {str_or_none}),
-                'timestamp': ('ctime', {int_or_none}, {lambda x: x or None}),
+                'timestamp': ('ctime', {int_or_none}, filter),
                 'thumbnail': ('cover', {url_or_none}),
             })),
         }

diff --git a/yt_dlp/extractor/bluesky.py b/yt_dlp/extractor/bluesky.py
@@ -382,7 +382,7 @@ def _extract_videos(self, root, video_id, embed_path='embed', record_path='recor
                 'age_limit': (
                     'labels', ..., 'val', {lambda x: 18 if x in ('sexual', 'porn', 'graphic-media') else None}, any),
                 'description': (*record_path, 'text', {str}, filter),
-                'title': (*record_path, 'text', {lambda x: x.replace('\n', '')}, {truncate_string(left=50)}),
+                'title': (*record_path, 'text', {lambda x: x.replace('\n', ' ')}, {truncate_string(left=50)}),
             }),
         })
         return entries