Skip to content

Commit

Permalink
[cleanup] Misc (yt-dlp#11347)
Browse files Browse the repository at this point in the history
Closes yt-dlp#11361
Authored by: avagordon01, bashonly, grqz, Grub4K, seproDev

Co-authored-by: Ava Gordon <[email protected]>
Co-authored-by: bashonly <[email protected]>
Co-authored-by: N/Ame <[email protected]>
Co-authored-by: Simon Sawicki <[email protected]>
  • Loading branch information
5 people authored Nov 4, 2024
1 parent b03267b commit 197d0b0
Show file tree
Hide file tree
Showing 72 changed files with 239 additions and 253 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -479,7 +479,8 @@ If you fork the project on GitHub, you can run your fork's [build workflow](.git
--no-download-archive Do not use archive file (default)
--max-downloads NUMBER Abort after downloading NUMBER files
--break-on-existing Stop the download process when encountering
a file that is in the archive
a file that is in the archive supplied with
the --download-archive option
--no-break-on-existing Do not stop the download process when
encountering a file that is in the archive
(default)
Expand Down
2 changes: 1 addition & 1 deletion test/test_traversal.py
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,7 @@ def test_subs_list_to_dict(self):
{'url': 'https://example.com/subs/en', 'name': 'en'},
], [..., {
'id': 'name',
'ext': ['url', {lambda x: determine_ext(x, default_ext=None)}],
'ext': ['url', {determine_ext(default_ext=None)}],
'url': 'url',
}, all, {subs_list_to_dict(ext='ext')}]) == {
'de': [{'url': 'https://example.com/subs/de.ass', 'ext': 'ass'}],
Expand Down
2 changes: 1 addition & 1 deletion test/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2156,7 +2156,7 @@ def test_partial_application(self):
assert callable(int_or_none(scale=10)), 'missing positional parameter should apply partially'
assert int_or_none(10, scale=0.1) == 100, 'positionally passed argument should call function'
assert int_or_none(v=10) == 10, 'keyword passed positional should call function'
assert int_or_none(scale=0.1)(10) == 100, 'call after partial applicatino should call the function'
assert int_or_none(scale=0.1)(10) == 100, 'call after partial application should call the function'

assert callable(join_nonempty(delim=', ')), 'varargs positional should apply partially'
assert callable(join_nonempty()), 'varargs positional should apply partially'
Expand Down
6 changes: 3 additions & 3 deletions yt_dlp/extractor/afreecatv.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def _real_extract(self, url):
'title': ('title', {str}),
'uploader': ('writer_nick', {str}),
'uploader_id': ('bj_id', {str}),
'duration': ('total_file_duration', {functools.partial(int_or_none, scale=1000)}),
'duration': ('total_file_duration', {int_or_none(scale=1000)}),
'thumbnail': ('thumb', {url_or_none}),
})

Expand All @@ -178,7 +178,7 @@ def _real_extract(self, url):
'title': f'{common_info.get("title") or "Untitled"} (part {file_num})',
'formats': formats,
**traverse_obj(file_element, {
'duration': ('duration', {functools.partial(int_or_none, scale=1000)}),
'duration': ('duration', {int_or_none(scale=1000)}),
'timestamp': ('file_start', {unified_timestamp}),
}),
})
Expand Down Expand Up @@ -234,7 +234,7 @@ def _entries(data):
'catch_list', lambda _, v: v['files'][0]['file'], {
'id': ('files', 0, 'file_info_key', {str}),
'url': ('files', 0, 'file', {url_or_none}),
'duration': ('files', 0, 'duration', {functools.partial(int_or_none, scale=1000)}),
'duration': ('files', 0, 'duration', {int_or_none(scale=1000)}),
'title': ('title', {str}),
'uploader': ('writer_nick', {str}),
'uploader_id': ('writer_id', {str}),
Expand Down
2 changes: 1 addition & 1 deletion yt_dlp/extractor/allstar.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def media_url_or_none(path):
'thumbnails': (('clipImageThumb', 'clipImageSource'), {'url': {media_url_or_none}}),
'duration': ('clipLength', {int_or_none}),
'filesize': ('clipSizeBytes', {int_or_none}),
'timestamp': ('createdDate', {functools.partial(int_or_none, scale=1000)}),
'timestamp': ('createdDate', {int_or_none(scale=1000)}),
'uploader': ('username', {str}),
'uploader_id': ('user', '_id', {str}),
'view_count': ('views', {int_or_none}),
Expand Down
41 changes: 33 additions & 8 deletions yt_dlp/extractor/bandcamp.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import functools
import json
import random
import re
Expand All @@ -10,7 +9,6 @@
ExtractorError,
extract_attributes,
float_or_none,
get_element_html_by_id,
int_or_none,
parse_filesize,
str_or_none,
Expand All @@ -21,7 +19,7 @@
url_or_none,
urljoin,
)
from ..utils.traversal import traverse_obj
from ..utils.traversal import find_element, traverse_obj


class BandcampIE(InfoExtractor):
Expand All @@ -45,6 +43,8 @@ class BandcampIE(InfoExtractor):
'uploader_url': 'https://youtube-dl.bandcamp.com',
'uploader_id': 'youtube-dl',
'thumbnail': 'https://f4.bcbits.com/img/a3216802731_5.jpg',
'artists': ['youtube-dl "\'/\\ä↭'],
'album_artists': ['youtube-dl "\'/\\ä↭'],
},
'skip': 'There is a limit of 200 free downloads / month for the test song',
}, {
Expand Down Expand Up @@ -271,6 +271,18 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
'timestamp': 1311756226,
'upload_date': '20110727',
'uploader': 'Blazo',
'thumbnail': 'https://f4.bcbits.com/img/a1721150828_5.jpg',
'album_artists': ['Blazo'],
'uploader_url': 'https://blazo.bandcamp.com',
'release_date': '20110727',
'release_timestamp': 1311724800.0,
'track': 'Intro',
'uploader_id': 'blazo',
'track_number': 1,
'album': 'Jazz Format Mixtape vol.1',
'artists': ['Blazo'],
'duration': 19.335,
'track_id': '1353101989',
},
},
{
Expand All @@ -282,13 +294,26 @@ class BandcampAlbumIE(BandcampIE): # XXX: Do not subclass from concrete IE
'timestamp': 1311757238,
'upload_date': '20110727',
'uploader': 'Blazo',
'track': 'Kero One - Keep It Alive (Blazo remix)',
'release_date': '20110727',
'track_id': '38097443',
'track_number': 2,
'duration': 181.467,
'uploader_url': 'https://blazo.bandcamp.com',
'album': 'Jazz Format Mixtape vol.1',
'uploader_id': 'blazo',
'album_artists': ['Blazo'],
'artists': ['Blazo'],
'thumbnail': 'https://f4.bcbits.com/img/a1721150828_5.jpg',
'release_timestamp': 1311724800.0,
},
},
],
'info_dict': {
'title': 'Jazz Format Mixtape vol.1',
'id': 'jazz-format-mixtape-vol-1',
'uploader_id': 'blazo',
'description': 'md5:38052a93217f3ffdc033cd5dbbce2989',
},
'params': {
'playlistend': 2,
Expand Down Expand Up @@ -363,10 +388,10 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
_VALID_URL = r'https?://(?:www\.)?bandcamp\.com/?\?(?:.*?&)?show=(?P<id>\d+)'
_TESTS = [{
'url': 'https://bandcamp.com/?show=224',
'md5': 'b00df799c733cf7e0c567ed187dea0fd',
'md5': '61acc9a002bed93986b91168aa3ab433',
'info_dict': {
'id': '224',
'ext': 'opus',
'ext': 'mp3',
'title': 'BC Weekly April 4th 2017 - Magic Moments',
'description': 'md5:5d48150916e8e02d030623a48512c874',
'duration': 5829.77,
Expand All @@ -376,7 +401,7 @@ class BandcampWeeklyIE(BandcampIE): # XXX: Do not subclass from concrete IE
'episode_id': '224',
},
'params': {
'format': 'opus-lo',
'format': 'mp3-128',
},
}, {
'url': 'https://bandcamp.com/?blah/blah@&show=228',
Expand Down Expand Up @@ -484,7 +509,7 @@ def _yield_items(self, webpage):
or re.findall(r'<div[^>]+trackTitle["\'][^"\']+["\']([^"\']+)', webpage))

yield from traverse_obj(webpage, (
{functools.partial(get_element_html_by_id, 'music-grid')}, {extract_attributes},
{find_element(id='music-grid', html=True)}, {extract_attributes},
'data-client-items', {json.loads}, ..., 'page_url', {str}))

def _real_extract(self, url):
Expand All @@ -493,4 +518,4 @@ def _real_extract(self, url):

return self.playlist_from_matches(
self._yield_items(webpage), uploader, f'Discography of {uploader}',
getter=functools.partial(urljoin, url))
getter=urljoin(url))
10 changes: 5 additions & 5 deletions yt_dlp/extractor/bbc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1284,9 +1284,9 @@ def parse_model(model):
**traverse_obj(model, {
'title': ('title', {str}),
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
'description': ('synopses', ('long', 'medium', 'short'), {str}, {lambda x: x or None}, any),
'description': ('synopses', ('long', 'medium', 'short'), {str}, filter, any),
'duration': ('versions', 0, 'duration', {int}),
'timestamp': ('versions', 0, 'availableFrom', {functools.partial(int_or_none, scale=1000)}),
'timestamp': ('versions', 0, 'availableFrom', {int_or_none(scale=1000)}),
}),
}

Expand Down Expand Up @@ -1386,7 +1386,7 @@ def parse_media(media):
formats = traverse_obj(media_data, ('playlist', lambda _, v: url_or_none(v['url']), {
'url': ('url', {url_or_none}),
'ext': ('format', {str}),
'tbr': ('bitrate', {functools.partial(int_or_none, scale=1000)}),
'tbr': ('bitrate', {int_or_none(scale=1000)}),
}))
if formats:
entry = {
Expand All @@ -1398,7 +1398,7 @@ def parse_media(media):
'title': ('title', {str}),
'thumbnail': ('imageUrl', {lambda u: urljoin(url, u.replace('$recipe', 'raw'))}),
'description': ('synopses', ('long', 'medium', 'short'), {str}, any),
'timestamp': ('firstPublished', {functools.partial(int_or_none, scale=1000)}),
'timestamp': ('firstPublished', {int_or_none(scale=1000)}),
}),
}
done = True
Expand Down Expand Up @@ -1428,7 +1428,7 @@ def extract_all(pattern):
if not entry.get('timestamp'):
entry['timestamp'] = traverse_obj(next_data, (
..., 'contents', is_type('timestamp'), 'model',
'timestamp', {functools.partial(int_or_none, scale=1000)}, any))
'timestamp', {int_or_none(scale=1000)}, any))
entries.append(entry)
return self.playlist_result(
entries, playlist_id, playlist_title, playlist_description)
Expand Down
3 changes: 1 addition & 2 deletions yt_dlp/extractor/bibeltv.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import functools

from .common import InfoExtractor
from ..utils import (
Expand Down Expand Up @@ -50,7 +49,7 @@ def _extract_base_info(data):
**traverse_obj(data, {
'title': 'title',
'description': 'description',
'duration': ('duration', {functools.partial(int_or_none, scale=1000)}),
'duration': ('duration', {int_or_none(scale=1000)}),
'timestamp': ('schedulingStart', {parse_iso8601}),
'season_number': 'seasonNumber',
'episode_number': 'episodeNumber',
Expand Down
6 changes: 3 additions & 3 deletions yt_dlp/extractor/bilibili.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def extract_formats(self, play_info):

fragments = traverse_obj(play_info, ('durl', lambda _, v: url_or_none(v['url']), {
'url': ('url', {url_or_none}),
'duration': ('length', {functools.partial(float_or_none, scale=1000)}),
'duration': ('length', {float_or_none(scale=1000)}),
'filesize': ('size', {int_or_none}),
}))
if fragments:
Expand All @@ -124,7 +124,7 @@ def extract_formats(self, play_info):
'quality': ('quality', {int_or_none}),
'format_id': ('quality', {str_or_none}),
'format_note': ('quality', {lambda x: format_names.get(x)}),
'duration': ('timelength', {functools.partial(float_or_none, scale=1000)}),
'duration': ('timelength', {float_or_none(scale=1000)}),
}),
**parse_resolution(format_names.get(play_info.get('quality'))),
})
Expand Down Expand Up @@ -1585,7 +1585,7 @@ def _real_extract(self, url):
'title': ('title', {str}),
'uploader': ('upper', 'name', {str}),
'uploader_id': ('upper', 'mid', {str_or_none}),
'timestamp': ('ctime', {int_or_none}, {lambda x: x or None}),
'timestamp': ('ctime', {int_or_none}, filter),
'thumbnail': ('cover', {url_or_none}),
})),
}
Expand Down
2 changes: 1 addition & 1 deletion yt_dlp/extractor/bluesky.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ def _extract_videos(self, root, video_id, embed_path='embed', record_path='recor
'age_limit': (
'labels', ..., 'val', {lambda x: 18 if x in ('sexual', 'porn', 'graphic-media') else None}, any),
'description': (*record_path, 'text', {str}, filter),
'title': (*record_path, 'text', {lambda x: x.replace('\n', '')}, {truncate_string(left=50)}),
'title': (*record_path, 'text', {lambda x: x.replace('\n', ' ')}, {truncate_string(left=50)}),
}),
})
return entries
Loading

0 comments on commit 197d0b0

Please sign in to comment.