Merge pull request #170 from David-JonesDVN/channel-tabs

Add functional but preliminary channel tab support
user234683 · May 21, 2023 · bc51240 · bc51240
2 parents 6a44f9f + 20868b4
commit bc51240
Show file tree

Hide file tree

Showing 4 changed files with 54 additions and 22 deletions.
diff --git a/youtube/channel.py b/youtube/channel.py
@@ -32,16 +32,23 @@
 generic_cookie = (('Cookie', 'VISITOR_INFO1_LIVE=ST1Ti53r4fU'),)
 
 # added an extra nesting under the 2nd base64 compared to v4
+# added tab support
 def channel_ctoken_v5(channel_id, page, sort, tab, view=1):
     new_sort = (2 if int(sort) == 1 else 1)
     offset = str(30*(int(page) - 1))
+    if tab == 'videos':
+        tab = 15
+    elif tab == 'shorts':
+        tab = 10
+    elif tab == 'streams':
+        tab = 14
     pointless_nest = proto.string(80226972,
         proto.string(2, channel_id)
         + proto.string(3,
             proto.percent_b64encode(
                 proto.string(110,
                     proto.string(3,
-                        proto.string(15,
+                        proto.string(tab,
                             proto.string(1,
                                 proto.string(1,
                                     proto.unpadded_b64encode(
@@ -198,7 +205,7 @@ def get_channel_tab(channel_id, page="1", sort=3, tab='videos', view=1,
     message = 'Got channel tab' if print_status else None
 
     if not ctoken:
-        if tab == 'videos':
+        if tab in ('videos', 'shorts', 'streams'):
             ctoken = channel_ctoken_v5(channel_id, page, sort, tab, view)
         else:
             ctoken = channel_ctoken_v3(channel_id, page, sort, tab, view)
@@ -338,11 +345,11 @@ def post_process_channel_info(info):
                 info['links'][i] = (text, util.prefix_url(url))
 
 
-def get_channel_first_page(base_url=None, channel_id=None):
+def get_channel_first_page(base_url=None, channel_id=None, tab='videos'):
     if channel_id:
         base_url = 'https://www.youtube.com/channel/' + channel_id
-    return util.fetch_url(base_url + '/videos?pbj=1&view=0', headers_desktop,
-                          debug_name='gen_channel_videos')
+    return util.fetch_url(base_url + '/' + tab + '?pbj=1&view=0',
+                          headers_desktop, debug_name='gen_channel_' + tab)
 
 
 playlist_sort_codes = {'2': "da", '3': "dd", '4': "lad"}
@@ -361,24 +368,25 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
     default_params = (page_number == 1 and sort == '3' and view == '1')
     continuation = bool(ctoken) # whether or not we're using a continuation
 
-    if tab == 'videos' and channel_id and not default_params:
+    if (tab in ('videos', 'shorts', 'streams') and channel_id and
+        not default_params):
         tasks = (
             gevent.spawn(get_number_of_videos_channel, channel_id),
             gevent.spawn(get_channel_tab, channel_id, page_number, sort,
-                         'videos', view, ctoken)
+                         tab, view, ctoken)
         )
         gevent.joinall(tasks)
         util.check_gevent_exceptions(*tasks)
         number_of_videos, polymer_json = tasks[0].value, tasks[1].value
         continuation = True
-    elif tab == 'videos':
+    elif tab in ('videos', 'shorts', 'streams'):
         if channel_id:
             num_videos_call = (get_number_of_videos_channel, channel_id)
         else:
             num_videos_call = (get_number_of_videos_general, base_url)
         tasks = (
             gevent.spawn(*num_videos_call),
-            gevent.spawn(get_channel_first_page, base_url=base_url),
+            gevent.spawn(get_channel_first_page, base_url=base_url, tab=tab),
         )
         gevent.joinall(tasks)
         util.check_gevent_exceptions(*tasks)
@@ -429,11 +437,11 @@ def get_channel_page_general_url(base_url, tab, request, channel_id=None):
     if info['error'] is not None:
         return flask.render_template('error.html', error_message = info['error'])
 
-    if tab == 'videos':
+    if tab in ('videos', 'shorts', 'streams'):
         info['number_of_videos'] = number_of_videos
         info['number_of_pages'] = math.ceil(number_of_videos/30)
         info['header_playlist_names'] = local_playlist.get_playlist_names()
-    if tab in ('videos', 'playlists'):
+    if tab in ('videos', 'shorts', 'streams', 'playlists'):
         info['current_sort'] = sort
     elif tab == 'search':
         info['search_box_value'] = query

diff --git a/youtube/templates/channel.html b/youtube/templates/channel.html
@@ -120,7 +120,7 @@ <h2 class="title">{{ channel_name }}</h2>
         </div>
     </div>
     <nav class="channel-tabs">
-        {% for tab_name in ('Videos', 'Playlists', 'About') %}
+        {% for tab_name in ('Videos', 'Shorts', 'Streams', 'Playlists', 'About') %}
             {% if tab_name.lower() == current_tab %}
                 <a class="tab page-button">{{ tab_name }}</a>
             {% else %}
@@ -159,7 +159,7 @@ <h3>Description</h3>
     {% else %}
         <div class="content {{ current_tab + '-content'}}">
             <div id="links-metadata">
-                {% if current_tab == 'videos' %}
+                {% if current_tab in ('videos', 'shorts', 'streams') %}
                     {% set sorts = [('1', 'views'), ('2', 'oldest'), ('3', 'newest')] %}
                     <div id="number-of-results">{{ number_of_videos }} videos</div>
                 {% elif current_tab == 'playlists' %}
@@ -194,11 +194,11 @@ <h2 class="page-number">No results</h2>
                 {% endfor %}
             </nav>
 
-            {% if current_tab == 'videos' and current_sort.__str__() == '2' %}
+            {% if (current_tab in ('videos', 'shorts', 'streams')) and current_sort.__str__() == '2' %}
                 <nav class="next-previous-button-row">
                     {{ common_elements.next_previous_ctoken_buttons(None, ctoken, channel_url + '/' + current_tab, parameters_dictionary) }}
                 </nav>
-            {% elif current_tab == 'videos' %}
+            {% elif current_tab in ('videos', 'shorts', 'streams') %}
                 <nav class="page-button-row">
                     {{ common_elements.page_buttons(number_of_pages, channel_url + '/' + current_tab, parameters_dictionary, include_ends=(current_sort.__str__() == '3')) }}
                 </nav>

diff --git a/youtube/yt_data_extract/common.py b/youtube/yt_data_extract/common.py
@@ -249,6 +249,9 @@ def extract_item_info(item, additional_info={}):
     primary_type = type_parts[-2]
     if primary_type == 'video':
         info['type'] = 'video'
+    elif type_parts[0] == 'reel': # shorts
+        info['type'] = 'video'
+        primary_type = 'video'
     elif primary_type in ('playlist', 'radio', 'show'):
         info['type'] = 'playlist'
         info['playlist_type'] = primary_type
@@ -295,10 +298,11 @@ def extract_item_info(item, additional_info={}):
             info['time_published'] = timestamp.group(1)
 
     if primary_type == 'video':
-        info['id'] = item.get('videoId')
-        if not info['id']:
-            info['id'] = deep_get(item,'navigationEndpoint', 'watchEndpoint',
-                                  'videoId')
+        info['id'] = multi_deep_get(item,
+            ['videoId'],
+            ['navigationEndpoint', 'watchEndpoint', 'videoId'],
+            ['navigationEndpoint', 'reelWatchEndpoint', 'videoId'] # shorts
+        )
         info['view_count'] = extract_int(item.get('viewCountText'))
 
         # dig into accessibility data to get view_count for videos marked as recommended, and to get time_published
@@ -316,17 +320,35 @@ def extract_item_info(item, additional_info={}):
         if info['view_count']:
             info['approx_view_count'] = '{:,}'.format(info['view_count'])
         else:
-            info['approx_view_count'] = extract_approx_int(item.get('shortViewCountText'))
+            info['approx_view_count'] = extract_approx_int(multi_get(item,
+                'shortViewCountText',
+                'viewCountText' # shorts
+            ))
 
         # handle case where it is "No views"
         if not info['approx_view_count']:
             if ('No views' in item.get('shortViewCountText', '')
-                    or 'no views' in accessibility_label.lower()):
+                    or 'no views' in accessibility_label.lower()
+                    or 'No views' in extract_str(item.get('viewCountText', '')) # shorts
+            ):
                 info['view_count'] = 0
                 info['approx_view_count'] = '0'
 
         info['duration'] = extract_str(item.get('lengthText'))
 
+        # dig into accessibility data to get duration for shorts
+        accessibility_label = deep_get(item,
+            'accessibility', 'accessibilityData', 'label',
+            default='')
+        duration = re.search(r'(\d+) (second|seconds|minute) - play video$',
+                            accessibility_label)
+        if duration:
+            if duration.group(2) == 'minute':
+                conservative_update(info, 'duration', '1:00')
+            else:
+                conservative_update(info,
+                    'duration', '0:' + duration.group(1).zfill(2))
+
         # if it's an item in a playlist, get its index
         if 'index' in item: # url has wrong index on playlist page
             info['index'] = extract_int(item.get('index'))
@@ -398,6 +420,8 @@ def extract_response(polymer_json):
     'gridVideoRenderer',
     'playlistVideoRenderer',
 
+    'reelItemRenderer',
+
     'playlistRenderer',
     'compactPlaylistRenderer',
     'gridPlaylistRenderer',

diff --git a/youtube/yt_data_extract/everything_else.py b/youtube/yt_data_extract/everything_else.py
@@ -73,7 +73,7 @@ def extract_channel_info(polymer_json, tab, continuation=False):
     #if 'contents' not in response and 'continuationContents' not in response:
     #    return info
 
-    if tab in ('videos', 'playlists', 'search'):
+    if tab in ('videos', 'shorts', 'streams', 'playlists', 'search'):
         items, ctoken = extract_items(response)
         additional_info = {
             'author': info['channel_name'],