Skip to content

Commit

Permalink
Fix parsing shorts
Browse files Browse the repository at this point in the history
Add check for extracting duration for shorts
Make short duration extraction stricter
Fix handling shorts with no views
  • Loading branch information
David-JonesDVN committed May 19, 2023
1 parent 35c1da7 commit af49945
Showing 1 changed file with 7 additions and 8 deletions.
15 changes: 7 additions & 8 deletions youtube/yt_data_extract/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,22 +355,21 @@ def extract_item_info(item, additional_info={}):

# handle case where it is "No views"
if not info['approx_view_count']:
if ('No views' in item.get('shortViewCountText', '')
or 'no views' in accessibility_label.lower()):
if ('No views' in extract_str(item.get('viewCountText', ''))):
info['view_count'] = 0
info['approx_view_count'] = '0'

# dig into accessibility data to get duration for shorts
accessibility_label = multi_deep_get(item,
['accessibility', 'accessibilityData', 'label'],
default='')

duration = re.search(r'(\d+) (second|seconds|minute) - play video',
duration = re.search(r'(\d+) (second|seconds|minute) - play video$',
accessibility_label)
if duration.group(2) == 'minute':
info['duration'] = "1:00"
else:
info['duration'] = "0:" + duration.group(1).zfill(2)
if duration:
if duration.group(2) == 'minute':
info['duration'] = '1:00'
else:
info['duration'] = '0:' + duration.group(1).zfill(2)

# if it's an item in a playlist, get its index
if 'index' in item: # url has wrong index on playlist page
Expand Down

0 comments on commit af49945

Please sign in to comment.