[youtube] Fix playlist and feed extraction (closes #25675)
parent
48bd042ce7
commit
d84b21b427
|
@ -70,6 +70,11 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
||||||
|
|
||||||
_PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
|
_PLAYLIST_ID_RE = r'(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}'
|
||||||
|
|
||||||
|
_YOUTUBE_CLIENT_HEADERS = {
|
||||||
|
'x-youtube-client-name': '1',
|
||||||
|
'x-youtube-client-version': '1.20200609.04.02',
|
||||||
|
}
|
||||||
|
|
||||||
def _set_language(self):
|
def _set_language(self):
|
||||||
self._set_cookie(
|
self._set_cookie(
|
||||||
'.youtube.com', 'PREF', 'f1=50000000&hl=en',
|
'.youtube.com', 'PREF', 'f1=50000000&hl=en',
|
||||||
|
@ -301,7 +306,8 @@ class YoutubeEntryListBaseInfoExtractor(YoutubeBaseInfoExtractor):
|
||||||
'https://youtube.com/%s' % mobj.group('more'), playlist_id,
|
'https://youtube.com/%s' % mobj.group('more'), playlist_id,
|
||||||
'Downloading page #%s%s'
|
'Downloading page #%s%s'
|
||||||
% (page_num, ' (retry #%d)' % count if count else ''),
|
% (page_num, ' (retry #%d)' % count if count else ''),
|
||||||
transform_source=uppercase_escape)
|
transform_source=uppercase_escape,
|
||||||
|
headers=self._YOUTUBE_CLIENT_HEADERS)
|
||||||
break
|
break
|
||||||
except ExtractorError as e:
|
except ExtractorError as e:
|
||||||
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
|
if isinstance(e.cause, compat_HTTPError) and e.cause.code in (500, 503):
|
||||||
|
@ -3250,7 +3256,8 @@ class YoutubeFeedsInfoExtractor(YoutubeBaseInfoExtractor):
|
||||||
more = self._download_json(
|
more = self._download_json(
|
||||||
'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
|
'https://youtube.com/%s' % mobj.group('more'), self._PLAYLIST_TITLE,
|
||||||
'Downloading page #%s' % page_num,
|
'Downloading page #%s' % page_num,
|
||||||
transform_source=uppercase_escape)
|
transform_source=uppercase_escape,
|
||||||
|
headers=self._YOUTUBE_CLIENT_HEADERS)
|
||||||
content_html = more['content_html']
|
content_html = more['content_html']
|
||||||
more_widget_html = more['load_more_widget_html']
|
more_widget_html = more['load_more_widget_html']
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue