[dailymotion:playlist] fix extraction(closes #16894)
parent
cd3a3ff93b
commit
ec240a4369
|
@ -2,6 +2,7 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
|
import functools
|
||||||
import hashlib
|
import hashlib
|
||||||
import itertools
|
import itertools
|
||||||
import json
|
import json
|
||||||
|
@ -16,11 +17,13 @@ from ..utils import (
|
||||||
error_to_compat_str,
|
error_to_compat_str,
|
||||||
ExtractorError,
|
ExtractorError,
|
||||||
int_or_none,
|
int_or_none,
|
||||||
|
mimetype2ext,
|
||||||
|
OnDemandPagedList,
|
||||||
parse_iso8601,
|
parse_iso8601,
|
||||||
sanitized_Request,
|
sanitized_Request,
|
||||||
str_to_int,
|
str_to_int,
|
||||||
unescapeHTML,
|
unescapeHTML,
|
||||||
mimetype2ext,
|
urlencode_postdata,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -343,17 +346,93 @@ class DailymotionIE(DailymotionBaseInfoExtractor):
|
||||||
|
|
||||||
class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||||
IE_NAME = 'dailymotion:playlist'
|
IE_NAME = 'dailymotion:playlist'
|
||||||
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>[^/?#&]+)'
|
_VALID_URL = r'(?:https?://)?(?:www\.)?dailymotion\.[a-z]{2,3}/playlist/(?P<id>x[0-9a-z]+)'
|
||||||
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
|
|
||||||
_PAGE_TEMPLATE = 'https://www.dailymotion.com/playlist/%s/%s'
|
|
||||||
_TESTS = [{
|
_TESTS = [{
|
||||||
'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
|
'url': 'http://www.dailymotion.com/playlist/xv4bw_nqtv_sport/1#video=xl8v3q',
|
||||||
'info_dict': {
|
'info_dict': {
|
||||||
'title': 'SPORT',
|
'title': 'SPORT',
|
||||||
'id': 'xv4bw_nqtv_sport',
|
'id': 'xv4bw',
|
||||||
},
|
},
|
||||||
'playlist_mincount': 20,
|
'playlist_mincount': 20,
|
||||||
}]
|
}]
|
||||||
|
_PAGE_SIZE = 100
|
||||||
|
|
||||||
|
def _fetch_page(self, playlist_id, authorizaion, page):
|
||||||
|
page += 1
|
||||||
|
videos = self._download_json(
|
||||||
|
'https://graphql.api.dailymotion.com',
|
||||||
|
playlist_id, 'Downloading page %d' % page,
|
||||||
|
data=json.dumps({
|
||||||
|
'query': '''{
|
||||||
|
collection(xid: "%s") {
|
||||||
|
videos(first: %d, page: %d) {
|
||||||
|
pageInfo {
|
||||||
|
hasNextPage
|
||||||
|
nextPage
|
||||||
|
}
|
||||||
|
edges {
|
||||||
|
node {
|
||||||
|
xid
|
||||||
|
url
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}''' % (playlist_id, self._PAGE_SIZE, page)
|
||||||
|
}).encode(), headers={
|
||||||
|
'Authorization': authorizaion,
|
||||||
|
'Origin': 'https://www.dailymotion.com',
|
||||||
|
})['data']['collection']['videos']
|
||||||
|
for edge in videos['edges']:
|
||||||
|
node = edge['node']
|
||||||
|
yield self.url_result(
|
||||||
|
node['url'], DailymotionIE.ie_key(), node['xid'])
|
||||||
|
|
||||||
|
def _real_extract(self, url):
|
||||||
|
playlist_id = self._match_id(url)
|
||||||
|
webpage = self._download_webpage(url, playlist_id)
|
||||||
|
api = self._parse_json(self._search_regex(
|
||||||
|
r'__PLAYER_CONFIG__\s*=\s*({.+?});',
|
||||||
|
webpage, 'player config'), playlist_id)['context']['api']
|
||||||
|
auth = self._download_json(
|
||||||
|
api.get('auth_url', 'https://graphql.api.dailymotion.com/oauth/token'),
|
||||||
|
playlist_id, data=urlencode_postdata({
|
||||||
|
'client_id': api.get('client_id', 'f1a362d288c1b98099c7'),
|
||||||
|
'client_secret': api.get('client_secret', 'eea605b96e01c796ff369935357eca920c5da4c5'),
|
||||||
|
'grant_type': 'client_credentials',
|
||||||
|
}))
|
||||||
|
authorizaion = '%s %s' % (auth.get('token_type', 'Bearer'), auth['access_token'])
|
||||||
|
entries = OnDemandPagedList(functools.partial(
|
||||||
|
self._fetch_page, playlist_id, authorizaion), self._PAGE_SIZE)
|
||||||
|
return self.playlist_result(
|
||||||
|
entries, playlist_id,
|
||||||
|
self._og_search_title(webpage))
|
||||||
|
|
||||||
|
|
||||||
|
class DailymotionUserIE(DailymotionBaseInfoExtractor):
|
||||||
|
IE_NAME = 'dailymotion:user'
|
||||||
|
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
|
||||||
|
_MORE_PAGES_INDICATOR = r'(?s)<div class="pages[^"]*">.*?<a\s+class="[^"]*?icon-arrow_right[^"]*?"'
|
||||||
|
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
|
||||||
|
_TESTS = [{
|
||||||
|
'url': 'https://www.dailymotion.com/user/nqtv',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'nqtv',
|
||||||
|
'title': 'Rémi Gaillard',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 100,
|
||||||
|
}, {
|
||||||
|
'url': 'http://www.dailymotion.com/user/UnderProject',
|
||||||
|
'info_dict': {
|
||||||
|
'id': 'UnderProject',
|
||||||
|
'title': 'UnderProject',
|
||||||
|
},
|
||||||
|
'playlist_mincount': 1800,
|
||||||
|
'expected_warnings': [
|
||||||
|
'Stopped at duplicated page',
|
||||||
|
],
|
||||||
|
'skip': 'Takes too long time',
|
||||||
|
}]
|
||||||
|
|
||||||
def _extract_entries(self, id):
|
def _extract_entries(self, id):
|
||||||
video_ids = set()
|
video_ids = set()
|
||||||
|
@ -379,43 +458,6 @@ class DailymotionPlaylistIE(DailymotionBaseInfoExtractor):
|
||||||
if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
|
if re.search(self._MORE_PAGES_INDICATOR, webpage) is None:
|
||||||
break
|
break
|
||||||
|
|
||||||
def _real_extract(self, url):
|
|
||||||
mobj = re.match(self._VALID_URL, url)
|
|
||||||
playlist_id = mobj.group('id')
|
|
||||||
webpage = self._download_webpage(url, playlist_id)
|
|
||||||
|
|
||||||
return {
|
|
||||||
'_type': 'playlist',
|
|
||||||
'id': playlist_id,
|
|
||||||
'title': self._og_search_title(webpage),
|
|
||||||
'entries': self._extract_entries(playlist_id),
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class DailymotionUserIE(DailymotionPlaylistIE):
|
|
||||||
IE_NAME = 'dailymotion:user'
|
|
||||||
_VALID_URL = r'https?://(?:www\.)?dailymotion\.[a-z]{2,3}/(?!(?:embed|swf|#|video|playlist)/)(?:(?:old/)?user/)?(?P<user>[^/]+)'
|
|
||||||
_PAGE_TEMPLATE = 'http://www.dailymotion.com/user/%s/%s'
|
|
||||||
_TESTS = [{
|
|
||||||
'url': 'https://www.dailymotion.com/user/nqtv',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'nqtv',
|
|
||||||
'title': 'Rémi Gaillard',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 100,
|
|
||||||
}, {
|
|
||||||
'url': 'http://www.dailymotion.com/user/UnderProject',
|
|
||||||
'info_dict': {
|
|
||||||
'id': 'UnderProject',
|
|
||||||
'title': 'UnderProject',
|
|
||||||
},
|
|
||||||
'playlist_mincount': 1800,
|
|
||||||
'expected_warnings': [
|
|
||||||
'Stopped at duplicated page',
|
|
||||||
],
|
|
||||||
'skip': 'Takes too long time',
|
|
||||||
}]
|
|
||||||
|
|
||||||
def _real_extract(self, url):
|
def _real_extract(self, url):
|
||||||
mobj = re.match(self._VALID_URL, url)
|
mobj = re.match(self._VALID_URL, url)
|
||||||
user = mobj.group('user')
|
user = mobj.group('user')
|
||||||
|
|
Loading…
Reference in New Issue