[younow] Fix issues and improve extraction (closes #9255, closes #9432, closes #12436)

2017-10-29 04:16:07 +07:00 · 2017-10-29 04:16:07 +07:00 · eb4b5818e2
parent 47a8587915
commit eb4b5818e2
2 changed files with 114 additions and 109 deletions
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@ -1336,7 +1336,7 @@ from .youku import (
    YoukuShowIE,
 )
 from .younow import (
-    YouNowIE,
+    YouNowLiveIE,
    YouNowChannelIE,
    YouNowMomentIE,
 )
--- a/youtube_dl/extractor/younow.py
+++ b/youtube_dl/extractor/younow.py
@ -1,17 +1,22 @@
 # coding: utf-8
 from __future__ import unicode_literals
-from datetime import date, datetime
+
 import itertools
 from .common import InfoExtractor
 from ..compat import compat_str
-from ..utils import int_or_none, UnsupportedError
+from ..utils import (
    ExtractorError,
    int_or_none,
    try_get,
 )
-MOMENT_URL_FORMAT = 'https://cdn.younow.com/php/api/moment/fetch/id=%s'
+CDN_API_BASE = 'https://cdn.younow.com/php/api'
-STREAM_URL_FORMAT = 'https://hls.younow.com/momentsplaylists/live/%s/%s.m3u8'
+MOMENT_URL_FORMAT = '%s/moment/fetch/id=%%s' % CDN_API_BASE
-class YouNowIE(InfoExtractor):
+class YouNowLiveIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?younow\.com/(?P<id>[^/]+)'
+    _VALID_URL = r'https?://(?:www\.)?younow\.com/(?P<id>[^/?#&]+)'
    _TEST = {
        'url': 'https://www.younow.com/AmandaPadeezy',
        'info_dict': {
@ -19,179 +24,179 @@ class YouNowIE(InfoExtractor):
            'ext': 'mp4',
            'is_live': True,
            'title': 'March 26, 2017',
-            'description': 'YouNow is the best way to broadcast live and get an audience to watch you.',
+            'thumbnail': r're:^https?://.*\.jpg$',
            'thumbnail': 'https://ynassets.s3.amazonaws.com/broadcast/live/157869188/157869188.jpg',
            'tags': ['girls'],
            'categories': ['girls'],
            'uploader': 'AmandaPadeezy',
            'uploader_id': '6716501',
            'uploader_url': 'https://www.younow.com/AmandaPadeezy',
            'creator': 'AmandaPadeezy',
-            'formats': [{
+        },
-                'url': 'https://cdn.younow.com/php/api/broadcast/videoPath/hls=1/broadcastId=157869188/channelId=6716501',
+        'skip': True,
                'ext': 'mp4',
                'protocol': 'm3u8',
            }],
        }
    }
    @classmethod
    def suitable(cls, url):
        return (False
                if YouNowChannelIE.suitable(url) or YouNowMomentIE.suitable(url)
                else super(YouNowLiveIE, cls).suitable(url))
    def _real_extract(self, url):
        username = self._match_id(url)
        data = self._download_json('https://api.younow.com/php/api/broadcast/info/curId=0/user=%s' % (username), username)
-        if data.get('media'):
+        data = self._download_json(
-            stream_url = 'https://cdn.younow.com/php/api/broadcast/videoPath/hls=1/broadcastId=%s/channelId=%s' % (
+            'https://api.younow.com/php/api/broadcast/info/curId=0/user=%s'
-                data.get('broadcastId'),
+            % username, username)
                data.get('userId'),
            )
        else:
            raise UnsupportedError('Unsupported stream or user is not streaming at this time')
-        webpage = self._download_webpage(url, username)
+        if data.get('errorCode') != 0:
-        try:
+            raise ExtractorError(data['errorMsg'], expected=True)
-            uploader = data['user']['profileUrlString']
+
-        except KeyError:
+        uploader = try_get(
-            uploader = username
+            data, lambda x: x['user']['profileUrlString'],
-        try:
+            compat_str) or username
            title = data['title']
        except KeyError:
            title = date.today().strftime('%B %d, %Y')
        return {
            'id': uploader,
            'is_live': True,
-            'title': title,
+            'title': self._live_title(uploader),
            'description': self._og_search_description(webpage),
            'thumbnail': data.get('awsUrl'),
            'tags': data.get('tags'),
            'categories': data.get('tags'),
            'uploader': uploader,
            'uploader_id': data.get('userId'),
-            'uploader_url': 'https://www.younow.com/%s' % (data['user']['profileUrlString'],),
+            'uploader_url': 'https://www.younow.com/%s' % username,
            'creator': uploader,
            'view_count': int_or_none(data.get('viewers')),
            'like_count': int_or_none(data.get('likes')),
            'formats': [{
-                'url': stream_url,
+                'url': '%s/broadcast/videoPath/hls=1/broadcastId=%s/channelId=%s'
                       % (CDN_API_BASE, data['broadcastId'], data['userId']),
                'ext': 'mp4',
                'protocol': 'm3u8',
            }],
        }
-def _moment_to_entry(item):
+def _extract_moment(item, fatal=True):
    moment_id = item.get('momentId')
    if not moment_id:
        if not fatal:
            return
        raise ExtractorError('Unable to extract moment id')
    moment_id = compat_str(moment_id)
    title = item.get('text')
    title_type = item.get('titleType')
    if not title:
-        if title_type:
+        title = 'YouNow %s' % (
-            title = 'YouNow %s' % item.get('titleType')
+            item.get('momentType') or item.get('titleType') or 'moment')
-        else:
+
-            title = 'YouNow moment'
+    uploader = try_get(item, lambda x: x['owner']['name'], compat_str)
    uploader_id = try_get(item, lambda x: x['owner']['userId'])
    uploader_url = 'https://www.younow.com/%s' % uploader if uploader else None
    entry = {
-        'id': compat_str(item['momentId']),
+        'extractor_key': 'YouNowMoment',
        'id': moment_id,
        'title': title,
        'view_count': int_or_none(item.get('views')),
        'like_count': int_or_none(item.get('likes')),
        'timestamp': int_or_none(item.get('created')),
        'creator': uploader,
        'uploader': uploader,
        'uploader_id': uploader_id,
        'uploader_url': uploader_url,
        'formats': [{
-            'url': STREAM_URL_FORMAT % (item['momentId'], item['momentId']),
+            'url': 'https://hls.younow.com/momentsplaylists/live/%s/%s.m3u8'
                   % (moment_id, moment_id),
            'ext': 'mp4',
-            'protocol': 'm3u8',
+            'protocol': 'm3u8_native',
        }],
    }
    try:
        entry['uploader'] = entry['creator'] = item['owner']['name']
        entry['uploader_url'] = 'https://www.younow.com/%s' % (item['owner']['name'],)
        entry['uploader_id'] = item['owner']['userId']
    except KeyError:
        pass
    return entry
 class YouNowChannelIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?younow\.com/(?P<id>[^/]+)/channel'
    _TEST = {
-        'url': 'https://www.younow.com/Kate_Swiz/channel',
+        'url': 'https://www.younow.com/its_Kateee_/channel',
        'info_dict': {
-            'title': 'Kate_Swiz moments'
+            'id': '14629760',
            'title': 'its_Kateee_ moments'
        },
-        'playlist_count': 6,
+        'playlist_mincount': 8,
    }
-    MOMENTS_URL_FORMAT = 'https://cdn.younow.com/php/api/moment/profile/channelId=%s/createdBefore=%d/records=20'
+    def _entries(self, username, channel_id):
        created_before = 0
        for page_num in itertools.count(1):
            if created_before is None:
                break
            info = self._download_json(
                '%s/moment/profile/channelId=%s/createdBefore=%d/records=20'
                % (CDN_API_BASE, channel_id, created_before), username,
                note='Downloading moments page %d' % page_num)
            items = info.get('items')
            if not items or not isinstance(items, list):
                break
            for item in items:
                if not isinstance(item, dict):
                    continue
                item_type = item.get('type')
                if item_type == 'moment':
                    entry = _extract_moment(item, fatal=False)
                    if entry:
                        yield entry
                elif item_type == 'collection':
                    moments = item.get('momentsIds')
                    if isinstance(moments, list):
                        for moment_id in moments:
                            m = self._download_json(
                                MOMENT_URL_FORMAT % moment_id, username,
                                note='Downloading %s moment JSON' % moment_id,
                                fatal=False)
                            if m and isinstance(m, dict) and m.get('item'):
                                entry = _extract_moment(m['item'])
                                if entry:
                                    yield entry
                created_before = int_or_none(item.get('created'))
    def _real_extract(self, url):
        entries = []
        username = self._match_id(url)
-        user_info = self._download_json('https://api.younow.com/php/api/broadcast/info/curId=0/user=%s' % (username), username, note='Downloading user information')
+        channel_id = compat_str(self._download_json(
-        channel_id = user_info['userId']
+            'https://api.younow.com/php/api/broadcast/info/curId=0/user=%s'
-        created_before = 0
+            % username, username, note='Downloading user information')['userId'])
-        moment_ids = []
+        return self.playlist_result(
-        moment_ids_processed = []
+            self._entries(username, channel_id), channel_id,
-        err = False
+            '%s moments' % username)
        while True:
            if created_before:
                cb = datetime.fromtimestamp(created_before)
            else:
                cb = datetime.now()
            info = self._download_json(self.MOMENTS_URL_FORMAT % (channel_id, created_before), username, note='Downloading moments data (created before %s)' % (cb))
            for item in info['items']:
                if item['type'] == 'moment':
                    entry = _moment_to_entry(item)
                    moment_ids_processed.append(entry['id'])
                    entries.append(entry)
                elif item['type'] == 'collection':
                    moment_ids += [compat_str(x) for x in item['momentsIds']]
                try:
                    created_before = int_or_none(item['created'])
                except KeyError:
                    err = True
                    break
            if (err or
                    not info['hasMore'] or
                    'items' not in info or
                    not info['items']):
                break
        for mid in set(moment_ids):
            if mid in moment_ids_processed:
                continue
            item = self._download_json(MOMENT_URL_FORMAT % (mid), mid)
            entries.append(_moment_to_entry(item['item']))
        return self.playlist_result(entries, playlist_title='%s moments' % (username))
 class YouNowMomentIE(InfoExtractor):
-    _VALID_URL = r'https?://(?:www\.)?younow\.com/[^/]+/(?P<id>[^/]+)/[^/]+'
+    _VALID_URL = r'https?://(?:www\.)?younow\.com/[^/]+/(?P<id>[^/?#&]+)'
    _TEST = {
        'url': 'https://www.younow.com/GABO.../20712117/36319236/3b316doc/m',
        'md5': 'a30c70eadb9fb39a1aa3c8c0d22a0807',
        'info_dict': {
            'id': '20712117',
            'ext': 'mp4',
            'title': 'YouNow capture',
-            'view_count': 19,
+            'view_count': int,
-            'like_count': 0,
+            'like_count': int,
            'timestamp': 1490432040,
            'formats': [{
                'url': 'https://hls.younow.com/momentsplaylists/live/20712117/20712117.m3u8',
                'ext': 'mp4',
                'protocol': 'm3u8',
            }],
            'upload_date': '20170325',
            'uploader': 'GABO...',
            'uploader_id': 35917228,
        },
    }
    @classmethod
    def suitable(cls, url):
        return (False
                if YouNowChannelIE.suitable(url)
                else super(YouNowMomentIE, cls).suitable(url))
    def _real_extract(self, url):
-        mid = self._match_id(url)
+        video_id = self._match_id(url)
-        item = self._download_json(MOMENT_URL_FORMAT % (mid), mid)
+        item = self._download_json(MOMENT_URL_FORMAT % video_id, video_id)
-        return _moment_to_entry(item['item'])
+        return _extract_moment(item['item'])