[discovery] add support for discovery related sites

- investigationdiscovery.com - discoverylife.com - animalplanet.com - ahctv.com - destinationamerica.com - sciencechannel.com - tlc.com - velocity.com
2015-12-31 17:29:37 +01:00 · 2015-12-31 17:29:37 +01:00 · fec040e754
parent 34a9da136f
commit fec040e754
3 changed files with 30 additions and 63 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -19,7 +19,6 @@ from .aftonbladet import AftonbladetIE
 from .airmozilla import AirMozillaIE
 from .aljazeera import AlJazeeraIE
 from .alphaporno import AlphaPornoIE
 from .animalplanet import AnimalPlanetIE
 from .anitube import AnitubeIE
 from .anysex import AnySexIE
 from .aol import AolIE
--- a/youtube_dl/extractor/animalplanet.py
+++ b/youtube_dl/extractor/animalplanet.py
@ -1,53 +0,0 @@
 # coding: utf-8
 from __future__ import unicode_literals
 from .common import InfoExtractor
 from ..compat import compat_str
 from ..utils import (
    parse_duration,
    parse_iso8601,
 )
 class AnimalPlanetIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?animalplanet\.com/([^/]+/)*(?P<id>[^/\?#]+)'
    _TESTS = [{
        'url': 'http://www.animalplanet.com/tv-shows/i-shouldnt-be-alive/videos/dog-saves-injured-owner/',
        'info_dict': {
            'id': '10608',
            'ext': 'mp4',
            'title': 'Dog Saves Injured Owner',
            'description': 'A world class athlete is put to the test when she falls into a canyon and breaks her hip. Her only companion is her dog, Taz, who is on a mission to save her!',
            'upload_date': '20100410',
            'timestamp': 1270857727,
            'duration': 220,
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        }
    }, {
        'url': 'http://www.animalplanet.com/longfin-eels-maneaters/',
        'only_matching': True,
    }]
    def _real_extract(self, url):
        display_id = self._match_id(url)
        webpage = self._download_webpage(url, display_id)
        video_data = self._parse_json(self._search_regex(
            r'initialVideoData\s*=\s*({.+?});',
            webpage, 'initialVideoData'), display_id)['playlist'][0]
        return {
            'id': compat_str(video_data['id']),
            'display_id': display_id,
            'title': video_data['title'],
            'description': video_data.get('description'),
            'thumbnail': video_data.get('thumbnailURL'),
            'duration': parse_duration(video_data.get('video_length')),
            'timestamp': parse_iso8601(video_data.get('publishedDate')),
            'formats': self._extract_m3u8_formats(
                video_data['src'], display_id, 'mp4',
                'm3u8_native', m3u8_id='hls')
        }
--- a/youtube_dl/extractor/discovery.py
+++ b/youtube_dl/extractor/discovery.py
@ -9,7 +9,17 @@ from ..compat import compat_str
 class DiscoveryIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.discovery\.com\/[a-zA-Z0-9\-]*/[a-zA-Z0-9\-]*/videos/(?P<id>[a-zA-Z0-9_\-]*)(?:\.htm)?'
+    _VALID_URL = r'''(?x)http://(?:www\.)?(?:
            discovery|
            investigationdiscovery|
            discoverylife|
            animalplanet|
            ahctv|
            destinationamerica|
            sciencechannel|
            tlc|
            velocity
        )\.com/([^/]+/)*(?P<id>[^\./\?#]+)'''
    _TESTS = [{
        'url': 'http://www.discovery.com/tv-shows/mythbusters/videos/mission-impossible-outtakes.htm',
        'info_dict': {
@ -21,8 +31,8 @@ class DiscoveryIE(InfoExtractor):
                            'don\'t miss Adam moon-walking as Jamie ... behind Jamie\'s'
                            ' back.'),
            'duration': 156,
-            'timestamp': 1303099200,
+            'timestamp': 1302032462,
-            'upload_date': '20110418',
+            'upload_date': '20110405',
        },
        'params': {
            'skip_download': True,  # requires ffmpeg
@ -33,27 +43,38 @@ class DiscoveryIE(InfoExtractor):
            'id': 'mythbusters-the-simpsons',
            'title': 'MythBusters: The Simpsons',
        },
-        'playlist_count': 9,
+        'playlist_mincount': 10,
    }, {
        'url': 'http://www.animalplanet.com/longfin-eels-maneaters/',
        'info_dict': {
            'id': '78326',
            'ext': 'mp4',
            'title': 'Longfin Eels: Maneaters?',
            'description': 'Jeremy Wade tests whether or not New Zealand\'s longfin eels are man-eaters by covering himself in fish guts and getting in the water with them.',
            'upload_date': '20140725',
            'timestamp': 1406246400,
            'duration': 116,
        },
    }]
    def _real_extract(self, url):
-        video_id = self._match_id(url)
+        display_id = self._match_id(url)
-        info = self._download_json(url + '?flat=1', video_id)
+        info = self._download_json(url + '?flat=1', display_id)
        video_title = info.get('playlist_title') or info.get('video_title')
        entries = [{
            'id': compat_str(video_info['id']),
            'formats': self._extract_m3u8_formats(
-                video_info['src'], video_id, ext='mp4',
+                video_info['src'], display_id, 'mp4', 'm3u8_native', m3u8_id='hls',
                note='Download m3u8 information for video %d' % (idx + 1)),
            'title': video_info['title'],
            'description': video_info.get('description'),
            'duration': parse_duration(video_info.get('video_length')),
-            'webpage_url': video_info.get('href'),
+            'webpage_url': video_info.get('href') or video_info.get('url'),
            'thumbnail': video_info.get('thumbnailURL'),
            'alt_title': video_info.get('secondary_title'),
            'timestamp': parse_iso8601(video_info.get('publishedDate')),
        } for idx, video_info in enumerate(info['playlist'])]
-        return self.playlist_result(entries, video_id, video_title)
+        return self.playlist_result(entries, display_id, video_title)