youtube-dl/youtube_dl/extractor/aenetworks.py

from __future__ import unicode_literals

import re

from .common import InfoExtractor
from ..utils import (
    smuggle_url,
    update_url_query,
    unescapeHTML,
)


class AENetworksIE(InfoExtractor):
    IE_NAME = 'aenetworks'
    IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'
    _VALID_URL = r'https?://(?:www\.)?(?:(?:history|aetv|mylifetime)\.com|fyi\.tv)/(?P<type>[^/]+)/(?:[^/]+/)+(?P<id>[^/]+?)(?:$|[?#])'

    _TESTS = [{
        'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false',
        'info_dict': {
            'id': 'g12m5Gyt3fdR',
            'ext': 'mp4',
            'title': "Bet You Didn't Know: Valentine's Day",
            'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',
        },
        'params': {
            # m3u8 download
            'skip_download': True,
        },
        'add_ie': ['ThePlatform'],
        'expected_warnings': ['JSON-LD'],
    }, {
        'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',
        'md5': '8ff93eb073449f151d6b90c0ae1ef0c7',
        'info_dict': {
            'id': 'eg47EERs_JsZ',
            'ext': 'mp4',
            'title': 'Winter Is Coming',
            'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',
        },
        'add_ie': ['ThePlatform'],
    }, {
        'url': 'http://www.aetv.com/shows/duck-dynasty/video/inlawful-entry',
        'only_matching': True
    }, {
        'url': 'http://www.fyi.tv/shows/tiny-house-nation/videos/207-sq-ft-minnesota-prairie-cottage',
        'only_matching': True
    }, {
        'url': 'http://www.mylifetime.com/shows/project-runway-junior/video/season-1/episode-6/superstar-clients',
        'only_matching': True
    }]

    def _real_extract(self, url):
        page_type, video_id = re.match(self._VALID_URL, url).groups()

        webpage = self._download_webpage(url, video_id)

        video_url_re = [
            r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id,
            r"media_url\s*=\s*'([^']+)'"
        ]
        video_url = unescapeHTML(self._search_regex(video_url_re, webpage, 'video url'))
        query = {'mbr': 'true'}
        if page_type == 'shows':
            query['assetTypes'] = 'medium_video_s3'
        if 'switch=hds' in video_url:
            query['switch'] = 'hls'

        info = self._search_json_ld(webpage, video_id, fatal=False)
        info.update({
            '_type': 'url_transparent',
            'url': smuggle_url(update_url_query(
                video_url, query), {
                    'sig': {
                        'key': 'crazyjava',
                        'secret': 's3cr3t'},
                    'force_smil_url': True
                }),
        })
        return info
[history] Add extractor (Closes #4934) 2015-02-15 09:57:52 +11:00			`from __future__ import unicode_literals`

[aenetworks] improve format extraction 2016-04-01 19:56:18 +11:00			`import re`

[history] Add extractor (Closes #4934) 2015-02-15 09:57:52 +11:00			`from .common import InfoExtractor`
[aenetworks] improve format extraction 2016-04-01 19:56:18 +11:00			`from ..utils import (`
			`smuggle_url,`
			`update_url_query,`
			`unescapeHTML,`
			`)`
[history] Add extractor (Closes #4934) 2015-02-15 09:57:52 +11:00

[ae] Rename to aenetworks and clarify extractor name and description 2016-01-17 08:02:45 +11:00			`class AENetworksIE(InfoExtractor):`
			`IE_NAME = 'aenetworks'`
			`IE_DESC = 'A+E Networks: A&E, Lifetime, History.com, FYI Network'`
[aenetworks] improve format extraction 2016-04-01 19:56:18 +11:00			`_VALID_URL = r'https?://(?:www\.)?(?:(?:history\|aetv\|mylifetime)\.com\|fyi\.tv)/(?P<type>[^/]+)/(?:[^/]+/)+(?P<id>[^/]+?)(?:$\|[?#])'`
[history] Add extractor (Closes #4934) 2015-02-15 09:57:52 +11:00
			`_TESTS = [{`
			`'url': 'http://www.history.com/topics/valentines-day/history-of-valentines-day/videos/bet-you-didnt-know-valentines-day?m=528e394da93ae&s=undefined&f=1&free=false',`
			`'info_dict': {`
[history] add support for episode pages(fixes #8240) 2016-01-16 01:16:57 +11:00			`'id': 'g12m5Gyt3fdR',`
[history] Add extractor (Closes #4934) 2015-02-15 09:57:52 +11:00			`'ext': 'mp4',`
			`'title': "Bet You Didn't Know: Valentine's Day",`
			`'description': 'md5:7b57ea4829b391995b405fa60bd7b5f7',`
			`},`
[history] add support for episode pages(fixes #8240) 2016-01-16 01:16:57 +11:00			`'params': {`
			`# m3u8 download`
			`'skip_download': True,`
			`},`
			`'add_ie': ['ThePlatform'],`
[ae] Fix _TESTS 2016-01-16 23:56:53 +11:00			`'expected_warnings': ['JSON-LD'],`
[history] add support for episode pages(fixes #8240) 2016-01-16 01:16:57 +11:00			`}, {`
			`'url': 'http://www.history.com/shows/mountain-men/season-1/episode-1',`
[aenetworks] improve format extraction 2016-04-01 19:56:18 +11:00			`'md5': '8ff93eb073449f151d6b90c0ae1ef0c7',`
[history] add support for episode pages(fixes #8240) 2016-01-16 01:16:57 +11:00			`'info_dict': {`
			`'id': 'eg47EERs_JsZ',`
			`'ext': 'mp4',`
[refactor] Single quotes consistency 2016-02-14 20:37:17 +11:00			`'title': 'Winter Is Coming',`
[ae] Fix _TESTS 2016-01-16 23:56:53 +11:00			`'description': 'md5:641f424b7a19d8e24f26dea22cf59d74',`
[history] add support for episode pages(fixes #8240) 2016-01-16 01:16:57 +11:00			`},`
[history] Add extractor (Closes #4934) 2015-02-15 09:57:52 +11:00			`'add_ie': ['ThePlatform'],`
[ae] Add support for fyi.tv, aetv.com and mylifetime.com(closes #3599) 2016-01-16 02:18:07 +11:00			`}, {`
			`'url': 'http://www.aetv.com/shows/duck-dynasty/video/inlawful-entry',`
			`'only_matching': True`
			`}, {`
			`'url': 'http://www.fyi.tv/shows/tiny-house-nation/videos/207-sq-ft-minnesota-prairie-cottage',`
			`'only_matching': True`
			`}, {`
			`'url': 'http://www.mylifetime.com/shows/project-runway-junior/video/season-1/episode-6/superstar-clients',`
			`'only_matching': True`
[history] Add extractor (Closes #4934) 2015-02-15 09:57:52 +11:00			`}]`

			`def _real_extract(self, url):`
[aenetworks] improve format extraction 2016-04-01 19:56:18 +11:00			`page_type, video_id = re.match(self._VALID_URL, url).groups()`
[history] Add extractor (Closes #4934) 2015-02-15 09:57:52 +11:00
			`webpage = self._download_webpage(url, video_id)`

[history] add support for episode pages(fixes #8240) 2016-01-16 01:16:57 +11:00			`video_url_re = [`
Revert "[history] fix signature and media url extraction(fixes #8240)" This reverts commit ffbc0baf72f456b1f4f4ac989e4a7b75b69c7a54. 2016-01-16 00:54:39 +11:00			`r'data-href="[^"]*/%s"[^>]+data-release-url="([^"]+)"' % video_id,`
[history] add support for episode pages(fixes #8240) 2016-01-16 01:16:57 +11:00			`r"media_url\s=\s'([^']+)'"`
			`]`
[aenetworks] improve format extraction 2016-04-01 19:56:18 +11:00			`video_url = unescapeHTML(self._search_regex(video_url_re, webpage, 'video url'))`
			`query = {'mbr': 'true'}`
			`if page_type == 'shows':`
			`query['assetTypes'] = 'medium_video_s3'`
			`if 'switch=hds' in video_url:`
			`query['switch'] = 'hls'`
[history] Add extractor (Closes #4934) 2015-02-15 09:57:52 +11:00
[ae] Use JSON-LD for TV series metadata 2016-01-16 05:36:49 +11:00			`info = self._search_json_ld(webpage, video_id, fatal=False)`
			`info.update({`
			`'_type': 'url_transparent',`
[aenetworks] improve format extraction 2016-04-01 19:56:18 +11:00			`'url': smuggle_url(update_url_query(`
			`video_url, query), {`
			`'sig': {`
			`'key': 'crazyjava',`
			`'secret': 's3cr3t'},`
			`'force_smil_url': True`
			`}),`
[ae] Use JSON-LD for TV series metadata 2016-01-16 05:36:49 +11:00			`})`
			`return info`