From 6f4ad0db34dc9251d52fe1ca1db6e8d52862af14 Mon Sep 17 00:00:00 2001 From: Petr Novak Date: Sun, 28 Jan 2018 13:28:58 +0100 Subject: [PATCH] [dvtv] Fix live streams extraction (also fixed test broken by changed og tags on the site) --- youtube_dl/extractor/dvtv.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/youtube_dl/extractor/dvtv.py b/youtube_dl/extractor/dvtv.py index e85c58bd5..5decd9951 100644 --- a/youtube_dl/extractor/dvtv.py +++ b/youtube_dl/extractor/dvtv.py @@ -32,7 +32,7 @@ class DVTVIE(InfoExtractor): }, { 'url': 'http://video.aktualne.cz/dvtv/dvtv-16-12-2014-utok-talibanu-boj-o-kliniku-uprchlici/r~973eb3bc854e11e498be002590604f2e/', 'info_dict': { - 'title': 'DVTV 16. 12. 2014: útok Talibanu, boj o kliniku, uprchlíci', + 'title': r're:^DVTV 16\. 12\. 2014: útok Talibanu, boj o kliniku, uprchlíci', 'id': '973eb3bc854e11e498be002590604f2e', }, 'playlist': [{ @@ -91,10 +91,21 @@ class DVTVIE(InfoExtractor): }, { 'url': 'http://video.aktualne.cz/v-cechach-poprve-zazni-zelenkova-zrestaurovana-mse/r~45b4b00483ec11e4883b002590604f2e/', 'only_matching': True, + }, { + 'url': 'https://video.aktualne.cz/dvtv/babis-a-zeman-nesou-vinu-za-to-ze-nemame-jasno-v-tom-kdo-bud/r~026afb54fad711e79704ac1f6b220ee8/', + 'md5': '87defe16681b1429c91f7a74809823c6', + 'info_dict': { + 'id': 'f5ae72f6fad611e794dbac1f6b220ee8', + 'ext': 'mp4', + 'title': 'Babiš a Zeman nesou vinu za to, že nemáme jasno v tom, kdo bude vládnout, říká Pekarová Adamová', + }, }] - def _parse_video_metadata(self, js, video_id): + def _parse_video_metadata(self, js, video_id, live_js=None): data = self._parse_json(js, video_id, transform_source=js_to_json) + if live_js: + data.update( + self._parse_json(live_js, video_id, transform_source=js_to_json)) title = unescapeHTML(data['title']) @@ -142,13 +153,18 @@ class DVTVIE(InfoExtractor): webpage = self._download_webpage(url, video_id) + # live content + live_item = self._search_regex( + r'(?s)embedData[0-9a-f]{32}\.asset\.liveStarter\s*=\s*(\{.+?\});', + webpage, 'video', default=None, fatal=False) + # single video item = self._search_regex( r'(?s)embedData[0-9a-f]{32}\[["\']asset["\']\]\s*=\s*(\{.+?\});', webpage, 'video', default=None, fatal=False) if item: - return self._parse_video_metadata(item, video_id) + return self._parse_video_metadata(item, video_id, live_item) # playlist items = re.findall(