[radiojavan] Simplify and extract upload date

2015-04-04 16:45:41 +06:00 · 2015-04-04 16:45:41 +06:00 · 7cf97daf77
parent cce23e43a9
commit 7cf97daf77
1 changed files with 35 additions and 40 deletions
--- a/youtube_dl/extractor/radiojavan.py
+++ b/youtube_dl/extractor/radiojavan.py
@ -1,12 +1,14 @@
-# coding: utf-8
 from __future__ import unicode_literals

+import re
+
 from .common import InfoExtractor
 from ..utils import(
-    parse_duration,
-    str_to_int
+    unified_strdate,
+    str_to_int,
 )

+
 class RadioJavanIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?radiojavan\.com/videos/video/(?P<id>[^/]+)/?'
    _TEST = {
@ -16,56 +18,49 @@ class RadioJavanIE(InfoExtractor):
            'id': 'chaartaar-ashoobam',
            'ext': 'mp4',
            'title': 'Chaartaar - Ashoobam',
-            'description': 'Chaartaar - Ashoobam',
            'thumbnail': 're:^https?://.*\.jpe?g$',
+            'upload_date': '20150215',
+            'view_count': int,
+            'like_count': int,
+            'dislike_count': int,
        }
    }

    def _real_extract(self, url):
-        display_id = self._match_id(url)
-        webpage = self._download_webpage(url, display_id)
-        urls = list()
-        prefix = 'https://media.rdjavan.com/media/music_video/'
+        video_id = self._match_id(url)

-        video_url_480 = self._search_regex(
-            r'RJ\.video480p = \'([^\']+)\'', webpage, '480 video url', fatal= False)
-        video_url_720 = self._search_regex(
-            r'RJ\.video720p = \'([^\']+)\'', webpage, '720 video url', fatal= False)
-        video_url_1080 = self._search_regex(
-            r'RJ\.video1080p = \'([^\']+)\'', webpage, '1080 video url', fatal= False)
+        webpage = self._download_webpage(url, video_id)

-        if video_url_480:
-            urls.append({'url': prefix + video_url_480, 'format': '480p'})
-        if video_url_720:
-            urls.append({'url': prefix + video_url_720, 'format': '720p'})
-        if video_url_1080:
-            urls.append({'url': prefix + video_url_1080, 'format': '1080p'})
+        formats = [{
+            'url': 'https://media.rdjavan.com/media/music_video/%s' % video_path,
+            'format_id': '%sp' % height,
+            'height': height,
+        } for height, video_path in re.findall(r"RJ\.video(\d+)p\s*=\s*'/?([^']+)'", webpage)]

        title = self._og_search_title(webpage)
        thumbnail = self._og_search_thumbnail(webpage)
-        formats = [{
-            'url': url['url'],
-            'format': url['format']
-        } for url in urls]

-        likes = self._search_regex(
-            r'<span class="rating">([\d,]+)\s*likes</span>', webpage, 'Likes Count', fatal=False )
-        likes = likes.replace(',', '')
-        dislikes = self._search_regex(
-            r'<span class="rating">([\d,]+)\s*dislikes</span>', webpage, 'Dislikes Count', fatal=False )
-        dislikes = dislikes.replace(',', '')
+        upload_date = unified_strdate(self._search_regex(
+            r'class="date_added">Date added: ([^<]+)<',
+            webpage, 'upload date', fatal=False))

-        plays = self._search_regex(
-            r'views_publish[">\s]*<span[^>]+class="views">Plays: ([\d,]+)</span>', webpage, 'Play Count', fatal=False )
-        plays = plays.replace(',', '')
+        view_count = str_to_int(self._search_regex(
+            r'class="views">Plays: ([\d,]+)',
+            webpage, 'view count', fatal=False))
+        like_count = str_to_int(self._search_regex(
+            r'class="rating">([\d,]+) likes',
+            webpage, 'like count', fatal=False))
+        dislike_count = str_to_int(self._search_regex(
+            r'class="rating">([\d,]+) dislikes',
+            webpage, 'dislike count', fatal=False))

        return {
-            'formats': formats,
-            'id': display_id,
+            'id': video_id,
            'title': title,
-            'description': title, # no description provided in RadioJavan
            'thumbnail': thumbnail,
-            'like_count': str_to_int(likes),
-            'dislike_count': str_to_int(dislikes),
-            'viewCount': str_to_int(plays)
-        }
+            'upload_date': upload_date,
+            'view_count': view_count,
+            'like_count': like_count,
+            'dislike_count': dislike_count,
+            'formats': formats,
+        }