2016-10-02 22:39:18 +11:00
# coding: utf-8
2014-01-17 13:29:41 +11:00
from __future__ import unicode_literals
2013-09-15 05:41:49 +10:00
import itertools
2017-09-01 18:08:24 +10:00
import re
2013-06-24 04:57:44 +10:00
2015-10-18 03:23:46 +11:00
from . common import (
InfoExtractor ,
SearchInfoExtractor
)
2014-12-13 22:24:42 +11:00
from . . compat import (
2013-06-24 04:57:44 +10:00
compat_str ,
2013-08-22 01:06:37 +10:00
compat_urlparse ,
2016-03-26 06:46:57 +11:00
compat_urllib_parse_urlencode ,
2014-12-13 22:24:42 +11:00
)
from . . utils import (
2013-06-24 04:57:44 +10:00
ExtractorError ,
2014-06-07 23:51:01 +10:00
int_or_none ,
2019-02-11 03:44:08 +11:00
try_get ,
unified_timestamp ,
2017-09-03 19:18:24 +10:00
update_url_query ,
2019-02-03 03:40:06 +11:00
url_or_none ,
2017-09-03 19:18:24 +10:00
)
2013-06-24 04:57:44 +10:00
class SoundcloudIE ( InfoExtractor ) :
""" Information extractor for soundcloud.com
To access the media , the uid of the song and a stream token
must be extracted from the page source and the script must make
a request to media . soundcloud . com / crossdomain . xml . Then
the media can be grabbed by requesting from an url composed
of the stream token and uid
"""
2014-05-05 11:12:41 +10:00
_VALID_URL = r ''' (?x)^(?:https?://)?
2013-12-20 02:39:01 +11:00
( ? : ( ? : ( ? : www \. | m \. ) ? soundcloud \. com /
2017-07-29 21:41:42 +10:00
( ? ! stations / track )
2013-12-10 05:57:00 +11:00
( ? P < uploader > [ \w \d - ] + ) /
2019-02-02 08:00:29 +11:00
( ? ! ( ? : tracks | albums | sets ( ? : / . + ? ) ? | reposts | likes | spotlight ) / ? ( ? : $ | [ ? #]))
2014-08-28 08:58:24 +10:00
( ? P < title > [ \w \d - ] + ) / ?
2013-12-10 03:08:58 +11:00
( ? P < token > [ ^ ? ] + ? ) ? ( ? : [ ? ] . * ) ? $ )
2014-09-18 16:02:03 +10:00
| ( ? : api \. soundcloud \. com / tracks / ( ? P < track_id > \d + )
2014-12-03 21:49:53 +11:00
( ? : / ? \? secret_token = ( ? P < secret_token > [ ^ & ] + ) ) ? )
2014-01-03 02:18:51 +11:00
| ( ? P < player > ( ? : w | player | p . ) \. soundcloud \. com / player / ? . * ? url = . * )
2013-07-24 22:39:21 +10:00
)
'''
2014-01-17 13:29:41 +11:00
IE_NAME = ' soundcloud '
2013-11-10 04:06:09 +11:00
_TESTS = [
{
2014-01-17 13:29:41 +11:00
' url ' : ' http://soundcloud.com/ethmusic/lostin-powers-she-so-heavy ' ,
' md5 ' : ' ebef0a451b909710ed1d7787dddbf0d7 ' ,
' info_dict ' : {
2014-10-26 07:32:01 +11:00
' id ' : ' 62986583 ' ,
' ext ' : ' mp3 ' ,
2019-02-11 03:44:08 +11:00
' title ' : ' Lostin Powers - She so Heavy (SneakPreview) Adrian Ackers Blueprint 1 ' ,
2014-10-26 07:32:01 +11:00
' description ' : ' No Downloads untill we record the finished version this weekend, i was too pumped n i had to post it , earl is prolly gonna b hella p.o \' d ' ,
' uploader ' : ' E.T. ExTerrestrial Music ' ,
2019-02-11 03:44:08 +11:00
' timestamp ' : 1349920598 ,
' upload_date ' : ' 20121011 ' ,
2014-10-26 07:32:01 +11:00
' duration ' : 143 ,
2016-09-18 19:53:05 +10:00
' license ' : ' all-rights-reserved ' ,
2019-02-11 03:44:08 +11:00
' view_count ' : int ,
' like_count ' : int ,
' comment_count ' : int ,
' repost_count ' : int ,
2013-11-10 04:06:09 +11:00
}
} ,
# not streamable song
{
2014-01-17 13:29:41 +11:00
' url ' : ' https://soundcloud.com/the-concept-band/goldrushed-mastered?in=the-concept-band/sets/the-royal-concept-ep ' ,
' info_dict ' : {
' id ' : ' 47127627 ' ,
' ext ' : ' mp3 ' ,
' title ' : ' Goldrushed ' ,
2014-03-09 22:20:34 +11:00
' description ' : ' From Stockholm Sweden \r \n Povel / Magnus / Filip / David \r \n www.theroyalconcept.com ' ,
2014-01-17 13:29:41 +11:00
' uploader ' : ' The Royal Concept ' ,
2019-02-11 03:44:08 +11:00
' timestamp ' : 1337635207 ,
2014-01-17 13:29:41 +11:00
' upload_date ' : ' 20120521 ' ,
2019-02-11 03:44:08 +11:00
' duration ' : 30 ,
2016-09-18 19:53:05 +10:00
' license ' : ' all-rights-reserved ' ,
2019-02-11 03:44:08 +11:00
' view_count ' : int ,
' like_count ' : int ,
' comment_count ' : int ,
' repost_count ' : int ,
2013-11-10 04:06:09 +11:00
} ,
2014-01-17 13:29:41 +11:00
' params ' : {
2013-11-10 04:06:09 +11:00
# rtmp
2014-01-17 13:29:41 +11:00
' skip_download ' : True ,
2013-11-10 04:06:09 +11:00
} ,
} ,
2013-12-10 03:08:58 +11:00
# private link
{
2014-01-17 13:29:41 +11:00
' url ' : ' https://soundcloud.com/jaimemf/youtube-dl-test-video-a-y-baw/s-8Pjrp ' ,
' md5 ' : ' aa0dd32bfea9b0c5ef4f02aacd080604 ' ,
' info_dict ' : {
' id ' : ' 123998367 ' ,
' ext ' : ' mp3 ' ,
' title ' : ' Youtube - Dl Test Video \' \' Ä↭ ' ,
' description ' : ' test chars: \" \' / \\ ä↭ ' ,
2019-02-11 03:44:08 +11:00
' uploader ' : ' jaimeMF ' ,
' timestamp ' : 1386604920 ,
2014-01-17 13:29:41 +11:00
' upload_date ' : ' 20131209 ' ,
2014-06-07 23:51:01 +10:00
' duration ' : 9 ,
2016-09-18 19:53:05 +10:00
' license ' : ' all-rights-reserved ' ,
2019-02-11 03:44:08 +11:00
' view_count ' : int ,
' like_count ' : int ,
' comment_count ' : int ,
' repost_count ' : int ,
2013-12-10 03:08:58 +11:00
} ,
} ,
2014-09-18 16:02:03 +10:00
# private link (alt format)
{
' url ' : ' https://api.soundcloud.com/tracks/123998367?secret_token=s-8Pjrp ' ,
' md5 ' : ' aa0dd32bfea9b0c5ef4f02aacd080604 ' ,
' info_dict ' : {
' id ' : ' 123998367 ' ,
' ext ' : ' mp3 ' ,
' title ' : ' Youtube - Dl Test Video \' \' Ä↭ ' ,
' description ' : ' test chars: \" \' / \\ ä↭ ' ,
2019-02-11 03:44:08 +11:00
' uploader ' : ' jaimeMF ' ,
' timestamp ' : 1386604920 ,
2014-09-18 16:02:03 +10:00
' upload_date ' : ' 20131209 ' ,
' duration ' : 9 ,
2016-09-18 19:53:05 +10:00
' license ' : ' all-rights-reserved ' ,
2019-02-11 03:44:08 +11:00
' view_count ' : int ,
' like_count ' : int ,
' comment_count ' : int ,
' repost_count ' : int ,
2014-09-18 16:02:03 +10:00
} ,
} ,
2013-12-10 23:04:21 +11:00
# downloadable song
{
2014-07-15 22:18:06 +10:00
' url ' : ' https://soundcloud.com/oddsamples/bus-brakes ' ,
2014-07-23 09:41:44 +10:00
' md5 ' : ' 7624f2351f8a3b2e7cd51522496e7631 ' ,
2014-01-17 13:29:41 +11:00
' info_dict ' : {
2014-07-15 22:18:06 +10:00
' id ' : ' 128590877 ' ,
2014-07-23 09:41:44 +10:00
' ext ' : ' mp3 ' ,
2014-07-15 22:18:06 +10:00
' title ' : ' Bus Brakes ' ,
2014-10-26 07:32:01 +11:00
' description ' : ' md5:0053ca6396e8d2fd7b7e1595ef12ab66 ' ,
2014-07-15 22:18:06 +10:00
' uploader ' : ' oddsamples ' ,
2019-02-11 03:44:08 +11:00
' timestamp ' : 1389232924 ,
2014-07-15 22:18:06 +10:00
' upload_date ' : ' 20140109 ' ,
' duration ' : 17 ,
2016-09-18 19:53:05 +10:00
' license ' : ' cc-by-sa ' ,
2019-02-11 03:44:08 +11:00
' view_count ' : int ,
' like_count ' : int ,
' comment_count ' : int ,
' repost_count ' : int ,
2013-12-10 23:04:21 +11:00
} ,
} ,
2017-09-03 19:18:24 +10:00
# private link, downloadable format
{
' url ' : ' https://soundcloud.com/oriuplift/uponly-238-no-talking-wav/s-AyZUd ' ,
' md5 ' : ' 64a60b16e617d41d0bef032b7f55441e ' ,
' info_dict ' : {
' id ' : ' 340344461 ' ,
' ext ' : ' wav ' ,
' title ' : ' Uplifting Only 238 [No Talking] (incl. Alex Feed Guestmix) (Aug 31, 2017) [wav] ' ,
' description ' : ' md5:fa20ee0fca76a3d6df8c7e57f3715366 ' ,
' uploader ' : ' Ori Uplift Music ' ,
2019-02-11 03:44:08 +11:00
' timestamp ' : 1504206263 ,
2017-09-03 19:18:24 +10:00
' upload_date ' : ' 20170831 ' ,
' duration ' : 7449 ,
' license ' : ' all-rights-reserved ' ,
2019-02-11 03:44:08 +11:00
' view_count ' : int ,
' like_count ' : int ,
' comment_count ' : int ,
' repost_count ' : int ,
2017-09-03 19:18:24 +10:00
} ,
} ,
2018-01-05 12:25:42 +11:00
# no album art, use avatar pic for thumbnail
{
' url ' : ' https://soundcloud.com/garyvee/sideways-prod-mad-real ' ,
' md5 ' : ' 59c7872bc44e5d99b7211891664760c2 ' ,
' info_dict ' : {
' id ' : ' 309699954 ' ,
' ext ' : ' mp3 ' ,
' title ' : ' Sideways (Prod. Mad Real) ' ,
' description ' : ' md5:d41d8cd98f00b204e9800998ecf8427e ' ,
' uploader ' : ' garyvee ' ,
2019-02-11 03:44:08 +11:00
' timestamp ' : 1488152409 ,
2018-01-05 12:25:42 +11:00
' upload_date ' : ' 20170226 ' ,
' duration ' : 207 ,
' thumbnail ' : r ' re:https?://.* \ .jpg ' ,
' license ' : ' all-rights-reserved ' ,
2019-02-11 03:44:08 +11:00
' view_count ' : int ,
' like_count ' : int ,
' comment_count ' : int ,
' repost_count ' : int ,
2018-01-05 12:25:42 +11:00
} ,
' params ' : {
' skip_download ' : True ,
} ,
} ,
2013-11-10 04:06:09 +11:00
]
2013-06-24 04:57:44 +10:00
2019-02-02 08:00:29 +11:00
_CLIENT_ID = ' NmW1FlPaiL94ueEu7oziOWjYEzZzQDcK '
2013-07-24 22:05:14 +10:00
2016-07-29 01:16:05 +10:00
@staticmethod
def _extract_urls ( webpage ) :
return [ m . group ( ' url ' ) for m in re . finditer (
r ' <iframe[^>]+src=([ " \' ])(?P<url>(?:https?://)?(?:w \ .)?soundcloud \ .com/player.+?) \ 1 ' ,
webpage ) ]
2013-06-24 04:57:44 +10:00
def report_resolve ( self , video_id ) :
""" Report information extraction. """
2014-03-24 12:15:31 +11:00
self . to_screen ( ' %s : Resolving id ' % video_id )
2013-06-24 04:57:44 +10:00
2013-07-24 22:05:14 +10:00
@classmethod
def _resolv_url ( cls , url ) :
2017-06-28 00:20:18 +10:00
return ' https://api.soundcloud.com/resolve.json?url= ' + url + ' &client_id= ' + cls . _CLIENT_ID
2013-07-24 22:05:14 +10:00
2013-12-10 03:08:58 +11:00
def _extract_info_dict ( self , info , full_title = None , quiet = False , secret_token = None ) :
2013-11-10 04:06:09 +11:00
track_id = compat_str ( info [ ' id ' ] )
2019-02-11 03:44:08 +11:00
title = info [ ' title ' ]
2013-11-10 04:06:09 +11:00
name = full_title or track_id
2013-11-26 06:30:41 +11:00
if quiet :
2013-09-15 05:41:49 +10:00
self . report_extraction ( name )
2018-01-05 12:25:42 +11:00
thumbnail = info . get ( ' artwork_url ' ) or info . get ( ' user ' , { } ) . get ( ' avatar_url ' )
2016-09-21 00:55:57 +10:00
if isinstance ( thumbnail , compat_str ) :
2013-07-24 22:05:14 +10:00
thumbnail = thumbnail . replace ( ' -large ' , ' -t500x500 ' )
2019-02-11 03:44:08 +11:00
username = try_get ( info , lambda x : x [ ' user ' ] [ ' username ' ] , compat_str )
def extract_count ( key ) :
return int_or_none ( info . get ( ' %s _count ' % key ) )
2013-11-10 04:06:09 +11:00
result = {
2013-11-26 06:30:41 +11:00
' id ' : track_id ,
2019-02-11 03:44:08 +11:00
' uploader ' : username ,
' timestamp ' : unified_timestamp ( info . get ( ' created_at ' ) ) ,
' title ' : title ,
2016-09-21 00:55:57 +10:00
' description ' : info . get ( ' description ' ) ,
2013-07-24 22:05:14 +10:00
' thumbnail ' : thumbnail ,
2014-06-07 23:51:01 +10:00
' duration ' : int_or_none ( info . get ( ' duration ' ) , 1000 ) ,
2014-10-27 05:08:36 +11:00
' webpage_url ' : info . get ( ' permalink_url ' ) ,
2016-09-21 00:55:57 +10:00
' license ' : info . get ( ' license ' ) ,
2019-02-11 03:44:08 +11:00
' view_count ' : extract_count ( ' playback ' ) ,
' like_count ' : extract_count ( ' favoritings ' ) ,
' comment_count ' : extract_count ( ' comment ' ) ,
' repost_count ' : extract_count ( ' reposts ' ) ,
' genre ' : info . get ( ' genre ' ) ,
2013-07-24 22:05:14 +10:00
}
2014-03-24 12:21:17 +11:00
formats = [ ]
2017-09-01 18:08:24 +10:00
query = { ' client_id ' : self . _CLIENT_ID }
if secret_token is not None :
query [ ' secret_token ' ] = secret_token
2013-11-10 04:06:09 +11:00
if info . get ( ' downloadable ' , False ) :
2013-11-21 23:16:19 +11:00
# We can build a direct link to the song
2017-09-01 18:08:24 +10:00
format_url = update_url_query (
2017-09-03 19:18:24 +10:00
' https://api.soundcloud.com/tracks/ %s /download ' % track_id , query )
2014-03-24 12:21:17 +11:00
formats . append ( {
2013-11-26 06:30:41 +11:00
' format_id ' : ' download ' ,
2014-01-17 13:29:41 +11:00
' ext ' : info . get ( ' original_format ' , ' mp3 ' ) ,
2013-11-26 06:30:41 +11:00
' url ' : format_url ,
2013-11-26 08:34:56 +11:00
' vcodec ' : ' none ' ,
2014-03-24 12:21:17 +11:00
' preference ' : 10 ,
} )
# We have to retrieve the url
2014-05-05 11:12:41 +10:00
format_dict = self . _download_json (
2017-06-28 00:20:18 +10:00
' https://api.soundcloud.com/i1/tracks/ %s /streams ' % track_id ,
2017-09-01 18:08:24 +10:00
track_id , ' Downloading track url ' , query = query )
2014-03-24 12:21:17 +11:00
for key , stream_url in format_dict . items ( ) :
2018-05-26 23:34:13 +10:00
ext , abr = ' mp3 ' , None
mobj = re . search ( r ' _([^_]+)_( \ d+)_url ' , key )
if mobj :
ext , abr = mobj . groups ( )
abr = int ( abr )
2014-03-24 12:21:17 +11:00
if key . startswith ( ' http ' ) :
2017-01-28 03:16:30 +11:00
stream_formats = [ {
2014-03-24 12:21:17 +11:00
' format_id ' : key ,
' ext ' : ext ,
' url ' : stream_url ,
2017-01-28 03:16:30 +11:00
} ]
2014-03-24 12:21:17 +11:00
elif key . startswith ( ' rtmp ' ) :
# The url doesn't have an rtmp app, we have to extract the playpath
url , path = stream_url . split ( ' mp3: ' , 1 )
2017-01-28 03:16:30 +11:00
stream_formats = [ {
2014-03-24 12:21:17 +11:00
' format_id ' : key ,
' url ' : url ,
' play_path ' : ' mp3: ' + path ,
2015-03-03 03:47:07 +11:00
' ext ' : ' flv ' ,
2017-01-28 03:16:30 +11:00
} ]
2017-01-28 03:08:32 +11:00
elif key . startswith ( ' hls ' ) :
2017-01-28 03:16:30 +11:00
stream_formats = self . _extract_m3u8_formats (
2018-05-26 23:34:13 +10:00
stream_url , track_id , ext , entry_protocol = ' m3u8_native ' ,
2017-01-28 03:08:32 +11:00
m3u8_id = key , fatal = False )
2017-01-28 03:16:30 +11:00
else :
continue
2018-05-26 23:34:13 +10:00
if abr :
for f in stream_formats :
f [ ' abr ' ] = abr
2017-01-28 03:16:30 +11:00
formats . extend ( stream_formats )
if not formats :
# We fallback to the stream_url in the original info, this
# cannot be always used, sometimes it can give an HTTP 404 error
formats . append ( {
' format_id ' : ' fallback ' ,
2017-09-01 18:08:24 +10:00
' url ' : update_url_query ( info [ ' stream_url ' ] , query ) ,
2018-05-26 23:34:13 +10:00
' ext ' : ' mp3 ' ,
2017-01-28 03:16:30 +11:00
} )
for f in formats :
f [ ' vcodec ' ] = ' none '
2013-11-26 06:30:41 +11:00
2015-03-03 03:39:32 +11:00
self . _check_formats ( formats , track_id )
self . _sort_formats ( formats )
result [ ' formats ' ] = formats
2013-11-21 23:16:19 +11:00
2013-11-10 04:06:09 +11:00
return result
2013-07-24 22:05:14 +10:00
2013-06-24 04:57:44 +10:00
def _real_extract ( self , url ) :
2013-07-24 22:39:21 +10:00
mobj = re . match ( self . _VALID_URL , url , flags = re . VERBOSE )
2013-06-24 04:57:44 +10:00
if mobj is None :
2014-03-24 12:15:31 +11:00
raise ExtractorError ( ' Invalid URL: %s ' % url )
2013-06-24 04:57:44 +10:00
2013-07-24 22:39:21 +10:00
track_id = mobj . group ( ' track_id ' )
2016-09-18 19:53:05 +10:00
2013-07-24 22:39:21 +10:00
if track_id is not None :
2017-06-28 00:20:18 +10:00
info_json_url = ' https://api.soundcloud.com/tracks/ ' + track_id + ' .json?client_id= ' + self . _CLIENT_ID
2013-07-24 22:39:21 +10:00
full_title = track_id
2014-09-18 16:02:03 +10:00
token = mobj . group ( ' secret_token ' )
if token :
2016-02-14 20:37:17 +11:00
info_json_url + = ' &secret_token= ' + token
2014-01-03 02:18:51 +11:00
elif mobj . group ( ' player ' ) :
2013-08-22 01:06:37 +10:00
query = compat_urlparse . parse_qs ( compat_urlparse . urlparse ( url ) . query )
2015-04-17 18:46:25 +10:00
real_url = query [ ' url ' ] [ 0 ]
# If the token is in the query of the original url we have to
# manually add it
if ' secret_token ' in query :
real_url + = ' ?secret_token= ' + query [ ' secret_token ' ] [ 0 ]
return self . url_result ( real_url )
2013-07-24 22:39:21 +10:00
else :
# extract uploader (which is in the url)
2013-12-10 03:08:58 +11:00
uploader = mobj . group ( ' uploader ' )
2013-07-24 22:39:21 +10:00
# extract simple title (uploader + slug of song title)
2014-11-24 07:20:46 +11:00
slug_title = mobj . group ( ' title ' )
2013-12-10 03:08:58 +11:00
token = mobj . group ( ' token ' )
full_title = resolve_title = ' %s / %s ' % ( uploader , slug_title )
if token :
resolve_title + = ' / %s ' % token
2014-11-24 06:41:03 +11:00
2013-07-24 22:39:21 +10:00
self . report_resolve ( full_title )
2014-11-24 06:41:03 +11:00
2017-06-28 00:20:18 +10:00
url = ' https://soundcloud.com/ %s ' % resolve_title
2013-07-24 22:39:21 +10:00
info_json_url = self . _resolv_url ( url )
2014-05-05 11:12:41 +10:00
info = self . _download_json ( info_json_url , full_title , ' Downloading info JSON ' )
2013-06-24 04:57:44 +10:00
2013-12-10 03:08:58 +11:00
return self . _extract_info_dict ( info , full_title , secret_token = token )
2013-06-24 04:57:44 +10:00
2014-05-05 11:12:41 +10:00
2016-09-24 22:29:49 +10:00
class SoundcloudPlaylistBaseIE ( SoundcloudIE ) :
2016-09-24 22:18:01 +10:00
@staticmethod
def _extract_id ( e ) :
return compat_str ( e [ ' id ' ] ) if e . get ( ' id ' ) else None
def _extract_track_entries ( self , tracks ) :
return [
self . url_result (
track [ ' permalink_url ' ] , SoundcloudIE . ie_key ( ) ,
video_id = self . _extract_id ( track ) )
for track in tracks if track . get ( ' permalink_url ' ) ]
2016-09-24 22:29:49 +10:00
class SoundcloudSetIE ( SoundcloudPlaylistBaseIE ) :
2015-03-31 02:03:38 +11:00
_VALID_URL = r ' https?://(?:(?:www|m) \ .)?soundcloud \ .com/(?P<uploader>[ \ w \ d-]+)/sets/(?P<slug_title>[ \ w \ d-]+)(?:/(?P<token>[^?/]+))? '
2014-01-17 13:29:41 +11:00
IE_NAME = ' soundcloud:set '
2014-08-28 08:58:24 +10:00
_TESTS = [ {
' url ' : ' https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep ' ,
' info_dict ' : {
2015-02-02 01:24:38 +11:00
' id ' : ' 2284613 ' ,
2014-08-28 08:58:24 +10:00
' title ' : ' The Royal Concept EP ' ,
} ,
2017-06-28 00:26:46 +10:00
' playlist_mincount ' : 5 ,
2016-08-31 04:56:15 +10:00
} , {
' url ' : ' https://soundcloud.com/the-concept-band/sets/the-royal-concept-ep/token ' ,
' only_matching ' : True ,
2014-08-28 08:58:24 +10:00
} ]
2013-06-24 04:57:44 +10:00
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
# extract uploader (which is in the url)
2014-09-18 19:35:11 +10:00
uploader = mobj . group ( ' uploader ' )
2013-06-24 04:57:44 +10:00
# extract simple title (uploader + slug of song title)
2014-09-18 19:35:11 +10:00
slug_title = mobj . group ( ' slug_title ' )
2013-06-24 04:57:44 +10:00
full_title = ' %s /sets/ %s ' % ( uploader , slug_title )
2017-06-28 00:20:18 +10:00
url = ' https://soundcloud.com/ %s /sets/ %s ' % ( uploader , slug_title )
2014-09-18 19:35:11 +10:00
token = mobj . group ( ' token ' )
if token :
full_title + = ' / ' + token
url + = ' / ' + token
2013-06-24 04:57:44 +10:00
self . report_resolve ( full_title )
2013-07-24 22:05:14 +10:00
resolv_url = self . _resolv_url ( url )
2014-05-05 11:12:41 +10:00
info = self . _download_json ( resolv_url , full_title )
2013-06-24 04:57:44 +10:00
if ' errors ' in info :
2015-04-18 03:24:30 +10:00
msgs = ( compat_str ( err [ ' error_message ' ] ) for err in info [ ' errors ' ] )
raise ExtractorError ( ' unable to download video webpage: %s ' % ' , ' . join ( msgs ) )
2013-06-24 04:57:44 +10:00
2016-09-24 22:18:01 +10:00
entries = self . _extract_track_entries ( info [ ' tracks ' ] )
2015-07-25 19:47:53 +10:00
2014-08-28 08:58:24 +10:00
return {
' _type ' : ' playlist ' ,
2015-07-25 19:47:53 +10:00
' entries ' : entries ,
2015-02-02 01:24:38 +11:00
' id ' : ' %s ' % info [ ' id ' ] ,
2014-08-28 08:58:24 +10:00
' title ' : info [ ' title ' ] ,
}
2013-09-15 05:41:49 +10:00
2017-07-29 21:41:42 +10:00
class SoundcloudPagedPlaylistBaseIE ( SoundcloudPlaylistBaseIE ) :
_API_V2_BASE = ' https://api-v2.soundcloud.com '
def _extract_playlist ( self , base_url , playlist_id , playlist_title ) :
COMMON_QUERY = {
' limit ' : 50 ,
' client_id ' : self . _CLIENT_ID ,
' linked_partitioning ' : ' 1 ' ,
}
query = COMMON_QUERY . copy ( )
query [ ' offset ' ] = 0
next_href = base_url + ' ? ' + compat_urllib_parse_urlencode ( query )
entries = [ ]
for i in itertools . count ( ) :
response = self . _download_json (
next_href , playlist_id , ' Downloading track page %s ' % ( i + 1 ) )
collection = response [ ' collection ' ]
2019-02-02 08:00:29 +11:00
if not isinstance ( collection , list ) :
collection = [ ]
# Empty collection may be returned, in this case we proceed
# straight to next_href
2017-07-29 21:41:42 +10:00
2019-02-03 03:40:06 +11:00
def resolve_entry ( candidates ) :
for cand in candidates :
if not isinstance ( cand , dict ) :
continue
permalink_url = url_or_none ( cand . get ( ' permalink_url ' ) )
if not permalink_url :
continue
return self . url_result (
permalink_url ,
ie = SoundcloudIE . ie_key ( ) if SoundcloudIE . suitable ( permalink_url ) else None ,
video_id = self . _extract_id ( cand ) ,
video_title = cand . get ( ' title ' ) )
2017-07-29 21:41:42 +10:00
for e in collection :
2019-02-03 03:40:06 +11:00
entry = resolve_entry ( ( e , e . get ( ' track ' ) , e . get ( ' playlist ' ) ) )
if entry :
entries . append ( entry )
2017-07-29 21:41:42 +10:00
next_href = response . get ( ' next_href ' )
if not next_href :
break
parsed_next_href = compat_urlparse . urlparse ( response [ ' next_href ' ] )
qs = compat_urlparse . parse_qs ( parsed_next_href . query )
qs . update ( COMMON_QUERY )
next_href = compat_urlparse . urlunparse (
parsed_next_href . _replace ( query = compat_urllib_parse_urlencode ( qs , True ) ) )
return {
' _type ' : ' playlist ' ,
' id ' : playlist_id ,
' title ' : playlist_title ,
' entries ' : entries ,
}
class SoundcloudUserIE ( SoundcloudPagedPlaylistBaseIE ) :
2015-08-01 07:50:03 +10:00
_VALID_URL = r ''' (?x)
https ? : / /
( ? : ( ? : www | m ) \. ) ? soundcloud \. com /
( ? P < user > [ ^ / ] + )
( ? : /
2019-02-02 08:00:29 +11:00
( ? P < rsrc > tracks | albums | sets | reposts | likes | spotlight )
2015-08-01 07:50:03 +10:00
) ?
/ ? ( ? : [ ? #].*)?$
'''
2014-01-17 13:29:41 +11:00
IE_NAME = ' soundcloud:user '
2014-08-28 08:58:24 +10:00
_TESTS = [ {
2019-02-02 08:11:32 +11:00
' url ' : ' https://soundcloud.com/soft-cell-official ' ,
2014-08-28 08:58:24 +10:00
' info_dict ' : {
2019-02-02 08:11:32 +11:00
' id ' : ' 207965082 ' ,
' title ' : ' Soft Cell (All) ' ,
2014-08-28 08:58:24 +10:00
} ,
2019-02-02 08:11:32 +11:00
' playlist_mincount ' : 28 ,
2014-08-28 08:58:24 +10:00
} , {
2019-02-02 08:11:32 +11:00
' url ' : ' https://soundcloud.com/soft-cell-official/tracks ' ,
2014-08-28 08:58:24 +10:00
' info_dict ' : {
2019-02-02 08:11:32 +11:00
' id ' : ' 207965082 ' ,
' title ' : ' Soft Cell (Tracks) ' ,
2014-08-28 08:58:24 +10:00
} ,
2019-02-02 08:11:32 +11:00
' playlist_mincount ' : 27 ,
2015-06-25 00:49:22 +10:00
} , {
2019-02-02 08:11:32 +11:00
' url ' : ' https://soundcloud.com/soft-cell-official/albums ' ,
' info_dict ' : {
' id ' : ' 207965082 ' ,
' title ' : ' Soft Cell (Albums) ' ,
} ,
' playlist_mincount ' : 1 ,
} , {
' url ' : ' https://soundcloud.com/jcv246/sets ' ,
2015-07-31 04:54:26 +10:00
' info_dict ' : {
2019-02-02 08:11:32 +11:00
' id ' : ' 12982173 ' ,
' title ' : ' Jordi / cv (Playlists) ' ,
2015-07-31 04:54:26 +10:00
} ,
2016-09-24 22:18:01 +10:00
' playlist_mincount ' : 2 ,
2015-07-31 04:54:26 +10:00
} , {
2019-02-02 08:11:32 +11:00
' url ' : ' https://soundcloud.com/jcv246/reposts ' ,
2015-07-31 04:54:26 +10:00
' info_dict ' : {
2019-02-02 08:11:32 +11:00
' id ' : ' 12982173 ' ,
' title ' : ' Jordi / cv (Reposts) ' ,
2015-07-31 04:54:26 +10:00
} ,
2019-02-02 08:11:32 +11:00
' playlist_mincount ' : 6 ,
2015-07-31 04:54:26 +10:00
} , {
2019-02-02 08:11:32 +11:00
' url ' : ' https://soundcloud.com/clalberg/likes ' ,
2015-07-31 04:54:26 +10:00
' info_dict ' : {
2019-02-02 08:11:32 +11:00
' id ' : ' 11817582 ' ,
' title ' : ' clalberg (Likes) ' ,
2015-07-31 04:54:26 +10:00
} ,
2019-02-02 08:11:32 +11:00
' playlist_mincount ' : 5 ,
2015-07-31 04:54:26 +10:00
} , {
' url ' : ' https://soundcloud.com/grynpyret/spotlight ' ,
' info_dict ' : {
' id ' : ' 7098329 ' ,
2017-06-28 00:26:46 +10:00
' title ' : ' Grynpyret (Spotlight) ' ,
2015-07-31 04:54:26 +10:00
} ,
' playlist_mincount ' : 1 ,
2014-08-28 08:58:24 +10:00
} ]
2013-09-15 05:41:49 +10:00
2015-07-31 04:54:26 +10:00
_BASE_URL_MAP = {
2019-02-02 08:00:29 +11:00
' all ' : ' %s /stream/users/ %% s ' % SoundcloudPagedPlaylistBaseIE . _API_V2_BASE ,
' tracks ' : ' %s /users/ %% s/tracks ' % SoundcloudPagedPlaylistBaseIE . _API_V2_BASE ,
' albums ' : ' %s /users/ %% s/albums ' % SoundcloudPagedPlaylistBaseIE . _API_V2_BASE ,
2017-07-29 21:41:42 +10:00
' sets ' : ' %s /users/ %% s/playlists ' % SoundcloudPagedPlaylistBaseIE . _API_V2_BASE ,
2019-02-02 08:00:29 +11:00
' reposts ' : ' %s /stream/users/ %% s/reposts ' % SoundcloudPagedPlaylistBaseIE . _API_V2_BASE ,
2017-07-29 21:41:42 +10:00
' likes ' : ' %s /users/ %% s/likes ' % SoundcloudPagedPlaylistBaseIE . _API_V2_BASE ,
' spotlight ' : ' %s /users/ %% s/spotlight ' % SoundcloudPagedPlaylistBaseIE . _API_V2_BASE ,
2015-07-31 04:54:26 +10:00
}
_TITLE_MAP = {
' all ' : ' All ' ,
' tracks ' : ' Tracks ' ,
2019-02-02 08:00:29 +11:00
' albums ' : ' Albums ' ,
2015-07-31 04:54:26 +10:00
' sets ' : ' Playlists ' ,
' reposts ' : ' Reposts ' ,
' likes ' : ' Likes ' ,
' spotlight ' : ' Spotlight ' ,
}
2013-09-15 05:41:49 +10:00
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
uploader = mobj . group ( ' user ' )
2017-06-28 00:20:18 +10:00
url = ' https://soundcloud.com/ %s / ' % uploader
2013-09-15 05:41:49 +10:00
resolv_url = self . _resolv_url ( url )
2014-05-05 11:12:41 +10:00
user = self . _download_json (
resolv_url , uploader , ' Downloading user info ' )
2015-07-31 04:54:26 +10:00
resource = mobj . group ( ' rsrc ' ) or ' all '
2017-07-29 21:41:42 +10:00
return self . _extract_playlist (
self . _BASE_URL_MAP [ resource ] % user [ ' id ' ] , compat_str ( user [ ' id ' ] ) ,
' %s ( %s ) ' % ( user [ ' username ' ] , self . _TITLE_MAP [ resource ] ) )
2016-01-08 06:54:31 +11:00
2013-09-15 05:41:49 +10:00
2017-07-29 21:41:42 +10:00
class SoundcloudTrackStationIE ( SoundcloudPagedPlaylistBaseIE ) :
_VALID_URL = r ' https?://(?:(?:www|m) \ .)?soundcloud \ .com/stations/track/[^/]+/(?P<id>[^/?#&]+) '
IE_NAME = ' soundcloud:trackstation '
_TESTS = [ {
' url ' : ' https://soundcloud.com/stations/track/officialsundial/your-text ' ,
' info_dict ' : {
' id ' : ' 286017854 ' ,
' title ' : ' Track station: your-text ' ,
} ,
' playlist_mincount ' : 47 ,
} ]
2015-07-31 04:54:26 +10:00
2017-07-29 21:41:42 +10:00
def _real_extract ( self , url ) :
track_name = self . _match_id ( url )
2015-07-31 04:54:26 +10:00
2017-07-29 21:41:42 +10:00
webpage = self . _download_webpage ( url , track_name )
2016-01-08 06:54:31 +11:00
2017-07-29 21:41:42 +10:00
track_id = self . _search_regex (
r ' soundcloud:track-stations:( \ d+) ' , webpage , ' track id ' )
2013-09-15 05:41:49 +10:00
2017-07-29 21:41:42 +10:00
return self . _extract_playlist (
' %s /stations/soundcloud:track-stations: %s /tracks '
% ( self . _API_V2_BASE , track_id ) ,
track_id , ' Track station: %s ' % track_name )
2014-05-05 11:12:41 +10:00
2016-09-24 22:29:49 +10:00
class SoundcloudPlaylistIE ( SoundcloudPlaylistBaseIE ) :
2014-09-19 02:57:04 +10:00
_VALID_URL = r ' https?://api \ .soundcloud \ .com/playlists/(?P<id>[0-9]+)(?:/? \ ?secret_token=(?P<token>[^&]+?))?$ '
2014-05-05 11:12:41 +10:00
IE_NAME = ' soundcloud:playlist '
2014-09-19 02:57:04 +10:00
_TESTS = [ {
2017-06-28 00:20:18 +10:00
' url ' : ' https://api.soundcloud.com/playlists/4110309 ' ,
2014-09-19 02:57:04 +10:00
' info_dict ' : {
' id ' : ' 4110309 ' ,
' title ' : ' TILT Brass - Bowery Poetry Club, August \' 03 [Non-Site SCR 02] ' ,
' description ' : ' re:.*?TILT Brass - Bowery Poetry Club ' ,
} ,
' playlist_count ' : 6 ,
} ]
2014-05-05 11:12:41 +10:00
def _real_extract ( self , url ) :
mobj = re . match ( self . _VALID_URL , url )
playlist_id = mobj . group ( ' id ' )
base_url = ' %s //api.soundcloud.com/playlists/ %s .json? ' % ( self . http_scheme ( ) , playlist_id )
2014-09-18 19:35:11 +10:00
data_dict = {
2014-05-05 11:12:41 +10:00
' client_id ' : self . _CLIENT_ID ,
2014-09-18 19:35:11 +10:00
}
token = mobj . group ( ' token ' )
if token :
data_dict [ ' secret_token ' ] = token
2016-03-26 06:46:57 +11:00
data = compat_urllib_parse_urlencode ( data_dict )
2014-05-05 11:12:41 +10:00
data = self . _download_json (
base_url + data , playlist_id , ' Downloading playlist ' )
2016-09-24 22:18:01 +10:00
entries = self . _extract_track_entries ( data [ ' tracks ' ] )
2014-05-05 11:12:41 +10:00
return {
' _type ' : ' playlist ' ,
' id ' : playlist_id ,
' title ' : data . get ( ' title ' ) ,
' description ' : data . get ( ' description ' ) ,
' entries ' : entries ,
2013-09-15 05:41:49 +10:00
}
2015-10-18 03:23:46 +11:00
class SoundcloudSearchIE ( SearchInfoExtractor , SoundcloudIE ) :
IE_NAME = ' soundcloud:search '
IE_DESC = ' Soundcloud search '
2015-10-31 09:56:07 +11:00
_MAX_RESULTS = float ( ' inf ' )
2015-10-18 03:23:46 +11:00
_TESTS = [ {
' url ' : ' scsearch15:post-avant jazzcore ' ,
' info_dict ' : {
' title ' : ' post-avant jazzcore ' ,
} ,
' playlist_count ' : 15 ,
} ]
_SEARCH_KEY = ' scsearch '
2015-10-31 09:56:07 +11:00
_MAX_RESULTS_PER_PAGE = 200
_DEFAULT_RESULTS_PER_PAGE = 50
2015-10-18 06:18:42 +11:00
_API_V2_BASE = ' https://api-v2.soundcloud.com '
2015-10-18 03:23:46 +11:00
def _get_collection ( self , endpoint , collection_id , * * query ) :
2015-11-22 05:49:58 +11:00
limit = min (
2015-10-31 09:56:07 +11:00
query . get ( ' limit ' , self . _DEFAULT_RESULTS_PER_PAGE ) ,
self . _MAX_RESULTS_PER_PAGE )
2015-11-22 02:21:21 +11:00
query [ ' limit ' ] = limit
2015-10-18 03:23:46 +11:00
query [ ' client_id ' ] = self . _CLIENT_ID
query [ ' linked_partitioning ' ] = ' 1 '
2015-11-22 02:21:21 +11:00
query [ ' offset ' ] = 0
2016-03-26 06:46:57 +11:00
data = compat_urllib_parse_urlencode ( query )
2015-11-22 02:21:21 +11:00
next_url = ' {0} {1} ? {2} ' . format ( self . _API_V2_BASE , endpoint , data )
2015-10-18 03:23:46 +11:00
collected_results = 0
2015-11-22 02:21:21 +11:00
for i in itertools . count ( 1 ) :
2015-11-22 01:04:35 +11:00
response = self . _download_json (
2015-11-22 02:21:21 +11:00
next_url , collection_id , ' Downloading page {0} ' . format ( i ) ,
2015-11-22 01:04:35 +11:00
' Unable to download API page ' )
2015-10-18 03:23:46 +11:00
2015-11-22 02:21:21 +11:00
collection = response . get ( ' collection ' , [ ] )
if not collection :
break
2015-10-18 03:23:46 +11:00
2015-11-22 02:21:21 +11:00
collection = list ( filter ( bool , collection ) )
2015-10-18 03:23:46 +11:00
collected_results + = len ( collection )
2015-11-22 02:21:21 +11:00
for item in collection :
yield self . url_result ( item [ ' uri ' ] , SoundcloudIE . ie_key ( ) )
2015-10-18 03:23:46 +11:00
2015-11-22 02:21:21 +11:00
if not collection or collected_results > = limit :
2015-10-18 03:23:46 +11:00
break
2015-11-22 01:04:35 +11:00
next_url = response . get ( ' next_href ' )
2015-11-22 02:21:21 +11:00
if not next_url :
break
2015-10-18 03:23:46 +11:00
def _get_n_results ( self , query , n ) :
2015-11-22 02:21:21 +11:00
tracks = self . _get_collection ( ' /search/tracks ' , query , limit = n , q = query )
return self . playlist_result ( tracks , playlist_title = query )