0
0
Fork 0

[utils] Share JSON-LD regex

main
Sergey M․ 2018-07-09 23:43:05 +07:00
parent e06632e3fe
commit 0685d9727b
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D
2 changed files with 3 additions and 2 deletions

View File

@ -52,6 +52,7 @@ from ..utils import (
GeoUtils, GeoUtils,
int_or_none, int_or_none,
js_to_json, js_to_json,
JSON_LD_RE,
mimetype2ext, mimetype2ext,
orderedSet, orderedSet,
parse_codecs, parse_codecs,
@ -1149,8 +1150,7 @@ class InfoExtractor(object):
def _search_json_ld(self, html, video_id, expected_type=None, **kwargs): def _search_json_ld(self, html, video_id, expected_type=None, **kwargs):
json_ld = self._search_regex( json_ld = self._search_regex(
r'(?s)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>', JSON_LD_RE, html, 'JSON-LD', group='json_ld', **kwargs)
html, 'JSON-LD', group='json_ld', **kwargs)
default = kwargs.get('default', NO_DEFAULT) default = kwargs.get('default', NO_DEFAULT)
if not json_ld: if not json_ld:
return default if default is not NO_DEFAULT else {} return default if default is not NO_DEFAULT else {}

View File

@ -184,6 +184,7 @@ DATE_FORMATS_MONTH_FIRST.extend([
]) ])
PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)" PACKED_CODES_RE = r"}\('(.+)',(\d+),(\d+),'([^']+)'\.split\('\|'\)"
JSON_LD_RE = r'(?is)<script[^>]+type=(["\'])application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
def preferredencoding(): def preferredencoding():