diff --git a/test/tests.json b/test/tests.json
index c39d1d9c1..82da27d5b 100644
--- a/test/tests.json
+++ b/test/tests.json
@@ -325,7 +325,7 @@
"file": "wshh6a7q1ny0G34ZwuIO.mp4",
"md5": "9d04de741161603bf7071bbf4e883186",
"info_dict": {
- "title": "Video: KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick! "
+ "title": "Video: KO Of The Week: MMA Fighter Gets Knocked Out By Swift Head Kick!"
}
},
{
diff --git a/youtube_dl/InfoExtractors.py b/youtube_dl/InfoExtractors.py
index 86cc7c748..6060a5988 100755
--- a/youtube_dl/InfoExtractors.py
+++ b/youtube_dl/InfoExtractors.py
@@ -222,6 +222,16 @@ class InfoExtractor(object):
u'please report this issue on GitHub.' % _name)
return None
+ def _html_search_regex(self, pattern, string, name, default=None, fatal=True, flags=0):
+ """
+ Like _search_regex, but strips HTML tags and unescapes entities.
+ """
+ res = self._search_regex(pattern, string, name, default, fatal, flags)
+ if res:
+ return clean_html(res).strip()
+ else:
+ return res
+
class SearchInfoExtractor(InfoExtractor):
"""
Base class for paged search queries extractors.
@@ -1923,9 +1933,8 @@ class FacebookIE(InfoExtractor):
video_duration = int(video_data['video_duration'])
thumbnail = video_data['thumbnail_src']
- video_title = self._search_regex('