0
0
Fork 0

[extractor/common] Skip html comment tags (Closes #6822)

main
Sergey M․ 2015-09-11 21:07:32 +06:00
parent 73eb13dfc7
commit 586f1cc532
1 changed files with 1 additions and 0 deletions

View File

@ -731,6 +731,7 @@ class InfoExtractor(object):
@staticmethod @staticmethod
def _hidden_inputs(html): def _hidden_inputs(html):
html = re.sub(r'<!--(?:(?!<!--).)*-->', '', html)
hidden_inputs = {} hidden_inputs = {}
for input in re.findall(r'(?i)<input([^>]+)>', html): for input in re.findall(r'(?i)<input([^>]+)>', html):
if not re.search(r'type=(["\'])(?:hidden|submit)\1', input): if not re.search(r'type=(["\'])(?:hidden|submit)\1', input):