[extractor/common] Allow angle brackets in attributes in _og_regexes (#7215)
This commit is contained in:
parent
49941c4e4f
commit
448ef1f31c
@ -37,12 +37,16 @@ class TestInfoExtractor(unittest.TestCase):
|
|||||||
<meta property='og:image' content='http://domain.com/pic.jpg?key1=val1&key2=val2'/>
|
<meta property='og:image' content='http://domain.com/pic.jpg?key1=val1&key2=val2'/>
|
||||||
<meta content='application/x-shockwave-flash' property='og:video:type'>
|
<meta content='application/x-shockwave-flash' property='og:video:type'>
|
||||||
<meta content='Foo' property=og:foobar>
|
<meta content='Foo' property=og:foobar>
|
||||||
|
<meta name="og:test1" content='foo > < bar'/>
|
||||||
|
<meta name="og:test2" content="foo >//< bar"/>
|
||||||
'''
|
'''
|
||||||
self.assertEqual(ie._og_search_title(html), 'Foo')
|
self.assertEqual(ie._og_search_title(html), 'Foo')
|
||||||
self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
|
self.assertEqual(ie._og_search_description(html), 'Some video\'s description ')
|
||||||
self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2')
|
self.assertEqual(ie._og_search_thumbnail(html), 'http://domain.com/pic.jpg?key1=val1&key2=val2')
|
||||||
self.assertEqual(ie._og_search_video_url(html, default=None), None)
|
self.assertEqual(ie._og_search_video_url(html, default=None), None)
|
||||||
self.assertEqual(ie._og_search_property('foobar', html), 'Foo')
|
self.assertEqual(ie._og_search_property('foobar', html), 'Foo')
|
||||||
|
self.assertEqual(ie._og_search_property('test1', html), 'foo > < bar')
|
||||||
|
self.assertEqual(ie._og_search_property('test2', html), 'foo >//< bar')
|
||||||
|
|
||||||
def test_html_search_meta(self):
|
def test_html_search_meta(self):
|
||||||
ie = self.ie
|
ie = self.ie
|
||||||
|
@ -645,7 +645,7 @@ class InfoExtractor(object):
|
|||||||
# Helper functions for extracting OpenGraph info
|
# Helper functions for extracting OpenGraph info
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _og_regexes(prop):
|
def _og_regexes(prop):
|
||||||
content_re = r'content=(?:"([^>]+?)"|\'([^>]+?)\'|\s*([^\s"\'=<>`]+?))'
|
content_re = r'content=(?:"([^"]+?)"|\'([^\']+?)\'|\s*([^\s"\'=<>`]+?))'
|
||||||
property_re = (r'(?:name|property)=(?:\'og:%(prop)s\'|"og:%(prop)s"|\s*og:%(prop)s\b)'
|
property_re = (r'(?:name|property)=(?:\'og:%(prop)s\'|"og:%(prop)s"|\s*og:%(prop)s\b)'
|
||||||
% {'prop': re.escape(prop)})
|
% {'prop': re.escape(prop)})
|
||||||
template = r'<meta[^>]+?%s[^>]+?%s'
|
template = r'<meta[^>]+?%s[^>]+?%s'
|
||||||
|
Loading…
Reference in New Issue
Block a user