[ytsearch] Fix flat title extraction (closes #11260, closes #15681)

This commit is contained in:
Sergey M․ 2018-02-23 03:34:55 +07:00
parent 3d977fe4d2
commit 870f3bfc63
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

View File

@ -2583,7 +2583,11 @@ class YoutubePlaylistsIE(YoutubePlaylistsBaseInfoExtractor):
}] }]
class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE): class YoutubeSearchBaseInfoExtractor(YoutubePlaylistBaseInfoExtractor):
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
class YoutubeSearchIE(SearchInfoExtractor, YoutubeSearchBaseInfoExtractor):
IE_DESC = 'YouTube.com searches' IE_DESC = 'YouTube.com searches'
# there doesn't appear to be a real limit, for example if you search for # there doesn't appear to be a real limit, for example if you search for
# 'python' you get more than 8.000.000 results # 'python' you get more than 8.000.000 results
@ -2617,8 +2621,7 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubePlaylistIE):
raise ExtractorError( raise ExtractorError(
'[youtube] No video results', expected=True) '[youtube] No video results', expected=True)
new_videos = self._ids_to_results(orderedSet(re.findall( new_videos = list(self._process_page(html_content))
r'href="/watch\?v=(.{11})', html_content)))
videos += new_videos videos += new_videos
if not new_videos or len(videos) > limit: if not new_videos or len(videos) > limit:
break break
@ -2641,11 +2644,10 @@ class YoutubeSearchDateIE(YoutubeSearchIE):
_EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'} _EXTRA_QUERY_ARGS = {'search_sort': 'video_date_uploaded'}
class YoutubeSearchURLIE(YoutubePlaylistBaseInfoExtractor): class YoutubeSearchURLIE(YoutubeSearchBaseInfoExtractor):
IE_DESC = 'YouTube.com search URLs' IE_DESC = 'YouTube.com search URLs'
IE_NAME = 'youtube:search_url' IE_NAME = 'youtube:search_url'
_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)' _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?P<query>[^&]+)(?:[&]|$)'
_VIDEO_RE = r'href="\s*/watch\?v=(?P<id>[0-9A-Za-z_-]{11})(?:[^"]*"[^>]+\btitle="(?P<title>[^"]+))?'
_TESTS = [{ _TESTS = [{
'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video', 'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
'playlist_mincount': 5, 'playlist_mincount': 5,