[slutload] Fix and improve extraction (closes #17001)

This commit is contained in:
Sergey M․ 2018-07-19 01:59:00 +07:00
parent 38f1eb0ac3
commit c63f5fb863
No known key found for this signature in database
GPG Key ID: 2C393E0F18A9236D

View File

@ -1,12 +1,10 @@
from __future__ import unicode_literals from __future__ import unicode_literals
import re
from .common import InfoExtractor from .common import InfoExtractor
class SlutloadIE(InfoExtractor): class SlutloadIE(InfoExtractor):
_VALID_URL = r'^https?://(?:\w+\.)?slutload\.com/video/[^/]+/(?P<id>[^/]+)/?$' _VALID_URL = r'https?://(?:\w+\.)?slutload\.com/(?:video/[^/]+|embed_player|watch)/(?P<id>[^/]+)'
_TESTS = [{ _TESTS = [{
'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/', 'url': 'http://www.slutload.com/video/virginie-baisee-en-cam/TD73btpBqSxc/',
'md5': '868309628ba00fd488cf516a113fd717', 'md5': '868309628ba00fd488cf516a113fd717',
@ -16,33 +14,52 @@ class SlutloadIE(InfoExtractor):
'title': 'virginie baisee en cam', 'title': 'virginie baisee en cam',
'age_limit': 18, 'age_limit': 18,
'thumbnail': r're:https?://.*?\.jpg' 'thumbnail': r're:https?://.*?\.jpg'
} },
}, { }, {
# mobile site # mobile site
'url': 'http://mobile.slutload.com/video/masturbation-solo/fviFLmc6kzJ/', 'url': 'http://mobile.slutload.com/video/masturbation-solo/fviFLmc6kzJ/',
'only_matching': True, 'only_matching': True,
}, {
'url': 'http://www.slutload.com/embed_player/TD73btpBqSxc/',
'only_matching': True,
}, {
'url': 'http://www.slutload.com/watch/TD73btpBqSxc/Virginie-Baisee-En-Cam.html',
'only_matching': True,
}] }]
def _real_extract(self, url): def _real_extract(self, url):
video_id = self._match_id(url) video_id = self._match_id(url)
desktop_url = re.sub(r'^(https?://)mobile\.', r'\1', url) embed_page = self._download_webpage(
webpage = self._download_webpage(desktop_url, video_id) 'http://www.slutload.com/embed_player/%s' % video_id, video_id,
'Downloading embed page', fatal=False)
video_title = self._html_search_regex(r'<h1><strong>([^<]+)</strong>', if embed_page:
webpage, 'title').strip() def extract(what):
return self._html_search_regex(
video_url = self._html_search_regex( r'data-video-%s=(["\'])(?P<url>(?:(?!\1).)+)\1' % what,
r'(?s)<div id="vidPlayer"\s+data-url="([^"]+)"', embed_page, 'video %s' % what, default=None, group='url')
webpage, 'video URL')
thumbnail = self._html_search_regex(
r'(?s)<div id="vidPlayer"\s+.*?previewer-file="([^"]+)"',
webpage, 'thumbnail', fatal=False)
video_url = extract('url')
if video_url:
title = self._html_search_regex(
r'<title>([^<]+)', embed_page, 'title', default=video_id)
return { return {
'id': video_id, 'id': video_id,
'url': video_url, 'url': video_url,
'title': video_title, 'title': title,
'thumbnail': thumbnail, 'thumbnail': extract('preview'),
'age_limit': 18 'age_limit': 18
} }
webpage = self._download_webpage(
'http://www.slutload.com/video/_/%s/' % video_id, video_id)
title = self._html_search_regex(
r'<h1><strong>([^<]+)</strong>', webpage, 'title').strip()
info = self._parse_html5_media_entries(url, webpage, video_id)[0]
info.update({
'id': video_id,
'title': title,
'age_limit': 18,
})
return info