2013-03-02 21:26:28 +01:00
|
|
|
# ex:ts=4:sw=4:sts=4:et
|
|
|
|
# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*-
|
2018-01-30 21:48:55 +01:00
|
|
|
import logging
|
2018-05-20 18:21:40 +02:00
|
|
|
import os
|
2019-08-25 00:40:39 +02:00
|
|
|
import re
|
2018-01-30 22:07:21 +01:00
|
|
|
from urllib.parse import urlparse
|
2018-01-30 21:48:55 +01:00
|
|
|
|
2019-08-25 00:40:39 +02:00
|
|
|
from svtplay_dl.utils.http import download_thumbnails
|
|
|
|
from svtplay_dl.utils.http import HTTP
|
|
|
|
from svtplay_dl.utils.parser import merge
|
|
|
|
from svtplay_dl.utils.parser import readconfig
|
|
|
|
from svtplay_dl.utils.parser import setup_defaults
|
2014-08-17 10:57:08 +02:00
|
|
|
|
2013-03-23 16:11:36 +01:00
|
|
|
|
2019-08-25 00:33:51 +02:00
|
|
|
class Service:
|
2014-01-01 14:57:17 +01:00
|
|
|
supported_domains = []
|
2014-01-01 15:50:47 +01:00
|
|
|
supported_domains_re = []
|
2014-01-01 14:57:17 +01:00
|
|
|
|
2018-05-13 13:06:45 +02:00
|
|
|
def __init__(self, config, _url, http=None):
|
2014-02-18 16:48:53 +01:00
|
|
|
self._url = _url
|
|
|
|
self._urldata = None
|
2014-12-08 23:07:02 +01:00
|
|
|
self._error = False
|
2015-12-26 11:46:14 +01:00
|
|
|
self.subtitle = None
|
|
|
|
self.cookies = {}
|
2018-05-13 13:06:45 +02:00
|
|
|
self.auto_name = None
|
2019-08-25 00:27:31 +02:00
|
|
|
self.output = {
|
|
|
|
"title": None,
|
|
|
|
"season": None,
|
|
|
|
"episode": None,
|
|
|
|
"episodename": None,
|
|
|
|
"id": None,
|
|
|
|
"service": self.__class__.__name__.lower(),
|
|
|
|
"tvshow": None,
|
|
|
|
"title_nice": None,
|
|
|
|
"showdescription": None,
|
|
|
|
"episodedescription": None,
|
|
|
|
"showthumbnailurl": None,
|
|
|
|
"episodethumbnailurl": None,
|
|
|
|
"publishing_datetime": None,
|
2021-05-03 01:43:37 +02:00
|
|
|
"language": None,
|
|
|
|
"ext": None,
|
2019-08-25 00:27:31 +02:00
|
|
|
}
|
2014-02-18 16:48:53 +01:00
|
|
|
|
2018-05-20 18:21:40 +02:00
|
|
|
# Config
|
2019-01-06 20:21:45 +01:00
|
|
|
if config.get("configfile") and os.path.isfile(config.get("configfile")):
|
2019-08-25 00:27:31 +02:00
|
|
|
self.config = merge(
|
2020-12-26 13:10:56 +01:00
|
|
|
readconfig(setup_defaults(), config.get("configfile"), service=self.__class__.__name__.lower()).get_variable(),
|
|
|
|
config.get_variable(),
|
2019-08-25 00:27:31 +02:00
|
|
|
)
|
2018-05-20 18:21:40 +02:00
|
|
|
else:
|
|
|
|
self.config = config
|
2021-02-09 22:18:04 +01:00
|
|
|
|
|
|
|
if not http:
|
|
|
|
self.http = HTTP(self.config)
|
|
|
|
else:
|
|
|
|
self.http = http
|
|
|
|
|
2021-12-18 21:36:16 +01:00
|
|
|
logging.debug("service: %s", self.__class__.__name__.lower())
|
2018-05-20 18:21:40 +02:00
|
|
|
|
2014-02-18 16:48:53 +01:00
|
|
|
@property
|
|
|
|
def url(self):
|
|
|
|
return self._url
|
|
|
|
|
|
|
|
def get_urldata(self):
|
|
|
|
if self._urldata is None:
|
2015-08-31 23:18:18 +02:00
|
|
|
self._urldata = self.http.request("get", self.url).text
|
2015-08-30 00:06:20 +02:00
|
|
|
return self._urldata
|
2014-01-06 23:14:06 +01:00
|
|
|
|
2014-01-06 22:47:54 +01:00
|
|
|
@classmethod
|
|
|
|
def handles(cls, url):
|
2014-01-01 14:57:17 +01:00
|
|
|
urlp = urlparse(url)
|
|
|
|
|
2014-01-01 15:50:47 +01:00
|
|
|
# Apply supported_domains_re regexp to the netloc. This
|
|
|
|
# is meant for 'dynamic' domains, e.g. containing country
|
|
|
|
# information etc.
|
2014-01-06 22:47:54 +01:00
|
|
|
for domain_re in [re.compile(x) for x in cls.supported_domains_re]:
|
2014-01-01 15:50:47 +01:00
|
|
|
if domain_re.match(urlp.netloc):
|
|
|
|
return True
|
|
|
|
|
2014-01-06 22:47:54 +01:00
|
|
|
if urlp.netloc in cls.supported_domains:
|
2014-01-01 14:57:17 +01:00
|
|
|
return True
|
|
|
|
|
2018-01-30 20:11:37 +01:00
|
|
|
# For every listed domain, try with www.subdomain as well.
|
2019-08-25 00:27:31 +02:00
|
|
|
if urlp.netloc in ["www." + x for x in cls.supported_domains]:
|
2014-01-01 14:57:17 +01:00
|
|
|
return True
|
|
|
|
|
|
|
|
return False
|
2013-03-01 23:39:42 +01:00
|
|
|
|
2014-01-11 23:02:47 +01:00
|
|
|
def get_subtitle(self, options):
|
|
|
|
pass
|
|
|
|
|
2014-03-19 22:57:49 +01:00
|
|
|
# the options parameter is unused, but is part of the
|
|
|
|
# interface, so we don't want to remove it. Thus, the
|
|
|
|
# pylint ignore.
|
2018-01-30 20:11:37 +01:00
|
|
|
def find_all_episodes(self, options): # pylint: disable-msg=unused-argument
|
2018-01-30 21:48:55 +01:00
|
|
|
logging.warning("--all-episodes not implemented for this service")
|
2014-02-18 18:56:28 +01:00
|
|
|
return [self.url]
|
2014-01-19 14:26:48 +01:00
|
|
|
|
2018-01-30 20:11:37 +01:00
|
|
|
|
2014-02-18 16:17:02 +01:00
|
|
|
def opengraph_get(html, prop):
|
|
|
|
"""
|
|
|
|
Extract specified OpenGraph property from html.
|
|
|
|
|
|
|
|
>>> opengraph_get('<html><head><meta property="og:image" content="http://example.com/img.jpg"><meta ...', "image")
|
|
|
|
'http://example.com/img.jpg'
|
|
|
|
>>> opengraph_get('<html><head><meta content="http://example.com/img2.jpg" property="og:image"><meta ...', "image")
|
|
|
|
'http://example.com/img2.jpg'
|
|
|
|
>>> opengraph_get('<html><head><meta name="og:image" property="og:image" content="http://example.com/img3.jpg"><meta ...', "image")
|
|
|
|
'http://example.com/img3.jpg'
|
|
|
|
"""
|
|
|
|
match = re.search('<meta [^>]*property="og:' + prop + '" content="([^"]*)"', html)
|
|
|
|
if match is None:
|
2019-09-06 22:49:49 +02:00
|
|
|
match = re.search('<meta [^>]*content="([^"]*)" property="og:' + prop + '"', html)
|
2014-02-18 16:17:02 +01:00
|
|
|
if match is None:
|
|
|
|
return None
|
|
|
|
return match.group(1)
|
|
|
|
|
|
|
|
|
2019-08-25 00:33:51 +02:00
|
|
|
class OpenGraphThumbMixin:
|
2014-01-19 14:26:48 +01:00
|
|
|
"""
|
|
|
|
Mix this into the service class to grab thumbnail from OpenGraph properties.
|
|
|
|
"""
|
2019-08-25 00:27:31 +02:00
|
|
|
|
2014-01-19 14:26:48 +01:00
|
|
|
def get_thumbnail(self, options):
|
2015-08-31 22:04:59 +02:00
|
|
|
url = opengraph_get(self.get_urldata(), "image")
|
2014-02-18 16:17:02 +01:00
|
|
|
if url is None:
|
|
|
|
return
|
2019-09-12 19:47:50 +02:00
|
|
|
download_thumbnails(self.output, options, [(False, url)])
|
2018-07-30 12:52:05 +02:00
|
|
|
|
|
|
|
|
2019-08-25 00:33:51 +02:00
|
|
|
class MetadataThumbMixin:
|
2018-07-30 12:52:05 +02:00
|
|
|
"""
|
|
|
|
Mix this into the service class to grab thumbnail from extracted metadata.
|
|
|
|
"""
|
2019-08-25 00:27:31 +02:00
|
|
|
|
2018-07-30 12:52:05 +02:00
|
|
|
def get_thumbnail(self, options):
|
|
|
|
urls = []
|
|
|
|
if self.output["showthumbnailurl"] is not None:
|
|
|
|
urls.append((True, self.output["showthumbnailurl"]))
|
|
|
|
if self.output["episodethumbnailurl"] is not None:
|
|
|
|
urls.append((False, self.output["episodethumbnailurl"]))
|
|
|
|
if urls:
|
|
|
|
download_thumbnails(self.output, options, urls)
|
2014-01-19 14:26:48 +01:00
|
|
|
|
|
|
|
|
2015-08-30 12:04:16 +02:00
|
|
|
class Generic(Service):
|
2019-08-25 00:27:31 +02:00
|
|
|
""" Videos embed in sites """
|
|
|
|
|
2015-09-06 16:03:57 +02:00
|
|
|
def get(self, sites):
|
|
|
|
data = self.http.request("get", self.url).text
|
2019-09-16 00:05:38 +02:00
|
|
|
return self._match(data, sites)
|
|
|
|
|
|
|
|
def _match(self, data, sites):
|
2014-12-22 10:39:51 +01:00
|
|
|
match = re.search(r"src=(\"|\')(http://www.svt.se/wd[^\'\"]+)(\"|\')", data)
|
2013-03-10 13:28:31 +01:00
|
|
|
stream = None
|
|
|
|
if match:
|
2014-12-22 10:39:51 +01:00
|
|
|
url = match.group(2)
|
2013-03-10 13:28:31 +01:00
|
|
|
for i in sites:
|
2014-01-01 15:03:15 +01:00
|
|
|
if i.handles(url):
|
2015-01-05 21:52:34 +01:00
|
|
|
url = url.replace("&", "&").replace("&", "&")
|
2018-06-24 16:10:41 +02:00
|
|
|
return url, i(self.config, url)
|
2013-03-10 13:28:31 +01:00
|
|
|
|
2019-09-16 00:05:38 +02:00
|
|
|
matchlist = [
|
|
|
|
r"src=\"(https://player.vimeo.com/video/[0-9]+)\" ",
|
|
|
|
r'src="(http://tv.aftonbladet[^"]*)"',
|
|
|
|
r'a href="(http://tv.aftonbladet[^"]*)" class="abVi',
|
|
|
|
r"iframe src='(http://www.svtplay[^']*)'",
|
|
|
|
'src="(http://mm-resource-service.herokuapp.com[^"]*)"',
|
|
|
|
r'src="([^.]+\.solidtango.com[^"+]+)"',
|
|
|
|
's.src="(https://csp-ssl.picsearch.com[^"]+|http://csp.picsearch.com/rest[^"]+)',
|
|
|
|
]
|
|
|
|
for i in matchlist:
|
|
|
|
match = re.search(i, data)
|
|
|
|
if match:
|
|
|
|
url = match.group(1)
|
|
|
|
for n in sites:
|
|
|
|
if n.handles(match.group(1)):
|
|
|
|
return match.group(1), n(self.config, url)
|
|
|
|
|
2014-02-04 20:20:36 +01:00
|
|
|
match = re.search(r"tv4play.se/iframe/video/(\d+)?", data)
|
2013-03-24 14:55:14 +01:00
|
|
|
if match:
|
2021-12-18 19:52:08 +01:00
|
|
|
url = f"http://www.tv4play.se/?video_id={match.group(1)}"
|
2013-03-24 14:55:14 +01:00
|
|
|
for i in sites:
|
2014-01-01 15:03:15 +01:00
|
|
|
if i.handles(url):
|
2018-06-24 16:10:41 +02:00
|
|
|
return url, i(self.config, url)
|
2014-02-05 20:42:34 +01:00
|
|
|
|
2019-08-25 00:27:31 +02:00
|
|
|
match = re.search("(lemonwhale|lwcdn.com)", data)
|
2016-03-16 20:56:20 +01:00
|
|
|
if match:
|
|
|
|
url = "http://lemonwhale.com"
|
|
|
|
for i in sites:
|
|
|
|
if i.handles(url):
|
2018-06-24 16:10:41 +02:00
|
|
|
return self.url, i(self.config, self.url)
|
2019-09-16 00:05:38 +02:00
|
|
|
|
2019-08-25 00:27:31 +02:00
|
|
|
match = re.search("(picsearch_ajax_auth|screen9-ajax-auth)", data)
|
2016-03-16 22:50:43 +01:00
|
|
|
if match:
|
|
|
|
url = "http://csp.picsearch.com"
|
|
|
|
for i in sites:
|
|
|
|
if i.handles(url):
|
2018-06-24 16:10:41 +02:00
|
|
|
return self.url, i(self.config, self.url)
|
2019-09-16 00:05:38 +02:00
|
|
|
|
2016-03-20 18:20:19 +01:00
|
|
|
match = re.search('iframe src="(//csp.screen9.com[^"]+)"', data)
|
|
|
|
if match:
|
2021-12-18 19:52:08 +01:00
|
|
|
url = f"https:{match.group(1)}"
|
2016-03-20 18:20:19 +01:00
|
|
|
for i in sites:
|
|
|
|
if i.handles(url):
|
2018-06-24 16:10:41 +02:00
|
|
|
return self.url, i(self.config, self.url)
|
2016-02-19 21:29:49 +01:00
|
|
|
|
2016-08-23 01:01:31 +02:00
|
|
|
match = re.search('source src="([^"]+)" type="application/x-mpegURL"', data)
|
|
|
|
if match:
|
|
|
|
for i in sites:
|
|
|
|
if i.__name__ == "Raw":
|
2018-06-24 16:10:41 +02:00
|
|
|
return self.url, i(self.config, match.group(1))
|
2016-08-23 01:01:31 +02:00
|
|
|
|
2015-10-19 17:26:29 +02:00
|
|
|
return self.url, stream
|
2013-03-10 13:28:31 +01:00
|
|
|
|
2018-01-30 20:11:37 +01:00
|
|
|
|
2015-12-26 11:46:14 +01:00
|
|
|
def service_handler(sites, options, url):
|
2013-02-28 21:44:28 +01:00
|
|
|
handler = None
|
|
|
|
|
|
|
|
for i in sites:
|
2014-01-01 15:03:15 +01:00
|
|
|
if i.handles(url):
|
2015-12-26 11:46:14 +01:00
|
|
|
handler = i(options, url)
|
2013-02-28 21:44:28 +01:00
|
|
|
break
|
|
|
|
|
2014-01-01 14:57:17 +01:00
|
|
|
return handler
|