# ex:ts=4:sw=4:sts=4:et # -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- from __future__ import absolute_import import re import logging import os from urllib.parse import urlparse from svtplay_dl.utils.parser import readconfig, setup_defaults, merge from svtplay_dl.utils.http import download_thumbnails, HTTP class Service(object): supported_domains = [] supported_domains_re = [] def __init__(self, config, _url, http=None): self._url = _url self._urldata = None self._error = False self.subtitle = None self.cookies = {} self.auto_name = None self.output = {"title": None, "season": None, "episode": None, "episodename": None, "id": None, "service": self.__class__.__name__.lower(), "tvshow": None, "title_nice": None, "showdescription": None, "episodedescription": None, "showthumbnailurl": None, "episodethumbnailurl": None} if not http: self.http = HTTP(config) else: self.http = http # Config if os.path.isfile(config.get("configfile")): self.config = merge(readconfig(setup_defaults(), config.get("configfile"), service=self.__class__.__name__.lower()).get_variable(), config.get_variable()) else: self.config = config logging.debug("service: {}".format(self.__class__.__name__.lower())) @property def url(self): return self._url def get_urldata(self): if self._urldata is None: self._urldata = self.http.request("get", self.url).text return self._urldata @classmethod def handles(cls, url): urlp = urlparse(url) # Apply supported_domains_re regexp to the netloc. This # is meant for 'dynamic' domains, e.g. containing country # information etc. for domain_re in [re.compile(x) for x in cls.supported_domains_re]: if domain_re.match(urlp.netloc): return True if urlp.netloc in cls.supported_domains: return True # For every listed domain, try with www.subdomain as well. if urlp.netloc in ['www.' + x for x in cls.supported_domains]: return True return False def get_subtitle(self, options): pass # the options parameter is unused, but is part of the # interface, so we don't want to remove it. Thus, the # pylint ignore. def find_all_episodes(self, options): # pylint: disable-msg=unused-argument logging.warning("--all-episodes not implemented for this service") return [self.url] def opengraph_get(html, prop): """ Extract specified OpenGraph property from html. >>> opengraph_get('
>> opengraph_get('>> opengraph_get(']*property="og:' + prop + '" content="([^"]*)"', html) if match is None: match = re.search(']*content="([^"]*)" property="og:' + prop + '"', html) if match is None: return None return match.group(1) class OpenGraphThumbMixin(object): """ Mix this into the service class to grab thumbnail from OpenGraph properties. """ def get_thumbnail(self, options): url = opengraph_get(self.get_urldata(), "image") if url is None: return download_thumbnails(options, [(False, url)]) class MetadataThumbMixin(object): """ Mix this into the service class to grab thumbnail from extracted metadata. """ def get_thumbnail(self, options): urls = [] if self.output["showthumbnailurl"] is not None: urls.append((True, self.output["showthumbnailurl"])) if self.output["episodethumbnailurl"] is not None: urls.append((False, self.output["episodethumbnailurl"])) if urls: download_thumbnails(self.output, options, urls) class Generic(Service): ''' Videos embed in sites ''' def get(self, sites): data = self.http.request("get", self.url).text match = re.search(r"src=(\"|\')(http://www.svt.se/wd[^\'\"]+)(\"|\')", data) stream = None if match: url = match.group(2) for i in sites: if i.handles(url): url = url.replace("&", "&").replace("&", "&") return url, i(self.config, url) match = re.search(r"src=\"(http://player.vimeo.com/video/[0-9]+)\" ", data) if match: for i in sites: if i.handles(match.group(1)): return match.group(1), i(self.config, url) match = re.search(r"tv4play.se/iframe/video/(\d+)?", data) if match: url = "http://www.tv4play.se/?video_id=%s" % match.group(1) for i in sites: if i.handles(url): return url, i(self.config, url) match = re.search(r"embed.bambuser.com/broadcast/(\d+)", data) if match: url = "http://bambuser.com/v/%s" % match.group(1) for i in sites: if i.handles(url): return url, i(self.config, url) match = re.search(r'src="(http://tv.aftonbladet[^"]*)"', data) if match: url = match.group(1) for i in sites: if i.handles(url): return url, i(self.config, url) match = re.search(r'a href="(http://tv.aftonbladet[^"]*)" class="abVi', data) if match: url = match.group(1) for i in sites: if i.handles(url): return url, i(self.config, url) match = re.search(r"iframe src='(http://www.svtplay[^']*)'", data) if match: url = match.group(1) for i in sites: if i.handles(url): return url, i(self.config, url) match = re.search('src="(http://mm-resource-service.herokuapp.com[^"]*)"', data) if match: url = match.group(1) for i in sites: if i.handles(url): return self.url, i(self.config, self.url) match = re.search(r'src="([^.]+\.solidtango.com[^"+]+)"', data) if match: url = match.group(1) for i in sites: if i.handles(url): return self.url, i(self.config, url) match = re.search('(lemonwhale|lwcdn.com)', data) if match: url = "http://lemonwhale.com" for i in sites: if i.handles(url): return self.url, i(self.config, self.url) match = re.search('s.src="(https://csp-ssl.picsearch.com[^"]+|http://csp.picsearch.com/rest[^"]+)', data) if match: url = match.group(1) for i in sites: if i.handles(url): return self.url, i(self.config, self.url) match = re.search('(picsearch_ajax_auth|screen9-ajax-auth)', data) if match: url = "http://csp.picsearch.com" for i in sites: if i.handles(url): return self.url, i(self.config, self.url) match = re.search('iframe src="(//csp.screen9.com[^"]+)"', data) if match: url = "http:%s" % match.group(1) for i in sites: if i.handles(url): return self.url, i(self.config, self.url) match = re.search('source src="([^"]+)" type="application/x-mpegURL"', data) if match: for i in sites: if i.__name__ == "Raw": return self.url, i(self.config, match.group(1)) return self.url, stream def service_handler(sites, options, url): handler = None for i in sites: if i.handles(url): handler = i(options, url) break return handler