svtplay-dl/lib/svtplay_dl/service/__init__.py

# ex:ts=4:sw=4:sts=4:et
# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*-
from __future__ import absolute_import
import re
from svtplay_dl.utils.urllib import urlparse
from svtplay_dl.utils import download_thumbnail, get_http_data

import logging

log = logging.getLogger('svtplay_dl')

class Service(object):
    supported_domains = []
    supported_domains_re = []

    def __init__(self, _url):
        self._url = _url
        self._urldata = None

    @property
    def url(self):
        return self._url

    def get_urldata(self):
        if self._urldata is None:
            self._urldata = get_http_data(self.url)
        return self._urldata

    @classmethod
    def handles(cls, url):
        urlp = urlparse(url)

        # Apply supported_domains_re regexp to the netloc. This
        # is meant for 'dynamic' domains, e.g. containing country
        # information etc.
        for domain_re in [re.compile(x) for x in cls.supported_domains_re]:
            if domain_re.match(urlp.netloc):
                return True

        if urlp.netloc in cls.supported_domains:
            return True

        # For every listed domain, try with www. subdomain as well.
        if urlp.netloc in ['www.'+x for x in cls.supported_domains]:
            return True

        return False

    def get_subtitle(self, options):
        pass

    # the options parameter is unused, but is part of the
    # interface, so we don't want to remove it. Thus, the
    # pylint ignore.
    def find_all_episodes(self, options): # pylint: disable-msg=unused-argument
        log.warning("--all-episodes not implemented for this service")
        return [self.url]

def opengraph_get(html, prop):
    """
    Extract specified OpenGraph property from html.

        >>> opengraph_get('<html><head><meta property="og:image" content="http://example.com/img.jpg"><meta ...', "image")
        'http://example.com/img.jpg'
        >>> opengraph_get('<html><head><meta content="http://example.com/img2.jpg" property="og:image"><meta ...', "image")
        'http://example.com/img2.jpg'
        >>> opengraph_get('<html><head><meta name="og:image" property="og:image" content="http://example.com/img3.jpg"><meta ...', "image")
        'http://example.com/img3.jpg'
    """
    match = re.search('<meta [^>]*property="og:' + prop + '" content="([^"]*)"', html)
    if match is None:
        match = re.search('<meta [^>]*content="([^"]*)" property="og:' + prop + '"', html)
        if match is None:
            return None
    return match.group(1)


class OpenGraphThumbMixin(object):
    """
    Mix this into the service class to grab thumbnail from OpenGraph properties.
    """
    def get_thumbnail(self, options):
        url = opengraph_get(self.get_urldata(), "image")
        if url is None:
            return
        download_thumbnail(options, url)


from svtplay_dl.service.aftonbladet import Aftonbladet
from svtplay_dl.service.bambuser import Bambuser
from svtplay_dl.service.dbtv import Dbtv
from svtplay_dl.service.dr import Dr
from svtplay_dl.service.expressen import Expressen
from svtplay_dl.service.hbo import Hbo
from svtplay_dl.service.justin import Justin
from svtplay_dl.service.kanal5 import Kanal5
from svtplay_dl.service.lemonwhale import Lemonwhale
from svtplay_dl.service.mtvnn import Mtvnn
from svtplay_dl.service.mtvservices import Mtvservices
from svtplay_dl.service.nrk import Nrk
from svtplay_dl.service.oppetarkiv import OppetArkiv
from svtplay_dl.service.picsearch import Picsearch
from svtplay_dl.service.qbrick import Qbrick
from svtplay_dl.service.radioplay import Radioplay
from svtplay_dl.service.ruv import Ruv
from svtplay_dl.service.sr import Sr
from svtplay_dl.service.svtplay import Svtplay
from svtplay_dl.service.tv4play import Tv4play
from svtplay_dl.service.urplay import Urplay
from svtplay_dl.service.vg import Vg
from svtplay_dl.service.viaplay import Viaplay
from svtplay_dl.service.vimeo import Vimeo

sites = [
    Aftonbladet,
    Bambuser,
    Dbtv,
    Dr,
    Expressen,
    Hbo,
    Justin,
    Lemonwhale,
    Kanal5,
    Mtvservices,
    Mtvnn,
    Nrk,
    Qbrick,
    Picsearch,
    Ruv,
    Radioplay,
    Sr,
    Svtplay,
    OppetArkiv,
    Tv4play,
    Urplay,
    Viaplay,
    Vimeo,
    Vg]


class Generic(object):
    ''' Videos embed in sites '''
    def get(self, url):
        data = get_http_data(url)
        match = re.search(r"src=\"(http://www.svt.se/wd.*)\" height", data)
        stream = None
        if match:
            url = match.group(1)
            for i in sites:
                if i.handles(url):
                    url = url.replace("&amp;", "&")
                    return url, i(url)

        match = re.search(r"src=\"(http://player.vimeo.com/video/[0-9]+)\" ", data)
        if match:
            for i in sites:
                if i.handles(match.group(1)):
                    return match.group(1), i(url)
        match = re.search(r"tv4play.se/iframe/video/(\d+)?", data)
        if match:
            url = "http://www.tv4play.se/?video_id=%s" % match.group(1)
            for i in sites:
                if i.handles(url):
                    return url, i(url)
        match = re.search(r"embed.bambuser.com/broadcast/(\d+)", data)
        if match:
            url = "http://bambuser.com/v/%s" % match.group(1)
            for i in sites:
                if i.handles(url):
                    return url, i(url)
        match = re.search(r'iframe src="(http://tv.aftonbladet[^"]*)"', data)
        if match:
            url = match.group(1)
            for i in sites:
                if i.handles(url):
                    return url, i(url)
        match = re.search(r'a href="(http://tv.aftonbladet[^"]*)" class="abVi', data)
        if match:
            url = match.group(1)
            for i in sites:
                if i.handles(url):
                    return url, i(url)

        match = re.search(r"iframe src='(http://www.svtplay[^']*)'", data)
        if match:
            url = match.group(1)
            for i in sites:
                if i.handles(url):
                    return url, i(url)

        return url, stream

def service_handler(url):
    handler = None

    for i in sites:
        if i.handles(url):
            handler = i(url)
            break

    return handler
Add editor modelines 2013-03-02 21:26:28 +01:00			`# ex:ts=4:sw=4:sts=4:et`
			`# -- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil --`
Use absolute_import from __future__ everywhere 2013-03-01 23:39:42 +01:00			`from __future__ import absolute_import`
Fix so embedded videos works again 2013-03-23 16:11:36 +01:00			`import re`
service: missing urlparse for py3 2014-01-05 17:28:00 +01:00			`from svtplay_dl.utils.urllib import urlparse`
service: sort service import list 2014-08-17 10:57:08 +02:00			`from svtplay_dl.utils import download_thumbnail, get_http_data`

Add --thumbnail Names thumbnails as $basename.tbn (Hi xbmc!) 2014-01-19 14:26:48 +01:00			`import logging`

			`log = logging.getLogger('svtplay_dl')`
Fix so embedded videos works again 2013-03-23 16:11:36 +01:00
			`class Service(object):`
Add default handle method in Service base class The default handle method will look for a supported_domains attribute (a list), containing the supported domains. The subclassed service class can of course override this if other means of determining support is needded. 2014-01-01 14:57:17 +01:00			`supported_domains = []`
Support supported_domains regexp for services Lets services with more complex domains (like domains with language/country codes) use a regular expressions that will match the supported domains for the handles() method. 2014-01-01 15:50:47 +01:00			`supported_domains_re = []`
Add default handle method in Service base class The default handle method will look for a supported_domains attribute (a list), containing the supported domains. The subclassed service class can of course override this if other means of determining support is needded. 2014-01-01 14:57:17 +01:00
Add get_urldata() method to service self.get_urldata() is eqivalent to get_http_data(self.url), but also caches the data, so no additional requests are made if it is called multiple times (e.g when grabbing title or downloading thumbnail). Generic().get(url) still causes it to be fetched an extra time. 2014-02-18 16:48:53 +01:00			`def __init__(self, _url):`
			`self._url = _url`
			`self._urldata = None`

			`@property`
			`def url(self):`
			`return self._url`

			`def get_urldata(self):`
			`if self._urldata is None:`
			`self._urldata = get_http_data(self.url)`
			`return self._urldata`
Move url to object attribute 2014-01-06 23:14:06 +01:00
Make Service.handles a classmethod This way it can be called without instantiating the class. 2014-01-06 22:47:54 +01:00			`@classmethod`
			`def handles(cls, url):`
Add default handle method in Service base class The default handle method will look for a supported_domains attribute (a list), containing the supported domains. The subclassed service class can of course override this if other means of determining support is needded. 2014-01-01 14:57:17 +01:00			`urlp = urlparse(url)`

Support supported_domains regexp for services Lets services with more complex domains (like domains with language/country codes) use a regular expressions that will match the supported domains for the handles() method. 2014-01-01 15:50:47 +01:00			`# Apply supported_domains_re regexp to the netloc. This`
			`# is meant for 'dynamic' domains, e.g. containing country`
			`# information etc.`
Make Service.handles a classmethod This way it can be called without instantiating the class. 2014-01-06 22:47:54 +01:00			`for domain_re in [re.compile(x) for x in cls.supported_domains_re]:`
Support supported_domains regexp for services Lets services with more complex domains (like domains with language/country codes) use a regular expressions that will match the supported domains for the handles() method. 2014-01-01 15:50:47 +01:00			`if domain_re.match(urlp.netloc):`
			`return True`

Make Service.handles a classmethod This way it can be called without instantiating the class. 2014-01-06 22:47:54 +01:00			`if urlp.netloc in cls.supported_domains:`
Add default handle method in Service base class The default handle method will look for a supported_domains attribute (a list), containing the supported domains. The subclassed service class can of course override this if other means of determining support is needded. 2014-01-01 14:57:17 +01:00			`return True`

			`# For every listed domain, try with www. subdomain as well.`
Make Service.handles a classmethod This way it can be called without instantiating the class. 2014-01-06 22:47:54 +01:00			`if urlp.netloc in ['www.'+x for x in cls.supported_domains]:`
Add default handle method in Service base class The default handle method will look for a supported_domains attribute (a list), containing the supported domains. The subclassed service class can of course override this if other means of determining support is needded. 2014-01-01 14:57:17 +01:00			`return True`

			`return False`
Use absolute_import from __future__ everywhere 2013-03-01 23:39:42 +01:00
Split subtitle getting to separate method 2014-01-11 23:02:47 +01:00			`def get_subtitle(self, options):`
			`pass`

service: silence unused-argument warning from pylint The options parameter is unused, but is part of the interface, so we don't want to remove it. 2014-03-19 22:57:49 +01:00			`# the options parameter is unused, but is part of the`
			`# interface, so we don't want to remove it. Thus, the`
			`# pylint ignore.`
			`def find_all_episodes(self, options): # pylint: disable-msg=unused-argument`
Add --all-episodes option (for svt only currently) 2014-02-18 18:56:28 +01:00			`log.warning("--all-episodes not implemented for this service")`
			`return [self.url]`
Add --thumbnail Names thumbnails as $basename.tbn (Hi xbmc!) 2014-01-19 14:26:48 +01:00
Split out opengraph getter to separate function ...and add doctests 2014-02-18 16:17:02 +01:00			`def opengraph_get(html, prop):`
			`"""`
			`Extract specified OpenGraph property from html.`

			`>>> opengraph_get('<html><head><meta property="og:image" content="http://example.com/img.jpg"><meta ...', "image")`
			`'http://example.com/img.jpg'`
			`>>> opengraph_get('<html><head><meta content="http://example.com/img2.jpg" property="og:image"><meta ...', "image")`
			`'http://example.com/img2.jpg'`
			`>>> opengraph_get('<html><head><meta name="og:image" property="og:image" content="http://example.com/img3.jpg"><meta ...', "image")`
			`'http://example.com/img3.jpg'`
			`"""`
			`match = re.search('<meta [^>]property="og:' + prop + '" content="([^"])"', html)`
			`if match is None:`
			`match = re.search('<meta [^>]content="([^"])" property="og:' + prop + '"', html)`
			`if match is None:`
			`return None`
			`return match.group(1)`


Add --thumbnail Names thumbnails as $basename.tbn (Hi xbmc!) 2014-01-19 14:26:48 +01:00			`class OpenGraphThumbMixin(object):`
			`"""`
			`Mix this into the service class to grab thumbnail from OpenGraph properties.`
			`"""`
			`def get_thumbnail(self, options):`
Add get_urldata() method to service self.get_urldata() is eqivalent to get_http_data(self.url), but also caches the data, so no additional requests are made if it is called multiple times (e.g when grabbing title or downloading thumbnail). Generic().get(url) still causes it to be fetched an extra time. 2014-02-18 16:48:53 +01:00			`url = opengraph_get(self.get_urldata(), "image")`
Split out opengraph getter to separate function ...and add doctests 2014-02-18 16:17:02 +01:00			`if url is None:`
			`return`
			`download_thumbnail(options, url)`
Add --thumbnail Names thumbnails as $basename.tbn (Hi xbmc!) 2014-01-19 14:26:48 +01:00

Move classes under the imports in service/ 2013-03-23 15:56:25 +01:00			`from svtplay_dl.service.aftonbladet import Aftonbladet`
service: sort service import list 2014-08-17 10:57:08 +02:00			`from svtplay_dl.service.bambuser import Bambuser`
			`from svtplay_dl.service.dbtv import Dbtv`
Move classes under the imports in service/ 2013-03-23 15:56:25 +01:00			`from svtplay_dl.service.dr import Dr`
			`from svtplay_dl.service.expressen import Expressen`
			`from svtplay_dl.service.hbo import Hbo`
			`from svtplay_dl.service.justin import Justin`
			`from svtplay_dl.service.kanal5 import Kanal5`
service: sort service import list 2014-08-17 10:57:08 +02:00			`from svtplay_dl.service.lemonwhale import Lemonwhale`
			`from svtplay_dl.service.mtvnn import Mtvnn`
Support for Mtvservices (thedailyshow & colbertnation) 2013-04-21 21:51:45 +02:00			`from svtplay_dl.service.mtvservices import Mtvservices`
Move classes under the imports in service/ 2013-03-23 15:56:25 +01:00			`from svtplay_dl.service.nrk import Nrk`
service: sort service import list 2014-08-17 10:57:08 +02:00			`from svtplay_dl.service.oppetarkiv import OppetArkiv`
picsearch: New service. DN.se is using picsearch now days instead of qbrick 2014-03-25 15:37:41 +01:00			`from svtplay_dl.service.picsearch import Picsearch`
service: sort service import list 2014-08-17 10:57:08 +02:00			`from svtplay_dl.service.qbrick import Qbrick`
Move classes under the imports in service/ 2013-03-23 15:56:25 +01:00			`from svtplay_dl.service.radioplay import Radioplay`
service: sort service import list 2014-08-17 10:57:08 +02:00			`from svtplay_dl.service.ruv import Ruv`
Move classes under the imports in service/ 2013-03-23 15:56:25 +01:00			`from svtplay_dl.service.sr import Sr`
			`from svtplay_dl.service.svtplay import Svtplay`
			`from svtplay_dl.service.tv4play import Tv4play`
			`from svtplay_dl.service.urplay import Urplay`
service: sort service import list 2014-08-17 10:57:08 +02:00			`from svtplay_dl.service.vg import Vg`
Move classes under the imports in service/ 2013-03-23 15:56:25 +01:00			`from svtplay_dl.service.viaplay import Viaplay`
			`from svtplay_dl.service.vimeo import Vimeo`

service: move sites variable out from handler 2013-03-23 15:58:15 +01:00			`sites = [`
Make Service.handles a classmethod This way it can be called without instantiating the class. 2014-01-06 22:47:54 +01:00			`Aftonbladet,`
bambuser: new service 2014-02-05 20:37:50 +01:00			`Bambuser,`
dbtv: Support for dbtv.no 2014-08-11 23:20:17 +02:00			`Dbtv,`
Make Service.handles a classmethod This way it can be called without instantiating the class. 2014-01-06 22:47:54 +01:00			`Dr,`
			`Expressen,`
			`Hbo,`
			`Justin,`
lemonwhale: svd.se have changed provider for videos. 2014-05-01 23:17:57 +02:00			`Lemonwhale,`
Make Service.handles a classmethod This way it can be called without instantiating the class. 2014-01-06 22:47:54 +01:00			`Kanal5,`
			`Mtvservices,`
mtvnn: new service support for nickelodeon.(se,no,nl) this fixes #119 2014-08-11 20:41:09 +02:00			`Mtvnn,`
Make Service.handles a classmethod This way it can be called without instantiating the class. 2014-01-06 22:47:54 +01:00			`Nrk,`
			`Qbrick,`
picsearch: New service. DN.se is using picsearch now days instead of qbrick 2014-03-25 15:37:41 +01:00			`Picsearch,`
Make Service.handles a classmethod This way it can be called without instantiating the class. 2014-01-06 22:47:54 +01:00			`Ruv,`
			`Radioplay,`
			`Sr,`
			`Svtplay,`
Break out OppetArkiv to subclass of Svtplay 2014-05-01 19:51:21 +02:00			`OppetArkiv,`
Make Service.handles a classmethod This way it can be called without instantiating the class. 2014-01-06 22:47:54 +01:00			`Tv4play,`
			`Urplay,`
			`Viaplay,`
New service vg.no 2014-08-17 10:55:05 +02:00			`Vimeo,`
			`Vg]`
service: move sites variable out from handler 2013-03-23 15:58:15 +01:00
Move handler selection to svtplay.service 2013-02-28 21:44:28 +01:00
Generic class a way to find embeded videos support for embeded svtplay videos. 2013-03-10 13:28:31 +01:00			`class Generic(object):`
			`''' Videos embed in sites '''`
Fix so embedded videos works again 2013-03-23 16:11:36 +01:00			`def get(self, url):`
Generic class a way to find embeded videos support for embeded svtplay videos. 2013-03-10 13:28:31 +01:00			`data = get_http_data(url)`
generic: catch embed svt videos. 2013-11-14 22:43:39 +01:00			`match = re.search(r"src=\"(http://www.svt.se/wd.*)\" height", data)`
Generic class a way to find embeded videos support for embeded svtplay videos. 2013-03-10 13:28:31 +01:00			`stream = None`
			`if match:`
			`url = match.group(1)`
			`for i in sites:`
Rename Service's handle method to handles handle can be ambiguous (i.e, not a verb). Signed-off-by: Olof Johansson <olof@ethup.se> 2014-01-01 15:03:15 +01:00			`if i.handles(url):`
generic: replace & with & on svt.se 2014-03-25 20:22:21 +01:00			`url = url.replace("&", "&")`
Move url to object attribute 2014-01-06 23:14:06 +01:00			`return url, i(url)`
Generic class a way to find embeded videos support for embeded svtplay videos. 2013-03-10 13:28:31 +01:00
Adding missing r prefixes to regex string 2013-05-05 12:57:42 +02:00			`match = re.search(r"src=\"(http://player.vimeo.com/video/[0-9]+)\" ", data)`
Generic class a way to find embeded videos support for embeded svtplay videos. 2013-03-10 13:28:31 +01:00			`if match:`
			`for i in sites:`
Rename Service's handle method to handles handle can be ambiguous (i.e, not a verb). Signed-off-by: Olof Johansson <olof@ethup.se> 2014-01-01 15:03:15 +01:00			`if i.handles(match.group(1)):`
Move url to object attribute 2014-01-06 23:14:06 +01:00			`return match.group(1), i(url)`
generic: detect embeded tv4play streams 2014-02-04 20:20:36 +01:00			`match = re.search(r"tv4play.se/iframe/video/(\d+)?", data)`
Generic: embeded tv4play videos 2013-03-24 14:55:14 +01:00			`if match:`
			`url = "http://www.tv4play.se/?video_id=%s" % match.group(1)`
			`for i in sites:`
Rename Service's handle method to handles handle can be ambiguous (i.e, not a verb). Signed-off-by: Olof Johansson <olof@ethup.se> 2014-01-01 15:03:15 +01:00			`if i.handles(url):`
Move url to object attribute 2014-01-06 23:14:06 +01:00			`return url, i(url)`
generic: support for embed bambuser streams 2014-02-05 20:42:34 +01:00			`match = re.search(r"embed.bambuser.com/broadcast/(\d+)", data)`
			`if match:`
			`url = "http://bambuser.com/v/%s" % match.group(1)`
			`for i in sites:`
			`if i.handles(url):`
			`return url, i(url)`
aftonbladet: support for the new site 2014-02-05 23:15:19 +01:00			`match = re.search(r'iframe src="(http://tv.aftonbladet[^"]*)"', data)`
generic: detect embeded svtplay streams 2014-02-08 22:47:27 +01:00			`if match:`
			`url = match.group(1)`
			`for i in sites:`
			`if i.handles(url):`
			`return url, i(url)`
generic: detect embedded aftonbladet videos 2014-08-12 19:08:08 +02:00			`match = re.search(r'a href="(http://tv.aftonbladet[^"]*)" class="abVi', data)`
			`if match:`
			`url = match.group(1)`
			`for i in sites:`
			`if i.handles(url):`
			`return url, i(url)`

generic: detect embeded svtplay streams 2014-02-08 22:47:27 +01:00			`match = re.search(r"iframe src='(http://www.svtplay[^']*)'", data)`
aftonbladet: support for the new site 2014-02-05 23:15:19 +01:00			`if match:`
			`url = match.group(1)`
			`for i in sites:`
			`if i.handles(url):`
			`return url, i(url)`
generic: support for embed bambuser streams 2014-02-05 20:42:34 +01:00
Generic class a way to find embeded videos support for embeded svtplay videos. 2013-03-10 13:28:31 +01:00			`return url, stream`

Move handler selection to svtplay.service 2013-02-28 21:44:28 +01:00			`def service_handler(url):`
			`handler = None`

			`for i in sites:`
Rename Service's handle method to handles handle can be ambiguous (i.e, not a verb). Signed-off-by: Olof Johansson <olof@ethup.se> 2014-01-01 15:03:15 +01:00			`if i.handles(url):`
Move url to object attribute 2014-01-06 23:14:06 +01:00			`handler = i(url)`
Move handler selection to svtplay.service 2013-02-28 21:44:28 +01:00			`break`

Add default handle method in Service base class The default handle method will look for a supported_domains attribute (a list), containing the supported domains. The subclassed service class can of course override this if other means of determining support is needded. 2014-01-01 14:57:17 +01:00			`return handler`