svtplay-dl/lib/svtplay_dl/service/justin.py

# ex:ts=4:sw=4:sts=4:et
# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*-

# pylint has issues with urlparse: "some types could not be inferred"
# pylint: disable=E1103

from __future__ import absolute_import
import sys
import re
import json
import copy
import xml.etree.ElementTree as ET

from svtplay_dl.utils.urllib import urlparse, quote
from svtplay_dl.service import Service
from svtplay_dl.utils import get_http_data
from svtplay_dl.log import log
from svtplay_dl.fetcher.hls import HLS, hlsparse
from svtplay_dl.fetcher.http import HTTP

class JustinException(Exception):
    pass

class JustinUrlException(JustinException):
    """
    Used to indicate an invalid URL for a given media_type. E.g.:

      JustinUrlException('video', 'http://twitch.tv/example')
    """
    def __init__(self, media_type, url):
        super(JustinUrlException, self).__init__(
            "'%s' is not recognized as a %s URL" % (url, media_type)
        )


class Justin(Service):
    # Justin and Twitch uses language subdomains, e.g. en.www.twitch.tv. They
    # are usually two characters, but may have a country suffix as well (e.g.
    # zh-tw, zh-cn and pt-br.
    supported_domains_re = [
        r'^(?:(?:[a-z]{2}-)?[a-z]{2}\.)?(www\.)?twitch\.tv$',
        r'^(?:(?:[a-z]{2}-)?[a-z]{2}\.)?(www\.)?justin\.tv$']

    # TODO: verify that this will support Justin as well
    api_base_url = 'https://api.twitch.tv'
    hls_base_url = 'http://usher.justin.tv/api/channel/hls'

    def get(self, options):
        urlp = urlparse(self.url)
        success = False

        for jtv_video_type in [self._get_chapter, self._get_archive,
                               self._get_channel]:
            try:
                jtv_video_type(urlp, options)
                success = True
                break
            except JustinUrlException as e:
                log.debug(str(e))

        if not success:
            log.debug(str(e))
            log.error("This twitch/justin video type is unsupported")
            sys.exit(2)


    def _get_static_video(self, vid, options, vidtype):
        url = "http://api.justin.tv/api/broadcast/by_%s/%s.xml?onsite=true" % (
            vidtype, vid)
        data = get_http_data(url)

        xml = ET.XML(data)
        url = xml.find("archive").find("video_file_url").text

        yield HTTP(copy.copy(options), url)

    def _get_archive(self, urlp, options):
        match = re.match(r'/\w+/b/(\d+)', urlp.path)
        if not match:
            raise JustinUrlException('video', urlp.geturl())

        self._get_static_video(match.group(1), options, 'archive')


    def _get_chapter(self, urlp, options):
        match = re.match(r'/\w+/c/(\d+)', urlp.path)
        if not match:
            raise JustinUrlException('video', urlp.geturl())

        self._get_static_video(match.group(1), options, 'chapter')


    def _get_access_token(self, channel):
        """
        Get a Twitch access token. It's a three element dict:

         * mobile_restricted
         * sig
         * token

        `sig` is a hexadecimal string, and `token` is a JSON blob, with
        information about access expiration. `mobile_restricted` is not
        important, but is a boolean.

        Both `sig` and `token` should be added to the HLS URI, and the
        token should, of course, be URI encoded.
        """
        return self._ajax_get('/api/channels/%s/access_token' % channel)


    def _ajax_get(self, method):
        url = "%s/%s" % (self.api_base_url, method)

        # Logic found in Twitch's global.js. Prepend /kraken/ to url
        # path unless the API method already is absolute.
        if method[0] != '/':
            method = '/kraken/%s' % method

        # There are references to a api_token in global.js; it's used
        # with the "Twitch-Api-Token" HTTP header. But it doesn't seem
        # to be necessary.
        payload = get_http_data(url, header={
            'Accept': 'application/vnd.twitchtv.v2+json'
        })
        return json.loads(payload)


    def _get_hls_url(self, channel):
        access = self._get_access_token(channel)

        query = "token=%s&sig=%s" % (quote(access['token']), access['sig'])
        return "%s/%s.m3u8?%s" % (self.hls_base_url, channel, query)


    def _get_channel(self, urlp, options):
        match = re.match(r'/(\w+)', urlp.path)

        if not match:
            raise JustinUrlException('channel', urlp.geturl())

        channel = match.group(1)
        hls_url = self._get_hls_url(channel)
        urlp = urlparse(hls_url)

        options.live = True
        if not options.output:
            options.output = channel

        streams = hlsparse(hls_url)
        for n in list(streams.keys()):
            yield HLS(copy.copy(options), streams[n], n)
Add editor modelines 2013-03-02 21:26:28 +01:00			`# ex:ts=4:sw=4:sts=4:et`
			`# -- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil --`
Disable pylint warning E1103 when using urlparse pylint has issues with urlparse, e.g: Instance of 'ParseResult' has no 'query' member (but some types could not be inferred) 2013-04-27 13:17:00 +02:00
			`# pylint has issues with urlparse: "some types could not be inferred"`
			`# pylint: disable=E1103`

Use absolute_import from __future__ everywhere 2013-03-01 23:39:42 +01:00			`from __future__ import absolute_import`
Add minimal set of imports for services to work 2013-02-12 19:43:37 +01:00			`import sys`
			`import re`
justin: Add HLS support 2014-02-08 17:09:14 +01:00			`import json`
service: copy options to fetcher 2014-06-07 20:43:40 +02:00			`import copy`
Add minimal set of imports for services to work 2013-02-12 19:43:37 +01:00			`import xml.etree.ElementTree as ET`

justin: Add HLS support 2014-02-08 17:09:14 +01:00			`from svtplay_dl.utils.urllib import urlparse, quote`
Make all services inherit svtplay_dl.service.Service 2013-04-21 12:44:31 +02:00			`from svtplay_dl.service import Service`
Remove unused imports 2014-03-19 22:58:40 +01:00			`from svtplay_dl.utils import get_http_data`
Rename module from lib/svtplay to lib/svtplay_dl less confusion with the service. 2013-03-17 19:55:19 +01:00			`from svtplay_dl.log import log`
services: parse hls playlist first. 2014-04-21 21:55:39 +02:00			`from svtplay_dl.fetcher.hls import HLS, hlsparse`
justin: convert into new video fetcher 2014-04-21 18:27:54 +02:00			`from svtplay_dl.fetcher.http import HTTP`
Add minimal set of imports for services to work 2013-02-12 19:43:37 +01:00
justin: Add HLS support 2014-02-08 17:09:14 +01:00			`class JustinException(Exception):`
			`pass`

			`class JustinUrlException(JustinException):`
justin: refactoring Break out logic for video and channel fetching to functions and introduce a JustinUrlException class, that represents faulty URLs. 2014-02-05 19:52:29 +01:00			`"""`
			`Used to indicate an invalid URL for a given media_type. E.g.:`

			`JustinUrlException('video', 'http://twitch.tv/example')`
			`"""`
			`def __init__(self, media_type, url):`
			`super(JustinUrlException, self).__init__(`
			`"'%s' is not recognized as a %s URL" % (url, media_type)`
			`)`


Make all services inherit svtplay_dl.service.Service 2013-04-21 12:44:31 +02:00			`class Justin(Service):`
Support supported_domains regexp for services Lets services with more complex domains (like domains with language/country codes) use a regular expressions that will match the supported domains for the handles() method. 2014-01-01 15:50:47 +01:00			`# Justin and Twitch uses language subdomains, e.g. en.www.twitch.tv. They`
			`# are usually two characters, but may have a country suffix as well (e.g.`
			`# zh-tw, zh-cn and pt-br.`
			`supported_domains_re = [`
			`r'^(?:(?:[a-z]{2}-)?[a-z]{2}\.)?(www\.)?twitch\.tv$',`
			`r'^(?:(?:[a-z]{2}-)?[a-z]{2}\.)?(www\.)?justin\.tv$']`
Initial work on splitting script to modules Does not work reliably (downloading SVTPlay videos with HDS may work if you're lucky). 2013-01-17 00:21:47 +01:00
justin: Add HLS support 2014-02-08 17:09:14 +01:00			`# TODO: verify that this will support Justin as well`
			`api_base_url = 'https://api.twitch.tv'`
			`hls_base_url = 'http://usher.justin.tv/api/channel/hls'`

Move url to object attribute 2014-01-06 23:14:06 +01:00			`def get(self, options):`
justin: refactoring Break out logic for video and channel fetching to functions and introduce a JustinUrlException class, that represents faulty URLs. 2014-02-05 19:52:29 +01:00			`urlp = urlparse(self.url)`
justin: support archived content from twitch and justin Archive contents have URLs that look like justin.tv/<channel>/b/<id> and twitch.tv/<channel>/b/<id>. Otherwise, the implementation is equivalent with that of _get_video, which is now renamed to _get_chapter. Closes #67. 2014-03-09 15:01:04 +01:00			`success = False`
justin: refactoring Break out logic for video and channel fetching to functions and introduce a JustinUrlException class, that represents faulty URLs. 2014-02-05 19:52:29 +01:00
justin: support archived content from twitch and justin Archive contents have URLs that look like justin.tv/<channel>/b/<id> and twitch.tv/<channel>/b/<id>. Otherwise, the implementation is equivalent with that of _get_video, which is now renamed to _get_chapter. Closes #67. 2014-03-09 15:01:04 +01:00			`for jtv_video_type in [self._get_chapter, self._get_archive,`
			`self._get_channel]:`
justin: refactoring Break out logic for video and channel fetching to functions and introduce a JustinUrlException class, that represents faulty URLs. 2014-02-05 19:52:29 +01:00			`try:`
justin: support archived content from twitch and justin Archive contents have URLs that look like justin.tv/<channel>/b/<id> and twitch.tv/<channel>/b/<id>. Otherwise, the implementation is equivalent with that of _get_video, which is now renamed to _get_chapter. Closes #67. 2014-03-09 15:01:04 +01:00			`jtv_video_type(urlp, options)`
			`success = True`
			`break`
justin: refactoring Break out logic for video and channel fetching to functions and introduce a JustinUrlException class, that represents faulty URLs. 2014-02-05 19:52:29 +01:00			`except JustinUrlException as e:`
justin: don't assume Exception has message attribute In Python3, the Exception class does not have a message attribute. Stringifying the Exception object results in the same thing though, and it works in both Python 2 and 3. 2014-03-19 22:48:40 +01:00			`log.debug(str(e))`
justin: refactoring Break out logic for video and channel fetching to functions and introduce a JustinUrlException class, that represents faulty URLs. 2014-02-05 19:52:29 +01:00
justin: support archived content from twitch and justin Archive contents have URLs that look like justin.tv/<channel>/b/<id> and twitch.tv/<channel>/b/<id>. Otherwise, the implementation is equivalent with that of _get_video, which is now renamed to _get_chapter. Closes #67. 2014-03-09 15:01:04 +01:00			`if not success:`
			`log.debug(str(e))`
justin: adjust error msg in case of type being unknown 2014-03-09 15:04:08 +01:00			`log.error("This twitch/justin video type is unsupported")`
justin: support archived content from twitch and justin Archive contents have URLs that look like justin.tv/<channel>/b/<id> and twitch.tv/<channel>/b/<id>. Otherwise, the implementation is equivalent with that of _get_video, which is now renamed to _get_chapter. Closes #67. 2014-03-09 15:01:04 +01:00			`sys.exit(2)`
justin: refactoring Break out logic for video and channel fetching to functions and introduce a JustinUrlException class, that represents faulty URLs. 2014-02-05 19:52:29 +01:00

justin: support archived content from twitch and justin Archive contents have URLs that look like justin.tv/<channel>/b/<id> and twitch.tv/<channel>/b/<id>. Otherwise, the implementation is equivalent with that of _get_video, which is now renamed to _get_chapter. Closes #67. 2014-03-09 15:01:04 +01:00			`def _get_static_video(self, vid, options, vidtype):`
			`url = "http://api.justin.tv/api/broadcast/by_%s/%s.xml?onsite=true" % (`
			`vidtype, vid)`
justin: refactoring Break out logic for video and channel fetching to functions and introduce a JustinUrlException class, that represents faulty URLs. 2014-02-05 19:52:29 +01:00			`data = get_http_data(url)`
justin: support archived content from twitch and justin Archive contents have URLs that look like justin.tv/<channel>/b/<id> and twitch.tv/<channel>/b/<id>. Otherwise, the implementation is equivalent with that of _get_video, which is now renamed to _get_chapter. Closes #67. 2014-03-09 15:01:04 +01:00
justin: refactoring Break out logic for video and channel fetching to functions and introduce a JustinUrlException class, that represents faulty URLs. 2014-02-05 19:52:29 +01:00			`xml = ET.XML(data)`
			`url = xml.find("archive").find("video_file_url").text`

service: copy options to fetcher 2014-06-07 20:43:40 +02:00			`yield HTTP(copy.copy(options), url)`
justin: refactoring Break out logic for video and channel fetching to functions and introduce a JustinUrlException class, that represents faulty URLs. 2014-02-05 19:52:29 +01:00
justin: support archived content from twitch and justin Archive contents have URLs that look like justin.tv/<channel>/b/<id> and twitch.tv/<channel>/b/<id>. Otherwise, the implementation is equivalent with that of _get_video, which is now renamed to _get_chapter. Closes #67. 2014-03-09 15:01:04 +01:00			`def _get_archive(self, urlp, options):`
			`match = re.match(r'/\w+/b/(\d+)', urlp.path)`
			`if not match:`
			`raise JustinUrlException('video', urlp.geturl())`

			`self._get_static_video(match.group(1), options, 'archive')`


			`def _get_chapter(self, urlp, options):`
			`match = re.match(r'/\w+/c/(\d+)', urlp.path)`
			`if not match:`
			`raise JustinUrlException('video', urlp.geturl())`

			`self._get_static_video(match.group(1), options, 'chapter')`


justin: Add HLS support 2014-02-08 17:09:14 +01:00			`def _get_access_token(self, channel):`
			`"""`
			`Get a Twitch access token. It's a three element dict:`

			`* mobile_restricted`
			`* sig`
			`* token`

			`sig` is a hexadecimal string, and `token` is a JSON blob, with
			information about access expiration. `mobile_restricted` is not
			`important, but is a boolean.`

			Both `sig` and `token` should be added to the HLS URI, and the
			`token should, of course, be URI encoded.`
			`"""`
			`return self._ajax_get('/api/channels/%s/access_token' % channel)`


			`def _ajax_get(self, method):`
			`url = "%s/%s" % (self.api_base_url, method)`

			`# Logic found in Twitch's global.js. Prepend /kraken/ to url`
			`# path unless the API method already is absolute.`
			`if method[0] != '/':`
			`method = '/kraken/%s' % method`

			`# There are references to a api_token in global.js; it's used`
			`# with the "Twitch-Api-Token" HTTP header. But it doesn't seem`
			`# to be necessary.`
			`payload = get_http_data(url, header={`
			`'Accept': 'application/vnd.twitchtv.v2+json'`
			`})`
			`return json.loads(payload)`


			`def _get_hls_url(self, channel):`
			`access = self._get_access_token(channel)`

			`query = "token=%s&sig=%s" % (quote(access['token']), access['sig'])`
			`return "%s/%s.m3u8?%s" % (self.hls_base_url, channel, query)`


justin: refactoring Break out logic for video and channel fetching to functions and introduce a JustinUrlException class, that represents faulty URLs. 2014-02-05 19:52:29 +01:00			`def _get_channel(self, urlp, options):`
			`match = re.match(r'/(\w+)', urlp.path)`
justin: Add HLS support 2014-02-08 17:09:14 +01:00
justin: refactoring Break out logic for video and channel fetching to functions and introduce a JustinUrlException class, that represents faulty URLs. 2014-02-05 19:52:29 +01:00			`if not match:`
			`raise JustinUrlException('channel', urlp.geturl())`

justin: Add HLS support 2014-02-08 17:09:14 +01:00			`channel = match.group(1)`
			`hls_url = self._get_hls_url(channel)`
			`urlp = urlparse(hls_url)`

justin: refactoring Break out logic for video and channel fetching to functions and introduce a JustinUrlException class, that represents faulty URLs. 2014-02-05 19:52:29 +01:00			`options.live = True`
justin: Add HLS support 2014-02-08 17:09:14 +01:00			`if not options.output:`
			`options.output = channel`

services: parse hls playlist first. 2014-04-21 21:55:39 +02:00			`streams = hlsparse(hls_url)`
			`for n in list(streams.keys()):`
service: copy options to fetcher 2014-06-07 20:43:40 +02:00			`yield HLS(copy.copy(options), streams[n], n)`