svtplay-dl/lib/svtplay_dl/service/twitch.py

# ex:ts=4:sw=4:sts=4:et
# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*-

# pylint has issues with urlparse: "some types could not be inferred"
# pylint: disable=E1103

from __future__ import absolute_import
import re
import json
import os
import copy

from svtplay_dl.utils.urllib import urlparse, quote_plus
from svtplay_dl.service import Service
from svtplay_dl.utils import filenamify
from svtplay_dl.log import log
from svtplay_dl.fetcher.hls import hlsparse
from svtplay_dl.fetcher.http import HTTP
from svtplay_dl.error import ServiceError


class TwitchException(Exception):
    pass


class TwitchUrlException(TwitchException):
    """
    Used to indicate an invalid URL for a given media_type. E.g.:

      TwitchUrlException('video', 'http://twitch.tv/example')
    """
    def __init__(self, media_type, url):
        super(TwitchUrlException, self).__init__(
            "'%s' is not recognized as a %s URL" % (url, media_type)
        )


class Twitch(Service):
    # Twitch uses language subdomains, e.g. en.www.twitch.tv. They
    # are usually two characters, but may have a country suffix as well (e.g.
    # zh-tw, zh-cn and pt-br.
    supported_domains_re = [
        r'^(?:(?:[a-z]{2}-)?[a-z]{2}\.)?(www\.|clips\.)?twitch\.tv$',
    ]

    api_base_url = 'https://api.twitch.tv'
    hls_base_url = 'http://usher.justin.tv/api/channel/hls'

    def get(self):
        urlp = urlparse(self.url)

        if self.exclude():
            yield ServiceError("Excluding video")
            return

        match = re.match(r'/(\w+)/([bcv])/(\d+)', urlp.path)
        if not match:
            if re.search("clips.twitch.tv", urlp.netloc):
                data = self._get_clips(self.options)
            else:
                data = self._get_channel(self.options, urlp)
        else:
            if match.group(2) in ["b", "c"]:
                yield ServiceError("This twitch video type is unsupported")
                return
            data = self._get_archive(self.options, match.group(3))
        try:
            for i in data:
                yield i
        except TwitchUrlException:
            yield ServiceError("This twitch video type is unsupported")
            return

    def _get_static_video(self, options, videoid):
        access = self._get_access_token(videoid)

        if options.output_auto:
            data = self.http.request("get", "https://api.twitch.tv/kraken/videos/v%s" % videoid)
            if data.status_code == 404:
                yield ServiceError("Can't find the video")
                return
            info = json.loads(data.text)
            name = "twitch-%s-%s" % (info["channel"]["name"], filenamify(info["title"]))
            directory = os.path.dirname(options.output)
            if os.path.isdir(directory):
                name = os.path.join(directory, name)
            options.output = name

        if "token" not in access:
            raise TwitchUrlException('video', self.url)
        nauth = quote_plus(str(access["token"]))
        authsig = access["sig"]

        url = "http://usher.twitch.tv/vod/%s?nauth=%s&nauthsig=%s" % (
            videoid, nauth, authsig)

        streams = hlsparse(options, self.http.request("get", url), url)
        if streams:
            for n in list(streams.keys()):
                yield streams[n]

    def _get_archive(self, options, vid):
        try:
            for n in self._get_static_video(options, vid):
                yield n
        except TwitchUrlException as e:
            log.error(str(e))

    def _get_access_token(self, channel, vtype="vods"):
        """
        Get a Twitch access token. It's a three element dict:

         * mobile_restricted
         * sig
         * token

        `sig` is a hexadecimal string, and `token` is a JSON blob, with
        information about access expiration. `mobile_restricted` is not
        important, but is a boolean.

        Both `sig` and `token` should be added to the HLS URI, and the
        token should, of course, be URI encoded.
        """
        return self._ajax_get('/api/%s/%s/access_token' % (vtype, channel))

    def _ajax_get(self, method):
        url = "%s/%s" % (self.api_base_url, method)

        # Logic found in Twitch's global.js. Prepend /kraken/ to url
        # path unless the API method already is absolute.
        if method[0] != '/':
            method = '/kraken/%s' % method

        payload = self.http.request("get", url)
        return json.loads(payload.text)

    def _get_hls_url(self, channel):
        access = self._get_access_token(channel, "channels")

        query = "token=%s&sig=%s&allow_source=true&allow_spectre=true" % (quote_plus(access['token']), access['sig'])
        return "%s/%s.m3u8?%s" % (self.hls_base_url, channel, query)

    def _get_channel(self, options, urlp):
        match = re.match(r'/(\w+)', urlp.path)

        if not match:
            raise TwitchUrlException('channel', urlp.geturl())

        channel = match.group(1)
        if options.output_auto:
            options.output = "twitch-%s" % channel

        hls_url = self._get_hls_url(channel)
        urlp = urlparse(hls_url)

        options.live = True
        if not options.output:
            options.output = channel
        data = self.http.request("get", hls_url)
        if data.status_code == 404:
            yield ServiceError("Stream is not online.")
            return
        streams = hlsparse(options, data, hls_url)
        for n in list(streams.keys()):
            yield streams[n]

    def _get_clips(self, options):
        match = re.search("quality_options: (\[[^\]]+\])", self.get_urldata())
        if not match:
            yield ServiceError("Can't find the video clip")
            return
        if options.output_auto:
            name = re.search('slug: "([^"]+)"', self.get_urldata()).group(1)
            brodcaster = re.search('broadcaster_login: "([^"]+)"', self.get_urldata()).group(1)
            name = "twitch-%s-%s" % (brodcaster, name)
            directory = os.path.dirname(options.output)
            if os.path.isdir(directory):
                name = os.path.join(directory, name)
            options.output = name

        dataj = json.loads(match.group(1))
        for i in dataj:
            yield HTTP(copy.copy(options), i["source"], i["quality"])
Add editor modelines 2013-03-02 21:26:28 +01:00			`# ex:ts=4:sw=4:sts=4:et`
			`# -- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil --`
Disable pylint warning E1103 when using urlparse pylint has issues with urlparse, e.g: Instance of 'ParseResult' has no 'query' member (but some types could not be inferred) 2013-04-27 13:17:00 +02:00
			`# pylint has issues with urlparse: "some types could not be inferred"`
			`# pylint: disable=E1103`

Use absolute_import from __future__ everywhere 2013-03-01 23:39:42 +01:00			`from __future__ import absolute_import`
Add minimal set of imports for services to work 2013-02-12 19:43:37 +01:00			`import re`
justin: Add HLS support 2014-02-08 17:09:14 +01:00			`import json`
twitch: handle directories when we output to an directory 2015-09-30 13:52:01 +02:00			`import os`
twitch: Add support for clips fixes #426 2016-08-20 16:32:12 +02:00			`import copy`
Add minimal set of imports for services to work 2013-02-12 19:43:37 +01:00
justin: support for the new vod system 2015-05-23 19:18:04 +02:00			`from svtplay_dl.utils.urllib import urlparse, quote_plus`
Make all services inherit svtplay_dl.service.Service 2013-04-21 12:44:31 +02:00			`from svtplay_dl.service import Service`
adding request support. still need some more work 2015-08-30 00:06:20 +02:00			`from svtplay_dl.utils import filenamify`
Rename module from lib/svtplay to lib/svtplay_dl less confusion with the service. 2013-03-17 19:55:19 +01:00			`from svtplay_dl.log import log`
reorder arguments for hlsparse and hdsparse 2015-10-04 14:37:16 +02:00			`from svtplay_dl.fetcher.hls import hlsparse`
twitch: Add support for clips fixes #426 2016-08-20 16:32:12 +02:00			`from svtplay_dl.fetcher.http import HTTP`
twitch: better error handling 2015-09-06 14:37:40 +02:00			`from svtplay_dl.error import ServiceError`
justin: support for the new vod system 2015-05-23 19:18:04 +02:00
Add minimal set of imports for services to work 2013-02-12 19:43:37 +01:00
Rename Justin to Twitch Justin has been disbanded for a while. 2015-08-24 18:40:59 +02:00			`class TwitchException(Exception):`
justin: Add HLS support 2014-02-08 17:09:14 +01:00			`pass`

justin: support for the new vod system 2015-05-23 19:18:04 +02:00
Rename Justin to Twitch Justin has been disbanded for a while. 2015-08-24 18:40:59 +02:00			`class TwitchUrlException(TwitchException):`
justin: refactoring Break out logic for video and channel fetching to functions and introduce a JustinUrlException class, that represents faulty URLs. 2014-02-05 19:52:29 +01:00			`"""`
			`Used to indicate an invalid URL for a given media_type. E.g.:`

Rename Justin to Twitch Justin has been disbanded for a while. 2015-08-24 18:40:59 +02:00			`TwitchUrlException('video', 'http://twitch.tv/example')`
justin: refactoring Break out logic for video and channel fetching to functions and introduce a JustinUrlException class, that represents faulty URLs. 2014-02-05 19:52:29 +01:00			`"""`
			`def __init__(self, media_type, url):`
Rename Justin to Twitch Justin has been disbanded for a while. 2015-08-24 18:40:59 +02:00			`super(TwitchUrlException, self).__init__(`
justin: refactoring Break out logic for video and channel fetching to functions and introduce a JustinUrlException class, that represents faulty URLs. 2014-02-05 19:52:29 +01:00			`"'%s' is not recognized as a %s URL" % (url, media_type)`
			`)`


Rename Justin to Twitch Justin has been disbanded for a while. 2015-08-24 18:40:59 +02:00			`class Twitch(Service):`
			`# Twitch uses language subdomains, e.g. en.www.twitch.tv. They`
Support supported_domains regexp for services Lets services with more complex domains (like domains with language/country codes) use a regular expressions that will match the supported domains for the handles() method. 2014-01-01 15:50:47 +01:00			`# are usually two characters, but may have a country suffix as well (e.g.`
			`# zh-tw, zh-cn and pt-br.`
			`supported_domains_re = [`
twitch: Add support for clips fixes #426 2016-08-20 16:32:12 +02:00			`r'^(?:(?:[a-z]{2}-)?[a-z]{2}\.)?(www\.\|clips\.)?twitch\.tv$',`
Rename Justin to Twitch Justin has been disbanded for a while. 2015-08-24 18:40:59 +02:00			`]`
Initial work on splitting script to modules Does not work reliably (downloading SVTPlay videos with HDS may work if you're lucky). 2013-01-17 00:21:47 +01:00
justin: Add HLS support 2014-02-08 17:09:14 +01:00			`api_base_url = 'https://api.twitch.tv'`
			`hls_base_url = 'http://usher.justin.tv/api/channel/hls'`

Move options to when we init the service class 2015-12-26 11:46:14 +01:00			`def get(self):`
justin: refactoring Break out logic for video and channel fetching to functions and introduce a JustinUrlException class, that represents faulty URLs. 2014-02-05 19:52:29 +01:00			`urlp = urlparse(self.url)`

remove options in argument for exclude 2016-05-14 22:54:30 +02:00			`if self.exclude():`
Better excluding message fixing #198 2015-09-06 23:04:48 +02:00			`yield ServiceError("Excluding video")`
Support for exclude filenames with WORD in them. this fixes #190 2014-12-22 17:41:40 +01:00			`return`

justin: support for the new vod system 2015-05-23 19:18:04 +02:00			`match = re.match(r'/(\w+)/([bcv])/(\d+)', urlp.path)`
			`if not match:`
twitch: Add support for clips fixes #426 2016-08-20 16:32:12 +02:00			`if re.search("clips.twitch.tv", urlp.netloc):`
			`data = self._get_clips(self.options)`
			`else:`
			`data = self._get_channel(self.options, urlp)`
justin: support for the new vod system 2015-05-23 19:18:04 +02:00			`else:`
			`if match.group(2) in ["b", "c"]:`
twitch: handle the error messages 2015-10-20 00:04:29 +02:00			`yield ServiceError("This twitch video type is unsupported")`
justin: support for the new vod system 2015-05-23 19:18:04 +02:00			`return`
Move options to when we init the service class 2015-12-26 11:46:14 +01:00			`data = self._get_archive(self.options, match.group(3))`
justin: refactor get function 2015-05-24 12:37:16 +02:00			`try:`
			`for i in data:`
			`yield i`
Fix various pylint warnings None of these were any real problems, but easier to spot real issues if pylint is a bit quieter. Apart from the pylint overrides being sprinkled over the code base, this commit also fixes occurences of the following issues: - logging-not-lazy - logging-format-interpolation - unused-import - unused-variable 2016-04-03 19:06:45 +02:00			`except TwitchUrlException:`
twitch: handle the error messages 2015-10-20 00:04:29 +02:00			`yield ServiceError("This twitch video type is unsupported")`
justin: refactor get function 2015-05-24 12:37:16 +02:00			`return`
justin: refactoring Break out logic for video and channel fetching to functions and introduce a JustinUrlException class, that represents faulty URLs. 2014-02-05 19:52:29 +01:00
justin: support for the new vod system 2015-05-23 19:18:04 +02:00			`def _get_static_video(self, options, videoid):`
			`access = self._get_access_token(videoid)`
justin: support archived content from twitch and justin Archive contents have URLs that look like justin.tv/<channel>/b/<id> and twitch.tv/<channel>/b/<id>. Otherwise, the implementation is equivalent with that of _get_video, which is now renamed to _get_chapter. Closes #67. 2014-03-09 15:01:04 +01:00
justin: better filenames 2015-05-24 13:59:11 +02:00			`if options.output_auto:`
twitch: better error handling 2015-09-06 14:37:40 +02:00			`data = self.http.request("get", "https://api.twitch.tv/kraken/videos/v%s" % videoid)`
			`if data.status_code == 404:`
			`yield ServiceError("Can't find the video")`
			`return`
			`info = json.loads(data.text)`
twitch: handle directories when we output to an directory 2015-09-30 13:52:01 +02:00			`name = "twitch-%s-%s" % (info["channel"]["name"], filenamify(info["title"]))`
			`directory = os.path.dirname(options.output)`
			`if os.path.isdir(directory):`
			`name = os.path.join(directory, name)`
			`options.output = name`
justin: better filenames 2015-05-24 13:59:11 +02:00
justin: support for the new vod system 2015-05-23 19:18:04 +02:00			`if "token" not in access:`
Rename Justin to Twitch Justin has been disbanded for a while. 2015-08-24 18:40:59 +02:00			`raise TwitchUrlException('video', self.url)`
justin: support for the new vod system 2015-05-23 19:18:04 +02:00			`nauth = quote_plus(str(access["token"]))`
			`authsig = access["sig"]`
justin: support archived content from twitch and justin Archive contents have URLs that look like justin.tv/<channel>/b/<id> and twitch.tv/<channel>/b/<id>. Otherwise, the implementation is equivalent with that of _get_video, which is now renamed to _get_chapter. Closes #67. 2014-03-09 15:01:04 +01:00
justin: support for the new vod system 2015-05-23 19:18:04 +02:00			`url = "http://usher.twitch.tv/vod/%s?nauth=%s&nauthsig=%s" % (`
			`videoid, nauth, authsig)`
justin: support archived content from twitch and justin Archive contents have URLs that look like justin.tv/<channel>/b/<id> and twitch.tv/<channel>/b/<id>. Otherwise, the implementation is equivalent with that of _get_video, which is now renamed to _get_chapter. Closes #67. 2014-03-09 15:01:04 +01:00
reorder arguments for hlsparse and hdsparse 2015-10-04 14:37:16 +02:00			`streams = hlsparse(options, self.http.request("get", url), url)`
justin: support for the new vod system 2015-05-23 19:18:04 +02:00			`if streams:`
			`for n in list(streams.keys()):`
reorder arguments for hlsparse and hdsparse 2015-10-04 14:37:16 +02:00			`yield streams[n]`
justin: support archived content from twitch and justin Archive contents have URLs that look like justin.tv/<channel>/b/<id> and twitch.tv/<channel>/b/<id>. Otherwise, the implementation is equivalent with that of _get_video, which is now renamed to _get_chapter. Closes #67. 2014-03-09 15:01:04 +01:00
justin: support for the new vod system 2015-05-23 19:18:04 +02:00			`def _get_archive(self, options, vid):`
			`try:`
			`for n in self._get_static_video(options, vid):`
			`yield n`
Rename Justin to Twitch Justin has been disbanded for a while. 2015-08-24 18:40:59 +02:00			`except TwitchUrlException as e:`
justin: support for the new vod system 2015-05-23 19:18:04 +02:00			`log.error(str(e))`
justin: support archived content from twitch and justin Archive contents have URLs that look like justin.tv/<channel>/b/<id> and twitch.tv/<channel>/b/<id>. Otherwise, the implementation is equivalent with that of _get_video, which is now renamed to _get_chapter. Closes #67. 2014-03-09 15:01:04 +01:00
justin: support for the new vod system 2015-05-23 19:18:04 +02:00			`def _get_access_token(self, channel, vtype="vods"):`
justin: Add HLS support 2014-02-08 17:09:14 +01:00			`"""`
			`Get a Twitch access token. It's a three element dict:`

			`* mobile_restricted`
			`* sig`
			`* token`

			`sig` is a hexadecimal string, and `token` is a JSON blob, with
			information about access expiration. `mobile_restricted` is not
			`important, but is a boolean.`

			Both `sig` and `token` should be added to the HLS URI, and the
			`token should, of course, be URI encoded.`
			`"""`
justin: support for the new vod system 2015-05-23 19:18:04 +02:00			`return self._ajax_get('/api/%s/%s/access_token' % (vtype, channel))`
justin: Add HLS support 2014-02-08 17:09:14 +01:00
			`def _ajax_get(self, method):`
			`url = "%s/%s" % (self.api_base_url, method)`

			`# Logic found in Twitch's global.js. Prepend /kraken/ to url`
			`# path unless the API method already is absolute.`
			`if method[0] != '/':`
			`method = '/kraken/%s' % method`

twitch: Fix so we can get live url again fixes: #385 2016-05-02 22:26:30 +02:00			`payload = self.http.request("get", url)`
twitch: this should be text 2015-09-07 19:00:40 +02:00			`return json.loads(payload.text)`
justin: Add HLS support 2014-02-08 17:09:14 +01:00
			`def _get_hls_url(self, channel):`
justin: support for the new vod system 2015-05-23 19:18:04 +02:00			`access = self._get_access_token(channel, "channels")`
justin: Add HLS support 2014-02-08 17:09:14 +01:00
twitch: Fix so we can get live url again fixes: #385 2016-05-02 22:26:30 +02:00			`query = "token=%s&sig=%s&allow_source=true&allow_spectre=true" % (quote_plus(access['token']), access['sig'])`
justin: Add HLS support 2014-02-08 17:09:14 +01:00			`return "%s/%s.m3u8?%s" % (self.hls_base_url, channel, query)`

justin: support for the new vod system 2015-05-23 19:18:04 +02:00			`def _get_channel(self, options, urlp):`
justin: refactoring Break out logic for video and channel fetching to functions and introduce a JustinUrlException class, that represents faulty URLs. 2014-02-05 19:52:29 +01:00			`match = re.match(r'/(\w+)', urlp.path)`
justin: Add HLS support 2014-02-08 17:09:14 +01:00
justin: refactoring Break out logic for video and channel fetching to functions and introduce a JustinUrlException class, that represents faulty URLs. 2014-02-05 19:52:29 +01:00			`if not match:`
Rename Justin to Twitch Justin has been disbanded for a while. 2015-08-24 18:40:59 +02:00			`raise TwitchUrlException('channel', urlp.geturl())`
justin: refactoring Break out logic for video and channel fetching to functions and introduce a JustinUrlException class, that represents faulty URLs. 2014-02-05 19:52:29 +01:00
justin: Add HLS support 2014-02-08 17:09:14 +01:00			`channel = match.group(1)`
justin: better filenames 2015-05-24 13:59:11 +02:00			`if options.output_auto:`
			`options.output = "twitch-%s" % channel`

justin: Add HLS support 2014-02-08 17:09:14 +01:00			`hls_url = self._get_hls_url(channel)`
			`urlp = urlparse(hls_url)`

justin: refactoring Break out logic for video and channel fetching to functions and introduce a JustinUrlException class, that represents faulty URLs. 2014-02-05 19:52:29 +01:00			`options.live = True`
justin: Add HLS support 2014-02-08 17:09:14 +01:00			`if not options.output:`
			`options.output = channel`
twitch: better error handling 2015-09-06 14:37:40 +02:00			`data = self.http.request("get", hls_url)`
			`if data.status_code == 404:`
			`yield ServiceError("Stream is not online.")`
			`return`
reorder arguments for hlsparse and hdsparse 2015-10-04 14:37:16 +02:00			`streams = hlsparse(options, data, hls_url)`
services: parse hls playlist first. 2014-04-21 21:55:39 +02:00			`for n in list(streams.keys()):`
reorder arguments for hlsparse and hdsparse 2015-10-04 14:37:16 +02:00			`yield streams[n]`
twitch: Add support for clips fixes #426 2016-08-20 16:32:12 +02:00
			`def _get_clips(self, options):`
			`match = re.search("quality_options: (\[[^\]]+\])", self.get_urldata())`
			`if not match:`
			`yield ServiceError("Can't find the video clip")`
			`return`
			`if options.output_auto:`
			`name = re.search('slug: "([^"]+)"', self.get_urldata()).group(1)`
			`brodcaster = re.search('broadcaster_login: "([^"]+)"', self.get_urldata()).group(1)`
			`name = "twitch-%s-%s" % (brodcaster, name)`
			`directory = os.path.dirname(options.output)`
			`if os.path.isdir(directory):`
			`name = os.path.join(directory, name)`
			`options.output = name`

			`dataj = json.loads(match.group(1))`
			`for i in dataj:`
			`yield HTTP(copy.copy(options), i["source"], i["quality"])`