1
0
mirror of https://github.com/spaam/svtplay-dl.git synced 2024-11-28 06:04:17 +01:00
svtplay-dl/lib/svtplay_dl/service/twitch.py

160 lines
5.5 KiB
Python
Raw Normal View History

2013-03-02 21:26:28 +01:00
# ex:ts=4:sw=4:sts=4:et
# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*-
# pylint has issues with urlparse: "some types could not be inferred"
# pylint: disable=E1103
import copy
2019-08-25 00:40:39 +02:00
import json
2018-11-18 12:47:19 +01:00
import logging
2019-08-25 00:40:39 +02:00
import re
from urllib.parse import quote_plus
from urllib.parse import urlparse
2019-08-25 00:40:39 +02:00
from svtplay_dl.error import ServiceError
from svtplay_dl.fetcher.hls import hlsparse
from svtplay_dl.fetcher.http import HTTP
2019-08-25 00:40:39 +02:00
from svtplay_dl.service import Service
2015-05-23 19:18:04 +02:00
class TwitchException(Exception):
2014-02-08 17:09:14 +01:00
pass
2015-05-23 19:18:04 +02:00
class TwitchUrlException(TwitchException):
"""
Used to indicate an invalid URL for a given media_type. E.g.:
TwitchUrlException('video', 'http://twitch.tv/example')
"""
2019-08-25 00:27:31 +02:00
def __init__(self, media_type, url):
2019-08-25 00:33:51 +02:00
super().__init__("'{}' is not recognized as a {} URL".format(url, media_type))
class Twitch(Service):
# Twitch uses language subdomains, e.g. en.www.twitch.tv. They
# are usually two characters, but may have a country suffix as well (e.g.
# zh-tw, zh-cn and pt-br.
supported_domains_re = [r"^(?:(?:[a-z]{2}-)?[a-z]{2}\.)?(www\.|clips\.)?twitch\.tv$"]
2019-08-25 00:27:31 +02:00
api_base_url = "https://api.twitch.tv"
hls_base_url = "http://usher.justin.tv/api/channel/hls"
2014-02-08 17:09:14 +01:00
def get(self):
urlp = urlparse(self.url)
2019-08-25 00:27:31 +02:00
match = re.match(r"/(\w+)/([bcv])/(\d+)", urlp.path)
2015-05-23 19:18:04 +02:00
if not match:
if re.search("clips.twitch.tv", urlp.netloc):
2018-05-22 00:02:20 +02:00
data = self._get_clips()
else:
2018-05-22 00:02:20 +02:00
data = self._get_channel(urlp)
2015-05-23 19:18:04 +02:00
else:
if match.group(2) in ["b", "c"]:
2015-10-20 00:04:29 +02:00
yield ServiceError("This twitch video type is unsupported")
2015-05-23 19:18:04 +02:00
return
2018-05-22 00:02:20 +02:00
data = self._get_archive(match.group(3))
2015-05-24 12:37:16 +02:00
try:
2019-08-25 00:33:51 +02:00
yield from data
except TwitchUrlException:
2015-10-20 00:04:29 +02:00
yield ServiceError("This twitch video type is unsupported")
2015-05-24 12:37:16 +02:00
return
2018-05-22 00:02:20 +02:00
def _get_static_video(self, videoid):
2015-05-23 19:18:04 +02:00
access = self._get_access_token(videoid)
data = self.http.request("get", "https://api.twitch.tv/kraken/videos/v{}".format(videoid))
2018-05-22 00:02:20 +02:00
if data.status_code == 404:
yield ServiceError("Can't find the video")
return
info = json.loads(data.text)
2019-08-25 00:33:51 +02:00
self.output["title"] = "twitch-{}".format(info["channel"]["name"])
2018-05-27 16:04:39 +02:00
self.output["episodename"] = info["title"]
2015-05-24 13:59:11 +02:00
2015-05-23 19:18:04 +02:00
if "token" not in access:
2019-08-25 00:27:31 +02:00
raise TwitchUrlException("video", self.url)
2015-05-23 19:18:04 +02:00
nauth = quote_plus(str(access["token"]))
authsig = access["sig"]
url = "http://usher.twitch.tv/vod/{}?nauth={}&nauthsig={}".format(videoid, nauth, authsig)
streams = hlsparse(copy.copy(self.config), self.http.request("get", url), url, output=self.output)
2015-05-23 19:18:04 +02:00
if streams:
for n in list(streams.keys()):
yield streams[n]
2018-05-22 00:02:20 +02:00
def _get_archive(self, vid):
2015-05-23 19:18:04 +02:00
try:
2019-08-25 00:33:51 +02:00
yield from self._get_static_video(vid)
except TwitchUrlException as e:
2018-11-18 12:47:19 +01:00
logging.error(str(e))
2015-05-23 19:18:04 +02:00
def _get_access_token(self, channel, vtype="vods"):
2014-02-08 17:09:14 +01:00
"""
Get a Twitch access token. It's a three element dict:
* mobile_restricted
* sig
* token
`sig` is a hexadecimal string, and `token` is a JSON blob, with
information about access expiration. `mobile_restricted` is not
important, but is a boolean.
Both `sig` and `token` should be added to the HLS URI, and the
token should, of course, be URI encoded.
"""
2019-08-25 00:33:51 +02:00
return self._ajax_get("/api/{}/{}/access_token".format(vtype, channel))
2014-02-08 17:09:14 +01:00
def _ajax_get(self, method):
2019-08-25 00:33:51 +02:00
url = "{}/{}".format(self.api_base_url, method)
2014-02-08 17:09:14 +01:00
# Logic found in Twitch's global.js. Prepend /kraken/ to url
# path unless the API method already is absolute.
2019-08-25 00:27:31 +02:00
if method[0] != "/":
2019-08-25 00:33:51 +02:00
method = "/kraken/{}".format(method)
2014-02-08 17:09:14 +01:00
payload = self.http.request("get", url)
2015-09-07 19:00:40 +02:00
return json.loads(payload.text)
2014-02-08 17:09:14 +01:00
def _get_hls_url(self, channel):
2015-05-23 19:18:04 +02:00
access = self._get_access_token(channel, "channels")
2014-02-08 17:09:14 +01:00
query = "token={}&sig={}&allow_source=true&allow_spectre=true".format(quote_plus(access["token"]), access["sig"])
2019-08-25 00:33:51 +02:00
return "{}/{}.m3u8?{}".format(self.hls_base_url, channel, query)
2014-02-08 17:09:14 +01:00
2018-05-22 00:02:20 +02:00
def _get_channel(self, urlp):
2019-08-25 00:27:31 +02:00
match = re.match(r"/(\w+)", urlp.path)
2014-02-08 17:09:14 +01:00
if not match:
2019-08-25 00:27:31 +02:00
raise TwitchUrlException("channel", urlp.geturl())
2014-02-08 17:09:14 +01:00
channel = match.group(1)
2018-05-22 00:02:20 +02:00
self.output["title"] = channel
2015-05-24 13:59:11 +02:00
2014-02-08 17:09:14 +01:00
hls_url = self._get_hls_url(channel)
urlp = urlparse(hls_url)
2018-05-22 00:02:20 +02:00
self.config.set("live", True)
2015-09-06 14:37:40 +02:00
data = self.http.request("get", hls_url)
if data.status_code == 404:
yield ServiceError("Stream is not online.")
return
2018-05-22 00:02:20 +02:00
streams = hlsparse(self.output, data, hls_url, output=self.output)
2014-04-21 21:55:39 +02:00
for n in list(streams.keys()):
yield streams[n]
2018-05-22 00:02:20 +02:00
def _get_clips(self):
match = re.search(r"quality_options: (\[[^\]]+\])", self.get_urldata())
if not match:
yield ServiceError("Can't find the video clip")
return
name = re.search(r'slug: "([^"]+)"', self.get_urldata()).group(1)
brodcaster = re.search('broadcaster_login: "([^"]+)"', self.get_urldata()).group(1)
2019-08-25 00:33:51 +02:00
self.output["title"] = "twitch-{}".format(brodcaster)
2018-05-27 16:04:39 +02:00
self.output["episodename"] = name
dataj = json.loads(match.group(1))
for i in dataj:
yield HTTP(copy.copy(self.config), i["source"], i["quality"], output=self.output)