1
0
mirror of https://github.com/spaam/svtplay-dl.git synced 2024-11-24 12:15:40 +01:00
svtplay-dl/lib/svtplay_dl/fetcher/hls.py

508 lines
19 KiB
Python
Raw Normal View History

2013-03-02 21:26:28 +01:00
# ex:ts=4:sw=4:sts=4:et
# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*-
2019-08-25 00:40:39 +02:00
import binascii
import copy
2013-02-12 19:39:52 +01:00
import os
2019-08-25 00:40:39 +02:00
import random
2013-02-12 19:39:52 +01:00
import re
2018-01-07 20:52:19 +01:00
import time
2019-08-25 00:40:39 +02:00
from datetime import datetime
from datetime import timedelta
2019-03-24 21:04:41 +01:00
from cryptography.hazmat.backends import default_backend
2019-08-25 00:40:39 +02:00
from cryptography.hazmat.primitives.ciphers import algorithms
from cryptography.hazmat.primitives.ciphers import Cipher
from cryptography.hazmat.primitives.ciphers import modes
from svtplay_dl.error import ServiceError
from svtplay_dl.error import UIException
2014-04-21 16:50:24 +02:00
from svtplay_dl.fetcher import VideoRetriever
from svtplay_dl.subtitle import subtitle
2019-08-25 00:40:39 +02:00
from svtplay_dl.utils.http import get_full_url
from svtplay_dl.utils.output import ETA
from svtplay_dl.utils.output import output
from svtplay_dl.utils.output import progress_stream
from svtplay_dl.utils.output import progressbar
2014-04-21 16:50:24 +02:00
class HLSException(UIException):
def __init__(self, url, message):
self.url = url
2019-08-25 00:33:51 +02:00
super().__init__(message)
class LiveHLSException(HLSException):
def __init__(self, url):
2019-08-25 00:33:51 +02:00
super().__init__(url, "This is a live HLS stream, and they are not supported.")
2013-02-12 19:39:52 +01:00
2018-05-13 13:06:45 +02:00
def hlsparse(config, res, url, **kwargs):
2014-04-21 21:55:39 +02:00
streams = {}
2014-04-21 21:42:49 +02:00
if not res:
return streams
if res.status_code > 400:
2019-08-25 00:33:51 +02:00
streams[0] = ServiceError("Can't read HLS playlist. {}".format(res.status_code))
return streams
m3u8 = M3U8(res.text)
keycookie = kwargs.pop("keycookie", None)
authorization = kwargs.pop("authorization", None)
httpobject = kwargs.pop("httpobject", None)
2018-05-13 13:06:45 +02:00
output = kwargs.pop("output", None)
2020-07-28 21:26:13 +02:00
channels = kwargs.pop("channels", None)
codec = kwargs.pop("codec", "h264")
2017-10-27 00:08:53 +02:00
media = {}
subtitles = {}
segments = None
2018-02-04 22:59:38 +01:00
if m3u8.master_playlist:
for i in m3u8.master_playlist:
audio_url = None
2020-07-28 21:26:13 +02:00
vcodec = None
chans = None
if i["TAG"] == "EXT-X-MEDIA":
2018-02-04 22:59:38 +01:00
if "AUTOSELECT" in i and (i["AUTOSELECT"].upper() == "YES"):
if i["TYPE"] and i["TYPE"] != "SUBTITLES":
if "URI" in i:
if segments is None:
segments = True
if i["GROUP-ID"] not in media:
media[i["GROUP-ID"]] = []
2020-07-28 21:26:13 +02:00
if "CHANNELS" in i:
if i["CHANNELS"] == "6":
chans = "51"
media[i["GROUP-ID"]].append([i["URI"], chans])
else:
segments = False
if i["TYPE"] == "SUBTITLES":
if "URI" in i:
if i["GROUP-ID"] not in subtitles:
subtitles[i["GROUP-ID"]] = []
item = [i["URI"], i["LANGUAGE"]]
if item not in subtitles[i["GROUP-ID"]]:
subtitles[i["GROUP-ID"]].append(item)
continue
elif i["TAG"] == "EXT-X-STREAM-INF":
if "AVERAGE-BANDWIDTH" in i:
bit_rate = float(i["AVERAGE-BANDWIDTH"]) / 1000
else:
bit_rate = float(i["BANDWIDTH"]) / 1000
2020-07-28 21:26:13 +02:00
if "CODECS" in i:
if i["CODECS"][:3] == "hvc":
vcodec = "hevc"
if i["CODECS"][:3] == "avc":
vcodec = "h264"
if "AUDIO" in i and (i["AUDIO"] in media):
2020-07-28 21:26:13 +02:00
chans = media[i["AUDIO"]][0][1]
audio_url = get_full_url(media[i["AUDIO"]][0][0], url)
urls = get_full_url(i["URI"], url)
else:
2018-01-30 20:11:37 +01:00
continue # Needs to be changed to utilise other tags.
2020-07-28 21:26:13 +02:00
chans = chans if audio_url else channels
codec = vcodec if vcodec else codec
2019-08-25 00:27:31 +02:00
streams[int(bit_rate)] = HLS(
copy.copy(config),
urls,
bit_rate,
cookies=res.cookies,
keycookie=keycookie,
authorization=authorization,
audio=audio_url,
output=output,
segments=bool(segments),
2020-07-28 21:26:13 +02:00
channels=chans,
codec=codec,
**kwargs,
2019-08-25 00:27:31 +02:00
)
if subtitles and httpobject:
for sub in list(subtitles.keys()):
for n in subtitles[sub]:
m3u8s = M3U8(httpobject.request("get", get_full_url(n[0], url), cookies=res.cookies).text)
if "cmore" in url:
subtype = "wrstsegment" # this have been seen in tv4play
else:
subtype = "wrst"
2019-08-25 00:27:31 +02:00
streams[int(random.randint(1, 40))] = subtitle(
copy.copy(config),
subtype,
get_full_url(m3u8s.media_segment[0]["URI"], url),
subfix=n[1],
output=copy.copy(output),
m3u8=m3u8s,
)
elif m3u8.media_segment:
2018-05-13 13:06:45 +02:00
config.set("segments", False)
2019-08-25 00:27:31 +02:00
streams[0] = HLS(
2020-12-26 13:10:56 +01:00
copy.copy(config),
url,
0,
cookies=res.cookies,
keycookie=keycookie,
authorization=authorization,
output=output,
segments=False,
2019-08-25 00:27:31 +02:00
)
else:
streams[0] = ServiceError("Can't find HLS playlist in m3u8 file.")
2014-04-21 21:55:39 +02:00
return streams
2014-04-21 21:42:49 +02:00
2015-09-15 20:10:32 +02:00
2014-04-21 21:55:39 +02:00
class HLS(VideoRetriever):
2018-05-25 22:47:26 +02:00
@property
def name(self):
return "hls"
2014-04-21 16:50:24 +02:00
def download(self):
self.output_extention = "ts"
2018-05-08 22:46:11 +02:00
if self.segments:
if self.audio and not self.config.get("only_video"):
self._download(self.audio, file_name=(copy.copy(self.output), "audio.ts"))
if not self.config.get("only_audio"):
self._download(self.url, file_name=(self.output, "ts"))
2017-10-27 00:08:53 +02:00
else:
# Ignore audio
self.audio = None
2018-05-08 22:46:11 +02:00
self._download(self.url, file_name=(self.output, "ts"))
2017-10-27 00:08:53 +02:00
2018-02-04 22:59:38 +01:00
def _download(self, url, file_name):
2018-03-10 11:40:36 +01:00
cookies = self.kwargs.get("cookies", None)
2018-01-07 20:52:19 +01:00
start_time = time.time()
2018-02-04 22:59:38 +01:00
m3u8 = M3U8(self.http.request("get", url, cookies=cookies).text)
2014-04-21 16:50:24 +02:00
key = None
def random_iv():
return os.urandom(16)
2019-08-25 00:27:31 +02:00
2018-05-08 22:46:11 +02:00
file_d = output(file_name[0], self.config, file_name[1])
if file_d is None:
return
2014-04-21 16:50:24 +02:00
2018-05-08 22:46:11 +02:00
hls_time_stamp = self.kwargs.pop("hls_time_stamp", False)
decryptor = None
2018-01-07 20:52:19 +01:00
size_media = len(m3u8.media_segment)
eta = ETA(size_media)
total_duration = 0
2018-01-07 20:52:19 +01:00
duration = 0
max_duration = 0
for index, i in enumerate(m3u8.media_segment):
2018-01-07 20:52:19 +01:00
if "duration" in i["EXTINF"]:
duration = i["EXTINF"]["duration"]
max_duration = max(max_duration, duration)
total_duration += duration
item = get_full_url(i["URI"], url)
2014-04-21 16:50:24 +02:00
2018-05-08 22:46:11 +02:00
if not self.config.get("silent"):
if self.config.get("live"):
progressbar(size_media, index + 1, "".join(["DU: ", str(timedelta(seconds=int(total_duration)))]))
2018-01-07 20:52:19 +01:00
else:
eta.increment()
2019-08-25 00:27:31 +02:00
progressbar(size_media, index + 1, "".join(["ETA: ", str(eta)]))
2014-04-21 16:50:24 +02:00
2015-10-04 14:36:06 +02:00
data = self.http.request("get", item, cookies=cookies)
if data.status_code == 404:
break
data = data.content
if m3u8.encrypted:
headers = {}
if self.keycookie:
keycookies = self.keycookie
else:
keycookies = cookies
if self.authorization:
headers["authorization"] = self.authorization
# Update key/decryptor
if "EXT-X-KEY" in i:
keyurl = get_full_url(i["EXT-X-KEY"]["URI"], url)
if keyurl and keyurl[:4] == "skd:":
raise HLSException(keyurl, "Can't decrypt beacuse of DRM")
key = self.http.request("get", keyurl, cookies=keycookies, headers=headers).content
iv = binascii.unhexlify(i["EXT-X-KEY"]["IV"][2:].zfill(32)) if "IV" in i["EXT-X-KEY"] else random_iv()
backend = default_backend()
cipher = Cipher(algorithms.AES(key), modes.CBC(iv), backend=backend)
decryptor = cipher.decryptor()
# In some cases the playlist say its encrypted but the files is not.
# This happen on svtplay 5.1ch stream where it started with ID3..
# Adding the other ones is header for mpeg-ts files. third byte is 10 or 11..
if data[:3] != b"ID3" and data[:3] != b"\x47\x40\x11" and data[:3] != b"\x47\x40\x10":
if decryptor:
data = decryptor.update(data)
else:
raise ValueError("No decryptor found for encrypted hls steam.")
2014-04-21 16:50:24 +02:00
file_d.write(data)
2013-02-12 19:39:52 +01:00
if self.config.get("capture_time") > 0 and total_duration >= self.config.get("capture_time") * 60:
2018-01-07 21:49:50 +01:00
break
2018-05-08 22:46:11 +02:00
if (size_media == (index + 1)) and self.config.get("live"):
sleep_int = (start_time + max_duration * 2) - time.time()
if sleep_int > 0:
time.sleep(sleep_int)
2018-01-07 20:52:19 +01:00
size_media_old = size_media
while size_media_old == size_media:
start_time = time.time()
2018-05-08 22:46:11 +02:00
if hls_time_stamp:
end_time_stamp = (datetime.utcnow() - timedelta(minutes=1, seconds=max_duration * 2)).replace(microsecond=0)
start_time_stamp = end_time_stamp - timedelta(minutes=1)
2018-02-04 22:59:38 +01:00
base_url = url.split(".m3u8")[0]
url = "{}.m3u8?in={}&out={}?".format(base_url, start_time_stamp.isoformat(), end_time_stamp.isoformat())
2018-02-04 22:59:38 +01:00
new_m3u8 = M3U8(self.http.request("get", url, cookies=cookies).text)
for n_m3u in new_m3u8.media_segment:
if not any(d["URI"] == n_m3u["URI"] for d in m3u8.media_segment):
m3u8.media_segment.append(n_m3u)
2018-01-07 20:52:19 +01:00
size_media = len(m3u8.media_segment)
if size_media_old == size_media:
time.sleep(max_duration)
2018-01-07 20:52:19 +01:00
file_d.close()
2018-05-08 22:46:11 +02:00
if not self.config.get("silent"):
2019-08-25 00:27:31 +02:00
progress_stream.write("\n")
self.finished = True
2013-02-12 19:39:52 +01:00
2018-01-30 20:11:37 +01:00
2019-08-25 00:27:31 +02:00
class M3U8:
# Created for hls version <=7
# https://tools.ietf.org/html/rfc8216
2017-10-19 00:35:11 +02:00
MEDIA_SEGMENT_TAGS = ("EXTINF", "EXT-X-BYTERANGE", "EXT-X-DISCONTINUITY", "EXT-X-KEY", "EXT-X-MAP", "EXT-X-PROGRAM-DATE-TIME", "EXT-X-DATERANGE")
2019-08-25 00:27:31 +02:00
MEDIA_PLAYLIST_TAGS = (
"EXT-X-TARGETDURATION",
"EXT-X-MEDIA-SEQUENCE",
"EXT-X-DISCONTINUITY-SEQUENCE",
"EXT-X-ENDLIST",
"EXT-X-PLAYLIST-TYPE",
"EXT-X-I-FRAMES-ONLY",
)
MASTER_PLAYLIST_TAGS = ("EXT-X-MEDIA", "EXT-X-STREAM-INF", "EXT-X-I-FRAME-STREAM-INF", "EXT-X-SESSION-DATA", "EXT-X-SESSION-KEY")
MEDIA_OR_MASTER_PLAYLIST_TAGS = ("EXT-X-INDEPENDENT-SEGMENTS", "EXT-X-START")
TAG_TYPES = {"MEDIA_SEGMENT": 0, "MEDIA_PLAYLIST": 1, "MASTER_PLAYLIST": 2}
def __init__(self, data):
2017-10-19 00:35:11 +02:00
self.version = None
self.media_segment = []
self.media_playlist = {}
self.master_playlist = []
self.encrypted = False
self.independent_segments = False
self.parse_m3u(data)
2017-10-19 00:35:11 +02:00
def __str__(self):
2019-08-25 00:33:51 +02:00
return "Version: {}\nMedia Segment: {}\nMedia Playlist: {}\nMaster Playlist: {}\nEncrypted: {}\tIndependent_segments: {}".format(
2020-12-26 13:10:56 +01:00
self.version,
self.media_segment,
self.media_playlist,
self.master_playlist,
self.encrypted,
self.independent_segments,
2019-08-25 00:27:31 +02:00
)
2017-10-19 00:35:11 +02:00
def parse_m3u(self, data):
if not data.startswith("#EXTM3U"):
2017-10-19 00:35:11 +02:00
raise ValueError("Does not appear to be an 'EXTM3U' file.")
data = data.replace("\r\n", "\n")
lines = data.split("\n")[1:]
last_tag_type = None
tag_type = None
media_segment_info = {}
2017-10-19 00:35:11 +02:00
for index, l in enumerate(lines):
if not l:
continue
elif l.startswith("#EXT"):
info = {}
tag, attr = _get_tag_attribute(l)
if tag == "EXT-X-VERSION":
self.version = int(attr)
# 4.3.2. Media Segment Tags
elif tag in M3U8.MEDIA_SEGMENT_TAGS:
tag_type = M3U8.TAG_TYPES["MEDIA_SEGMENT"]
# 4.3.2.1. EXTINF
if tag == "EXTINF":
if "," in attr:
dur, title = attr.split(",", 1)
else:
dur = attr
title = None
info["duration"] = float(dur)
info["title"] = title
# 4.3.2.2. EXT-X-BYTERANGE
elif tag == "EXT-X-BYTERANGE":
if "@" in attr:
n, o = attr.split("@", 1)
info["n"], info["o"] = (int(n), int(o))
else:
info["n"] = int(attr)
info["o"] = 0
# 4.3.2.3. EXT-X-DISCONTINUITY
elif tag == "EXT-X-DISCONTINUITY":
pass
# 4.3.2.4. EXT-X-KEY
elif tag == "EXT-X-KEY":
self.encrypted = True
info = _get_tuple_attribute(attr)
# 4.3.2.5. EXT-X-MAP
elif tag == "EXT-X-MAP":
info = _get_tuple_attribute(attr)
# 4.3.2.6. EXT-X-PROGRAM-DATE-TIME"
elif tag == "EXT-X-PROGRAM-DATE-TIME":
info = attr
# 4.3.2.7. EXT-X-DATERANGE
elif tag == "EXT-X-DATERANGE":
info = _get_tuple_attribute(attr)
media_segment_info[tag] = info
# 4.3.3. Media Playlist Tags
elif tag in M3U8.MEDIA_PLAYLIST_TAGS:
tag_type = M3U8.TAG_TYPES["MEDIA_PLAYLIST"]
# 4.3.3.1. EXT-X-TARGETDURATION
if tag == "EXT-X-TARGETDURATION":
info = int(attr)
# 4.3.3.2. EXT-X-MEDIA-SEQUENCE
elif tag == "EXT-X-MEDIA-SEQUENCE":
info = int(attr)
# 4.3.3.3. EXT-X-DISCONTINUITY-SEQUENCE
elif tag == "EXT-X-DISCONTINUITY-SEQUENCE":
info = int(attr)
# 4.3.3.4. EXT-X-ENDLIST
elif tag == "EXT-X-ENDLIST":
break
# 4.3.3.5. EXT-X-PLAYLIST-TYPE
elif tag == "EXT-X-PLAYLIST-TYPE":
info = attr
# 4.3.3.6. EXT-X-I-FRAMES-ONLY
elif tag == "EXT-X-I-FRAMES-ONLY":
pass
self.media_playlist[tag] = info
# 4.3.4. Master Playlist Tags
elif tag in M3U8.MASTER_PLAYLIST_TAGS:
tag_type = M3U8.TAG_TYPES["MASTER_PLAYLIST"]
# 4.3.4.1. EXT-X-MEDIA
if tag == "EXT-X-MEDIA":
info = _get_tuple_attribute(attr)
# 4.3.4.2. EXT-X-STREAM-INF
elif tag == "EXT-X-STREAM-INF":
info = _get_tuple_attribute(attr)
if "BANDWIDTH" not in info:
raise ValueError("Can't find 'BANDWIDTH' in 'EXT-X-STREAM-INF'")
2018-01-30 20:11:37 +01:00
info["URI"] = lines[index + 1]
# 4.3.4.3. EXT-X-I-FRAME-STREAM-INF
elif tag == "EXT-X-I-FRAME-STREAM-INF":
info = _get_tuple_attribute(attr)
# 4.3.4.4. EXT-X-SESSION-DATA
elif tag == "EXT-X-SESSION-DATA":
info = _get_tuple_attribute(attr)
# 4.3.4.5. EXT-X-SESSION-KEY
elif tag == "EXT-X-SESSION-KEY":
self.encrypted = True
info = _get_tuple_attribute(attr)
info["TAG"] = tag
2018-01-05 22:43:53 +01:00
self.master_playlist.append(info)
# 4.3.5. Media or Master Playlist Tags
elif tag in M3U8.MEDIA_OR_MASTER_PLAYLIST_TAGS:
tag_type = M3U8.TAG_TYPES["MEDIA_PLAYLIST"]
# 4.3.5.1. EXT-X-INDEPENDENT-SEGMENTS
if tag == "EXT-X-INDEPENDENT-SEGMENTS":
self.independent_segments = True
# 4.3.5.2. EXT-X-START
elif tag == "EXT-X-START":
info = _get_tuple_attribute(attr)
self.media_playlist[tag] = info
# Unused tags
else:
pass
# This is a comment
elif l.startswith("#"):
pass
# This must be a url/uri
else:
tag_type = None
if last_tag_type is M3U8.TAG_TYPES["MEDIA_SEGMENT"]:
media_segment_info["URI"] = l
self.media_segment.append(media_segment_info)
media_segment_info = {}
last_tag_type = tag_type
if self.media_segment and self.master_playlist:
raise ValueError("This 'M3U8' file contains data for both 'Media Segment' and 'Master Playlist'. This is not allowed.")
def _get_tag_attribute(line):
line = line[1:]
try:
search_line = re.search(r"^([A-Z\-]*):(.*)", line)
return search_line.group(1), search_line.group(2)
2018-01-30 20:11:37 +01:00
except Exception:
return line, None
def _get_tuple_attribute(attribute):
attr_tuple = {}
2019-08-25 00:27:31 +02:00
for art_l in re.split(""",(?=(?:[^'"]|'[^']*'|"[^"]*")*$)""", attribute):
if art_l:
2017-10-27 00:08:53 +02:00
name, value = art_l.split("=", 1)
name = name.strip()
2017-10-27 00:08:53 +02:00
# Checks for attribute name
if not re.match(r"^[A-Z0-9\-]*$", name):
2017-10-27 00:08:53 +02:00
raise ValueError("Not a valid attribute name.")
2017-10-27 00:08:53 +02:00
# Remove extra quotes of string
if value.startswith('"') and value.endswith('"'):
value = value[1:-1]
attr_tuple[name] = value
return attr_tuple