1
0
mirror of https://github.com/spaam/svtplay-dl.git synced 2024-11-24 12:15:40 +01:00
svtplay-dl/lib/svtplay_dl/fetcher/dash.py
2019-09-06 22:09:07 +02:00

289 lines
10 KiB
Python

# ex:ts=4:sw=4:sts=4:et
# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*-
from __future__ import absolute_import
import copy
import xml.etree.ElementTree as ET
import os
import re
from datetime import datetime
from urllib.parse import urljoin
import time
import math
from svtplay_dl.utils.output import output, progress_stream, ETA, progressbar
from svtplay_dl.error import UIException, ServiceError
from svtplay_dl.fetcher import VideoRetriever
class DASHException(UIException):
def __init__(self, url, message):
self.url = url
super().__init__(message)
class LiveDASHException(DASHException):
def __init__(self, url):
super().__init__(url, "This is a live DASH stream, and they are not supported.")
class DASHattibutes:
def __init__(self):
self.default = {}
def set(self, key, value):
self.default[key] = value
def get(self, key):
if key in self.default:
return self.default[key]
return 0
def templateelemt(attributes, element, filename, idnumber):
files = []
init = element.attrib["initialization"]
media = element.attrib["media"]
if "startNumber" in element.attrib:
start = int(element.attrib["startNumber"])
else:
start = 1
if "timescale" in element.attrib:
attributes.set("timescale", float(element.attrib["timescale"]))
else:
attributes.set("timescale", 1)
if "duration" in element.attrib:
attributes.set("duration", float(element.attrib["duration"]))
segments = []
timeline = element.findall("{urn:mpeg:dash:schema:mpd:2011}SegmentTimeline/{urn:mpeg:dash:schema:mpd:2011}S")
if timeline:
t = -1
for s in timeline:
duration = int(s.attrib["d"])
repeat = int(s.attrib["r"]) if "r" in s.attrib else 0
segmenttime = int(s.attrib["t"]) if "t" in s.attrib else 0
if t < 0:
t = segmenttime
count = repeat + 1
end = start + len(segments) + count
number = start + len(segments)
while number < end:
segments.append({"number": number, "duration": math.ceil(duration / attributes.get("timescale")), "time": t})
t += duration
number += 1
else: # Saw this on dynamic live content
start = 0
now = time.time()
periodStartWC = time.mktime(attributes.get("availabilityStartTime").timetuple()) + start
periodEndWC = now + attributes.get("minimumUpdatePeriod")
periodDuration = periodEndWC - periodStartWC
segmentCount = math.ceil(periodDuration * attributes.get("timescale") / attributes.get("duration"))
availableStart = math.floor(
(now - periodStartWC - attributes.get("timeShiftBufferDepth")) * attributes.get("timescale") / attributes.get("duration")
)
availableEnd = math.floor((now - periodStartWC) * attributes.get("timescale") / attributes.get("duration"))
start = max(0, availableStart)
end = min(segmentCount, availableEnd)
for number in range(start, end):
segments.append({"number": number, "duration": int(attributes.get("duration") / attributes.get("timescale"))})
name = media.replace("$RepresentationID$", idnumber).replace("$Bandwidth$", attributes.get("bandwidth"))
files.append(urljoin(filename, init.replace("$RepresentationID$", idnumber).replace("$Bandwidth$", attributes.get("bandwidth"))))
for segment in segments:
if "$Time$" in media:
new = name.replace("$Time$", str(segment["time"]))
if "$Number" in name:
if re.search(r"\$Number(\%\d+)d\$", name):
vname = name.replace("$Number", "").replace("$", "")
new = vname % segment["number"]
else:
new = name.replace("$Number$", str(segment["number"]))
files.append(urljoin(filename, new))
return files
def adaptionset(attributes, element, url, baseurl=None):
streams = {}
dirname = os.path.dirname(url) + "/"
if baseurl:
dirname = urljoin(dirname, baseurl)
template = element[0].find("{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate")
represtation = element[0].findall(".//{urn:mpeg:dash:schema:mpd:2011}Representation")
for i in represtation:
files = []
segments = False
filename = dirname
attributes.set("bandwidth", i.attrib["bandwidth"])
bitrate = int(i.attrib["bandwidth"]) / 1000
idnumber = i.attrib["id"]
if i.find("{urn:mpeg:dash:schema:mpd:2011}BaseURL") is not None:
filename = urljoin(filename, i.find("{urn:mpeg:dash:schema:mpd:2011}BaseURL").text)
if i.find("{urn:mpeg:dash:schema:mpd:2011}SegmentBase") is not None:
segments = True
files.append(filename)
if template is not None:
segments = True
files = templateelemt(attributes, template, filename, idnumber)
elif i.find("{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate") is not None:
segments = True
files = templateelemt(attributes, i.find("{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate"), filename, idnumber)
if files:
streams[bitrate] = {"segments": segments, "files": files}
return streams
def dashparse(config, res, url, output=None):
streams = {}
if not res:
return streams
if res.status_code >= 400:
streams[0] = ServiceError("Can't read DASH playlist. {}".format(res.status_code))
return streams
if len(res.text) < 1:
streams[0] = ServiceError("Can't read DASH playlist. {}, size: {}".format(res.status_code, len(res.text)))
return streams
return _dashparse(config, res.text, url, output, res.cookies)
def _dashparse(config, text, url, output, cookies):
streams = {}
baseurl = None
attributes = DASHattibutes()
xml = ET.XML(text)
if xml.find("./{urn:mpeg:dash:schema:mpd:2011}BaseURL") is not None:
baseurl = xml.find("./{urn:mpeg:dash:schema:mpd:2011}BaseURL").text
if "availabilityStartTime" in xml.attrib:
attributes.set("availabilityStartTime", parse_dates(xml.attrib["availabilityStartTime"]))
attributes.set("publishTime", parse_dates(xml.attrib["publishTime"]))
if "mediaPresentationDuration" in xml.attrib:
attributes.set("mediaPresentationDuration", parse_duration(xml.attrib["mediaPresentationDuration"]))
if "timeShiftBufferDepth" in xml.attrib:
attributes.set("timeShiftBufferDepth", parse_duration(xml.attrib["timeShiftBufferDepth"]))
if "minimumUpdatePeriod" in xml.attrib:
attributes.set("minimumUpdatePeriod", parse_duration(xml.attrib["minimumUpdatePeriod"]))
attributes.set("type", xml.attrib["type"])
temp = xml.findall('.//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@mimeType="audio/mp4"]')
if len(temp) == 0:
temp = xml.findall('.//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@contentType="audio"]')
audiofiles = adaptionset(attributes, temp, url, baseurl)
temp = xml.findall('.//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@mimeType="video/mp4"]')
if len(temp) == 0:
temp = xml.findall('.//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@contentType="video"]')
videofiles = adaptionset(attributes, temp, url, baseurl)
if not audiofiles or not videofiles:
streams[0] = ServiceError("Found no Audiofiles or Videofiles to download.")
return streams
for i in videofiles.keys():
bitrate = i + list(audiofiles.keys())[0]
streams[bitrate] = DASH(
copy.copy(config),
url,
bitrate,
cookies=cookies,
audio=audiofiles[list(audiofiles.keys())[0]]["files"],
files=videofiles[i]["files"],
output=output,
segments=videofiles[i]["segments"],
)
return streams
def parse_duration(duration):
match = re.search(r"P(?:(\d*)Y)?(?:(\d*)M)?(?:(\d*)D)?(?:T(?:(\d*)H)?(?:(\d*)M)?(?:([\d.]*)S)?)?", duration)
if not match:
return 0
year = int(match.group(1)) * 365 * 24 * 60 * 60 if match.group(1) else 0
month = int(match.group(2)) * 30 * 24 * 60 * 60 if match.group(2) else 0
day = int(match.group(3)) * 24 * 60 * 60 if match.group(3) else 0
hour = int(match.group(4)) * 60 * 60 if match.group(4) else 0
minute = int(match.group(5)) * 60 if match.group(5) else 0
second = float(match.group(6)) if match.group(6) else 0
return year + month + day + hour + minute + second
def parse_dates(date_str):
date_patterns = ["%Y-%m-%dT%H:%M:%S.%fZ", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M:%SZ"]
dt = None
for pattern in date_patterns:
try:
dt = datetime.strptime(date_str, pattern)
break
except Exception:
pass
if not dt:
raise ValueError("Can't parse date format: {}".format(date_str))
return dt
class DASH(VideoRetriever):
@property
def name(self):
return "dash"
def download(self):
self.output_extention = "mp4"
if self.config.get("live") and not self.config.get("force"):
raise LiveDASHException(self.url)
if self.segments:
if self.audio:
self._download2(self.audio, audio=True)
self._download2(self.files)
else:
if self.audio:
self._download_url(self.audio, audio=True)
self._download_url(self.url)
def _download2(self, files, audio=False):
cookies = self.kwargs["cookies"]
if audio:
file_d = output(copy.copy(self.output), self.config, extension="m4a")
else:
file_d = output(self.output, self.config, extension="mp4")
if file_d is None:
return
eta = ETA(len(files))
n = 1
for i in files:
if not self.config.get("silent"):
eta.increment()
progressbar(len(files), n, "".join(["ETA: ", str(eta)]))
n += 1
data = self.http.request("get", i, cookies=cookies)
if data.status_code == 404:
break
data = data.content
file_d.write(data)
file_d.close()
if not self.config.get("silent"):
progress_stream.write("\n")
self.finished = True