1
0
mirror of https://github.com/spaam/svtplay-dl.git synced 2024-11-28 06:04:17 +01:00
svtplay-dl/lib/svtplay_dl/fetcher/dash.py

250 lines
8.5 KiB
Python
Raw Normal View History

2016-03-26 23:08:22 +01:00
# ex:ts=4:sw=4:sts=4:et
# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*-
from __future__ import absolute_import
import copy
import xml.etree.ElementTree as ET
import os
import re
2018-02-25 12:19:31 +01:00
from datetime import datetime
2018-01-30 22:07:21 +01:00
from urllib.parse import urljoin
2016-03-26 23:08:22 +01:00
2018-05-13 13:06:45 +02:00
from svtplay_dl.utils.output import output, progress_stream, ETA, progressbar
2016-03-26 23:08:22 +01:00
from svtplay_dl.error import UIException, ServiceError
from svtplay_dl.fetcher import VideoRetriever
class DASHException(UIException):
def __init__(self, url, message):
self.url = url
super(DASHException, self).__init__(message)
class LiveDASHException(DASHException):
def __init__(self, url):
super(LiveDASHException, self).__init__(
url, "This is a live DASH stream, and they are not supported.")
2018-02-25 12:19:31 +01:00
def templateelemt(element, filename, idnumber, offset_sec, duration_sec):
files = []
timescale = 1
duration = 1
2018-02-25 12:19:31 +01:00
total = 1
init = element.attrib["initialization"]
media = element.attrib["media"]
if "startNumber" in element.attrib:
start = int(element.attrib["startNumber"])
else:
start = 1
if "timescale" in element.attrib:
timescale = float(element.attrib["timescale"])
if "duration" in element.attrib:
duration = float(element.attrib["duration"])
2018-02-25 12:19:31 +01:00
if offset_sec is not None and duration_sec is None:
2018-02-26 00:05:01 +01:00
start += int(offset_sec / (duration / timescale))
2018-02-25 12:19:31 +01:00
if duration_sec is not None:
2018-02-26 00:05:01 +01:00
total = int(duration_sec / (duration / timescale))
2018-02-25 12:19:31 +01:00
selements = None
rvalue = None
timeline = element.find("{urn:mpeg:dash:schema:mpd:2011}SegmentTimeline")
if timeline is not None:
2018-01-07 20:52:19 +01:00
rvalue = timeline.findall(".//{urn:mpeg:dash:schema:mpd:2011}S[@r]")
selements = timeline.findall(".//{urn:mpeg:dash:schema:mpd:2011}S")
selements.pop()
if rvalue:
total = int(rvalue[0].attrib["r"]) + len(selements) + 1
name = media.replace("$RepresentationID$", idnumber)
files.append(urljoin(filename, init.replace("$RepresentationID$", idnumber)))
if "$Time$" in media:
time = [0]
for n in selements:
time.append(int(n.attrib["d"]))
match = re.search(r"\$Time\$", name)
2018-03-06 19:47:14 +01:00
if rvalue and match and len(selements) < 3:
for n in range(start, start + total):
new = name.replace("$Time$", str(n * int(rvalue[0].attrib["d"])))
files.append(urljoin(filename, new))
else:
number = 0
for n in time:
number += n
new = name.replace("$Time$", str(number))
files.append(urljoin(filename, new))
if "$Number" in name:
if re.search(r"\$Number(\%\d+)d\$", name):
vname = name.replace("$Number", "").replace("$", "")
for n in range(start, start + total):
files.append(urljoin(filename, vname % n))
else:
for n in range(start, start + total):
newname = name.replace("$Number$", str(n))
files.append(urljoin(filename, newname))
return files
2018-02-25 12:19:31 +01:00
def adaptionset(element, url, baseurl=None, offset_sec=None, duration_sec=None):
streams = {}
dirname = os.path.dirname(url) + "/"
if baseurl:
dirname = urljoin(dirname, baseurl)
template = element[0].find("{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate")
represtation = element[0].findall(".//{urn:mpeg:dash:schema:mpd:2011}Representation")
for i in represtation:
files = []
segments = False
filename = dirname
bitrate = int(i.attrib["bandwidth"]) / 1000
idnumber = i.attrib["id"]
if i.find("{urn:mpeg:dash:schema:mpd:2011}BaseURL") is not None:
filename = urljoin(filename, i.find("{urn:mpeg:dash:schema:mpd:2011}BaseURL").text)
if i.find("{urn:mpeg:dash:schema:mpd:2011}SegmentBase") is not None:
segments = True
files.append(filename)
2017-09-17 21:03:10 +02:00
if template is not None:
segments = True
2018-02-25 12:19:31 +01:00
files = templateelemt(template, filename, idnumber, offset_sec, duration_sec)
elif i.find("{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate") is not None:
segments = True
2018-02-25 12:19:31 +01:00
files = templateelemt(i.find("{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate"), filename, idnumber, offset_sec, duration_sec)
if files:
streams[bitrate] = {"segments": segments, "files": files}
return streams
2018-05-08 22:46:11 +02:00
def dashparse(config, res, url, output=None):
2016-03-26 23:08:22 +01:00
streams = {}
if not res:
2018-05-08 22:48:55 +02:00
return streams
if res.status_code >= 400:
2016-03-26 23:08:22 +01:00
streams[0] = ServiceError("Can't read DASH playlist. {0}".format(res.status_code))
return streams
if len(res.text) < 1:
streams[0] = ServiceError("Can't read DASH playlist. {0}, size: {1}".format(res.status_code, len(res.text)))
2018-05-08 22:48:55 +02:00
return streams
2018-10-06 23:17:07 +02:00
return _dashparse(config, res.text, url, output, res.cookies)
def _dashparse(config, text, url, output, cookies):
streams = {}
baseurl = None
offset_sec = None
duration_sec = None
xml = ET.XML(text)
2016-03-26 23:08:22 +01:00
if xml.find("./{urn:mpeg:dash:schema:mpd:2011}BaseURL") is not None:
baseurl = xml.find("./{urn:mpeg:dash:schema:mpd:2011}BaseURL").text
if "availabilityStartTime" in xml.attrib:
availabilityStartTime = xml.attrib["availabilityStartTime"]
publishTime = xml.attrib["publishTime"]
datetime_start = parse_dates(availabilityStartTime)
datetime_publish = parse_dates(publishTime)
diff_publish = datetime_publish - datetime_start
offset_sec = diff_publish.total_seconds()
2018-02-25 12:19:31 +01:00
if "mediaPresentationDuration" in xml.attrib:
mediaPresentationDuration = xml.attrib["mediaPresentationDuration"]
duration_sec = (parse_dates(mediaPresentationDuration) - datetime(1900, 1, 1)).total_seconds()
2018-02-25 12:19:31 +01:00
temp = xml.findall('.//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@mimeType="audio/mp4"]')
2018-02-25 12:19:31 +01:00
audiofiles = adaptionset(temp, url, baseurl, offset_sec, duration_sec)
temp = xml.findall('.//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@mimeType="video/mp4"]')
2018-02-25 12:19:31 +01:00
videofiles = adaptionset(temp, url, baseurl, offset_sec, duration_sec)
if not audiofiles or not videofiles:
streams[0] = ServiceError("Found no Audiofiles or Videofiles to download.")
2018-05-08 22:48:55 +02:00
return streams
2016-03-26 23:08:22 +01:00
for i in videofiles.keys():
bitrate = i + list(audiofiles.keys())[0]
2018-10-06 23:17:07 +02:00
streams[bitrate] = DASH(copy.copy(config), url, bitrate, cookies=cookies,
2018-05-13 13:06:45 +02:00
audio=audiofiles[list(audiofiles.keys())[0]]["files"], files=videofiles[i]["files"],
output=output, segments=videofiles[i]["segments"])
return streams
2016-03-26 23:08:22 +01:00
def parse_dates(date_str):
date_patterns = ["%Y-%m-%dT%H:%M:%S.%fZ", "%Y-%m-%dT%H:%M:%S", "PT%HH%MM%S.%fS",
"PT%HH%MM%SS", "PT%MM%S.%fS", "PT%MM%SS", "PT%HH%SS", "PT%HH%S.%fS"]
dt = None
for pattern in date_patterns:
try:
dt = datetime.strptime(date_str, pattern)
break
2018-05-13 14:21:27 +02:00
except Exception:
pass
if not dt:
raise ValueError("Can't parse date format: {0}".format(date_str))
return dt
2016-03-26 23:08:22 +01:00
class DASH(VideoRetriever):
2018-05-25 22:47:26 +02:00
@property
2016-03-26 23:08:22 +01:00
def name(self):
return "dash"
def download(self):
2018-05-13 01:45:23 +02:00
self.output_extention = "mp4"
2018-05-08 22:46:11 +02:00
if self.config.get("live") and not self.config.get("force"):
2016-03-26 23:08:22 +01:00
raise LiveDASHException(self.url)
2018-05-13 13:06:45 +02:00
if self.segments:
if self.audio:
self._download2(self.audio, audio=True)
self._download2(self.files)
else:
if self.audio:
2017-10-27 00:08:53 +02:00
self._download_url(self.audio, audio=True)
self._download_url(self.url)
def _download2(self, files, audio=False):
cookies = self.kwargs["cookies"]
if audio:
2018-05-08 22:46:11 +02:00
file_d = output(copy.copy(self.output), self.config, extension="m4a")
else:
2018-05-13 01:45:23 +02:00
file_d = output(self.output, self.config, extension="mp4")
if file_d is None:
return
eta = ETA(len(files))
n = 1
for i in files:
2018-05-08 22:46:11 +02:00
if not self.config.get("silent"):
eta.increment()
progressbar(len(files), n, ''.join(['ETA: ', str(eta)]))
n += 1
data = self.http.request("get", i, cookies=cookies)
if data.status_code == 404:
break
data = data.content
file_d.write(data)
file_d.close()
2018-05-08 22:46:11 +02:00
if not self.config.get("silent"):
progress_stream.write('\n')
self.finished = True