1
0
mirror of https://github.com/spaam/svtplay-dl.git synced 2024-11-27 05:34:15 +01:00

Rewrote big parts of the _dashparser

This commit is contained in:
Johan Andersson 2019-07-12 22:37:05 +02:00
parent d33186e54e
commit a84b89bafd
3 changed files with 136 additions and 67 deletions

View File

@ -7,6 +7,8 @@ import os
import re
from datetime import datetime
from urllib.parse import urljoin
import time
import math
from svtplay_dl.utils.output import output, progress_stream, ETA, progressbar
from svtplay_dl.error import UIException, ServiceError
@ -25,11 +27,21 @@ class LiveDASHException(DASHException):
url, "This is a live DASH stream, and they are not supported.")
def templateelemt(element, filename, idnumber, offset_sec, duration_sec):
class DASHattibutes(object):
def __init__(self):
self.default = {}
def set(self, key, value):
self.default[key] = value
def get(self, key):
if key in self.default:
return self.default[key]
return 0
def templateelemt(attributes, element, filename, idnumber):
files = []
timescale = 1
duration = 1
total = 1
init = element.attrib["initialization"]
media = element.attrib["media"]
@ -39,60 +51,64 @@ def templateelemt(element, filename, idnumber, offset_sec, duration_sec):
start = 1
if "timescale" in element.attrib:
timescale = float(element.attrib["timescale"])
attributes.set("timescale", float(element.attrib["timescale"]))
else:
attributes.set("timescale", 1)
if "duration" in element.attrib:
duration = float(element.attrib["duration"])
attributes.set("duration", float(element.attrib["duration"]))
if offset_sec is not None and duration_sec is None:
start += int(offset_sec / (duration / timescale))
segments = []
timeline = element.findall("{urn:mpeg:dash:schema:mpd:2011}SegmentTimeline/{urn:mpeg:dash:schema:mpd:2011}S")
if timeline:
t = -1
for s in timeline:
duration = int(s.attrib["d"])
repeat = int(s.attrib["r"]) if "r" in s.attrib else 0
segmenttime = int(s.attrib["t"]) if "t" in s.attrib else 0
if duration_sec is not None:
total = int(duration_sec / (duration / timescale))
if t < 0:
t = segmenttime
count = repeat + 1
selements = None
rvalue = None
timeline = element.find("{urn:mpeg:dash:schema:mpd:2011}SegmentTimeline")
if timeline is not None:
end = start + len(segments) + count
number = start + len(segments)
while number < end:
segments.append({"number": number, "duration": math.ceil(duration / attributes.get("timescale")), "time": t, })
t += duration
number += 1
else: # Saw this on dynamic live content
start = 0
now = time.time()
periodStartWC = time.mktime(attributes.get("availabilityStartTime").timetuple()) + start
periodEndWC = now + attributes.get("minimumUpdatePeriod")
periodDuration = periodEndWC - periodStartWC
segmentCount = math.ceil(periodDuration * attributes.get("timescale") / attributes.get("duration"))
availableStart = math.floor((now - periodStartWC - attributes.get("timeShiftBufferDepth")) * attributes.get("timescale") / attributes.get("duration"))
availableEnd = math.floor((now - periodStartWC) * attributes.get("timescale") / attributes.get("duration"))
start = max(0, availableStart)
end = min(segmentCount, availableEnd)
for number in range(start, end):
segments.append({"number": number, "duration": int(attributes.get("duration") / attributes.get("timescale"))})
rvalue = timeline.findall(".//{urn:mpeg:dash:schema:mpd:2011}S[@r]")
selements = timeline.findall(".//{urn:mpeg:dash:schema:mpd:2011}S")
selements.pop()
name = media.replace("$RepresentationID$", idnumber).replace("$Bandwidth$", attributes.get("bandwidth"))
files.append(urljoin(filename, init.replace("$RepresentationID$", idnumber).replace("$Bandwidth$", attributes.get("bandwidth"))))
for segment in segments:
if "$Time$" in media:
new = name.replace("$Time$", str(segment["time"]))
if "$Number" in name:
if re.search(r"\$Number(\%\d+)d\$", name):
vname = name.replace("$Number", "").replace("$", "")
new = vname % segment["number"]
else:
new = name.replace("$Number$", str(segment["number"]))
if rvalue:
total = int(rvalue[0].attrib["r"]) + len(selements) + 1
files.append(urljoin(filename, new))
name = media.replace("$RepresentationID$", idnumber)
files.append(urljoin(filename, init.replace("$RepresentationID$", idnumber)))
if "$Time$" in media:
time = [0]
for n in selements:
time.append(int(n.attrib["d"]))
match = re.search(r"\$Time\$", name)
if rvalue and match and len(selements) < 3:
for n in range(start, start + total):
new = name.replace("$Time$", str(n * int(rvalue[0].attrib["d"])))
files.append(urljoin(filename, new))
else:
number = 0
for n in time:
number += n
new = name.replace("$Time$", str(number))
files.append(urljoin(filename, new))
if "$Number" in name:
if re.search(r"\$Number(\%\d+)d\$", name):
vname = name.replace("$Number", "").replace("$", "")
for n in range(start, start + total):
files.append(urljoin(filename, vname % n))
else:
for n in range(start, start + total):
newname = name.replace("$Number$", str(n))
files.append(urljoin(filename, newname))
return files
def adaptionset(element, url, baseurl=None, offset_sec=None, duration_sec=None):
def adaptionset(attributes, element, url, baseurl=None):
streams = {}
dirname = os.path.dirname(url) + "/"
@ -106,6 +122,7 @@ def adaptionset(element, url, baseurl=None, offset_sec=None, duration_sec=None):
files = []
segments = False
filename = dirname
attributes.set("bandwidth", i.attrib["bandwidth"])
bitrate = int(i.attrib["bandwidth"]) / 1000
idnumber = i.attrib["id"]
@ -117,10 +134,10 @@ def adaptionset(element, url, baseurl=None, offset_sec=None, duration_sec=None):
files.append(filename)
if template is not None:
segments = True
files = templateelemt(template, filename, idnumber, offset_sec, duration_sec)
files = templateelemt(attributes, template, filename, idnumber)
elif i.find("{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate") is not None:
segments = True
files = templateelemt(i.find("{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate"), filename, idnumber, offset_sec, duration_sec)
files = templateelemt(attributes, i.find("{urn:mpeg:dash:schema:mpd:2011}SegmentTemplate"), filename, idnumber)
if files:
streams[bitrate] = {"segments": segments, "files": files}
@ -146,8 +163,7 @@ def dashparse(config, res, url, output=None):
def _dashparse(config, text, url, output, cookies):
streams = {}
baseurl = None
offset_sec = None
duration_sec = None
attributes = DASHattibutes()
xml = ET.XML(text)
@ -155,26 +171,25 @@ def _dashparse(config, text, url, output, cookies):
baseurl = xml.find("./{urn:mpeg:dash:schema:mpd:2011}BaseURL").text
if "availabilityStartTime" in xml.attrib:
availabilityStartTime = xml.attrib["availabilityStartTime"]
publishTime = xml.attrib["publishTime"]
attributes.set("availabilityStartTime", parse_dates(xml.attrib["availabilityStartTime"]))
attributes.set("publishTime", parse_dates(xml.attrib["publishTime"]))
datetime_start = parse_dates(availabilityStartTime)
datetime_publish = parse_dates(publishTime)
diff_publish = datetime_publish - datetime_start
offset_sec = diff_publish.total_seconds()
if "mediaPresentationDuration" in xml.attrib:
mediaPresentationDuration = xml.attrib["mediaPresentationDuration"]
duration_sec = (parse_dates(mediaPresentationDuration) - datetime(1900, 1, 1)).total_seconds()
if "mediaPresentationDuration" in xml.attrib:
attributes.set("mediaPresentationDuration", parse_duration(xml.attrib["mediaPresentationDuration"]))
if "timeShiftBufferDepth" in xml.attrib:
attributes.set("timeShiftBufferDepth", parse_duration(xml.attrib["timeShiftBufferDepth"]))
if "minimumUpdatePeriod" in xml.attrib:
attributes.set("minimumUpdatePeriod", parse_duration(xml.attrib["minimumUpdatePeriod"]))
attributes.set("type", xml.attrib["type"])
temp = xml.findall('.//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@mimeType="audio/mp4"]')
if len(temp) == 0:
temp = xml.findall('.//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@contentType="audio"]')
audiofiles = adaptionset(temp, url, baseurl, offset_sec, duration_sec)
audiofiles = adaptionset(attributes, temp, url, baseurl)
temp = xml.findall('.//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@mimeType="video/mp4"]')
if len(temp) == 0:
temp = xml.findall('.//{urn:mpeg:dash:schema:mpd:2011}AdaptationSet[@contentType="video"]')
videofiles = adaptionset(temp, url, baseurl, offset_sec, duration_sec)
videofiles = adaptionset(attributes, temp, url, baseurl)
if not audiofiles or not videofiles:
streams[0] = ServiceError("Found no Audiofiles or Videofiles to download.")
@ -189,9 +204,21 @@ def _dashparse(config, text, url, output, cookies):
return streams
def parse_duration(duration):
match = re.search(r"P(?:(\d*)Y)?(?:(\d*)M)?(?:(\d*)D)?(?:T(?:(\d*)H)?(?:(\d*)M)?(?:([\d.]*)S)?)?", duration)
if not match:
return 0
year = int(match.group(1)) * 365 * 24 * 60 * 60 if match.group(1) else 0
month = int(match.group(2)) * 30 * 24 * 60 * 60 if match.group(2) else 0
day = int(match.group(3)) * 24 * 60 * 60 if match.group(3) else 0
hour = int(match.group(4)) * 60 * 60 if match.group(4) else 0
minute = int(match.group(5)) * 60 if match.group(5) else 0
second = float(match.group(6)) if match.group(6) else 0
return year + month + day + hour + minute + second
def parse_dates(date_str):
date_patterns = ["%Y-%m-%dT%H:%M:%S.%fZ", "%Y-%m-%dT%H:%M:%S", "PT%HH%MM%S.%fS",
"PT%HH%MM%SS", "PT%MM%S.%fS", "PT%MM%SS", "PT%HH%SS", "PT%HH%S.%fS"]
date_patterns = ["%Y-%m-%dT%H:%M:%S.%fZ", "%Y-%m-%dT%H:%M:%S", "%Y-%m-%dT%H:%M:%SZ"]
dt = None
for pattern in date_patterns:
try:

View File

@ -0,0 +1,29 @@
<MPD xmlns="urn:mpeg:dash:schema:mpd:2011" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="urn:mpeg:dash:schema:mpd:2011 http://standards.iso.org/ittf/PubliclyAvailableStandards/MPEG-DASH_schema_files/DASH-MPD.xsd" profiles="urn:hbbtv:dash:profile:isoff-live:2012,urn:mpeg:dash:profile:isoff-live:2011" type="dynamic" availabilityStartTime="2018-05-15T12:21:39.834Z" publishTime="2019-07-11T20:40:51.958Z" minimumUpdatePeriod="PT9.6S" minBufferTime="PT6.4S" timeShiftBufferDepth="PT32S" suggestedPresentationDelay="PT6.4S">
<Period id="0" start="PT0S">
<AdaptationSet id="0" mimeType="video/mp4" segmentAlignment="true" frameRate="25" group="1">
<SegmentTemplate initialization="36603052-54ff-4835-a041-a1c25feee9a1/dash-v$RepresentationID$/v$RepresentationID$-init.mp4" media="36603052-54ff-4835-a041-a1c25feee9a1/dash-v$RepresentationID$/v$RepresentationID$-$Number$.mp4" startNumber="1" timescale="90000" duration="288000"/>
<Representation id="0" bandwidth="144000" width="512" height="288" codecs="avc1.42c015"/>
<Representation id="1" bandwidth="348000" width="512" height="288" codecs="avc1.42c016"/>
<Representation id="2" bandwidth="456000" width="512" height="288" codecs="avc1.42c016"/>
<Representation id="3" bandwidth="636000" width="512" height="288" codecs="avc1.42c016"/>
<Representation id="4" bandwidth="988000" width="768" height="432" codecs="avc1.4d401e"/>
<Representation id="5" bandwidth="1680000" width="1280" height="720" codecs="avc1.4d401f"/>
<Representation id="6" bandwidth="2796000" width="1280" height="720" codecs="avc1.4d401f"/>
</AdaptationSet>
<AdaptationSet id="1" mimeType="audio/mp4" segmentAlignment="true" group="2" lang="sv">
<Role schemeIdUri="urn:mpeg:dash:role:2011" value="main"/>
<SegmentTemplate initialization="36603052-54ff-4835-a041-a1c25feee9a1/dash-a0/a0-init.mp4" media="36603052-54ff-4835-a041-a1c25feee9a1/dash-a0/a0-$Number$.mp4" startNumber="1" timescale="90000" duration="288000"/>
<Representation id="7" bandwidth="96000" codecs="mp4a.40.2" audioSamplingRate="48000">
<AudioChannelConfiguration schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011" value="2"/>
</Representation>
</AdaptationSet>
<AdaptationSet id="2" mimeType="audio/mp4" segmentAlignment="true" group="2" lang="sv-tal">
<Role schemeIdUri="urn:mpeg:dash:role:2011" value="dub"/>
<SegmentTemplate initialization="36603052-54ff-4835-a041-a1c25feee9a1/dash-a1/a1-init.mp4" media="36603052-54ff-4835-a041-a1c25feee9a1/dash-a1/a1-$Number$.mp4" startNumber="1" timescale="90000" duration="288000"/>
<Representation id="8" bandwidth="96000" codecs="mp4a.40.2" audioSamplingRate="48000">
<AudioChannelConfiguration schemeIdUri="urn:mpeg:dash:23003:3:audio_channel_configuration:2011" value="2"/>
</Representation>
</AdaptationSet>
</Period>
<UTCTiming schemeIdUri="urn:mpeg:dash:utc:http-iso:2014" value="https://time.akamai.com/?iso"/>
</MPD>

View File

@ -1,7 +1,7 @@
from __future__ import absolute_import
import unittest
import os
from svtplay_dl.fetcher.dash import _dashparse
from svtplay_dl.fetcher.dash import _dashparse, parse_duration
from svtplay_dl.utils.parser import setup_defaults
@ -16,7 +16,7 @@ class dashtest(unittest.TestCase):
def test_parse_cmore(self):
data = parse("cmore.mpd")
self.assertEquals(len(data[3261.367].files), 410)
self.assertEqual(len(data[3261.367].audio), 309)
self.assertEqual(len(data[3261.367].audio), 615)
self.assertTrue(data[3261.367].segments)
def test_parse_fff(self):
@ -36,3 +36,16 @@ class dashtest(unittest.TestCase):
self.assertEquals(len(data[2795.9959999999996].files), 6)
self.assertEqual(len(data[2795.9959999999996].audio), 6)
self.assertTrue(data[2795.9959999999996].segments)
def test_parse_live2(self):
data = parse("svtplay-live2.mpd")
self.assertEquals(len(data[2892.0].files), 11)
self.assertEqual(len(data[2892.0].audio), 11)
self.assertTrue(data[2892.0].segments)
def test_parse_duration(self):
self.assertEquals(parse_duration("PT3459.520S"), 3459.52)
self.assertEquals(parse_duration("PT2.00S"), 2.0)
self.assertEquals(parse_duration("PT1H0M30.000S"), 3630.0)
self.assertEquals(parse_duration("P1Y1M1DT1H0M30.000S"), 34218030.0)
self.assertEquals(parse_duration("PWroNG"), 0)