svtplay-dl/lib/svtplay_dl/fetcher/hls.py

# ex:ts=4:sw=4:sts=4:et
# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*-
from __future__ import absolute_import
import sys
import os
import re
import copy

from svtplay_dl.output import progressbar, progress_stream, ETA, output
from svtplay_dl.log import log
from svtplay_dl.error import UIException, ServiceError
from svtplay_dl.fetcher import VideoRetriever
from svtplay_dl.utils import HTTP


class HLSException(UIException):
    def __init__(self, url, message):
        self.url = url
        super(HLSException, self).__init__(message)


class LiveHLSException(HLSException):
    def __init__(self, url):
        super(LiveHLSException, self).__init__(
            url, "This is a live HLS stream, and they are not supported.")


def _get_full_url(url, srcurl):
    if url[:4] == 'http':
        return url
    if url[0] == '/':
        baseurl = re.search(r'^(http[s]{0,1}://[^/]+)/', srcurl)
        return "{0}{1}".format(baseurl.group(1), url)

    # remove everything after last / in the path of the URL
    baseurl = re.sub(r'^([^\?]+)/[^/]*(\?.*)?$', r'\1', srcurl)
    returl = "{0}/{1}".format(baseurl, url)

    return returl


def hlsparse(options, res, url):
    streams = {}

    if not res:
        return None

    if res.status_code > 400:
        streams[0] = ServiceError("Can't read HLS playlist. {0}".format(res.status_code))
        return streams
    files = (parsem3u(res.text))[1]
    http = HTTP(options)
    for i in files:
        try:
            bitrate = float(i[1]["BANDWIDTH"])/1000
        except KeyError:
            streams[0] = ServiceError("Can't read HLS playlist")
            return streams
        urls = _get_full_url(i[0], url)
        res2 = http.get(urls, cookies=res.cookies)
        if res2.status_code < 400:
            streams[int(bitrate)] = HLS(copy.copy(options), urls, bitrate, cookies=res.cookies)
    return streams


class HLS(VideoRetriever):
    def name(self):
        return "hls"

    def download(self):
        if self.options.live and not self.options.force:
            raise LiveHLSException(self.url)

        cookies = self.kwargs["cookies"]
        m3u8 = self.http.request("get", self.url, cookies=cookies).text
        globaldata, files = parsem3u(m3u8)
        encrypted = False
        key = None
        if "KEY" in globaldata:
            keydata = globaldata["KEY"]
            encrypted = True

        if encrypted:
            try:
                from Crypto.Cipher import AES
            except ImportError:
                log.error("You need to install pycrypto to download encrypted HLS streams")
                sys.exit(2)

            match = re.search(r'URI="(https?://.*?)"', keydata)
            key = self.http.request("get", match.group(1)).content
            rand = os.urandom(16)
            decryptor = AES.new(key, AES.MODE_CBC, rand)

        file_d = output(self.options, "ts")
        if hasattr(file_d, "read") is False:
            return

        n = 1
        eta = ETA(len(files))
        for i in files:
            item = _get_full_url(i[0], self.url)

            if self.options.output != "-" and not self.options.silent:
                eta.increment()
                progressbar(len(files), n, ''.join(['ETA: ', str(eta)]))
                n += 1

            data = self.http.request("get", item, cookies=cookies)
            if data.status_code == 404:
                break
            data = data.content
            if encrypted:
                data = decryptor.decrypt(data)
            file_d.write(data)

        if self.options.output != "-":
            file_d.close()
            if not self.options.silent:
                progress_stream.write('\n')
            self.finished = True


def parsem3u(data):
    if not data.startswith("#EXTM3U"):
        raise ValueError("Does not apprear to be a ext m3u file")

    files = []
    streaminfo = {}
    globdata = {}

    data = data.replace("\r", "\n")
    for l in data.split("\n")[1:]:
        if not l:
            continue
        if l.startswith("#EXT-X-STREAM-INF:"):
            # not a proper parser
            info = [x.strip().split("=", 1) for x in l[18:].split(",")]
            for i in range(0, len(info)):
                if info[i][0] == "BANDWIDTH":
                    streaminfo.update({info[i][0]: info[i][1]})
                if info[i][0] == "RESOLUTION":
                    streaminfo.update({info[i][0]: info[i][1]})
        elif l.startswith("#EXT-X-MAP:"):
            line = l[11:]
            if line.startswith("URI"):
                files.append((line[5:].split("\"")[0], streaminfo))
        elif l.startswith("#EXT-X-ENDLIST") or l.startswith("#EXT-X-BYTERANGE:"):
            break
        elif l.startswith("#EXT-X-"):
            line = [l[7:].strip().split(":", 1)]
            if len(line[0]) == 1:
                line[0].append("None")
            globdata.update(dict(line))
        elif l.startswith("#EXTINF:"):
            try:
                dur, title = l[8:].strip().split(",", 1)
            except:
                dur = l[8:].strip()
                title = None
            streaminfo['duration'] = dur
            streaminfo['title'] = title
        elif l[0] == '#':
            pass
        else:
            files.append((l, streaminfo))
            streaminfo = {}

    return globdata, files
Add editor modelines 2013-03-02 21:26:28 +01:00			`# ex:ts=4:sw=4:sts=4:et`
			`# -- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil --`
Use absolute_import from __future__ everywhere 2013-03-01 23:39:42 +01:00			`from __future__ import absolute_import`
Break out HLS fetcher to module 2013-02-12 19:39:52 +01:00			`import sys`
			`import os`
			`import re`
hls: have the same interface as hds 2015-10-04 14:33:54 +02:00			`import copy`
Break out HLS fetcher to module 2013-02-12 19:39:52 +01:00
refactor output to its own function Almost the same code for all the fetchers. 2014-08-20 20:27:45 +02:00			`from svtplay_dl.output import progressbar, progress_stream, ETA, output`
Rename module from lib/svtplay to lib/svtplay_dl less confusion with the service. 2013-03-17 19:55:19 +01:00			`from svtplay_dl.log import log`
HLS: handle 403 error when the playlist is geoblocked 2015-10-04 17:41:11 +02:00			`from svtplay_dl.error import UIException, ServiceError`
fetcher: convert to VideoRetriever 2014-04-21 16:50:24 +02:00			`from svtplay_dl.fetcher import VideoRetriever`
hls: try to read the file playlist 2017-02-18 23:51:26 +01:00			`from svtplay_dl.utils import HTTP`
fetcher: convert to VideoRetriever 2014-04-21 16:50:24 +02:00
hls: Error on live streams This can be overriden using the --force flag, but the output may be a bit disappointing --- only the current state of the HLS playlist is downloaded, and no reload is attempted. 2014-02-09 15:40:02 +01:00
			`class HLSException(UIException):`
			`def __init__(self, url, message):`
			`self.url = url`
			`super(HLSException, self).__init__(message)`


			`class LiveHLSException(HLSException):`
			`def __init__(self, url):`
			`super(LiveHLSException, self).__init__(`
			`url, "This is a live HLS stream, and they are not supported.")`

Break out HLS fetcher to module 2013-02-12 19:39:52 +01:00
hls: automatically detect baseurl 2014-02-08 16:08:39 +01:00			`def _get_full_url(url, srcurl):`
			`if url[:4] == 'http':`
			`return url`
hls: if the file start with / add hostname to it. 2016-09-04 20:43:37 +02:00			`if url[0] == '/':`
			`baseurl = re.search(r'^(http[s]{0,1}://[^/]+)/', srcurl)`
fetcher: change str formating from '%s' to '.format' 2017-10-09 22:35:33 +02:00			`return "{0}{1}".format(baseurl.group(1), url)`
hls: automatically detect baseurl 2014-02-08 16:08:39 +01:00
			`# remove everything after last / in the path of the URL`
			`baseurl = re.sub(r'^([^\?]+)/[^/](\?.)?$', r'\1', srcurl)`
fetcher: change str formating from '%s' to '.format' 2017-10-09 22:35:33 +02:00			`returl = "{0}/{1}".format(baseurl, url)`
hls: automatically detect baseurl 2014-02-08 16:08:39 +01:00
			`return returl`

pip8. expected 2 lines found 1 2015-09-15 20:10:32 +02:00
hls: have the same interface as hds 2015-10-04 14:33:54 +02:00			`def hlsparse(options, res, url):`
services: parse hls playlist first. 2014-04-21 21:55:39 +02:00			`streams = {}`
HLS: export resolution. 2014-04-21 21:42:49 +02:00
fetcher: handle errors from parsing playlists 2016-10-16 19:35:38 +02:00			`if not res:`
			`return None`

fetcher: dont crash if we cant read the playlists fixes #446 2016-09-09 22:56:05 +02:00			`if res.status_code > 400:`
			`streams[0] = ServiceError("Can't read HLS playlist. {0}".format(res.status_code))`
HLS: handle 403 error when the playlist is geoblocked 2015-10-04 17:41:11 +02:00			`return streams`
			`files = (parsem3u(res.text))[1]`
hls: try to read the file playlist 2017-02-18 23:51:26 +01:00			`http = HTTP(options)`
services: parse hls playlist first. 2014-04-21 21:55:39 +02:00			`for i in files:`
hls: don’t crash when we can’t find bandwidth key fixes #473 2016-10-16 19:50:30 +02:00			`try:`
			`bitrate = float(i[1]["BANDWIDTH"])/1000`
			`except KeyError:`
			`streams[0] = ServiceError("Can't read HLS playlist")`
			`return streams`
hls: try to read the file playlist 2017-02-18 23:51:26 +01:00			`urls = _get_full_url(i[0], url)`
hlsparse: need to send cookies when we grab hlsfiles 2017-09-16 18:26:07 +02:00			`res2 = http.get(urls, cookies=res.cookies)`
hls: try to read the file playlist 2017-02-18 23:51:26 +01:00			`if res2.status_code < 400:`
			`streams[int(bitrate)] = HLS(copy.copy(options), urls, bitrate, cookies=res.cookies)`
services: parse hls playlist first. 2014-04-21 21:55:39 +02:00			`return streams`
HLS: export resolution. 2014-04-21 21:42:49 +02:00
pip8. expected 2 lines found 1 2015-09-15 20:10:32 +02:00
services: parse hls playlist first. 2014-04-21 21:55:39 +02:00			`class HLS(VideoRetriever):`
option to choose which download method is preferred. 2014-05-01 17:13:46 +02:00			`def name(self):`
			`return "hls"`

fetcher: convert to VideoRetriever 2014-04-21 16:50:24 +02:00			`def download(self):`
HLS: export resolution. 2014-04-21 21:42:49 +02:00			`if self.options.live and not self.options.force:`
			`raise LiveHLSException(self.url)`

hls: handle cookies 2015-10-04 14:36:06 +02:00			`cookies = self.kwargs["cookies"]`
			`m3u8 = self.http.request("get", self.url, cookies=cookies).text`
fetcher: convert to VideoRetriever 2014-04-21 16:50:24 +02:00			`globaldata, files = parsem3u(m3u8)`
			`encrypted = False`
			`key = None`
hls: refactor some code 2014-12-20 21:07:55 +01:00			`if "KEY" in globaldata:`
fetcher: convert to VideoRetriever 2014-04-21 16:50:24 +02:00			`keydata = globaldata["KEY"]`
			`encrypted = True`

Break out HLS fetcher to module 2013-02-12 19:39:52 +01:00			`if encrypted:`
fetcher: convert to VideoRetriever 2014-04-21 16:50:24 +02:00			`try:`
			`from Crypto.Cipher import AES`
			`except ImportError:`
			`log.error("You need to install pycrypto to download encrypted HLS streams")`
			`sys.exit(2)`

			`match = re.search(r'URI="(https?://.*?)"', keydata)`
replace self.http.get with our own function with debug info 2015-08-31 19:45:15 +02:00			`key = self.http.request("get", match.group(1)).content`
fetcher: convert to VideoRetriever 2014-04-21 16:50:24 +02:00			`rand = os.urandom(16)`
			`decryptor = AES.new(key, AES.MODE_CBC, rand)`
refactor output to its own function Almost the same code for all the fetchers. 2014-08-20 20:27:45 +02:00
output: no need to get the filename when we already have it in options 2014-12-30 21:18:01 +01:00			`file_d = output(self.options, "ts")`
output: make pylint happier. 2014-08-21 22:10:16 +02:00			`if hasattr(file_d, "read") is False:`
refactor output to its own function Almost the same code for all the fetchers. 2014-08-20 20:27:45 +02:00			`return`
fetcher: convert to VideoRetriever 2014-04-21 16:50:24 +02:00
hls: start on 1 instead of 0 this fixes #127 2014-09-02 19:54:06 +02:00			`n = 1`
fetcher: convert to VideoRetriever 2014-04-21 16:50:24 +02:00			`eta = ETA(len(files))`
			`for i in files:`
hls: we select quality before we go here. 2014-04-27 10:42:13 +02:00			`item = _get_full_url(i[0], self.url)`
fetcher: convert to VideoRetriever 2014-04-21 16:50:24 +02:00
hls, hds, http: its should be “and” and not “or” 2015-10-25 19:24:14 +01:00			`if self.options.output != "-" and not self.options.silent:`
fetcher: convert to VideoRetriever 2014-04-21 16:50:24 +02:00			`eta.increment()`
			`progressbar(len(files), n, ''.join(['ETA: ', str(eta)]))`
			`n += 1`

hls: handle cookies 2015-10-04 14:36:06 +02:00			`data = self.http.request("get", item, cookies=cookies)`
adding request support. still need some more work 2015-08-30 00:06:20 +02:00			`if data.status_code == 404:`
			`break`
			`data = data.content`
fetcher: convert to VideoRetriever 2014-04-21 16:50:24 +02:00			`if encrypted:`
			`data = decryptor.decrypt(data)`
			`file_d.write(data)`
Break out HLS fetcher to module 2013-02-12 19:39:52 +01:00
fetcher: convert to VideoRetriever 2014-04-21 16:50:24 +02:00			`if self.options.output != "-":`
			`file_d.close()`
fetcher: dont print newline when its silent 2016-05-01 13:12:19 +02:00			`if not self.options.silent:`
			`progress_stream.write('\n')`
fetcher: set a variable if the stream is finished. 2016-03-22 22:28:41 +01:00			`self.finished = True`
Break out HLS fetcher to module 2013-02-12 19:39:52 +01:00
pip8. expected 2 lines found 1 2015-09-15 20:10:32 +02:00
Break out HLS fetcher to module 2013-02-12 19:39:52 +01:00			`def parsem3u(data):`
			`if not data.startswith("#EXTM3U"):`
			`raise ValueError("Does not apprear to be a ext m3u file")`

			`files = []`
			`streaminfo = {}`
			`globdata = {}`

			`data = data.replace("\r", "\n")`
			`for l in data.split("\n")[1:]:`
			`if not l:`
			`continue`
			`if l.startswith("#EXT-X-STREAM-INF:"):`
Space after # 2014-07-28 16:01:27 +02:00			`# not a proper parser`
Break out HLS fetcher to module 2013-02-12 19:39:52 +01:00			`info = [x.strip().split("=", 1) for x in l[18:].split(",")]`
hls: dont search for bandwidth at a fixed position. On svtplay they added subtitle info in the playlist. Beacuse of that the script crashed when we tried to access it 2013-10-14 20:18:09 +02:00			`for i in range(0, len(info)):`
			`if info[i][0] == "BANDWIDTH":`
			`streaminfo.update({info[i][0]: info[i][1]})`
HLS: export resolution. 2014-04-21 21:42:49 +02:00			`if info[i][0] == "RESOLUTION":`
			`streaminfo.update({info[i][0]: info[i][1]})`
Avoid 'EXT-X-BYTERANGE', fix for 'hls.py' Fix support for vidme in 'dash.py' 2017-10-04 23:10:57 +02:00			`elif l.startswith("#EXT-X-MAP:"):`
			`line = l[11:]`
			`if line.startswith("URI"):`
			`files.append((line[5:].split("\"")[0], streaminfo))`
			`elif l.startswith("#EXT-X-ENDLIST") or l.startswith("#EXT-X-BYTERANGE:"):`
Break out HLS fetcher to module 2013-02-12 19:39:52 +01:00			`break`
			`elif l.startswith("#EXT-X-"):`
hls: parsem3u dont crash when EXT-X- is one 2015-10-29 22:15:14 +01:00			`line = [l[7:].strip().split(":", 1)]`
			`if len(line[0]) == 1:`
			`line[0].append("None")`
			`globdata.update(dict(line))`
Break out HLS fetcher to module 2013-02-12 19:39:52 +01:00			`elif l.startswith("#EXTINF:"):`
parsem3u: don't crash if we cant unpack extinf fixes: #557 2017-02-12 08:53:59 +01:00			`try:`
			`dur, title = l[8:].strip().split(",", 1)`
			`except:`
			`dur = l[8:].strip()`
			`title = None`
Break out HLS fetcher to module 2013-02-12 19:39:52 +01:00			`streaminfo['duration'] = dur`
			`streaminfo['title'] = title`
			`elif l[0] == '#':`
			`pass`
			`else:`
			`files.append((l, streaminfo))`
			`streaminfo = {}`

			`return globdata, files`