1
0
mirror of https://github.com/spaam/svtplay-dl.git synced 2024-11-30 23:24:16 +01:00
svtplay-dl/svtplay_dl.py

1258 lines
42 KiB
Python
Raw Normal View History

#!/usr/bin/env python
import sys
if sys.version_info > (3, 0):
from urllib.request import Request, urlopen
from urllib.error import HTTPError, URLError
from urllib.parse import urlparse, parse_qs, unquote_plus, quote_plus
2013-01-14 23:34:31 +01:00
from io import BytesIO as StringIO
else:
from urllib2 import Request, urlopen, HTTPError, URLError
from urlparse import urlparse, parse_qs
from urllib import unquote_plus, quote_plus
2013-01-14 23:34:31 +01:00
from StringIO import StringIO
import re
import os
import subprocess
from optparse import OptionParser
import xml.etree.ElementTree as ET
import shlex
import json
import time
import logging
import base64
import struct
import binascii
from datetime import timedelta
__version__ = "0.8.2013.01.26"
class Options:
"""
Options used when invoking the script from another Python script.
Simple container class used when calling get_media() from another Python
script. The variables corresponds to the command line parameters parsed
in main() when the script is called directly.
When called from a script there are a few more things to consider:
* Logging is done to 'log'. main() calls setup_log() which sets the
logging to either stdout or stderr depending on the silent level.
A user calling get_media() directly can either also use setup_log()
or configure the log manually.
* Progress information is printed to 'progress_stream' which defaults to
sys.stderr but can be changed to any stream.
* Many errors results in calls to system.exit() so catch 'SystemExit'-
Exceptions to prevent the entire application from exiting if that happens.
"""
def __init__(self):
self.output = None
self.resume = False
self.live = False
self.silent = False
self.quality = None
self.hls = False
2012-12-29 21:01:57 +01:00
self.other = None
log = logging.getLogger('svtplay_dl')
progress_stream = sys.stderr
def readbyte(data, pos):
return struct.unpack("B", data[pos])[0]
def read16(data, pos):
endpos = pos + 2
return struct.unpack(">H", data[pos:endpos])[0]
def read24(data, pos):
end = pos + 3
return struct.unpack(">L", "\x00" + data[pos:end])[0]
def read32(data, pos):
end = pos + 4
return struct.unpack(">i", data[pos:end])[0]
def read64(data, pos):
end = pos + 8
return struct.unpack(">Q", data[pos:end])[0]
def readstring(data, pos):
length = 0
while (data[pos + length] != "\x00"):
length += 1
endpos = pos + length
string = data[pos:endpos]
pos += length + 1
return pos, string
def readboxtype(data, pos):
boxsize = read32(data, pos)
tpos = pos + 4
endpos = tpos + 4
boxtype = data[tpos:endpos]
if boxsize > 1:
boxsize -= 8
pos += 8
return pos, boxsize, boxtype
def readbox(data, pos):
version = readbyte(data, pos)
pos += 1
flags = read24(data, pos)
pos += 3
bootstrapversion = read32(data, pos)
pos += 4
byte = readbyte(data, pos)
pos += 1
profile = (byte & 0xC0) >> 6
live = (byte & 0x20) >> 5
update = (byte & 0x10) >> 4
timescale = read32(data, pos)
pos += 4
currentmediatime = read64(data, pos)
pos += 8
smptetimecodeoffset = read64(data, pos)
pos += 8
temp = readstring(data, pos)
movieidentifier = temp[1]
pos = temp[0]
serverentrycount = readbyte(data, pos)
pos += 1
serverentrytable = []
i = 0
while i < serverentrycount:
temp = readstring(data, pos)
serverentrytable.append(temp[1])
pos = temp[0]
i += 1
qualityentrycount = readbyte(data, pos)
pos += 1
qualityentrytable = []
i = 0
while i < qualityentrycount:
temp = readstring(data, pos)
qualityentrytable.append(temp[1])
pos = temp[0]
i += 1
tmp = readstring(data, pos)
drm = tmp[1]
pos = tmp[0]
tmp = readstring(data, pos)
metadata = tmp[1]
pos = tmp[0]
segmentruntable = readbyte(data, pos)
pos += 1
if segmentruntable > 0:
tmp = readboxtype(data, pos)
boxtype = tmp[2]
boxsize = tmp[1]
pos = tmp[0]
if boxtype == "asrt":
antal = readasrtbox(data, pos)
pos += boxsize
fragRunTableCount = readbyte(data, pos)
pos += 1
i = 0
while i < fragRunTableCount:
tmp = readboxtype(data, pos)
boxtype = tmp[2]
boxsize = tmp[1]
pos = tmp[0]
if boxtype == "afrt":
readafrtbox(data, pos)
pos += boxsize
i += 1
return antal
def readafrtbox(data, pos):
version = readbyte(data, pos)
pos += 1
flags = read24(data, pos)
pos += 3
timescale = read32(data, pos)
pos += 4
qualityentry = readbyte(data, pos)
pos += 1
i = 0
while i < qualityentry:
temp = readstring(data, pos)
qualitysegmulti = temp[1]
pos = temp[0]
i += 1
fragrunentrycount = read32(data, pos)
pos += 4
i = 0
while i < fragrunentrycount:
firstfragment = read32(data, pos)
pos += 4
timestamp = read64(data, pos)
pos += 8
duration = read32(data, pos)
pos += 4
i += 1
def readasrtbox(data, pos):
version = readbyte(data, pos)
pos += 1
flags = read24(data, pos)
pos += 3
qualityentrycount = readbyte(data, pos)
pos += 1
qualitysegmentmodifers = []
i = 0
while i < qualityentrycount:
temp = readstring(data, pos)
qualitysegmentmodifers.append(temp[1])
pos = temp[0]
i += 1
seqCount = read32(data, pos)
pos += 4
ret = {}
i = 0
while i < seqCount:
firstseg = read32(data, pos)
pos += 4
fragPerSeg = read32(data, pos)
pos += 4
tmp = i + 1
ret[tmp] = {"first": firstseg, "total": fragPerSeg}
i += 1
return ret
def parsem3u(data):
if not data.startswith("#EXTM3U"):
raise ValueError("Does not apprear to be a ext m3u file")
2012-12-28 14:59:34 +01:00
files = []
streaminfo = {}
globdata = {}
2013-01-12 23:07:41 +01:00
data = data.replace("\r", "\n")
for l in data.split("\n")[1:]:
if not l:
continue
if l.startswith("#EXT-X-STREAM-INF:"):
#not a proper parser
2012-12-28 14:59:34 +01:00
info = [x.strip().split("=", 1) for x in l[18:].split(",")]
streaminfo.update({info[1][0]: info[1][1]})
elif l.startswith("#EXT-X-ENDLIST"):
break
elif l.startswith("#EXT-X-"):
2012-12-28 14:59:34 +01:00
globdata.update(dict([l[7:].strip().split(":", 1)]))
elif l.startswith("#EXTINF:"):
2012-12-28 14:59:34 +01:00
dur, title = l[8:].strip().split(",", 1)
streaminfo['duration'] = dur
streaminfo['title'] = title
2012-12-28 14:59:34 +01:00
elif l[0] == '#':
pass
else:
files.append((l, streaminfo))
2012-12-28 14:59:34 +01:00
streaminfo = {}
2012-12-28 14:59:34 +01:00
return globdata, files
2013-01-12 23:07:41 +01:00
def decode_f4f(fragID, fragData):
2012-12-29 21:47:34 +01:00
start = fragData.find("mdat") + 4
if (fragID > 1):
2013-01-12 23:07:41 +01:00
for dummy in range(2):
tagLen, = struct.unpack_from(">L", fragData, start)
tagLen &= 0x00ffffff
start += tagLen + 11 + 4
return start
def get_http_data(url, method="GET", header="", data=""):
""" Get the page to parse it for streams """
request = Request(url)
request.add_header('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3')
if len(header) > 0:
request.add_header('Content-Type', header)
if len(data) > 0:
request.add_data(data)
try:
response = urlopen(request)
except HTTPError as e:
2012-12-23 18:01:16 +01:00
log.error("Something wrong with that url")
log.error("Error code: %s" % e.code)
sys.exit(5)
except URLError as e:
2012-12-23 18:01:16 +01:00
log.error("Something wrong with that url")
log.error("Error code: %s" % e.reason)
sys.exit(5)
except ValueError as e:
2012-12-23 18:01:16 +01:00
log.error("Try adding http:// before the url")
sys.exit(5)
if sys.version_info > (3, 0):
2013-01-14 23:34:31 +01:00
data = response.read()
try:
data = data.decode("utf-8")
except UnicodeDecodeError:
pass
else:
try:
data = response.read()
except socket.error as e:
2012-12-23 18:01:16 +01:00
log.error("Lost the connection to the server")
sys.exit(5)
response.close()
return data
def progress(byte, total, extra = ""):
""" Print some info about how much we have downloaded """
if total == 0:
progresstr = "Downloaded %dkB bytes" % (byte >> 10)
progress_stream.write(progresstr + '\r')
else:
ratio = float(byte) / total
percent = round(ratio*100, 2)
tlen = str(len(str(total)))
fmt = "Downloaded %"+tlen+"dkB of %dkB bytes (% 3.2f%%)"
progresstr = fmt % (byte >> 10, total >> 10, percent)
columns = int(os.getenv("COLUMNS", "80"))
if len(progresstr) < columns - 13:
p = int((columns - len(progresstr) - 3) * ratio)
q = int((columns - len(progresstr) - 3) * (1 - ratio))
progresstr = "[" + ("#" * p) + (" " * q) + "] " + progresstr
progress_stream.write(progresstr + ' ' + extra + '\r')
if byte >= total:
progress_stream.write('\n')
progress_stream.flush()
2013-01-12 14:03:45 +01:00
def download_hds(options, url, swf=None):
data = get_http_data(url)
streams = {}
bootstrap = {}
xml = ET.XML(data)
prefix = xml.find("{http://ns.adobe.com/f4m/1.0}id").text
if sys.version_info < (2, 7):
bootstrapIter = xml.getiterator("{http://ns.adobe.com/f4m/1.0}bootstrapInfo")
mediaIter = xml.getiterator("{http://ns.adobe.com/f4m/1.0}media")
else:
bootstrapIter = xml.iter("{http://ns.adobe.com/f4m/1.0}bootstrapInfo")
mediaIter = xml.iter("{http://ns.adobe.com/f4m/1.0}media")
for i in bootstrapIter:
bootstrap[i.attrib["id"]] = i.text
for i in mediaIter:
streams[int(i.attrib["bitrate"])] = {"url": i.attrib["url"], "bootstrapInfoId": i.attrib["bootstrapInfoId"], "metadata": i.find("{http://ns.adobe.com/f4m/1.0}metadata").text}
test = select_quality(options, streams)
bootstrap = base64.b64decode(bootstrap[test["bootstrapInfoId"]])
box = readboxtype(bootstrap, 0)
if box[2] == "abst":
antal = readbox(bootstrap, box[0])
baseurl = url[0:url.rfind("/")]
i = 1
2012-12-29 21:01:57 +01:00
if options.output != "-":
extension = re.search("(\.[a-z0-9]+)$", options.output)
if not extension:
2012-12-29 21:55:30 +01:00
options.output = "%s.flv" % options.output
2012-12-29 21:01:57 +01:00
log.info("Outfile: %s", options.output)
file_d = open(options.output, "wb")
else:
file_d = sys.stdout
2013-01-14 23:34:31 +01:00
file_d.write(binascii.a2b_hex(b"464c56010500000009000000001200010c00000000000000"))
file_d.write(base64.b64decode(test["metadata"]))
2013-01-14 23:34:31 +01:00
file_d.write(binascii.a2b_hex(b"00000000"))
2013-01-12 14:22:47 +01:00
total = antal[1]["total"]
start = time.time()
estimated = ""
while i <= total:
url = "%s/%sSeg1-Frag%s" % (baseurl, test["url"], i)
2012-12-29 21:01:57 +01:00
if options.output != "-":
2013-01-12 14:22:47 +01:00
progressbar(total, i, estimated)
data = get_http_data(url)
number = decode_f4f(i, data)
file_d.write(data[number:])
2013-01-12 14:22:47 +01:00
now = time.time()
dt = now - start
2013-01-12 23:07:41 +01:00
et = dt / (i + 1) * total
2013-01-12 14:22:47 +01:00
rt = et - dt
2013-01-12 23:07:41 +01:00
td = timedelta(seconds = int(rt))
estimated = "Estimated Remaining: " + str(td)
i += 1
2012-12-29 21:01:57 +01:00
if options.output != "-":
file_d.close()
progress_stream.write('\n')
def download_hls(options, url, baseurl=None):
data = get_http_data(url)
globaldata, files = parsem3u(data)
2012-12-28 14:59:34 +01:00
streams = {}
for i in files:
streams[int(i[1]["BANDWIDTH"])] = i[0]
test = select_quality(options, streams)
m3u8 = get_http_data(test)
globaldata, files = parsem3u(m3u8)
encrypted = False
key = None
try:
keydata = globaldata["KEY"]
encrypted = True
except:
pass
if encrypted:
try:
from Crypto.Cipher import AES
except ImportError:
log.error("You need to install pycrypto to download encrypted HLS streams")
sys.exit(2)
2013-01-14 23:34:31 +01:00
match = re.search("URI=\"(http://.*)\"", keydata)
key = get_http_data(match.group(1))
rand = os.urandom(16)
decryptor = AES.new(key, AES.MODE_CBC, rand)
2012-12-28 14:59:34 +01:00
n = 1
2012-12-29 21:01:57 +01:00
if options.output != "-":
extension = re.search("(\.[a-z0-9]+)$", options.output)
if not extension:
2012-12-29 21:55:30 +01:00
options.output = "%s.ts" % options.output
2012-12-29 21:01:57 +01:00
log.info("Outfile: %s", options.output)
file_d = open(options.output, "wb")
else:
file_d = sys.stdout
2013-01-12 14:22:47 +01:00
start = time.time()
estimated = ""
for i in files:
item = i[0]
2012-12-29 21:01:57 +01:00
if options.output != "-":
2013-01-12 14:22:47 +01:00
progressbar(len(files), n, estimated)
if item[0:5] != "http:":
item = "%s/%s" % (baseurl, item)
data = get_http_data(item)
if encrypted:
2013-01-14 23:34:31 +01:00
lots = StringIO(data)
2013-01-14 23:34:31 +01:00
plain = b""
crypt = lots.read(1024)
decrypted = decryptor.decrypt(crypt)
while decrypted:
plain += decrypted
crypt = lots.read(1024)
decrypted = decryptor.decrypt(crypt)
data = plain
file_d.write(data)
2013-01-12 14:22:47 +01:00
now = time.time()
dt = now - start
2013-01-12 23:07:41 +01:00
et = dt / (n + 1) * len(files)
2013-01-12 14:22:47 +01:00
rt = et - dt
2013-01-12 23:07:41 +01:00
td = timedelta(seconds = int(rt))
estimated = "Estimated Remaining: " + str(td)
n += 1
2012-12-29 21:01:57 +01:00
if options.output != "-":
file_d.close()
progress_stream.write('\n')
2012-12-29 21:01:57 +01:00
def download_http(options, url):
""" Get the stream from HTTP """
response = urlopen(url)
try:
total_size = response.info()['Content-Length']
except KeyError:
total_size = 0
total_size = int(total_size)
bytes_so_far = 0
2012-12-29 21:01:57 +01:00
if options.output != "-":
extension = re.search("(\.[a-z0-9]+)$", url)
if extension:
2012-12-29 21:01:57 +01:00
options.output = options.output + extension.group(1)
log.info("Outfile: %s", options.output)
2013-01-12 23:07:41 +01:00
file_d = open(options.output, "wb")
else:
file_d = sys.stdout
lastprogress = 0
while 1:
chunk = response.read(8192)
bytes_so_far += len(chunk)
if not chunk:
break
file_d.write(chunk)
if options.output != "-":
now = time.time()
if lastprogress + 1 < now:
lastprogress = now
progress(bytes_so_far, total_size)
2012-12-29 21:01:57 +01:00
if options.output != "-":
file_d.close()
2012-12-29 21:01:57 +01:00
def download_rtmp(options, url):
""" Get the stream from RTMP """
args = []
2012-12-29 21:01:57 +01:00
if options.live:
args.append("-v")
2012-12-29 21:01:57 +01:00
if options.resume:
args.append("-e")
extension = re.search("(\.[a-z0-9]+)$", url)
2012-12-29 21:01:57 +01:00
if options.output != "-":
if not extension:
2012-12-29 21:01:57 +01:00
extension = re.search("-y (.+):[-_a-z0-9\/]", options.other)
if not extension:
2012-12-29 21:55:30 +01:00
options.output = "%s.flv" % options.output
else:
2012-12-29 21:55:30 +01:00
options.output = "%s%s" % (options.output, extension.group(1))
else:
2012-12-29 21:01:57 +01:00
options.output = options.output + extension.group(1)
log.info("Outfile: %s", options.output)
args += ["-o", options.output]
if options.silent or options.output == "-":
args.append("-q")
2012-12-29 21:01:57 +01:00
if options.other:
args += shlex.split(options.other)
command = ["rtmpdump", "-r", url] + args
try:
subprocess.call(command)
except OSError as e:
2012-12-23 18:01:16 +01:00
log.error("Could not execute rtmpdump: " + e.strerror)
def select_quality(options, streams):
sort = sorted(streams.keys(), key=int)
if options.quality:
quality = options.quality
else:
quality = sort.pop()
try:
selected = streams[int(quality)]
except (KeyError, ValueError):
2012-12-23 18:01:16 +01:00
log.error("Can't find that quality. (Try one of: %s)",
", ".join(map(str, sort)))
sys.exit(4)
return selected
class Justin():
2013-01-13 21:34:48 +01:00
def handle(self, url):
2013-01-14 16:51:27 +01:00
return ("twitch.tv" in url) or ("justin.tv" in url)
2013-01-13 21:34:48 +01:00
2012-12-29 21:01:57 +01:00
def get(self, options, url):
2013-01-13 13:55:41 +01:00
parse = urlparse(url)
match = re.search("/b/(\d+)", parse.path)
if match:
url = "http://api.justin.tv/api/broadcast/by_archive/%s.xml?onsite=true" % match.group(1)
data = get_http_data(url)
xml = ET.XML(data)
url = xml.find("archive").find("video_file_url").text
2013-01-13 13:55:41 +01:00
download_http(options, url)
else:
match = re.search("/(.*)", parse.path)
if match:
user = match.group(1)
data = get_http_data(url)
match = re.search("embedSWF\(\"(.*)\", \"live", data)
if not match:
log.error("Can't find swf file.")
options.other = match.group(1)
url = "http://usher.justin.tv/find/%s.xml?type=any&p=2321" % user
options.live = True
data = get_http_data(url)
data = re.sub("<(\d+)", "<_\g<1>", data)
data = re.sub("</(\d+)", "</_\g<1>", data)
xml = ET.XML(data)
if sys.version_info < (2, 7):
sa = list(xml)
else:
sa = list(xml)
streams = {}
for i in sa:
if i.tag[1:][:-1] != "iv":
try:
stream = {}
stream["token"] = i.find("token").text
stream["url"] = "%s/%s" % (i.find("connect").text, i.find("play").text)
streams[int(i.find("video_height").text)] = stream
except AttributeError:
pass
test = select_quality(options, streams)
options.other = "-j '%s' -W %s" % (test["token"], options.other)
options.resume = False
download_rtmp(options, test["url"])
class Hbo():
2013-01-13 21:34:48 +01:00
def handle(self, url):
return "hbo.com" in url
def get(self, url):
2013-01-13 13:55:41 +01:00
parse = urlparse(url)
try:
other = parse[5]
except KeyError:
log.error("Something wrong with that url")
sys.exit(2)
match = re.search("^/(.*).html", other)
if not match:
log.error("Cant find video file")
sys.exit(2)
url = "http://www.hbo.com/data/content/%s.xml" % match.group(1)
data = get_http_data(url)
xml = ET.XML(data)
videoid = xml.find("content")[1].find("videoId").text
url = "http://render.cdn.hbo.com/data/content/global/videos/data/%s.xml" % videoid
data = get_http_data(url)
xml = ET.XML(data)
ss = xml.find("videos")
if sys.version_info < (2, 7):
sa = list(ss.getiterator("size"))
else:
sa = list(ss.iter("size"))
streams = {}
for i in sa:
stream = {}
stream["path"] = i.find("tv14").find("path").text
streams[int(i.attrib["width"])] = stream
2012-12-29 21:01:57 +01:00
test = select_quality(options, streams)
2012-12-29 21:01:57 +01:00
download_rtmp(options, test["path"])
class Sr():
2013-01-13 21:34:48 +01:00
def handle(self, url):
2013-01-16 12:22:49 +01:00
return "sverigesradio.se" in url
2013-01-13 21:34:48 +01:00
2012-12-29 21:01:57 +01:00
def get(self, options, url):
2013-01-13 13:55:41 +01:00
data = get_http_data(url)
parse = urlparse(url)
try:
metafile = parse_qs(parse[4])["metafile"][0]
options.other = "%s?%s" % (parse[2], parse[4])
except KeyError:
match = re.search("linkUrl=(.*)\;isButton=", data)
if not match:
log.error("Can't find video file")
sys.exit(2)
options.other = unquote_plus(match.group(1))
url = "http://sverigesradio.se%s" % options.other
data = get_http_data(url)
xml = ET.XML(data)
url = xml.find("entry").find("ref").attrib["href"]
2013-01-12 15:59:51 +01:00
download_http(options, url)
class Urplay():
2013-01-13 21:34:48 +01:00
def handle(self, url):
2013-02-14 23:44:53 +01:00
return ("urplay.se" in url) or ("ur.se" in url)
2013-01-13 21:34:48 +01:00
2012-12-29 21:01:57 +01:00
def get(self, options, url):
data = get_http_data(url)
data = re.search("urPlayer.init\((.*)\);", data)
data = re.sub("(\w+): ", r'"\1":',data.group(1))
data = data.replace("\'", "\"").replace("\",}","\"}").replace("(m = location.hash.match(/[#&]start=(\d+)/)) ? m[1] : 0,","0")
jsondata = json.loads(data)
basedomain = jsondata["streaming_config"]["streamer"]["redirect"]
http = "http://%s/%s" % (basedomain, jsondata["file_html5"])
hds = "%s%s" % (http, jsondata["streaming_config"]["http_streaming"]["hds_file"])
hls = "%s%s" % (http, jsondata["streaming_config"]["http_streaming"]["hls_file"])
rtmp = "rtmp://%s/%s" % (basedomain, jsondata["streaming_config"]["rtmp"]["application"])
path = "mp%s:%s" % (jsondata["file_flash"][-1], jsondata["file_flash"])
options.other = "-v -a %s -y %s" % (jsondata["streaming_config"]["rtmp"]["application"], path)
if options.hls:
download_hls(options, hls, http)
if jsondata["file_flash"][-1] == "3":
download_rtmp(options, rtmp)
download_hds(options, hds)
class Qbrick():
2013-01-13 21:34:48 +01:00
def handle(self, url):
return ("dn.se" in url) or ("di.se" in url) or ("svd.se" in url)
2012-12-29 21:01:57 +01:00
def get(self, options, url):
2013-01-13 13:55:41 +01:00
if re.findall("dn.se", url):
data = get_http_data(url)
match = re.search("data-qbrick-mcid=\"([0-9A-F]+)\"", data)
if not match:
match = re.search("mediaId = \'([0-9A-F]+)\';", data)
if not match:
log.error("Can't find video file")
sys.exit(2)
mcid = "%sDE1BA107" % match.group(1)
else:
mcid = match.group(1)
host = "http://vms.api.qbrick.com/rest/v3/getsingleplayer/%s" % mcid
elif re.findall("di.se", url):
data = get_http_data(url)
match = re.search("ccid: \"(.*)\"\,", data)
if not match:
log.error("Can't find video file")
sys.exit(2)
host = "http://vms.api.qbrick.com/rest/v3/getplayer/%s" % match.group(1)
elif re.findall("svd.se", url):
match = re.search("_([0-9]+)\.svd", url)
if not match:
log.error("Can't find video file")
sys.exit(2)
data = get_http_data("http://www.svd.se/?service=ajax&type=webTvClip&articleId=%s" % match.group(1))
match = re.search("mcid=([A-F0-9]+)\&width=", data)
if not match:
log.error("Can't find video file")
sys.exit(2)
host = "http://vms.api.qbrick.com/rest/v3/getsingleplayer/%s" % match.group(1)
else:
log.error("Can't find site")
sys.exit(2)
data = get_http_data(host)
xml = ET.XML(data)
try:
url = xml.find("media").find("item").find("playlist").find("stream").find("format").find("substream").text
except AttributeError:
log.error("Can't find video file")
sys.exit(2)
data = get_http_data(url)
xml = ET.XML(data)
server = xml.find("head").find("meta").attrib["base"]
streams = xml.find("body").find("switch")
if sys.version_info < (2, 7):
sa = list(streams.getiterator("video"))
else:
sa = list(streams.iter("video"))
streams = {}
for i in sa:
streams[int(i.attrib["system-bitrate"])] = i.attrib["src"]
2012-12-29 21:01:57 +01:00
path = select_quality(options, streams)
2012-12-29 21:01:57 +01:00
options.other = "-y %s" % path
download_rtmp(options, server)
class Kanal5():
2013-01-13 21:34:48 +01:00
def handle(self, url):
return "kanal5play.se" in url
2012-12-29 21:01:57 +01:00
def get(self, options, url):
2013-01-13 13:55:41 +01:00
match = re.search(".*video/([0-9]+)", url)
if not match:
log.error("Can't find video file")
sys.exit(2)
url = "http://www.kanal5play.se/api/getVideo?format=FLASH&videoId=%s" % match.group(1)
data = json.loads(get_http_data(url))
2012-12-29 21:01:57 +01:00
options.live = data["isLive"]
steambaseurl = data["streamBaseUrl"]
streams = {}
for i in data["streams"]:
stream = {}
stream["source"] = i["source"]
streams[int(i["bitrate"])] = stream
2012-12-29 21:01:57 +01:00
test = select_quality(options, streams)
filename = test["source"]
match = re.search("^(.*):", filename)
2012-12-29 21:01:57 +01:00
options.output = "%s.%s" % (options.output, match.group(1))
options.other = "-W %s -y %s " % ("http://www.kanal5play.se/flash/StandardPlayer.swf", filename)
download_rtmp(options, steambaseurl)
class Kanal9():
2013-01-13 21:34:48 +01:00
def handle(self, url):
2013-01-15 18:19:03 +01:00
return ("kanal9play.se" in url) or ("kanal5.se" in url)
2013-01-13 21:34:48 +01:00
2012-12-29 21:01:57 +01:00
def get(self, options, url):
2013-01-13 13:55:41 +01:00
data = get_http_data(url)
match = re.search("@videoPlayer\" value=\"(.*)\"", data)
if not match:
2013-01-15 18:19:03 +01:00
match = re.search("videoId=(\d+)&player", data)
if not match:
log.error("Can't find video file")
sys.exit(2)
try:
from pyamf import remoting
except ImportError:
2013-01-28 22:07:39 +01:00
log.error("You need to install pyamf to download content from kanal5.se and kanal9play")
2012-12-23 18:01:16 +01:00
log.error("In debian the package is called python-pyamf")
sys.exit(2)
player_id = 811317479001
publisher_id = 22710239001
const = "9f79dd85c3703b8674de883265d8c9e606360c2e"
env = remoting.Envelope(amfVersion=3)
2013-01-13 13:55:41 +01:00
env.bodies.append(("/1", remoting.Request(target="com.brightcove.player.runtime.PlayerMediaFacade.findMediaById", body=[const, player_id, match.group(1), publisher_id], envelope=env)))
env = str(remoting.encode(env).read())
2013-01-13 13:55:41 +01:00
url = "http://c.brightcove.com/services/messagebroker/amf?playerKey=AQ~~,AAAABUmivxk~,SnCsFJuhbr0vfwrPJJSL03znlhz-e9bk"
header = "application/x-amf"
data = get_http_data(url, "POST", header, env)
streams = {}
for i in remoting.decode(data).bodies[0][1].body['renditions']:
stream = {}
stream["uri"] = i["defaultURL"]
streams[i["encodingRate"]] = stream
2012-12-29 21:01:57 +01:00
test = select_quality(options, streams)
filename = test["uri"]
match = re.search("(rtmp[e]{0,1}://.*)\&(.*)$", filename)
2012-12-29 21:01:57 +01:00
options.other = "-W %s -y %s " % ("http://admin.brightcove.com/viewer/us1.25.04.01.2011-05-24182704/connection/ExternalConnection_2.swf", match.group(2))
download_rtmp(options, match.group(1))
class Expressen():
2013-01-13 21:34:48 +01:00
def handle(self, url):
return "expressen.se" in url
2012-12-29 21:01:57 +01:00
def get(self, options, url):
2013-01-13 13:55:41 +01:00
parse = urlparse(url)
match = re.search("/(.*[\/\+].*)/", unquote_plus(parse.path))
if not match:
log.error("Can't find video file")
sys.exit(2)
url = "http://tv.expressen.se/%s/?standAlone=true&output=xml" % quote_plus(match.group(1))
other = ""
data = get_http_data(url)
xml = ET.XML(data)
ss = xml.find("vurls")
if sys.version_info < (2, 7):
sa = list(ss.getiterator("vurl"))
else:
sa = list(ss.iter("vurl"))
streams = {}
for i in sa:
streams[int(i.attrib["bitrate"])] = i.text
2012-12-29 21:01:57 +01:00
test = select_quality(options, streams)
filename = test
match = re.search("rtmp://([0-9a-z\.]+/[0-9]+/)(.*).flv", filename)
filename = "rtmp://%s" % match.group(1)
2012-12-29 21:01:57 +01:00
options.other = "-y %s" % match.group(2)
2012-12-29 21:01:57 +01:00
download_rtmp(options, filename)
class Aftonbladet():
2013-01-13 21:34:48 +01:00
def handle(self, url):
return "aftonbladet.se" in url
def get(self, options, url):
2013-01-13 13:55:41 +01:00
parse = urlparse(url)
data = get_http_data(url)
match = re.search("abTvArticlePlayer-player-(.*)-[0-9]+-[0-9]+-clickOverlay", data)
if not match:
log.error("Can't find video file")
sys.exit(2)
try:
start = parse_qs(parse[4])["start"][0]
except KeyError:
start = 0
url = "http://www.aftonbladet.se/resource/webbtv/article/%s/player" % match.group(1)
data = get_http_data(url)
xml = ET.XML(data)
url = xml.find("articleElement").find("mediaElement").find("baseUrl").text
path = xml.find("articleElement").find("mediaElement").find("media").attrib["url"]
live = xml.find("articleElement").find("mediaElement").find("isLive").text
2012-12-29 21:01:57 +01:00
options.other = "-y %s" % path
if start > 0:
2012-12-29 21:55:30 +01:00
options.other = "%s -A %s" % (options.other, str(start))
if live == "true":
options.live = True
if url == None:
2012-12-23 18:01:16 +01:00
log.error("Can't find any video on that page")
sys.exit(3)
if url[0:4] == "rtmp":
2012-12-29 21:01:57 +01:00
download_rtmp(options, url)
else:
filename = url + path
2012-12-29 21:01:57 +01:00
download_http(options, filename)
class Viaplay():
2013-01-13 21:34:48 +01:00
def handle(self, url):
return ("tv3play.se" in url) or ("tv6play.se" in url) or ("tv8play.se" in url)
2012-12-29 21:01:57 +01:00
def get(self, options, url):
2013-01-13 13:55:41 +01:00
parse = urlparse(url)
match = re.search('\/play\/(.*)/?', parse.path)
if not match:
log.error("Cant find video file")
sys.exit(2)
url = "http://viastream.viasat.tv/PlayProduct/%s" % match.group(1)
2012-12-29 21:01:57 +01:00
options.other = ""
data = get_http_data(url)
xml = ET.XML(data)
filename = xml.find("Product").find("Videos").find("Video").find("Url").text
if filename[:4] == "http":
data = get_http_data(filename)
xml = ET.XML(data)
filename = xml.find("Url").text
2012-12-29 21:01:57 +01:00
options.other = "-W http://flvplayer.viastream.viasat.tv/play/swf/player110516.swf?rnd=1315434062"
download_rtmp(options, filename)
class Tv4play():
2013-01-13 21:34:48 +01:00
def handle(self, url):
2013-01-15 17:20:56 +01:00
return ("tv4play.se" in url) or ("tv4.se" in url)
2013-01-13 21:34:48 +01:00
2012-12-29 21:01:57 +01:00
def get(self, options, url):
2013-01-13 13:55:41 +01:00
parse = urlparse(url)
2013-01-15 17:20:56 +01:00
if "tv4play.se" in url:
try:
vid = parse_qs(parse[4])["video_id"][0]
except KeyError:
log.error("Can't find video file")
sys.exit(2)
else:
match = re.search("-(\d+)$", url)
if match:
vid = match.group(1)
else:
data = get_http_data(url)
match = re.search("\"vid\":\"(\d+)\",", data)
if match:
vid = match.group(1)
else:
log.error("Can't find video file")
sys.exit(2)
2013-01-15 17:20:56 +01:00
2013-01-13 13:55:41 +01:00
url = "http://premium.tv4play.se/api/web/asset/%s/play" % vid
data = get_http_data(url)
xml = ET.XML(data)
ss = xml.find("items")
if sys.version_info < (2, 7):
sa = list(ss.getiterator("item"))
else:
sa = list(ss.iter("item"))
2013-01-12 23:07:41 +01:00
2012-12-23 18:23:49 +01:00
if xml.find("live").text:
2013-01-11 15:29:05 +01:00
if xml.find("live").text != "false":
options.live = True
2012-12-23 18:23:49 +01:00
streams = {}
for i in sa:
if i.find("mediaFormat").text != "smi":
stream = {}
stream["uri"] = i.find("base").text
stream["path"] = i.find("url").text
streams[int(i.find("bitrate").text)] = stream
2012-12-23 18:12:23 +01:00
if len(streams) == 1:
test = streams[list(streams.keys())[0]]
2012-12-23 18:12:23 +01:00
else:
2012-12-29 21:01:57 +01:00
test = select_quality(options, streams)
2012-12-23 18:12:23 +01:00
swf = "http://www.tv4play.se/flash/tv4playflashlets.swf"
2012-12-29 21:01:57 +01:00
options.other = "-W %s -y %s" % (swf, test["path"])
2012-12-23 18:12:23 +01:00
if test["uri"][0:4] == "rtmp":
download_rtmp(options, test["uri"])
2012-12-23 18:12:23 +01:00
elif test["uri"][len(test["uri"])-3:len(test["uri"])] == "f4m":
match = re.search("\/se\/secure\/", test["uri"])
if match:
log.error("This stream is encrypted. Use --hls option")
sys.exit(2)
manifest = "%s?hdcore=2.8.0&g=hejsan" % test["path"]
2012-12-29 21:01:57 +01:00
download_hds(options, manifest, swf)
class Svtplay():
2013-01-13 21:34:48 +01:00
def handle(self, url):
return ("svtplay.se" in url) or ("svt.se" in url)
2012-12-29 21:01:57 +01:00
def get(self, options, url):
2013-01-13 13:55:41 +01:00
if re.findall("svt.se", url):
data = get_http_data(url)
match = re.search("data-json-href=\"(.*)\"", data)
if match:
filename = match.group(1).replace("&amp;", "&").replace("&format=json", "")
url = "http://www.svt.se%s" % filename
else:
log.error("Can't find video file")
sys.exit(2)
2012-12-29 21:55:30 +01:00
url = "%s?type=embed" % url
data = get_http_data(url)
2012-12-24 15:58:35 +01:00
match = re.search("value=\"(/(public)?(statiskt)?/swf/video/svtplayer-[0-9\.]+swf)\"", data)
2012-12-29 21:55:30 +01:00
swf = "http://www.svtplay.se%s" % match.group(1)
options.other = "-W %s" % swf
url = "%s&output=json&format=json" % url
data = json.loads(get_http_data(url))
2012-12-29 21:01:57 +01:00
options.live = data["video"]["live"]
streams = {}
streams2 = {} #hack..
for i in data["video"]["videoReferences"]:
2012-12-29 21:01:57 +01:00
if options.hls and i["playerType"] == "ios":
stream = {}
stream["url"] = i["url"]
streams[int(i["bitrate"])] = stream
2012-12-29 21:01:57 +01:00
elif not options.hls and i["playerType"] == "flash":
stream = {}
stream["url"] = i["url"]
streams[int(i["bitrate"])] = stream
if options.hls and i["playerType"] == "flash":
stream = {}
stream["url"] = i["url"]
streams2[int(i["bitrate"])] = stream
if len(streams) == 0 and options.hls:
2013-01-18 10:31:13 +01:00
test = streams2[0]
test["url"] = test["url"].replace("/z/", "/i/").replace("manifest.f4m", "master.m3u8")
elif len(streams) == 0:
log.error("Can't find any streams.")
sys.exit(2)
elif len(streams) == 1:
test = streams[list(streams.keys())[0]]
else:
2012-12-29 21:01:57 +01:00
test = select_quality(options, streams)
if test["url"][0:4] == "rtmp":
download_rtmp(options, test["url"])
2012-12-29 21:01:57 +01:00
elif options.hls:
download_hls(options, test["url"])
elif test["url"][len(test["url"])-3:len(test["url"])] == "f4m":
match = re.search("\/se\/secure\/", test["url"])
if match:
2012-12-23 18:01:16 +01:00
log.error("This stream is encrypted. Use --hls option")
sys.exit(2)
manifest = "%s?hdcore=2.8.0&g=hejsan" % test["url"]
2012-12-29 21:01:57 +01:00
download_hds(options, manifest, swf)
else:
2012-12-29 21:01:57 +01:00
download_http(options, test["url"])
2013-01-12 23:07:41 +01:00
class Nrk(object):
2013-01-13 21:34:48 +01:00
def handle(self, url):
return "nrk.no" in url
2013-01-12 23:07:41 +01:00
def get(self, options, url):
data = get_http_data(url)
match = re.search(r'data-media="(.*manifest.f4m)"', data)
manifest_url = match.group(1)
2013-01-12 14:03:45 +01:00
if options.hls:
2013-01-12 23:07:41 +01:00
manifest_url = manifest_url.replace("/z/", "/i/").replace("manifest.f4m", "master.m3u8")
2013-01-12 14:03:45 +01:00
download_hls(options, manifest_url)
else:
manifest_url = "%s?hdcore=2.8.0&g=hejsan" % manifest_url
download_hds(options, manifest_url)
2013-01-12 23:07:41 +01:00
class Dr(object):
2013-01-13 21:34:48 +01:00
def handle(self, url):
return "dr.dk" in url
2013-01-12 23:07:41 +01:00
def get(self, options, url):
data = get_http_data(url)
match = re.search(r'resource:[ ]*"([^"]*)",', data)
resource_url = match.group(1)
resource_data = get_http_data(resource_url)
resource = json.loads(resource_data)
streams = {}
2013-01-12 23:07:41 +01:00
for stream in resource['links']:
streams[stream['bitrateKbps']] = stream['uri']
if len(streams) == 1:
uri = streams[list(streams.keys())[0]]
else:
uri = select_quality(options, streams)
# need -v ?
2013-01-12 23:07:41 +01:00
options.other = "-v -y '" + uri.replace("rtmp://vod.dr.dk/cms/", "") + "'"
download_rtmp(options, uri)
class Ruv(object):
2013-01-13 21:34:48 +01:00
def handle(self, url):
return "ruv.is" in url
2013-01-12 23:07:41 +01:00
def get(self, options, url):
data = get_http_data(url)
match = re.search(r'(http://load.cache.is/vodruv.*)"', data)
js_url = match.group(1)
js = get_http_data(js_url)
tengipunktur = js.split('"')[1]
match = re.search(r"http.*tengipunktur [+] '([:]1935.*)'", data)
m3u8_url = "http://" + tengipunktur + match.group(1)
base_url = m3u8_url.rsplit("/", 1)[0]
download_hls(options, m3u8_url, base_url)
2013-02-08 12:34:28 +01:00
class Radioplay(object):
def handle(self, url):
return "radioplay.se" in url
def get(self, options, url):
data = get_http_data(url)
match = re.search("liveStationsRedundancy = ({.*});</script>", data)
parse = urlparse(url)
station = parse.path[1:]
streams = None
if match:
data = json.loads(match.group(1))
for i in data["stations"]:
if station == i["name"].lower().replace(" ", ""):
streams = i["streams"]
break
else:
log.error("Can't find any streams.")
sys.exit(2)
if streams:
if options.hls:
try:
m3u8_url = streams["hls"]
base_url = m3u8_url.rsplit("/", 1)[0]
download_hls(options, m3u8_url, base_url)
except KeyError:
log.error("Can't find any streams.")
sys.error(2)
else:
try:
rtmp = streams["rtmp"]
download_rtmp(options, rtmp)
except KeyError:
mp3 = streams["mp3"]
download_http(options, mp3)
else:
log.error("Can't find any streams.")
sys.exit(2)
class generic(object):
''' Videos embed in sites '''
def get(self, sites, url):
data = get_http_data(url)
match = re.search("src=\"(http://www.svt.se/wd.*)\" frameborder", data)
stream = None
if match:
url = match.group(1)
for i in sites:
if i.handle(url):
stream = i
break
return url, stream
def progressbar(total, pos, msg=""):
"""
Given a total and a progress position, output a progress bar
to stderr. It is important to not output anything else while
using this, as it relies soley on the behavior of carriage
return (\\r).
Can also take an optioal message to add after the
progressbar. It must not contain newliens.
The progress bar will look something like this:
[099/500][=========...............................] ETA: 13:36:59
Of course, the ETA part should be supplied be the calling
function.
"""
width = 50 # TODO hardcoded progressbar width
rel_pos = int(float(pos)/total*width)
bar = str()
# FIXME ugly generation of bar
for i in range(0, rel_pos):
bar += "="
for i in range(rel_pos, width):
bar += "."
# Determine how many digits in total (base 10)
digits_total = len(str(total))
fmt_width = "%0" + str(digits_total) + "d"
fmt = "\r[" + fmt_width + "/" + fmt_width + "][%s] %s"
progress_stream.write(fmt % (pos, total, bar, msg))
def get_media(url, options):
2013-02-08 12:34:28 +01:00
sites = [Aftonbladet(), Dr(), Expressen(), Hbo(), Justin(), Kanal5(), Kanal9(), Nrk(),
Qbrick(), Ruv(), Radioplay(), Sr(), Svtplay(), Tv4play(), Urplay(), Viaplay()]
2013-01-13 21:34:48 +01:00
stream = None
for i in sites:
if i.handle(url):
stream = i
break
if not stream:
url, stream = generic().get(sites, url)
if not stream:
log.error("That site is not supported. Make a ticket or send a message")
sys.exit(2)
url = url.replace("&amp;", "&")
2012-12-29 21:01:57 +01:00
if not options.output or os.path.isdir(options.output):
data = get_http_data(url)
match = re.search("(?i)<title.*>\s*(.*?)\s*</title>", data)
if match:
if sys.version_info > (3, 0):
title = re.sub('[^\w\s-]', '', match.group(1)).strip().lower()
if options.output:
2012-12-29 21:01:57 +01:00
options.output = options.output + re.sub('[-\s]+', '-', title)
else:
2012-12-29 21:01:57 +01:00
options.output = re.sub('[-\s]+', '-', title)
else:
title = unicode(re.sub('[^\w\s-]', '', match.group(1)).strip().lower())
2012-12-29 21:01:57 +01:00
if options.output:
options.output = unicode(options.output + re.sub('[-\s]+', '-', title))
else:
2012-12-29 21:01:57 +01:00
options.output = unicode(re.sub('[-\s]+', '-', title))
2013-01-13 21:34:48 +01:00
stream.get(options, url)
def setup_log(silent):
if silent:
stream = sys.stderr
level = logging.WARNING
else:
stream = sys.stdout
level = logging.INFO
2013-01-12 23:07:41 +01:00
fmt = logging.Formatter('%(levelname)s %(message)s')
hdlr = logging.StreamHandler(stream)
hdlr.setFormatter(fmt)
log.addHandler(hdlr)
log.setLevel(level)
def main():
""" Main program """
usage = "usage: %prog [options] url"
parser = OptionParser(usage=usage, version=__version__)
parser.add_option("-o", "--output",
metavar="OUTPUT", help="Outputs to the given filename.")
parser.add_option("-r", "--resume",
action="store_true", dest="resume", default=False,
help="Resume a download")
parser.add_option("-l", "--live",
action="store_true", dest="live", default=False,
help="Enable for live streams")
parser.add_option("-s", "--silent",
action="store_true", dest="silent", default=False)
parser.add_option("-q", "--quality",
metavar="quality", help="Choose what format to download.\nIt will download the best format by default")
parser.add_option("-H", "--hls",
action="store_true", dest="hls", default=False)
(options, args) = parser.parse_args()
if len(args) != 1:
parser.error("incorrect number of arguments")
setup_log(options.silent)
url = args[0]
get_media(url, options)
if __name__ == "__main__":
main()