#!/usr/bin/env python import sys if sys.version_info > (3, 0): from urllib.request import Request, urlopen from urllib.error import HTTPError, URLError from urllib.parse import urlparse, parse_qs, unquote_plus, quote_plus from io import BytesIO as StringIO else: from urllib2 import Request, urlopen, HTTPError, URLError from urlparse import urlparse, parse_qs from urllib import unquote_plus, quote_plus from StringIO import StringIO import re import os import subprocess from optparse import OptionParser import xml.etree.ElementTree as ET import shlex import json import time import logging import base64 import struct import binascii from datetime import timedelta __version__ = "0.8.2013.01.26" class Options: """ Options used when invoking the script from another Python script. Simple container class used when calling get_media() from another Python script. The variables corresponds to the command line parameters parsed in main() when the script is called directly. When called from a script there are a few more things to consider: * Logging is done to 'log'. main() calls setup_log() which sets the logging to either stdout or stderr depending on the silent level. A user calling get_media() directly can either also use setup_log() or configure the log manually. * Progress information is printed to 'progress_stream' which defaults to sys.stderr but can be changed to any stream. * Many errors results in calls to system.exit() so catch 'SystemExit'- Exceptions to prevent the entire application from exiting if that happens. """ def __init__(self): self.output = None self.resume = False self.live = False self.silent = False self.quality = None self.hls = False self.other = None log = logging.getLogger('svtplay_dl') progress_stream = sys.stderr def readbyte(data, pos): return struct.unpack("B", data[pos])[0] def read16(data, pos): endpos = pos + 2 return struct.unpack(">H", data[pos:endpos])[0] def read24(data, pos): end = pos + 3 return struct.unpack(">L", "\x00" + data[pos:end])[0] def read32(data, pos): end = pos + 4 return struct.unpack(">i", data[pos:end])[0] def read64(data, pos): end = pos + 8 return struct.unpack(">Q", data[pos:end])[0] def readstring(data, pos): length = 0 while (data[pos + length] != "\x00"): length += 1 endpos = pos + length string = data[pos:endpos] pos += length + 1 return pos, string def readboxtype(data, pos): boxsize = read32(data, pos) tpos = pos + 4 endpos = tpos + 4 boxtype = data[tpos:endpos] if boxsize > 1: boxsize -= 8 pos += 8 return pos, boxsize, boxtype def readbox(data, pos): version = readbyte(data, pos) pos += 1 flags = read24(data, pos) pos += 3 bootstrapversion = read32(data, pos) pos += 4 byte = readbyte(data, pos) pos += 1 profile = (byte & 0xC0) >> 6 live = (byte & 0x20) >> 5 update = (byte & 0x10) >> 4 timescale = read32(data, pos) pos += 4 currentmediatime = read64(data, pos) pos += 8 smptetimecodeoffset = read64(data, pos) pos += 8 temp = readstring(data, pos) movieidentifier = temp[1] pos = temp[0] serverentrycount = readbyte(data, pos) pos += 1 serverentrytable = [] i = 0 while i < serverentrycount: temp = readstring(data, pos) serverentrytable.append(temp[1]) pos = temp[0] i += 1 qualityentrycount = readbyte(data, pos) pos += 1 qualityentrytable = [] i = 0 while i < qualityentrycount: temp = readstring(data, pos) qualityentrytable.append(temp[1]) pos = temp[0] i += 1 tmp = readstring(data, pos) drm = tmp[1] pos = tmp[0] tmp = readstring(data, pos) metadata = tmp[1] pos = tmp[0] segmentruntable = readbyte(data, pos) pos += 1 if segmentruntable > 0: tmp = readboxtype(data, pos) boxtype = tmp[2] boxsize = tmp[1] pos = tmp[0] if boxtype == "asrt": antal = readasrtbox(data, pos) pos += boxsize fragRunTableCount = readbyte(data, pos) pos += 1 i = 0 while i < fragRunTableCount: tmp = readboxtype(data, pos) boxtype = tmp[2] boxsize = tmp[1] pos = tmp[0] if boxtype == "afrt": readafrtbox(data, pos) pos += boxsize i += 1 return antal def readafrtbox(data, pos): version = readbyte(data, pos) pos += 1 flags = read24(data, pos) pos += 3 timescale = read32(data, pos) pos += 4 qualityentry = readbyte(data, pos) pos += 1 i = 0 while i < qualityentry: temp = readstring(data, pos) qualitysegmulti = temp[1] pos = temp[0] i += 1 fragrunentrycount = read32(data, pos) pos += 4 i = 0 while i < fragrunentrycount: firstfragment = read32(data, pos) pos += 4 timestamp = read64(data, pos) pos += 8 duration = read32(data, pos) pos += 4 i += 1 def readasrtbox(data, pos): version = readbyte(data, pos) pos += 1 flags = read24(data, pos) pos += 3 qualityentrycount = readbyte(data, pos) pos += 1 qualitysegmentmodifers = [] i = 0 while i < qualityentrycount: temp = readstring(data, pos) qualitysegmentmodifers.append(temp[1]) pos = temp[0] i += 1 seqCount = read32(data, pos) pos += 4 ret = {} i = 0 while i < seqCount: firstseg = read32(data, pos) pos += 4 fragPerSeg = read32(data, pos) pos += 4 tmp = i + 1 ret[tmp] = {"first": firstseg, "total": fragPerSeg} i += 1 return ret def parsem3u(data): if not data.startswith("#EXTM3U"): raise ValueError("Does not apprear to be a ext m3u file") files = [] streaminfo = {} globdata = {} data = data.replace("\r", "\n") for l in data.split("\n")[1:]: if not l: continue if l.startswith("#EXT-X-STREAM-INF:"): #not a proper parser info = [x.strip().split("=", 1) for x in l[18:].split(",")] streaminfo.update({info[1][0]: info[1][1]}) elif l.startswith("#EXT-X-ENDLIST"): break elif l.startswith("#EXT-X-"): globdata.update(dict([l[7:].strip().split(":", 1)])) elif l.startswith("#EXTINF:"): dur, title = l[8:].strip().split(",", 1) streaminfo['duration'] = dur streaminfo['title'] = title elif l[0] == '#': pass else: files.append((l, streaminfo)) streaminfo = {} return globdata, files def decode_f4f(fragID, fragData): start = fragData.find("mdat") + 4 if (fragID > 1): for dummy in range(2): tagLen, = struct.unpack_from(">L", fragData, start) tagLen &= 0x00ffffff start += tagLen + 11 + 4 return start def get_http_data(url, method="GET", header="", data=""): """ Get the page to parse it for streams """ request = Request(url) request.add_header('User-Agent', 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-GB; rv:1.9.0.3) Gecko/2008092417 Firefox/3.0.3') if len(header) > 0: request.add_header('Content-Type', header) if len(data) > 0: request.add_data(data) try: response = urlopen(request) except HTTPError as e: log.error("Something wrong with that url") log.error("Error code: %s" % e.code) sys.exit(5) except URLError as e: log.error("Something wrong with that url") log.error("Error code: %s" % e.reason) sys.exit(5) except ValueError as e: log.error("Try adding http:// before the url") sys.exit(5) if sys.version_info > (3, 0): data = response.read() try: data = data.decode("utf-8") except UnicodeDecodeError: pass else: try: data = response.read() except socket.error as e: log.error("Lost the connection to the server") sys.exit(5) response.close() return data def progress(byte, total, extra = ""): """ Print some info about how much we have downloaded """ ratio = float(byte) / total percent = round(ratio*100, 2) tlen = str(len(str(total))) fmt = "Downloaded %"+tlen+"dkB of %dkB bytes (% 3.2f%%)" progresstr = fmt % (byte >> 10, total >> 10, percent) columns = int(os.getenv("COLUMNS", "80")) if len(progresstr) < columns - 13: p = int((columns - len(progresstr) - 3) * ratio) q = int((columns - len(progresstr) - 3) * (1 - ratio)) progresstr = "[" + ("#" * p) + (" " * q) + "] " + progresstr progress_stream.write(progresstr + ' ' + extra + '\r') if byte >= total: progress_stream.write('\n') progress_stream.flush() def download_hds(options, url, swf=None): data = get_http_data(url) streams = {} bootstrap = {} xml = ET.XML(data) prefix = xml.find("{http://ns.adobe.com/f4m/1.0}id").text if sys.version_info < (2, 7): bootstrapIter = xml.getiterator("{http://ns.adobe.com/f4m/1.0}bootstrapInfo") mediaIter = xml.getiterator("{http://ns.adobe.com/f4m/1.0}media") else: bootstrapIter = xml.iter("{http://ns.adobe.com/f4m/1.0}bootstrapInfo") mediaIter = xml.iter("{http://ns.adobe.com/f4m/1.0}media") for i in bootstrapIter: bootstrap[i.attrib["id"]] = i.text for i in mediaIter: streams[int(i.attrib["bitrate"])] = {"url": i.attrib["url"], "bootstrapInfoId": i.attrib["bootstrapInfoId"], "metadata": i.find("{http://ns.adobe.com/f4m/1.0}metadata").text} test = select_quality(options, streams) bootstrap = base64.b64decode(bootstrap[test["bootstrapInfoId"]]) box = readboxtype(bootstrap, 0) if box[2] == "abst": antal = readbox(bootstrap, box[0]) baseurl = url[0:url.rfind("/")] i = 1 if options.output != "-": extension = re.search("(\.[a-z0-9]+)$", options.output) if not extension: options.output = "%s.flv" % options.output log.info("Outfile: %s", options.output) file_d = open(options.output, "wb") else: file_d = sys.stdout file_d.write(binascii.a2b_hex(b"464c56010500000009000000001200010c00000000000000")) file_d.write(base64.b64decode(test["metadata"])) file_d.write(binascii.a2b_hex(b"00000000")) total = antal[1]["total"] start = time.time() estimated = "" while i <= total: url = "%s/%sSeg1-Frag%s" % (baseurl, test["url"], i) if options.output != "-": progressbar(total, i, estimated) data = get_http_data(url) number = decode_f4f(i, data) file_d.write(data[number:]) now = time.time() dt = now - start et = dt / (i + 1) * total rt = et - dt td = timedelta(seconds = int(rt)) estimated = "Estimated Remaining: " + str(td) i += 1 if options.output != "-": file_d.close() progress_stream.write('\n') def download_hls(options, url, baseurl=None): data = get_http_data(url) globaldata, files = parsem3u(data) streams = {} for i in files: streams[int(i[1]["BANDWIDTH"])] = i[0] test = select_quality(options, streams) m3u8 = get_http_data(test) globaldata, files = parsem3u(m3u8) encrypted = False key = None try: keydata = globaldata["KEY"] encrypted = True except: pass if encrypted: try: from Crypto.Cipher import AES except ImportError: log.error("You need to install pycrypto to download encrypted HLS streams") sys.exit(2) match = re.search("URI=\"(http://.*)\"", keydata) key = get_http_data(match.group(1)) rand = os.urandom(16) decryptor = AES.new(key, AES.MODE_CBC, rand) n = 1 if options.output != "-": extension = re.search("(\.[a-z0-9]+)$", options.output) if not extension: options.output = "%s.ts" % options.output log.info("Outfile: %s", options.output) file_d = open(options.output, "wb") else: file_d = sys.stdout start = time.time() estimated = "" for i in files: item = i[0] if options.output != "-": progressbar(len(files), n, estimated) if item[0:5] != "http:": item = "%s/%s" % (baseurl, item) data = get_http_data(item) if encrypted: lots = StringIO(data) plain = b"" crypt = lots.read(1024) decrypted = decryptor.decrypt(crypt) while decrypted: plain += decrypted crypt = lots.read(1024) decrypted = decryptor.decrypt(crypt) data = plain file_d.write(data) now = time.time() dt = now - start et = dt / (n + 1) * len(files) rt = et - dt td = timedelta(seconds = int(rt)) estimated = "Estimated Remaining: " + str(td) n += 1 if options.output != "-": file_d.close() progress_stream.write('\n') def download_http(options, url): """ Get the stream from HTTP """ response = urlopen(url) total_size = response.info()['Content-Length'] total_size = int(total_size) bytes_so_far = 0 if options.output != "-": extension = re.search("(\.[a-z0-9]+)$", url) if extension: options.output = options.output + extension.group(1) log.info("Outfile: %s", options.output) file_d = open(options.output, "wb") else: file_d = sys.stdout lastprogress = 0 while 1: chunk = response.read(8192) bytes_so_far += len(chunk) if not chunk: break file_d.write(chunk) if options.output != "-": now = time.time() if lastprogress + 1 < now: lastprogress = now progress(bytes_so_far, total_size) if options.output != "-": file_d.close() def download_rtmp(options, url): """ Get the stream from RTMP """ args = [] if options.live: args.append("-v") if options.resume: args.append("-e") extension = re.search("(\.[a-z0-9]+)$", url) if options.output != "-": if not extension: extension = re.search("-y (.+):[-_a-z0-9\/]", options.other) if not extension: options.output = "%s.flv" % options.output else: options.output = "%s%s" % (options.output, extension.group(1)) else: options.output = options.output + extension.group(1) log.info("Outfile: %s", options.output) args += ["-o", options.output] if options.silent or options.output == "-": args.append("-q") if options.other: args += shlex.split(options.other) command = ["rtmpdump", "-r", url] + args try: subprocess.call(command) except OSError as e: log.error("Could not execute rtmpdump: " + e.strerror) def select_quality(options, streams): sort = sorted(streams.keys(), key=int) if options.quality: quality = options.quality else: quality = sort.pop() try: selected = streams[int(quality)] except (KeyError, ValueError): log.error("Can't find that quality. (Try one of: %s)", ", ".join(map(str, sort))) sys.exit(4) return selected class Justin(): def handle(self, url): return ("twitch.tv" in url) or ("justin.tv" in url) def get(self, options, url): parse = urlparse(url) match = re.search("/b/(\d+)", parse.path) if match: url = "http://api.justin.tv/api/broadcast/by_archive/%s.xml?onsite=true" % match.group(1) data = get_http_data(url) xml = ET.XML(data) url = xml.find("archive").find("video_file_url").text download_http(options, url) else: match = re.search("/(.*)", parse.path) if match: user = match.group(1) data = get_http_data(url) match = re.search("embedSWF\(\"(.*)\", \"live", data) if not match: log.error("Can't find swf file.") options.other = match.group(1) url = "http://usher.justin.tv/find/%s.xml?type=any&p=2321" % user options.live = True data = get_http_data(url) data = re.sub("<(\d+)", "<_\g<1>", data) data = re.sub("", data) xml = ET.XML(data) if sys.version_info < (2, 7): sa = list(xml) else: sa = list(xml) streams = {} for i in sa: if i.tag[1:][:-1] != "iv": try: stream = {} stream["token"] = i.find("token").text stream["url"] = "%s/%s" % (i.find("connect").text, i.find("play").text) streams[int(i.find("video_height").text)] = stream except AttributeError: pass test = select_quality(options, streams) options.other = "-j '%s' -W %s" % (test["token"], options.other) options.resume = False download_rtmp(options, test["url"]) class Hbo(): def handle(self, url): return "hbo.com" in url def get(self, url): parse = urlparse(url) try: other = parse[5] except KeyError: log.error("Something wrong with that url") sys.exit(2) match = re.search("^/(.*).html", other) if not match: log.error("Cant find video file") sys.exit(2) url = "http://www.hbo.com/data/content/%s.xml" % match.group(1) data = get_http_data(url) xml = ET.XML(data) videoid = xml.find("content")[1].find("videoId").text url = "http://render.cdn.hbo.com/data/content/global/videos/data/%s.xml" % videoid data = get_http_data(url) xml = ET.XML(data) ss = xml.find("videos") if sys.version_info < (2, 7): sa = list(ss.getiterator("size")) else: sa = list(ss.iter("size")) streams = {} for i in sa: stream = {} stream["path"] = i.find("tv14").find("path").text streams[int(i.attrib["width"])] = stream test = select_quality(options, streams) download_rtmp(options, test["path"]) class Sr(): def handle(self, url): return "sverigesradio.se" in url def get(self, options, url): data = get_http_data(url) parse = urlparse(url) try: metafile = parse_qs(parse[4])["metafile"][0] options.other = "%s?%s" % (parse[2], parse[4]) except KeyError: match = re.search("linkUrl=(.*)\;isButton=", data) if not match: log.error("Can't find video file") sys.exit(2) options.other = unquote_plus(match.group(1)) url = "http://sverigesradio.se%s" % options.other data = get_http_data(url) xml = ET.XML(data) url = xml.find("entry").find("ref").attrib["href"] download_http(options, url) class Urplay(): def handle(self, url): return "urplay.se" in url def get(self, options, url): data = get_http_data(url) match = re.search('file=(.*)\&plugins', data) if match: path = "mp%s:%s" % (match.group(1)[-1], match.group(1)) options.other = "-a ondemand -y %s" % path download_rtmp(options, "rtmp://streaming.ur.se/") class Qbrick(): def handle(self, url): return ("dn.se" in url) or ("di.se" in url) or ("svd.se" in url) def get(self, options, url): if re.findall("dn.se", url): data = get_http_data(url) match = re.search("data-qbrick-mcid=\"([0-9A-F]+)\"", data) if not match: match = re.search("mediaId = \'([0-9A-F]+)\';", data) if not match: log.error("Can't find video file") sys.exit(2) mcid = "%sDE1BA107" % match.group(1) else: mcid = match.group(1) host = "http://vms.api.qbrick.com/rest/v3/getsingleplayer/%s" % mcid elif re.findall("di.se", url): data = get_http_data(url) match = re.search("ccid: \"(.*)\"\,", data) if not match: log.error("Can't find video file") sys.exit(2) host = "http://vms.api.qbrick.com/rest/v3/getplayer/%s" % match.group(1) elif re.findall("svd.se", url): match = re.search("_([0-9]+)\.svd", url) if not match: log.error("Can't find video file") sys.exit(2) data = get_http_data("http://www.svd.se/?service=ajax&type=webTvClip&articleId=%s" % match.group(1)) match = re.search("mcid=([A-F0-9]+)\&width=", data) if not match: log.error("Can't find video file") sys.exit(2) host = "http://vms.api.qbrick.com/rest/v3/getsingleplayer/%s" % match.group(1) else: log.error("Can't find site") sys.exit(2) data = get_http_data(host) xml = ET.XML(data) try: url = xml.find("media").find("item").find("playlist").find("stream").find("format").find("substream").text except AttributeError: log.error("Can't find video file") sys.exit(2) data = get_http_data(url) xml = ET.XML(data) server = xml.find("head").find("meta").attrib["base"] streams = xml.find("body").find("switch") if sys.version_info < (2, 7): sa = list(streams.getiterator("video")) else: sa = list(streams.iter("video")) streams = {} for i in sa: streams[int(i.attrib["system-bitrate"])] = i.attrib["src"] path = select_quality(options, streams) options.other = "-y %s" % path download_rtmp(options, server) class Kanal5(): def handle(self, url): return "kanal5play.se" in url def get(self, options, url): match = re.search(".*video/([0-9]+)", url) if not match: log.error("Can't find video file") sys.exit(2) url = "http://www.kanal5play.se/api/getVideo?format=FLASH&videoId=%s" % match.group(1) data = json.loads(get_http_data(url)) options.live = data["isLive"] steambaseurl = data["streamBaseUrl"] streams = {} for i in data["streams"]: stream = {} stream["source"] = i["source"] streams[int(i["bitrate"])] = stream test = select_quality(options, streams) filename = test["source"] match = re.search("^(.*):", filename) options.output = "%s.%s" % (options.output, match.group(1)) options.other = "-W %s -y %s " % ("http://www.kanal5play.se/flash/StandardPlayer.swf", filename) download_rtmp(options, steambaseurl) class Kanal9(): def handle(self, url): return ("kanal9play.se" in url) or ("kanal5.se" in url) def get(self, options, url): data = get_http_data(url) match = re.search("@videoPlayer\" value=\"(.*)\"", data) if not match: match = re.search("videoId=(\d+)&player", data) if not match: log.error("Can't find video file") sys.exit(2) try: from pyamf import remoting except ImportError: log.error("You need to install pyamf to download content from kanal5.se and kanal9play") log.error("In debian the package is called python-pyamf") sys.exit(2) player_id = 811317479001 publisher_id = 22710239001 const = "9f79dd85c3703b8674de883265d8c9e606360c2e" env = remoting.Envelope(amfVersion=3) env.bodies.append(("/1", remoting.Request(target="com.brightcove.player.runtime.PlayerMediaFacade.findMediaById", body=[const, player_id, match.group(1), publisher_id], envelope=env))) env = str(remoting.encode(env).read()) url = "http://c.brightcove.com/services/messagebroker/amf?playerKey=AQ~~,AAAABUmivxk~,SnCsFJuhbr0vfwrPJJSL03znlhz-e9bk" header = "application/x-amf" data = get_http_data(url, "POST", header, env) streams = {} for i in remoting.decode(data).bodies[0][1].body['renditions']: stream = {} stream["uri"] = i["defaultURL"] streams[i["encodingRate"]] = stream test = select_quality(options, streams) filename = test["uri"] match = re.search("(rtmp[e]{0,1}://.*)\&(.*)$", filename) options.other = "-W %s -y %s " % ("http://admin.brightcove.com/viewer/us1.25.04.01.2011-05-24182704/connection/ExternalConnection_2.swf", match.group(2)) download_rtmp(options, match.group(1)) class Expressen(): def handle(self, url): return "expressen.se" in url def get(self, options, url): parse = urlparse(url) match = re.search("/(.*[\/\+].*)/", unquote_plus(parse.path)) if not match: log.error("Can't find video file") sys.exit(2) url = "http://tv.expressen.se/%s/?standAlone=true&output=xml" % quote_plus(match.group(1)) other = "" data = get_http_data(url) xml = ET.XML(data) ss = xml.find("vurls") if sys.version_info < (2, 7): sa = list(ss.getiterator("vurl")) else: sa = list(ss.iter("vurl")) streams = {} for i in sa: streams[int(i.attrib["bitrate"])] = i.text test = select_quality(options, streams) filename = test match = re.search("rtmp://([0-9a-z\.]+/[0-9]+/)(.*).flv", filename) filename = "rtmp://%s" % match.group(1) options.other = "-y %s" % match.group(2) download_rtmp(options, filename) class Aftonbladet(): def handle(self, url): return "aftonbladet.se" in url def get(self, options, url): parse = urlparse(url) data = get_http_data(url) match = re.search("abTvArticlePlayer-player-(.*)-[0-9]+-[0-9]+-clickOverlay", data) if not match: log.error("Can't find video file") sys.exit(2) try: start = parse_qs(parse[4])["start"][0] except KeyError: start = 0 url = "http://www.aftonbladet.se/resource/webbtv/article/%s/player" % match.group(1) data = get_http_data(url) xml = ET.XML(data) url = xml.find("articleElement").find("mediaElement").find("baseUrl").text path = xml.find("articleElement").find("mediaElement").find("media").attrib["url"] live = xml.find("articleElement").find("mediaElement").find("isLive").text options.other = "-y %s" % path if start > 0: options.other = "%s -A %s" % (options.other, str(start)) if live == "true": options.live = True if url == None: log.error("Can't find any video on that page") sys.exit(3) if url[0:4] == "rtmp": download_rtmp(options, url) else: filename = url + path download_http(options, filename) class Viaplay(): def handle(self, url): return ("tv3play.se" in url) or ("tv6play.se" in url) or ("tv8play.se" in url) def get(self, options, url): parse = urlparse(url) match = re.search('\/play\/(.*)/?', parse.path) if not match: log.error("Cant find video file") sys.exit(2) url = "http://viastream.viasat.tv/PlayProduct/%s" % match.group(1) options.other = "" data = get_http_data(url) xml = ET.XML(data) filename = xml.find("Product").find("Videos").find("Video").find("Url").text if filename[:4] == "http": data = get_http_data(filename) xml = ET.XML(data) filename = xml.find("Url").text options.other = "-W http://flvplayer.viastream.viasat.tv/play/swf/player110516.swf?rnd=1315434062" download_rtmp(options, filename) class Tv4play(): def handle(self, url): return ("tv4play.se" in url) or ("tv4.se" in url) def get(self, options, url): parse = urlparse(url) if "tv4play.se" in url: try: vid = parse_qs(parse[4])["video_id"][0] except KeyError: log.error("Can't find video file") sys.exit(2) else: match = re.search("-(\d+)$", url) if match: vid = match.group(1) else: log.error("Can't find video file") sys.exit(2) url = "http://premium.tv4play.se/api/web/asset/%s/play" % vid data = get_http_data(url) xml = ET.XML(data) ss = xml.find("items") if sys.version_info < (2, 7): sa = list(ss.getiterator("item")) else: sa = list(ss.iter("item")) if xml.find("live").text: if xml.find("live").text != "false": options.live = True streams = {} for i in sa: if i.find("mediaFormat").text != "smi": stream = {} stream["uri"] = i.find("base").text stream["path"] = i.find("url").text streams[int(i.find("bitrate").text)] = stream if len(streams) == 1: test = streams[list(streams.keys())[0]] else: test = select_quality(options, streams) swf = "http://www.tv4play.se/flash/tv4playflashlets.swf" options.other = "-W %s -y %s" % (swf, test["path"]) if test["uri"][0:4] == "rtmp": download_rtmp(options, test["uri"]) elif test["uri"][len(test["uri"])-3:len(test["uri"])] == "f4m": match = re.search("\/se\/secure\/", test["uri"]) if match: log.error("This stream is encrypted. Use --hls option") sys.exit(2) manifest = "%s?hdcore=2.8.0&g=hejsan" % test["path"] download_hds(options, manifest, swf) class Svtplay(): def handle(self, url): return ("svtplay.se" in url) or ("svt.se" in url) def get(self, options, url): if re.findall("svt.se", url): data = get_http_data(url) match = re.search("data-json-href=\"(.*)\"", data) if match: filename = match.group(1).replace("&", "&").replace("&format=json", "") url = "http://www.svt.se%s" % filename else: log.error("Can't find video file") sys.exit(2) url = "%s?type=embed" % url data = get_http_data(url) match = re.search("value=\"(/(public)?(statiskt)?/swf/video/svtplayer-[0-9\.]+swf)\"", data) swf = "http://www.svtplay.se%s" % match.group(1) options.other = "-W %s" % swf url = "%s&output=json&format=json" % url data = json.loads(get_http_data(url)) options.live = data["video"]["live"] streams = {} streams2 = {} #hack.. for i in data["video"]["videoReferences"]: if options.hls and i["playerType"] == "ios": stream = {} stream["url"] = i["url"] streams[int(i["bitrate"])] = stream elif not options.hls and i["playerType"] == "flash": stream = {} stream["url"] = i["url"] streams[int(i["bitrate"])] = stream if options.hls and i["playerType"] == "flash": stream = {} stream["url"] = i["url"] streams2[int(i["bitrate"])] = stream if len(streams) == 0 and options.hls: test = streams2[0] test["url"] = test["url"].replace("/z/", "/i/").replace("manifest.f4m", "master.m3u8") elif len(streams) == 0: log.error("Can't find any streams.") sys.exit(2) elif len(streams) == 1: test = streams[list(streams.keys())[0]] else: test = select_quality(options, streams) if test["url"][0:4] == "rtmp": download_rtmp(options, test["url"]) elif options.hls: download_hls(options, test["url"]) elif test["url"][len(test["url"])-3:len(test["url"])] == "f4m": match = re.search("\/se\/secure\/", test["url"]) if match: log.error("This stream is encrypted. Use --hls option") sys.exit(2) manifest = "%s?hdcore=2.8.0&g=hejsan" % test["url"] download_hds(options, manifest, swf) else: download_http(options, test["url"]) class Nrk(object): def handle(self, url): return "nrk.no" in url def get(self, options, url): data = get_http_data(url) match = re.search(r'data-media="(.*manifest.f4m)"', data) manifest_url = match.group(1) if options.hls: manifest_url = manifest_url.replace("/z/", "/i/").replace("manifest.f4m", "master.m3u8") download_hls(options, manifest_url) else: manifest_url = "%s?hdcore=2.8.0&g=hejsan" % manifest_url download_hds(options, manifest_url) class Dr(object): def handle(self, url): return "dr.dk" in url def get(self, options, url): data = get_http_data(url) match = re.search(r'resource:[ ]*"([^"]*)",', data) resource_url = match.group(1) resource_data = get_http_data(resource_url) resource = json.loads(resource_data) streams = {} for stream in resource['links']: streams[stream['bitrateKbps']] = stream['uri'] if len(streams) == 1: uri = streams[list(streams.keys())[0]] else: uri = select_quality(options, streams) # need -v ? options.other = "-v -y '" + uri.replace("rtmp://vod.dr.dk/cms/", "") + "'" download_rtmp(options, uri) class Ruv(object): def handle(self, url): return "ruv.is" in url def get(self, options, url): data = get_http_data(url) match = re.search(r'(http://load.cache.is/vodruv.*)"', data) js_url = match.group(1) js = get_http_data(js_url) tengipunktur = js.split('"')[1] match = re.search(r"http.*tengipunktur [+] '([:]1935.*)'", data) m3u8_url = "http://" + tengipunktur + match.group(1) base_url = m3u8_url.rsplit("/", 1)[0] download_hls(options, m3u8_url, base_url) def progressbar(total, pos, msg=""): """ Given a total and a progress position, output a progress bar to stderr. It is important to not output anything else while using this, as it relies soley on the behavior of carriage return (\\r). Can also take an optioal message to add after the progressbar. It must not contain newliens. The progress bar will look something like this: [099/500][=========...............................] ETA: 13:36:59 Of course, the ETA part should be supplied be the calling function. """ width = 50 # TODO hardcoded progressbar width rel_pos = int(float(pos)/total*width) bar = str() # FIXME ugly generation of bar for i in range(0, rel_pos): bar += "=" for i in range(rel_pos, width): bar += "." # Determine how many digits in total (base 10) digits_total = len(str(total)) fmt_width = "%0" + str(digits_total) + "d" fmt = "\r[" + fmt_width + "/" + fmt_width + "][%s] %s" progress_stream.write(fmt % (pos, total, bar, msg)) def get_media(url, options): sites = [Aftonbladet(), Dr(), Expressen(), Hbo(), Justin(), Kanal5(), Kanal9(), Nrk(), Qbrick(), Ruv(), Sr(), Svtplay(), Tv4play(), Urplay(), Viaplay()] stream = None for i in sites: if i.handle(url): stream = i break if not stream: log.error("That site is not supported. Make a ticket or send a message") sys.exit(2) if not options.output or os.path.isdir(options.output): data = get_http_data(url) match = re.search("(?i)\s*(.*?)\s*", data) if match: if sys.version_info > (3, 0): title = re.sub('[^\w\s-]', '', match.group(1)).strip().lower() if options.output: options.output = options.output + re.sub('[-\s]+', '-', title) else: options.output = re.sub('[-\s]+', '-', title) else: title = unicode(re.sub('[^\w\s-]', '', match.group(1)).strip().lower()) if options.output: options.output = unicode(options.output + re.sub('[-\s]+', '-', title)) else: options.output = unicode(re.sub('[-\s]+', '-', title)) stream.get(options, url) def setup_log(silent): if silent: stream = sys.stderr level = logging.WARNING else: stream = sys.stdout level = logging.INFO fmt = logging.Formatter('%(levelname)s %(message)s') hdlr = logging.StreamHandler(stream) hdlr.setFormatter(fmt) log.addHandler(hdlr) log.setLevel(level) def main(): """ Main program """ usage = "usage: %prog [options] url" parser = OptionParser(usage=usage, version=__version__) parser.add_option("-o", "--output", metavar="OUTPUT", help="Outputs to the given filename.") parser.add_option("-r", "--resume", action="store_true", dest="resume", default=False, help="Resume a download") parser.add_option("-l", "--live", action="store_true", dest="live", default=False, help="Enable for live streams") parser.add_option("-s", "--silent", action="store_true", dest="silent", default=False) parser.add_option("-q", "--quality", metavar="quality", help="Choose what format to download.\nIt will download the best format by default") parser.add_option("-H", "--hls", action="store_true", dest="hls", default=False) (options, args) = parser.parse_args() if len(args) != 1: parser.error("incorrect number of arguments") setup_log(options.silent) url = args[0] get_media(url, options) if __name__ == "__main__": main()