svtplay-dl/lib/svtplay_dl/service/svtplay.py

# ex:ts=4:sw=4:sts=4:et
# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*-
from __future__ import absolute_import
import sys
import re
import json
import xml.etree.ElementTree as ET

from svtplay_dl.service import Service, OpenGraphThumbMixin
from svtplay_dl.utils import get_http_data
from svtplay_dl.utils.urllib import urlparse
from svtplay_dl.fetcher.hds import HDS, hdsparse
from svtplay_dl.fetcher.hls import HLS, hlsparse
from svtplay_dl.fetcher.rtmp import RTMP
from svtplay_dl.fetcher.http import HTTP
from svtplay_dl.subtitle import subtitle_wsrt
from svtplay_dl.log import log

class Svtplay(Service, OpenGraphThumbMixin):
    supported_domains = ['svtplay.se', 'svt.se', 'oppetarkiv.se', 'beta.svtplay.se', 'svtflow.se']

    def __init__(self, url):
        Service.__init__(self, url)
        self.subtitle = None

    def get(self, options):
        if re.findall("svt.se", self.url):
            match = re.search(r"data-json-href=\"(.*)\"", self.get_urldata())
            if match:
                filename = match.group(1).replace("&amp;", "&").replace("&format=json", "")
                url = "http://www.svt.se%s" % filename
            else:
                log.error("Can't find video file")
                sys.exit(2)
        else:
            url = self.url

        pos = url.find("?")
        if pos < 0:
            dataurl = "%s?&output=json&format=json" % url
        else:
            dataurl = "%s&output=json&format=json" % url
        data = json.loads(get_http_data(dataurl))
        if "live" in data["video"]:
            options.live = data["video"]["live"]
        else:
            options.live = False

        if data["video"]["subtitleReferences"]:
            try:
                subtitle = data["video"]["subtitleReferences"][0]["url"]
            except KeyError:
                pass
            if len(subtitle) > 0:
                yield subtitle_wsrt(subtitle)

        for i in data["video"]["videoReferences"]:
            parse = urlparse(i["url"])

            if parse.path.find("m3u8") > 0:
                streams = hlsparse(i["url"])
                for n in list(streams.keys()):
                    yield HLS(options, streams[n], n)
            elif parse.path.find("f4m") > 0:
                match = re.search(r"\/se\/secure\/", i["url"])
                if not match:
                    manifest = "%s?hdcore=2.8.0&g=hejsan" % i["url"]
                    streams = hdsparse(options, manifest)
                    for n in list(streams.keys()):
                        yield streams[n]
            elif parse.scheme == "rtmp":
                embedurl = "%s?type=embed" % url
                data = get_http_data(embedurl)
                match = re.search(r"value=\"(/(public)?(statiskt)?/swf(/video)?/svtplayer-[0-9\.a-f]+swf)\"", data)
                swf = "http://www.svtplay.se%s" % match.group(1)
                options.other = "-W %s" % swf
                yield RTMP(options, i["url"], i["bitrate"])
            else:
                yield HTTP(options, i["url"], "0")

    def find_all_episodes(self, options):
        parse = urlparse(self.url)
        if parse.netloc == "www.oppetarkiv.se":
            page = 1
            match = re.search(r'"http://www.oppetarkiv.se/etikett/titel/([^"/]+)', self.get_urldata())
            if match is None:
                match = re.search(r'"http://www.oppetarkiv.se/etikett/titel/([^"/]+)', self.url)
                if match is None:
                    log.error("Couldn't find title")
                    sys.exit(2)
            program = match.group(1)
            more = True
            episodes = []
            while more:
                url = "http://www.oppetarkiv.se/etikett/titel/%s/?sida=%s&sort=tid_stigande&embed=true" % (program, page)
                data = get_http_data(url)
                visa = re.search(r'svtXColorDarkLightGrey', data)
                if not visa:
                    more = False
                regex = re.compile(r'(http://www.oppetarkiv.se/video/[^"]+)')
                for match in regex.finditer(data):
                    episodes.append(match.group(1))
                page += 1

            return episodes

        else:
            match = re.search(r'<link rel="alternate" type="application/rss\+xml" [^>]*href="([^"]+)"',
                              self.get_urldata())
            if match is None:
                log.error("Couldn't retrieve episode list")
                sys.exit(2)

            xml = ET.XML(get_http_data(match.group(1)))

            return sorted(x.text for x in xml.findall(".//item/link"))
Add editor modelines 2013-03-02 21:26:28 +01:00			`# ex:ts=4:sw=4:sts=4:et`
			`# -- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil --`
Use absolute_import from __future__ everywhere 2013-03-01 23:39:42 +01:00			`from __future__ import absolute_import`
Initial work on splitting script to modules Does not work reliably (downloading SVTPlay videos with HDS may work if you're lucky). 2013-01-17 00:21:47 +01:00			`import sys`
			`import re`
			`import json`
Add --all-episodes option (for svt only currently) 2014-02-18 18:56:28 +01:00			`import xml.etree.ElementTree as ET`
Initial work on splitting script to modules Does not work reliably (downloading SVTPlay videos with HDS may work if you're lucky). 2013-01-17 00:21:47 +01:00
Add --thumbnail Names thumbnails as $basename.tbn (Hi xbmc!) 2014-01-19 14:26:48 +01:00			`from svtplay_dl.service import Service, OpenGraphThumbMixin`
svtplay: yield the subs 2014-04-21 19:53:18 +02:00			`from svtplay_dl.utils import get_http_data`
svtplay: fixing a regression from last version 2013-10-14 20:05:03 +02:00			`from svtplay_dl.utils.urllib import urlparse`
hds: parse and kwargssupport 2014-04-27 13:19:34 +02:00			`from svtplay_dl.fetcher.hds import HDS, hdsparse`
services: parse hls playlist first. 2014-04-21 21:55:39 +02:00			`from svtplay_dl.fetcher.hls import HLS, hlsparse`
svtplay: convert into new video fetcher 2014-04-21 18:41:15 +02:00			`from svtplay_dl.fetcher.rtmp import RTMP`
			`from svtplay_dl.fetcher.http import HTTP`
svtplay: yield the subs 2014-04-21 19:53:18 +02:00			`from svtplay_dl.subtitle import subtitle_wsrt`
Rename module from lib/svtplay to lib/svtplay_dl less confusion with the service. 2013-03-17 19:55:19 +01:00			`from svtplay_dl.log import log`
Initial work on splitting script to modules Does not work reliably (downloading SVTPlay videos with HDS may work if you're lucky). 2013-01-17 00:21:47 +01:00
Add --thumbnail Names thumbnails as $basename.tbn (Hi xbmc!) 2014-01-19 14:26:48 +01:00			`class Svtplay(Service, OpenGraphThumbMixin):`
svtplay: basic svtflow.se support 2014-04-27 09:04:56 +02:00			`supported_domains = ['svtplay.se', 'svt.se', 'oppetarkiv.se', 'beta.svtplay.se', 'svtflow.se']`
Initial work on splitting script to modules Does not work reliably (downloading SVTPlay videos with HDS may work if you're lucky). 2013-01-17 00:21:47 +01:00
Split subtitle getting to separate method 2014-01-11 23:02:47 +01:00			`def __init__(self, url):`
			`Service.__init__(self, url)`
			`self.subtitle = None`

Move url to object attribute 2014-01-06 23:14:06 +01:00			`def get(self, options):`
			`if re.findall("svt.se", self.url):`
Add get_urldata() method to service self.get_urldata() is eqivalent to get_http_data(self.url), but also caches the data, so no additional requests are made if it is called multiple times (e.g when grabbing title or downloading thumbnail). Generic().get(url) still causes it to be fetched an extra time. 2014-02-18 16:48:53 +01:00			`match = re.search(r"data-json-href=\"(.*)\"", self.get_urldata())`
Initial work on splitting script to modules Does not work reliably (downloading SVTPlay videos with HDS may work if you're lucky). 2013-01-17 00:21:47 +01:00			`if match:`
			`filename = match.group(1).replace("&", "&").replace("&format=json", "")`
			`url = "http://www.svt.se%s" % filename`
			`else:`
			`log.error("Can't find video file")`
			`sys.exit(2)`
Move url to object attribute 2014-01-06 23:14:06 +01:00			`else:`
			`url = self.url`
hds: remove swf argument (it was unused) 2013-04-21 12:29:16 +02:00
svtplay: check for ?, if it exists remove it from json request. 2013-11-14 22:46:08 +01:00			`pos = url.find("?")`
			`if pos < 0:`
			`dataurl = "%s?&output=json&format=json" % url`
			`else:`
			`dataurl = "%s&output=json&format=json" % url`
			`data = json.loads(get_http_data(dataurl))`
svtplay: support for oppetarkiv.se 2013-04-16 13:18:40 +02:00			`if "live" in data["video"]:`
			`options.live = data["video"]["live"]`
			`else:`
			`options.live = False`
Split subtitle getting to separate method 2014-01-11 23:02:47 +01:00
svtplay: sometimes subtitleReferences is None 2014-02-11 18:16:26 +01:00			`if data["video"]["subtitleReferences"]:`
			`try:`
svtplay: only yield when have have a subtitle. 2014-04-27 15:36:57 +02:00			`subtitle = data["video"]["subtitleReferences"][0]["url"]`
svtplay: NoneType is not a valid execpt type. 2014-04-12 21:07:53 +02:00			`except KeyError:`
svtplay: sometimes subtitleReferences is None 2014-02-11 18:16:26 +01:00			`pass`
svtplay: only yield when have have a subtitle. 2014-04-27 15:36:57 +02:00			`if len(subtitle) > 0:`
			`yield subtitle_wsrt(subtitle)`
Split subtitle getting to separate method 2014-01-11 23:02:47 +01:00
Initial work on splitting script to modules Does not work reliably (downloading SVTPlay videos with HDS may work if you're lucky). 2013-01-17 00:21:47 +01:00			`for i in data["video"]["videoReferences"]:`
svtplay: fixing a regression from last version 2013-10-14 20:05:03 +02:00			`parse = urlparse(i["url"])`
Split subtitle getting to separate method 2014-01-11 23:02:47 +01:00
svtplay: convert into new video fetcher 2014-04-21 18:41:15 +02:00			`if parse.path.find("m3u8") > 0:`
services: parse hls playlist first. 2014-04-21 21:55:39 +02:00			`streams = hlsparse(i["url"])`
			`for n in list(streams.keys()):`
			`yield HLS(options, streams[n], n)`
svtplay: it should be elif and not if 2014-04-21 19:12:21 +02:00			`elif parse.path.find("f4m") > 0:`
svtplay: convert into new video fetcher 2014-04-21 18:41:15 +02:00			`match = re.search(r"\/se\/secure\/", i["url"])`
			`if not match:`
			`manifest = "%s?hdcore=2.8.0&g=hejsan" % i["url"]`
hds: parse and kwargssupport 2014-04-27 13:19:34 +02:00			`streams = hdsparse(options, manifest)`
			`for n in list(streams.keys()):`
			`yield streams[n]`
svtplay: it should be elif and not if 2014-04-21 19:12:21 +02:00			`elif parse.scheme == "rtmp":`
svtplay: convert into new video fetcher 2014-04-21 18:41:15 +02:00			`embedurl = "%s?type=embed" % url`
			`data = get_http_data(embedurl)`
			`match = re.search(r"value=\"(/(public)?(statiskt)?/swf(/video)?/svtplayer-[0-9\.a-f]+swf)\"", data)`
			`swf = "http://www.svtplay.se%s" % match.group(1)`
			`options.other = "-W %s" % swf`
			`yield RTMP(options, i["url"], i["bitrate"])`
			`else:`
			`yield HTTP(options, i["url"], "0")`
Split subtitle getting to separate method 2014-01-11 23:02:47 +01:00
Add --all-episodes option (for svt only currently) 2014-02-18 18:56:28 +01:00			`def find_all_episodes(self, options):`
svtplay: all episodes for öppet arkiv. 2014-04-03 21:02:51 +02:00			`parse = urlparse(self.url)`
			`if parse.netloc == "www.oppetarkiv.se":`
			`page = 1`
			`match = re.search(r'"http://www.oppetarkiv.se/etikett/titel/([^"/]+)', self.get_urldata())`
			`if match is None:`
			`match = re.search(r'"http://www.oppetarkiv.se/etikett/titel/([^"/]+)', self.url)`
			`if match is None:`
			`log.error("Couldn't find title")`
			`sys.exit(2)`
			`program = match.group(1)`
			`more = True`
			`episodes = []`
			`while more:`
			`url = "http://www.oppetarkiv.se/etikett/titel/%s/?sida=%s&sort=tid_stigande&embed=true" % (program, page)`
			`data = get_http_data(url)`
			`visa = re.search(r'svtXColorDarkLightGrey', data)`
			`if not visa:`
			`more = False`
			`regex = re.compile(r'(http://www.oppetarkiv.se/video/[^"]+)')`
			`for match in regex.finditer(data):`
			`episodes.append(match.group(1))`
			`page += 1`

			`return episodes`

			`else:`
			`match = re.search(r'<link rel="alternate" type="application/rss\+xml" [^>]*href="([^"]+)"',`
			`self.get_urldata())`
			`if match is None:`
			`log.error("Couldn't retrieve episode list")`
			`sys.exit(2)`
Add --all-episodes option (for svt only currently) 2014-02-18 18:56:28 +01:00
svtplay: all episodes for öppet arkiv. 2014-04-03 21:02:51 +02:00			`xml = ET.XML(get_http_data(match.group(1)))`
Add --all-episodes option (for svt only currently) 2014-02-18 18:56:28 +01:00
svtplay: all episodes for öppet arkiv. 2014-04-03 21:02:51 +02:00			`return sorted(x.text for x in xml.findall(".//item/link"))`