svtplay-dl/lib/svtplay_dl/service/svtplay.py

# ex:ts=4:sw=4:sts=4:et
# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*-
from __future__ import absolute_import
import re
import os
import xml.etree.ElementTree as ET
import copy
from  svtplay_dl.log import log
from svtplay_dl.service import Service, OpenGraphThumbMixin
from svtplay_dl.utils import filenamify, ensure_unicode
from svtplay_dl.utils.urllib import urlparse, urljoin
from svtplay_dl.fetcher.hds import hdsparse
from svtplay_dl.fetcher.hls import hlsparse
from svtplay_dl.fetcher.rtmp import RTMP
from svtplay_dl.fetcher.http import HTTP
from svtplay_dl.subtitle import subtitle
from svtplay_dl.error import ServiceError


class Svtplay(Service, OpenGraphThumbMixin):
    supported_domains = ['svtplay.se', 'svt.se', 'beta.svtplay.se', 'svtflow.se']

    def __init__(self, url):
        Service.__init__(self, url)
        self.subtitle = None

    def get(self, options):
        if re.findall("svt.se", self.url):
            data = self.get_urldata()
            match = re.search(r"data-json-href=\"(.*)\"", data)
            if match:
                filename = match.group(1).replace("&amp;", "&").replace("&format=json", "")
                url = "http://www.svt.se%s" % filename
            else:
                yield ServiceError("Can't find video file for: %s" % self.url)
                return
        else:
            url = self.url


        if "svt.se" in url:
            params = {"format": "json"}
        else:
            params = {"output": "json"}

        data = self.http.request("get", url, params=params)
        if data.status_code == 404:
            yield ServiceError("Can't get the json file for %s" % url)
            return
        data = data.json()
        if "live" in data["video"]:
            options.live = data["video"]["live"]

        if options.output_auto:
            options.service = "svtplay"
            options.output = outputfilename(data, options.output, ensure_unicode(self.get_urldata()))

        if self.exclude(options):
            yield ServiceError("Excluding video")
            return

        if data["video"]["subtitleReferences"]:
            try:
                suburl = data["video"]["subtitleReferences"][0]["url"]
            except KeyError:
                pass
            if suburl and len(suburl) > 0:
                yield subtitle(copy.copy(options), "wrst", suburl)

        if options.force_subtitle:
            return

        if len(data["video"].get("videoReferences", [])) == 0:
            yield ServiceError("Media doesn't have any associated videos (yet?)")
            return

        for i in data["video"]["videoReferences"]:
            parse = urlparse(i["url"])

            if parse.path.find("m3u8") > 0:
                streams = hlsparse(options, self.http.request("get", i["url"]), i["url"])
                if streams:
                    for n in list(streams.keys()):
                        yield streams[n]
            elif parse.path.find("f4m") > 0:
                match = re.search(r"\/se\/secure\/", i["url"])
                if not match:
                    streams = hdsparse(options, self.http.request("get", i["url"], params={"hdcore": "3.7.0"}), i["url"])
                    if streams:
                        for n in list(streams.keys()):
                            yield streams[n]
            elif parse.scheme == "rtmp":
                embedurl = "%s?type=embed" % url
                data = self.http.request("get", embedurl).text
                match = re.search(r"value=\"(/(public)?(statiskt)?/swf(/video)?/svtplayer-[0-9\.a-f]+swf)\"", data)
                swf = "http://www.svtplay.se%s" % match.group(1)
                options.other = "-W %s" % swf
                yield RTMP(copy.copy(options), i["url"], i["bitrate"])
            else:
                yield HTTP(copy.copy(options), i["url"], "0")

    def find_all_episodes(self, options):
        match = re.search(r'<link rel="alternate" type="application/rss\+xml" [^>]*href="([^"]+)"',
                          self.get_urldata())
        if match is None:
            match = re.findall(r'a class="play[^"]+"\s+href="(/video[^"]+)"', self.get_urldata())
            if not match:
                log.error("Couldn't retrieve episode list")
                return
            episodes = [urljoin("http://www.svtplay.se", x) for x in match]
        else:
            data = self.http.request("get", match.group(1)).content
            xml = ET.XML(data)

            episodes = [x.text for x in xml.findall(".//item/link")]
        episodes_new = []
        n = 1
        for i in episodes:
            episodes_new.append(i)
            if n == options.all_last:
                break
            n += 1
        return sorted(episodes_new)


def outputfilename(data, filename, raw):
    directory = os.path.dirname(filename)
    name = data["statistics"]["folderStructure"]
    if name.find(".") > 0:
        name = name[:name.find(".")]
    match = re.search("^arkiv-", name)
    if match:
        name = name.replace("arkiv-", "")
    name = name.replace("-", ".")
    season = seasoninfo(raw)
    other = filenamify(data["context"]["title"])
    if season:
        title = "%s.%s.%s-%s-svtplay" % (name, season, other, data["videoId"])
    else:
        title = "%s.%s-%s-svtplay" % (name, other, data["videoId"])
    title = filenamify(title)
    if len(directory):
        output = os.path.join(directory, title)
    else:
        output = title
    return output


def seasoninfo(data):
    match = re.search(r'play_video-area-aside__sub-title">([^<]+)<span', data)
    if match:
        line = match.group(1)
    else:
        match = re.search(r'data-title="([^"]+)"', data)
        if match:
            line = match.group(1)
        else:
            return None

    line = re.sub(" +", "", match.group(1)).replace('\n', '')
    match = re.search(r"(song(\d+)-)?Avsnitt(\d+)", line)
    if match:
        if match.group(2) is None:
            season = 1
        else:
            season = int(match.group(2))
        if season < 10:
            season = "0%s" % season
        episode = int(match.group(3))
        if episode < 10:
            episode = "0%s" % episode
        return "S%sE%s" % (season, episode)
    else:
        return None
Add editor modelines 2013-03-02 21:26:28 +01:00			`# ex:ts=4:sw=4:sts=4:et`
			`# -- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil --`
Use absolute_import from __future__ everywhere 2013-03-01 23:39:42 +01:00			`from __future__ import absolute_import`
Initial work on splitting script to modules Does not work reliably (downloading SVTPlay videos with HDS may work if you're lucky). 2013-01-17 00:21:47 +01:00			`import re`
svtplay: support for the new output mode 2014-08-27 22:41:38 +02:00			`import os`
Add --all-episodes option (for svt only currently) 2014-02-18 18:56:28 +01:00			`import xml.etree.ElementTree as ET`
service: copy options to fetcher 2014-06-07 20:43:40 +02:00			`import copy`
Show only one error message instead of two 2015-09-06 14:19:10 +02:00			`from svtplay_dl.log import log`
Add --thumbnail Names thumbnails as $basename.tbn (Hi xbmc!) 2014-01-19 14:26:48 +01:00			`from svtplay_dl.service import Service, OpenGraphThumbMixin`
adding request support. still need some more work 2015-08-30 00:06:20 +02:00			`from svtplay_dl.utils import filenamify, ensure_unicode`
svtplay: Grab all episodes from the new program page. fixes #216 2015-03-08 00:44:26 +01:00			`from svtplay_dl.utils.urllib import urlparse, urljoin`
svtplay: Unused import HDS 2014-04-27 20:48:13 +02:00			`from svtplay_dl.fetcher.hds import hdsparse`
reorder arguments for hlsparse and hdsparse 2015-10-04 14:37:16 +02:00			`from svtplay_dl.fetcher.hls import hlsparse`
svtplay: convert into new video fetcher 2014-04-21 18:41:15 +02:00			`from svtplay_dl.fetcher.rtmp import RTMP`
			`from svtplay_dl.fetcher.http import HTTP`
subtitle: refactor so we can reuse the try-except-thing 2014-08-31 01:20:36 +02:00			`from svtplay_dl.subtitle import subtitle`
Show only one error message instead of two 2015-09-06 14:19:10 +02:00			`from svtplay_dl.error import ServiceError`
Initial work on splitting script to modules Does not work reliably (downloading SVTPlay videos with HDS may work if you're lucky). 2013-01-17 00:21:47 +01:00
pip8. expected 2 lines found 1 2015-09-15 20:10:32 +02:00
Add --thumbnail Names thumbnails as $basename.tbn (Hi xbmc!) 2014-01-19 14:26:48 +01:00			`class Svtplay(Service, OpenGraphThumbMixin):`
Break out OppetArkiv to subclass of Svtplay 2014-05-01 19:51:21 +02:00			`supported_domains = ['svtplay.se', 'svt.se', 'beta.svtplay.se', 'svtflow.se']`
Initial work on splitting script to modules Does not work reliably (downloading SVTPlay videos with HDS may work if you're lucky). 2013-01-17 00:21:47 +01:00
Split subtitle getting to separate method 2014-01-11 23:02:47 +01:00			`def __init__(self, url):`
			`Service.__init__(self, url)`
			`self.subtitle = None`

Move url to object attribute 2014-01-06 23:14:06 +01:00			`def get(self, options):`
			`if re.findall("svt.se", self.url):`
adding request support. still need some more work 2015-08-30 00:06:20 +02:00			`data = self.get_urldata()`
Rewrite http request handling. 2014-12-08 23:07:02 +01:00			`match = re.search(r"data-json-href=\"(.*)\"", data)`
Initial work on splitting script to modules Does not work reliably (downloading SVTPlay videos with HDS may work if you're lucky). 2013-01-17 00:21:47 +01:00			`if match:`
			`filename = match.group(1).replace("&", "&").replace("&format=json", "")`
			`url = "http://www.svt.se%s" % filename`
			`else:`
Show only one error message instead of two 2015-09-06 14:19:10 +02:00			`yield ServiceError("Can't find video file for: %s" % self.url)`
svtplay: just return if we can’t find any video. 2014-10-06 23:08:03 +02:00			`return`
Move url to object attribute 2014-01-06 23:14:06 +01:00			`else:`
			`url = self.url`
hds: remove swf argument (it was unused) 2013-04-21 12:29:16 +02:00
svtplay: simplify json url 2015-10-29 18:08:25 +01:00
			`if "svt.se" in url:`
			`params = {"format": "json"}`
svtplay: check for ?, if it exists remove it from json request. 2013-11-14 22:46:08 +01:00			`else:`
svtplay: simplify json url 2015-10-29 18:08:25 +01:00			`params = {"output": "json"}`

			`data = self.http.request("get", url, params=params)`
More request fixes 2015-08-30 11:27:31 +02:00			`if data.status_code == 404:`
svtplay: simplify json url 2015-10-29 18:08:25 +01:00			`yield ServiceError("Can't get the json file for %s" % url)`
More request fixes 2015-08-30 11:27:31 +02:00			`return`
adding request support. still need some more work 2015-08-30 00:06:20 +02:00			`data = data.json()`
svtplay: support for oppetarkiv.se 2013-04-16 13:18:40 +02:00			`if "live" in data["video"]:`
			`options.live = data["video"]["live"]`
Split subtitle getting to separate method 2014-01-11 23:02:47 +01:00
svtplay: support for the new output mode 2014-08-27 22:41:38 +02:00			`if options.output_auto:`
svtplay: use options.service instead of hardcoded service name in format string. 2014-08-27 22:59:31 +02:00			`options.service = "svtplay"`
adding request support. still need some more work 2015-08-30 00:06:20 +02:00			`options.output = outputfilename(data, options.output, ensure_unicode(self.get_urldata()))`
svtplay: support for the new output mode 2014-08-27 22:41:38 +02:00
Support for exclude filenames with WORD in them. this fixes #190 2014-12-22 17:41:40 +01:00			`if self.exclude(options):`
Better excluding message fixing #198 2015-09-06 23:04:48 +02:00			`yield ServiceError("Excluding video")`
Support for exclude filenames with WORD in them. this fixes #190 2014-12-22 17:41:40 +01:00			`return`

svtplay: subtitles didn’t get the right filename 2014-12-22 10:04:32 +01:00			`if data["video"]["subtitleReferences"]:`
			`try:`
			`suburl = data["video"]["subtitleReferences"][0]["url"]`
			`except KeyError:`
			`pass`
			`if suburl and len(suburl) > 0:`
			`yield subtitle(copy.copy(options), "wrst", suburl)`

service: no need to check video streams when we force subtitles 2014-09-21 19:12:17 +02:00			`if options.force_subtitle:`
			`return`

svtplay: Catch media without any videoReferences This happens when they publish information about the TV episode before publishing the video stream. Probably due to some bug in SVT Play. The web player is also unable the play the video, reporting "Can't play the program, try again later". 2015-10-25 15:44:47 +01:00			`if len(data["video"].get("videoReferences", [])) == 0:`
			`yield ServiceError("Media doesn't have any associated videos (yet?)")`
			`return`

Initial work on splitting script to modules Does not work reliably (downloading SVTPlay videos with HDS may work if you're lucky). 2013-01-17 00:21:47 +01:00			`for i in data["video"]["videoReferences"]:`
svtplay: fixing a regression from last version 2013-10-14 20:05:03 +02:00			`parse = urlparse(i["url"])`
Split subtitle getting to separate method 2014-01-11 23:02:47 +01:00
svtplay: convert into new video fetcher 2014-04-21 18:41:15 +02:00			`if parse.path.find("m3u8") > 0:`
reorder arguments for hlsparse and hdsparse 2015-10-04 14:37:16 +02:00			`streams = hlsparse(options, self.http.request("get", i["url"]), i["url"])`
svtplay: don’t crash when we can’t get any HLS streams 2015-02-01 09:09:37 +01:00			`if streams:`
			`for n in list(streams.keys()):`
reorder arguments for hlsparse and hdsparse 2015-10-04 14:37:16 +02:00			`yield streams[n]`
svtplay: it should be elif and not if 2014-04-21 19:12:21 +02:00			`elif parse.path.find("f4m") > 0:`
svtplay: convert into new video fetcher 2014-04-21 18:41:15 +02:00			`match = re.search(r"\/se\/secure\/", i["url"])`
			`if not match:`
reorder arguments for hlsparse and hdsparse 2015-10-04 14:37:16 +02:00			`streams = hdsparse(options, self.http.request("get", i["url"], params={"hdcore": "3.7.0"}), i["url"])`
HDS: show an error message when we see DRM files. 2014-10-12 23:31:02 +02:00			`if streams:`
			`for n in list(streams.keys()):`
			`yield streams[n]`
svtplay: it should be elif and not if 2014-04-21 19:12:21 +02:00			`elif parse.scheme == "rtmp":`
svtplay: convert into new video fetcher 2014-04-21 18:41:15 +02:00			`embedurl = "%s?type=embed" % url`
svtplay: this need to be text 2015-09-01 23:53:13 +02:00			`data = self.http.request("get", embedurl).text`
svtplay: convert into new video fetcher 2014-04-21 18:41:15 +02:00			`match = re.search(r"value=\"(/(public)?(statiskt)?/swf(/video)?/svtplayer-[0-9\.a-f]+swf)\"", data)`
			`swf = "http://www.svtplay.se%s" % match.group(1)`
			`options.other = "-W %s" % swf`
service: copy options to fetcher 2014-06-07 20:43:40 +02:00			`yield RTMP(copy.copy(options), i["url"], i["bitrate"])`
svtplay: convert into new video fetcher 2014-04-21 18:41:15 +02:00			`else:`
service: copy options to fetcher 2014-06-07 20:43:40 +02:00			`yield HTTP(copy.copy(options), i["url"], "0")`
Split subtitle getting to separate method 2014-01-11 23:02:47 +01:00
Add --all-episodes option (for svt only currently) 2014-02-18 18:56:28 +01:00			`def find_all_episodes(self, options):`
Break out OppetArkiv to subclass of Svtplay 2014-05-01 19:51:21 +02:00			`match = re.search(r'<link rel="alternate" type="application/rss\+xml" [^>]*href="([^"]+)"',`
more requests fixes for get_urldata. dont check the first byte 2015-08-31 22:04:59 +02:00			`self.get_urldata())`
Break out OppetArkiv to subclass of Svtplay 2014-05-01 19:51:21 +02:00			`if match is None:`
more requests fixes for get_urldata. dont check the first byte 2015-08-31 22:04:59 +02:00			`match = re.findall(r'a class="play[^"]+"\s+href="(/video[^"]+)"', self.get_urldata())`
svtplay: Grab all episodes from the new program page. fixes #216 2015-03-08 00:44:26 +01:00			`if not match:`
			`log.error("Couldn't retrieve episode list")`
			`return`
			`episodes = [urljoin("http://www.svtplay.se", x) for x in match]`
			`else:`
svtplay: this need to be text 2015-09-01 23:53:13 +02:00			`data = self.http.request("get", match.group(1)).content`
svtplay: Grab all episodes from the new program page. fixes #216 2015-03-08 00:44:26 +01:00			`xml = ET.XML(data)`
Add --all-episodes option (for svt only currently) 2014-02-18 18:56:28 +01:00
svtplay: Grab all episodes from the new program page. fixes #216 2015-03-08 00:44:26 +01:00			`episodes = [x.text for x in xml.findall(".//item/link")]`
svtplay: support for downloading lastest X episodes 2014-12-21 13:01:51 +01:00			`episodes_new = []`
			`n = 1`
			`for i in episodes:`
			`episodes_new.append(i)`
			`if n == options.all_last:`
			`break`
			`n += 1`
			`return sorted(episodes_new)`
svtplay: Add season and episode info in the filename 2014-12-28 13:57:50 +01:00

			`def outputfilename(data, filename, raw):`
			`directory = os.path.dirname(filename)`
			`name = data["statistics"]["folderStructure"]`
			`if name.find(".") > 0:`
			`name = name[:name.find(".")]`
svtplay: remove arkiv from the filename 2014-12-28 14:18:16 +01:00			`match = re.search("^arkiv-", name)`
			`if match:`
			`name = name.replace("arkiv-", "")`
svtplay: Add season and episode info in the filename 2014-12-28 13:57:50 +01:00			`name = name.replace("-", ".")`
			`season = seasoninfo(raw)`
svtplay: use context instead of statistics for filename 2015-04-16 21:58:48 +02:00			`other = filenamify(data["context"]["title"])`
svtplay: Add season and episode info in the filename 2014-12-28 13:57:50 +01:00			`if season:`
			`title = "%s.%s.%s-%s-svtplay" % (name, season, other, data["videoId"])`
			`else:`
svtplay: remove arkiv from the filename 2014-12-28 14:18:16 +01:00			`title = "%s.%s-%s-svtplay" % (name, other, data["videoId"])`
svtplay: Add season and episode info in the filename 2014-12-28 13:57:50 +01:00			`title = filenamify(title)`
			`if len(directory):`
output: use the right slash for the right platform fixes #251 2015-08-24 23:02:18 +02:00			`output = os.path.join(directory, title)`
svtplay: Add season and episode info in the filename 2014-12-28 13:57:50 +01:00			`else:`
			`output = title`
			`return output`


svtplay: support for öppetarkiv this will only work with those with episodes in the name. this fixes #140 2014-12-28 14:33:25 +01:00			`def seasoninfo(data):`
			`match = re.search(r'play_video-area-aside__sub-title">([^<]+)<span', data)`
svtplay: Add season and episode info in the filename 2014-12-28 13:57:50 +01:00			`if match:`
svtplay: support for öppetarkiv this will only work with those with episodes in the name. this fixes #140 2014-12-28 14:33:25 +01:00			`line = match.group(1)`
			`else:`
			`match = re.search(r'data-title="([^"]+)"', data)`
svtplay: Add season and episode info in the filename 2014-12-28 13:57:50 +01:00			`if match:`
svtplay: support for öppetarkiv this will only work with those with episodes in the name. this fixes #140 2014-12-28 14:33:25 +01:00			`line = match.group(1)`
svtplay: Add season and episode info in the filename 2014-12-28 13:57:50 +01:00			`else:`
			`return None`
svtplay: support for öppetarkiv this will only work with those with episodes in the name. this fixes #140 2014-12-28 14:33:25 +01:00
pylint fixes 2015-01-05 21:52:34 +01:00			`line = re.sub(" +", "", match.group(1)).replace('\n', '')`
svtplay: support for öppetarkiv this will only work with those with episodes in the name. this fixes #140 2014-12-28 14:33:25 +01:00			`match = re.search(r"(song(\d+)-)?Avsnitt(\d+)", line)`
			`if match:`
			`if match.group(2) is None:`
			`season = 1`
			`else:`
svtplay: two digits for season number 2014-12-28 16:08:50 +01:00			`season = int(match.group(2))`
			`if season < 10:`
			`season = "0%s" % season`
svtplay: add leading 0 in episodes <10 2014-12-29 20:02:49 +01:00			`episode = int(match.group(3))`
			`if episode < 10:`
			`episode = "0%s" % episode`
			`return "S%sE%s" % (season, episode)`
svtplay: Add season and episode info in the filename 2014-12-28 13:57:50 +01:00			`else:`
svtplay: add leading 0 in episodes <10 2014-12-29 20:02:49 +01:00			`return None`