svtplay-dl/lib/svtplay_dl/service/svtplay.py

# ex:ts=4:sw=4:sts=4:et
# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*-
from __future__ import absolute_import
import re
import os
import xml.etree.ElementTree as ET
import copy
import hashlib

from svtplay_dl.log import log
from svtplay_dl.service import Service, OpenGraphThumbMixin
from svtplay_dl.utils import filenamify, ensure_unicode
from svtplay_dl.utils.urllib import urlparse, urljoin
from svtplay_dl.fetcher.hds import hdsparse
from svtplay_dl.fetcher.hls import hlsparse
from svtplay_dl.subtitle import subtitle
from svtplay_dl.error import ServiceError


class Svtplay(Service, OpenGraphThumbMixin):
    supported_domains = ['svtplay.se', 'svt.se', 'beta.svtplay.se', 'svtflow.se']

    def get(self):
        old = False

        parse = urlparse(self.url)
        if parse.netloc == "www.svtplay.se" or parse.netloc == "svtplay.se":
            if parse.path[:6] != "/video":
                yield ServiceError("This mode is not supported anymore. need the url with the video")
                return

        vid = self.find_video_id()
        if vid is None:
            yield ServiceError("Cant find video id for this video")
            return
        if re.match("^[0-9]+$", vid):
            old = True

        url = "http://www.svt.se/videoplayer-api/video/%s" % vid
        data = self.http.request("get", url)
        if data.status_code == 404:
            yield ServiceError("Can't get the json file for %s" % url)
            return

        data = data.json()
        if "live" in data:
            self.options.live = data["live"]
        if old:
            params = {"output": "json"}
            dataj = self.http.request("get", self.url, params=params).json()
        else:
            dataj = data

        if self.options.output_auto:
            self.options.service = "svtplay"
            self.options.output = self.outputfilename(dataj, self.options.output, ensure_unicode(self.get_urldata()))

        if self.exclude(self.options):
            yield ServiceError("Excluding video")
            return

        if "subtitleReferences" in data:
            for i in data["subtitleReferences"]:
                if i["format"] == "websrt":
                    yield subtitle(copy.copy(self.options), "wrst", i["url"])
        if old and dataj["video"]["subtitleReferences"]:
            try:
                suburl = dataj["video"]["subtitleReferences"][0]["url"]
            except KeyError:
                pass
            if suburl and len(suburl) > 0:
                yield subtitle(copy.copy(self.options), "wrst", suburl)

        if self.options.force_subtitle:
            return

        if len(data["videoReferences"]) == 0:
            yield ServiceError("Media doesn't have any associated videos (yet?)")
            return

        for i in data["videoReferences"]:
            if i["format"] == "hls" or i["format"] == "ios":
                streams = hlsparse(self.options, self.http.request("get", i["url"]), i["url"])
                if streams:
                    for n in list(streams.keys()):
                        yield streams[n]
            if i["format"] == "hds" or i["format"] == "flash":
                match = re.search(r"\/se\/secure\/", i["url"])
                if not match:
                    streams = hdsparse(self.options, self.http.request("get", i["url"], params={"hdcore": "3.7.0"}), i["url"])
                    if streams:
                        for n in list(streams.keys()):
                            yield streams[n]

    def find_video_id(self):
        match = re.search('data-video-id="([^"]+)"', self.get_urldata())
        if match:
            return match.group(1)
        parse = urlparse(self.url)
        match = re.search("/video/([0-9]+)/", parse.path)
        if match:
            return match.group(1)
        match = re.search("/videoEpisod-([^/]+)/", parse.path)
        if match:
            self._urldata = None
            self._url = "http://www.svtplay.se/video/%s/" % match.group(1)
            self.get_urldata()
            return self.find_video_id()
        return None

    def find_all_episodes(self, options):
        match = re.search(r'<link rel="alternate" type="application/rss\+xml" [^>]*href="([^"]+)"',
                          self.get_urldata())
        if match is None:
            match = re.findall(r'a class="play[^"]+"\s+href="(/video[^"]+)"', self.get_urldata())
            if not match:
                log.error("Couldn't retrieve episode list")
                return
            episodes = [urljoin("http://www.svtplay.se", x) for x in match]
        else:
            data = self.http.request("get", match.group(1)).content
            xml = ET.XML(data)

            episodes = [x.text for x in xml.findall(".//item/link")]
        episodes_new = []
        n = 1
        for i in episodes:
            episodes_new.append(i)
            if n == options.all_last:
                break
            n += 1
        return sorted(episodes_new)


    def outputfilename(self, data, filename, raw):
        directory = os.path.dirname(filename)
        if "statistics" in data:
            name = data["statistics"]["folderStructure"]
            if name.find(".") > 0:
                name = name[:name.find(".")]
            match = re.search("^arkiv-", name)
            if match:
                name = name.replace("arkiv-", "")
            name = filenamify(name.replace("-", "."))
            other = filenamify(data["context"]["title"])
            id = data["videoId"]
        else:
            name = data["programTitle"]
            if name.find(".") > 0:
                name = name[:name.find(".")]
            name = filenamify(name.replace(" - ", "."))
            other = filenamify(data["episodeTitle"])
            id = hashlib.sha256(data["programVersionId"]).hexdigest()[:7]

        if name == other:
            other = None
        season = self.seasoninfo(raw)
        title = name
        if season:
            title += ".%s" % season
        if other:
            title += ".%s" % other
        title += "-%s-svtplay" % id
        title = filenamify(title)
        if len(directory):
            output = os.path.join(directory, title)
        else:
            output = title
        return output


    def seasoninfo(self, data):
        match = re.search(r'play_video-area-aside__sub-title">([^<]+)<span', data)
        if match:
            line = match.group(1)
        else:
            match = re.search(r'data-title="([^"]+)"', data)
            if match:
                line = match.group(1)
            else:
                return None

        line = re.sub(" +", "", match.group(1)).replace('\n', '')
        match = re.search(r"(song(\d+)-)?Avsnitt(\d+)", line)
        if match:
            if match.group(2) is None:
                season = 1
            else:
                season = int(match.group(2))
            if season < 10:
                season = "0%s" % season
            episode = int(match.group(3))
            if episode < 10:
                episode = "0%s" % episode
            return "S%sE%s" % (season, episode)
        else:
            return None
Add editor modelines 2013-03-02 21:26:28 +01:00			`# ex:ts=4:sw=4:sts=4:et`
			`# -- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil --`
Use absolute_import from __future__ everywhere 2013-03-01 23:39:42 +01:00			`from __future__ import absolute_import`
Initial work on splitting script to modules Does not work reliably (downloading SVTPlay videos with HDS may work if you're lucky). 2013-01-17 00:21:47 +01:00			`import re`
svtplay: support for the new output mode 2014-08-27 22:41:38 +02:00			`import os`
Add --all-episodes option (for svt only currently) 2014-02-18 18:56:28 +01:00			`import xml.etree.ElementTree as ET`
service: copy options to fetcher 2014-06-07 20:43:40 +02:00			`import copy`
svtplay: use sha256 of the version string as id 2015-12-27 20:55:10 +01:00			`import hashlib`

			`from svtplay_dl.log import log`
Add --thumbnail Names thumbnails as $basename.tbn (Hi xbmc!) 2014-01-19 14:26:48 +01:00			`from svtplay_dl.service import Service, OpenGraphThumbMixin`
adding request support. still need some more work 2015-08-30 00:06:20 +02:00			`from svtplay_dl.utils import filenamify, ensure_unicode`
svtplay: Grab all episodes from the new program page. fixes #216 2015-03-08 00:44:26 +01:00			`from svtplay_dl.utils.urllib import urlparse, urljoin`
svtplay: Unused import HDS 2014-04-27 20:48:13 +02:00			`from svtplay_dl.fetcher.hds import hdsparse`
reorder arguments for hlsparse and hdsparse 2015-10-04 14:37:16 +02:00			`from svtplay_dl.fetcher.hls import hlsparse`
subtitle: refactor so we can reuse the try-except-thing 2014-08-31 01:20:36 +02:00			`from svtplay_dl.subtitle import subtitle`
Show only one error message instead of two 2015-09-06 14:19:10 +02:00			`from svtplay_dl.error import ServiceError`
Initial work on splitting script to modules Does not work reliably (downloading SVTPlay videos with HDS may work if you're lucky). 2013-01-17 00:21:47 +01:00
pip8. expected 2 lines found 1 2015-09-15 20:10:32 +02:00
Add --thumbnail Names thumbnails as $basename.tbn (Hi xbmc!) 2014-01-19 14:26:48 +01:00			`class Svtplay(Service, OpenGraphThumbMixin):`
Break out OppetArkiv to subclass of Svtplay 2014-05-01 19:51:21 +02:00			`supported_domains = ['svtplay.se', 'svt.se', 'beta.svtplay.se', 'svtflow.se']`
Initial work on splitting script to modules Does not work reliably (downloading SVTPlay videos with HDS may work if you're lucky). 2013-01-17 00:21:47 +01:00
Move options to when we init the service class 2015-12-26 11:46:14 +01:00			`def get(self):`
svtplay: support for the next version of the page. video id is left to fix... dunno how to do it in a good way 2015-12-27 14:40:27 +01:00			`old = False`
hds: remove swf argument (it was unused) 2013-04-21 12:29:16 +02:00
svtplay: support for the next version of the page. video id is left to fix... dunno how to do it in a good way 2015-12-27 14:40:27 +01:00			`parse = urlparse(self.url)`
svtplay: we should check if the user is using svtplay.se 2015-12-28 11:41:15 +01:00			`if parse.netloc == "www.svtplay.se" or parse.netloc == "svtplay.se":`
svtplay: support for the next version of the page. video id is left to fix... dunno how to do it in a good way 2015-12-27 14:40:27 +01:00			`if parse.path[:6] != "/video":`
			`yield ServiceError("This mode is not supported anymore. need the url with the video")`
			`return`
svtplay: move find video id into its own function 2016-01-10 15:33:30 +01:00
svtplay: use the data we already have 2016-01-10 20:42:23 +01:00			`vid = self.find_video_id()`
svtplay: move find video id into its own function 2016-01-10 15:33:30 +01:00			`if vid is None:`
			`yield ServiceError("Cant find video id for this video")`
			`return`
svtplay: support for the next version of the page. video id is left to fix... dunno how to do it in a good way 2015-12-27 14:40:27 +01:00			`if re.match("^[0-9]+$", vid):`
			`old = True`
svtplay: simplify json url 2015-10-29 18:08:25 +01:00
svtplay: support for the next version of the page. video id is left to fix... dunno how to do it in a good way 2015-12-27 14:40:27 +01:00			`url = "http://www.svt.se/videoplayer-api/video/%s" % vid`
			`data = self.http.request("get", url)`
More request fixes 2015-08-30 11:27:31 +02:00			`if data.status_code == 404:`
svtplay: simplify json url 2015-10-29 18:08:25 +01:00			`yield ServiceError("Can't get the json file for %s" % url)`
More request fixes 2015-08-30 11:27:31 +02:00			`return`
svtplay: support for the next version of the page. video id is left to fix... dunno how to do it in a good way 2015-12-27 14:40:27 +01:00
adding request support. still need some more work 2015-08-30 00:06:20 +02:00			`data = data.json()`
svtplay: support for the next version of the page. video id is left to fix... dunno how to do it in a good way 2015-12-27 14:40:27 +01:00			`if "live" in data:`
			`self.options.live = data["live"]`
			`if old:`
			`params = {"output": "json"}`
			`dataj = self.http.request("get", self.url, params=params).json()`
			`else:`
			`dataj = data`

Move options to when we init the service class 2015-12-26 11:46:14 +01:00			`if self.options.output_auto:`
			`self.options.service = "svtplay"`
svtplay: support for the next version of the page. video id is left to fix... dunno how to do it in a good way 2015-12-27 14:40:27 +01:00			`self.options.output = self.outputfilename(dataj, self.options.output, ensure_unicode(self.get_urldata()))`
svtplay: support for the new output mode 2014-08-27 22:41:38 +02:00
Move options to when we init the service class 2015-12-26 11:46:14 +01:00			`if self.exclude(self.options):`
Better excluding message fixing #198 2015-09-06 23:04:48 +02:00			`yield ServiceError("Excluding video")`
Support for exclude filenames with WORD in them. this fixes #190 2014-12-22 17:41:40 +01:00			`return`

svtplay: support for the next version of the page. video id is left to fix... dunno how to do it in a good way 2015-12-27 14:40:27 +01:00			`if "subtitleReferences" in data:`
			`for i in data["subtitleReferences"]:`
svtplay: it should be websrt and not wsrt 2015-12-28 11:41:39 +01:00			`if i["format"] == "websrt":`
svtplay: support for the next version of the page. video id is left to fix... dunno how to do it in a good way 2015-12-27 14:40:27 +01:00			`yield subtitle(copy.copy(self.options), "wrst", i["url"])`
			`if old and dataj["video"]["subtitleReferences"]:`
svtplay: subtitles didn’t get the right filename 2014-12-22 10:04:32 +01:00			`try:`
svtplay: support for the next version of the page. video id is left to fix... dunno how to do it in a good way 2015-12-27 14:40:27 +01:00			`suburl = dataj["video"]["subtitleReferences"][0]["url"]`
svtplay: subtitles didn’t get the right filename 2014-12-22 10:04:32 +01:00			`except KeyError:`
			`pass`
			`if suburl and len(suburl) > 0:`
Move options to when we init the service class 2015-12-26 11:46:14 +01:00			`yield subtitle(copy.copy(self.options), "wrst", suburl)`
svtplay: subtitles didn’t get the right filename 2014-12-22 10:04:32 +01:00
Move options to when we init the service class 2015-12-26 11:46:14 +01:00			`if self.options.force_subtitle:`
service: no need to check video streams when we force subtitles 2014-09-21 19:12:17 +02:00			`return`

svtplay: support for the next version of the page. video id is left to fix... dunno how to do it in a good way 2015-12-27 14:40:27 +01:00			`if len(data["videoReferences"]) == 0:`
svtplay: Catch media without any videoReferences This happens when they publish information about the TV episode before publishing the video stream. Probably due to some bug in SVT Play. The web player is also unable the play the video, reporting "Can't play the program, try again later". 2015-10-25 15:44:47 +01:00			`yield ServiceError("Media doesn't have any associated videos (yet?)")`
			`return`

svtplay: support for the next version of the page. video id is left to fix... dunno how to do it in a good way 2015-12-27 14:40:27 +01:00			`for i in data["videoReferences"]:`
svtplay: öppetarkiv still use the old names for format 2016-01-02 15:58:55 +01:00			`if i["format"] == "hls" or i["format"] == "ios":`
Move options to when we init the service class 2015-12-26 11:46:14 +01:00			`streams = hlsparse(self.options, self.http.request("get", i["url"]), i["url"])`
svtplay: don’t crash when we can’t get any HLS streams 2015-02-01 09:09:37 +01:00			`if streams:`
			`for n in list(streams.keys()):`
reorder arguments for hlsparse and hdsparse 2015-10-04 14:37:16 +02:00			`yield streams[n]`
svtplay: öppetarkiv still use the old names for format 2016-01-02 15:58:55 +01:00			`if i["format"] == "hds" or i["format"] == "flash":`
svtplay: convert into new video fetcher 2014-04-21 18:41:15 +02:00			`match = re.search(r"\/se\/secure\/", i["url"])`
			`if not match:`
Move options to when we init the service class 2015-12-26 11:46:14 +01:00			`streams = hdsparse(self.options, self.http.request("get", i["url"], params={"hdcore": "3.7.0"}), i["url"])`
HDS: show an error message when we see DRM files. 2014-10-12 23:31:02 +02:00			`if streams:`
			`for n in list(streams.keys()):`
			`yield streams[n]`
svtplay: move find video id into its own function 2016-01-10 15:33:30 +01:00
svtplay: use the data we already have 2016-01-10 20:42:23 +01:00			`def find_video_id(self):`
svtplay: move find video id into its own function 2016-01-10 15:33:30 +01:00			`match = re.search('data-video-id="([^"]+)"', self.get_urldata())`
			`if match:`
			`return match.group(1)`
			`parse = urlparse(self.url)`
			`match = re.search("/video/([0-9]+)/", parse.path)`
			`if match:`
			`return match.group(1)`
svtplay: another videoid from the url 2016-01-10 20:35:31 +01:00			`match = re.search("/videoEpisod-([^/]+)/", parse.path)`
			`if match:`
			`self._urldata = None`
			`self._url = "http://www.svtplay.se/video/%s/" % match.group(1)`
svtplay: update the data from the new vid 2016-01-10 20:43:10 +01:00			`self.get_urldata()`
			`return self.find_video_id()`
svtplay: move find video id into its own function 2016-01-10 15:33:30 +01:00			`return None`
Split subtitle getting to separate method 2014-01-11 23:02:47 +01:00
Add --all-episodes option (for svt only currently) 2014-02-18 18:56:28 +01:00			`def find_all_episodes(self, options):`
Break out OppetArkiv to subclass of Svtplay 2014-05-01 19:51:21 +02:00			`match = re.search(r'<link rel="alternate" type="application/rss\+xml" [^>]*href="([^"]+)"',`
more requests fixes for get_urldata. dont check the first byte 2015-08-31 22:04:59 +02:00			`self.get_urldata())`
Break out OppetArkiv to subclass of Svtplay 2014-05-01 19:51:21 +02:00			`if match is None:`
more requests fixes for get_urldata. dont check the first byte 2015-08-31 22:04:59 +02:00			`match = re.findall(r'a class="play[^"]+"\s+href="(/video[^"]+)"', self.get_urldata())`
svtplay: Grab all episodes from the new program page. fixes #216 2015-03-08 00:44:26 +01:00			`if not match:`
			`log.error("Couldn't retrieve episode list")`
			`return`
			`episodes = [urljoin("http://www.svtplay.se", x) for x in match]`
			`else:`
svtplay: this need to be text 2015-09-01 23:53:13 +02:00			`data = self.http.request("get", match.group(1)).content`
svtplay: Grab all episodes from the new program page. fixes #216 2015-03-08 00:44:26 +01:00			`xml = ET.XML(data)`
Add --all-episodes option (for svt only currently) 2014-02-18 18:56:28 +01:00
svtplay: Grab all episodes from the new program page. fixes #216 2015-03-08 00:44:26 +01:00			`episodes = [x.text for x in xml.findall(".//item/link")]`
svtplay: support for downloading lastest X episodes 2014-12-21 13:01:51 +01:00			`episodes_new = []`
			`n = 1`
			`for i in episodes:`
			`episodes_new.append(i)`
			`if n == options.all_last:`
			`break`
			`n += 1`
			`return sorted(episodes_new)`
svtplay: Add season and episode info in the filename 2014-12-28 13:57:50 +01:00

svtplay: support for the next version of the page. video id is left to fix... dunno how to do it in a good way 2015-12-27 14:40:27 +01:00			`def outputfilename(self, data, filename, raw):`
			`directory = os.path.dirname(filename)`
			`if "statistics" in data:`
			`name = data["statistics"]["folderStructure"]`
			`if name.find(".") > 0:`
			`name = name[:name.find(".")]`
			`match = re.search("^arkiv-", name)`
			`if match:`
			`name = name.replace("arkiv-", "")`
svtplay: dont include other in the filename Sometimes other is the same as name. fixes #319 2016-01-03 02:42:32 +01:00			`name = filenamify(name.replace("-", "."))`
svtplay: support for the next version of the page. video id is left to fix... dunno how to do it in a good way 2015-12-27 14:40:27 +01:00			`other = filenamify(data["context"]["title"])`
			`id = data["videoId"]`
			`else:`
			`name = data["programTitle"]`
			`if name.find(".") > 0:`
			`name = name[:name.find(".")]`
svtplay: dont include other in the filename Sometimes other is the same as name. fixes #319 2016-01-03 02:42:32 +01:00			`name = filenamify(name.replace(" - ", "."))`
svtplay: support for the next version of the page. video id is left to fix... dunno how to do it in a good way 2015-12-27 14:40:27 +01:00			`other = filenamify(data["episodeTitle"])`
svtplay: use sha256 of the version string as id 2015-12-27 20:55:10 +01:00			`id = hashlib.sha256(data["programVersionId"]).hexdigest()[:7]`
svtplay: support for the next version of the page. video id is left to fix... dunno how to do it in a good way 2015-12-27 14:40:27 +01:00
svtplay: dont include other in the filename Sometimes other is the same as name. fixes #319 2016-01-03 02:42:32 +01:00			`if name == other:`
			`other = None`
svtplay: support for the next version of the page. video id is left to fix... dunno how to do it in a good way 2015-12-27 14:40:27 +01:00			`season = self.seasoninfo(raw)`
svtplay: dont include other in the filename Sometimes other is the same as name. fixes #319 2016-01-03 02:42:32 +01:00			`title = name`
svtplay: support for the next version of the page. video id is left to fix... dunno how to do it in a good way 2015-12-27 14:40:27 +01:00			`if season:`
svtplay: dont include other in the filename Sometimes other is the same as name. fixes #319 2016-01-03 02:42:32 +01:00			`title += ".%s" % season`
			`if other:`
			`title += ".%s" % other`
			`title += "-%s-svtplay" % id`
svtplay: support for the next version of the page. video id is left to fix... dunno how to do it in a good way 2015-12-27 14:40:27 +01:00			`title = filenamify(title)`
			`if len(directory):`
			`output = os.path.join(directory, title)`
			`else:`
			`output = title`
			`return output`


			`def seasoninfo(self, data):`
			`match = re.search(r'play_video-area-aside__sub-title">([^<]+)<span', data)`
svtplay: Add season and episode info in the filename 2014-12-28 13:57:50 +01:00			`if match:`
svtplay: support for öppetarkiv this will only work with those with episodes in the name. this fixes #140 2014-12-28 14:33:25 +01:00			`line = match.group(1)`
svtplay: Add season and episode info in the filename 2014-12-28 13:57:50 +01:00			`else:`
svtplay: support for the next version of the page. video id is left to fix... dunno how to do it in a good way 2015-12-27 14:40:27 +01:00			`match = re.search(r'data-title="([^"]+)"', data)`
			`if match:`
			`line = match.group(1)`
			`else:`
			`return None`
svtplay: support for öppetarkiv this will only work with those with episodes in the name. this fixes #140 2014-12-28 14:33:25 +01:00
svtplay: support for the next version of the page. video id is left to fix... dunno how to do it in a good way 2015-12-27 14:40:27 +01:00			`line = re.sub(" +", "", match.group(1)).replace('\n', '')`
			`match = re.search(r"(song(\d+)-)?Avsnitt(\d+)", line)`
			`if match:`
			`if match.group(2) is None:`
			`season = 1`
			`else:`
			`season = int(match.group(2))`
			`if season < 10:`
			`season = "0%s" % season`
			`episode = int(match.group(3))`
			`if episode < 10:`
			`episode = "0%s" % episode`
			`return "S%sE%s" % (season, episode)`
svtplay: support for öppetarkiv this will only work with those with episodes in the name. this fixes #140 2014-12-28 14:33:25 +01:00			`else:`
svtplay: support for the next version of the page. video id is left to fix... dunno how to do it in a good way 2015-12-27 14:40:27 +01:00			`return None`