svtplay-dl/lib/svtplay_dl/service/urplay.py

# ex:ts=4:sw=4:sts=4:et
# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*-
from __future__ import absolute_import
import re
import json
import copy
from urllib.parse import urljoin, urlparse


from svtplay_dl.service import Service, OpenGraphThumbMixin
from svtplay_dl.fetcher.hls import hlsparse
from svtplay_dl.log import log
from svtplay_dl.error import ServiceError
from svtplay_dl.subtitle import subtitle
from svtplay_dl.utils.text import filenamify


class Urplay(Service, OpenGraphThumbMixin):
    supported_domains = ['urplay.se', 'ur.se', 'betaplay.ur.se', 'urskola.se']

    def get(self):
        data = self.get_urldata()
        match = re.search(r"urPlayer.init\((.*)\);", data)
        if not match:
            yield ServiceError("Can't find json info")
            return

        if self.exclude():
            yield ServiceError("Excluding video")
            return

        data = match.group(1)
        jsondata = json.loads(data)
        if len(jsondata["subtitles"]) > 0:
            for sub in jsondata["subtitles"]:
                if "label" in sub:
                    absurl = urljoin(self.url, sub["file"].split(",")[0])
                    if absurl.endswith("vtt"):
                        subtype = "wrst"
                    else:
                        subtype = "tt"
                    if self.options.get_all_subtitles:
                        yield subtitle(copy.copy(self.options), subtype, absurl, "-" + filenamify(sub["label"]))
                    else:
                        yield subtitle(copy.copy(self.options), subtype, absurl)

        if "streamer" in jsondata["streaming_config"]:
            basedomain = jsondata["streaming_config"]["streamer"]["redirect"]
        else:
            url = jsondata["streaming_config"]["loadbalancer"]
            if url[:1] == "/":
                url = "https:{}".format(url)
            lbjson = self.http.request("get", url).text
            lbjson = json.loads(lbjson)
            basedomain = lbjson["redirect"]
        http = "https://{0}/{1}".format(basedomain, jsondata["file_http"])
        hd = None
        if len(jsondata["file_http_hd"]) > 0:
            http_hd = "https://{0}/{1}".format(basedomain, jsondata["file_http_hd"])
            hls_hd = "{0}{1}".format(http_hd, jsondata["streaming_config"]["http_streaming"]["hls_file"])
            hd = True
        hls = "{0}{1}".format(http, jsondata["streaming_config"]["http_streaming"]["hls_file"])
        streams = hlsparse(self.options, self.http.request("get", hls), hls)
        for n in list(streams.keys()):
            yield streams[n]
        if hd:
            streams = hlsparse(self.options, self.http.request("get", hls_hd), hls_hd)
            for n in list(streams.keys()):
                yield streams[n]

    def find_all_episodes(self, options):
        parse = urlparse(self.url)
        episodes = []

        if parse.netloc == "urskola.se":
            data = self.get_urldata()
            match = re.search('data-limit="[^"]+" href="([^"]+)"', data)
            if match:
                res = self.http.get(urljoin("https://urskola.se", match.group(1)))
                data = res.text
            tags = re.findall('<a class="puff program tv video" title="[^"]+" href="([^"]+)"', data)
            for i in tags:
                url = urljoin("https://urskola.se/", i)
                if url not in episodes:
                    episodes.append(url)
        else:
            match = re.search("/program/\d+-(\w+)-", parse.path)
            if not match:
                log.error("Can't find any videos")
                return None
            keyword = match.group(1)
            all_links = re.findall('card-link" href="([^"]+)"', self.get_urldata())
            for i in all_links:
                match = re.search("/program/\d+-(\w+)-", i)
                if match and match.group(1) == keyword:
                    episodes.append(urljoin("https://urplay.se/", i))

        episodes_new = []
        n = 0
        for i in episodes:
            if n == options.all_last:
                break
            if i not in episodes_new:
                episodes_new.append(i)
            n += 1
        return episodes_new
Add editor modelines 2013-03-02 21:26:28 +01:00			`# ex:ts=4:sw=4:sts=4:et`
			`# -- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil --`
Use absolute_import from __future__ everywhere 2013-03-01 23:39:42 +01:00			`from __future__ import absolute_import`
Add minimal set of imports for services to work 2013-02-12 19:43:37 +01:00			`import re`
urplay: add support for the new site some help from @unayok 2013-03-03 10:58:37 +01:00			`import json`
service: copy options to fetcher 2014-06-07 20:43:40 +02:00			`import copy`
No need for utils.urllib anymore 2018-01-30 22:07:21 +01:00			`from urllib.parse import urljoin, urlparse`

Add minimal set of imports for services to work 2013-02-12 19:43:37 +01:00
Add --thumbnail Names thumbnails as $basename.tbn (Hi xbmc!) 2014-01-19 14:26:48 +01:00			`from svtplay_dl.service import Service, OpenGraphThumbMixin`
reorder arguments for hlsparse and hdsparse 2015-10-04 14:37:16 +02:00			`from svtplay_dl.fetcher.hls import hlsparse`
urplay: import log 2015-09-06 22:41:29 +02:00			`from svtplay_dl.log import log`
Show only one error message instead of two 2015-09-06 14:19:10 +02:00			`from svtplay_dl.error import ServiceError`
subtitle: refactor so we can reuse the try-except-thing 2014-08-31 01:20:36 +02:00			`from svtplay_dl.subtitle import subtitle`
utils: move functions out of init to its own files. 2018-03-13 00:33:39 +01:00			`from svtplay_dl.utils.text import filenamify`
Add minimal set of imports for services to work 2013-02-12 19:43:37 +01:00
pip8. expected 2 lines found 1 2015-09-15 20:10:32 +02:00
Add --thumbnail Names thumbnails as $basename.tbn (Hi xbmc!) 2014-01-19 14:26:48 +01:00			`class Urplay(Service, OpenGraphThumbMixin):`
urplay: support for urskola fixes: #358 2016-03-20 18:21:07 +01:00			`supported_domains = ['urplay.se', 'ur.se', 'betaplay.ur.se', 'urskola.se']`
Initial work on splitting script to modules Does not work reliably (downloading SVTPlay videos with HDS may work if you're lucky). 2013-01-17 00:21:47 +01:00
Move options to when we init the service class 2015-12-26 11:46:14 +01:00			`def get(self):`
adding request support. still need some more work 2015-08-30 00:06:20 +02:00			`data = self.get_urldata()`
Rewrite http request handling. 2014-12-08 23:07:02 +01:00			`match = re.search(r"urPlayer.init\((.*)\);", data)`
urplay: it is not necessary to replace characters anymore 2014-01-03 12:15:21 +01:00			`if not match:`
Show only one error message instead of two 2015-09-06 14:19:10 +02:00			`yield ServiceError("Can't find json info")`
service: replace sys.exit with return 2014-10-06 23:21:43 +02:00			`return`
Support for exclude filenames with WORD in them. this fixes #190 2014-12-22 17:41:40 +01:00
remove options in argument for exclude 2016-05-14 22:54:30 +02:00			`if self.exclude():`
Better excluding message fixing #198 2015-09-06 23:04:48 +02:00			`yield ServiceError("Excluding video")`
Support for exclude filenames with WORD in them. this fixes #190 2014-12-22 17:41:40 +01:00			`return`

urplay: it is not necessary to replace characters anymore 2014-01-03 12:15:21 +01:00			`data = match.group(1)`
urplay: add support for the new site some help from @unayok 2013-03-03 10:58:37 +01:00			`jsondata = json.loads(data)`
urplay: Check how many items we have in subtitles This fixes #235 2015-05-06 10:33:24 +02:00			`if len(jsondata["subtitles"]) > 0:`
urplay: dont crash when there is no label 2016-05-03 20:35:16 +02:00			`for sub in jsondata["subtitles"]:`
			`if "label" in sub:`
Fix subtitles in urplay The subtitle url was relative (in particular it left out the scheme) Never videos appears to use VTT/WRST format 2016-11-30 23:21:42 +01:00			`absurl = urljoin(self.url, sub["file"].split(",")[0])`
			`if absurl.endswith("vtt"):`
			`subtype = "wrst"`
			`else:`
			`subtype = "tt"`
urplay: dont crash when there is no label 2016-05-03 20:35:16 +02:00			`if self.options.get_all_subtitles:`
Fix subtitles in urplay The subtitle url was relative (in particular it left out the scheme) Never videos appears to use VTT/WRST format 2016-11-30 23:21:42 +01:00			`yield subtitle(copy.copy(self.options), subtype, absurl, "-" + filenamify(sub["label"]))`
urplay: dont crash when there is no label 2016-05-03 20:35:16 +02:00			`else:`
Fix subtitles in urplay The subtitle url was relative (in particular it left out the scheme) Never videos appears to use VTT/WRST format 2016-11-30 23:21:42 +01:00			`yield subtitle(copy.copy(self.options), subtype, absurl)`

urplay: rtmp is so 90s. removing it 2015-10-25 02:07:25 +02:00			`if "streamer" in jsondata["streaming_config"]:`
			`basedomain = jsondata["streaming_config"]["streamer"]["redirect"]`
			`else:`
urplay: in some cases they show schema. 2018-01-04 22:17:13 +01:00			`url = jsondata["streaming_config"]["loadbalancer"]`
			`if url[:1] == "/":`
			`url = "https:{}".format(url)`
			`lbjson = self.http.request("get", url).text`
urplay: rtmp is so 90s. removing it 2015-10-25 02:07:25 +02:00			`lbjson = json.loads(lbjson)`
			`basedomain = lbjson["redirect"]`
urplay: add scheme to urls fixes #742 2018-01-04 22:09:17 +01:00			`http = "https://{0}/{1}".format(basedomain, jsondata["file_http"])`
urplay: support for HD streams 2014-01-09 00:32:14 +01:00			`hd = None`
urplay: they updated their site a bit. this fixes #213 2015-03-21 17:34:22 +01:00			`if len(jsondata["file_http_hd"]) > 0:`
urplay: add scheme to urls fixes #742 2018-01-04 22:09:17 +01:00			`http_hd = "https://{0}/{1}".format(basedomain, jsondata["file_http_hd"])`
service: change str formating from '%s' to '.format' 2017-10-09 22:35:13 +02:00			`hls_hd = "{0}{1}".format(http_hd, jsondata["streaming_config"]["http_streaming"]["hls_file"])`
urplay: support for HD streams 2014-01-09 00:32:14 +01:00			`hd = True`
service: change str formating from '%s' to '.format' 2017-10-09 22:35:13 +02:00			`hls = "{0}{1}".format(http, jsondata["streaming_config"]["http_streaming"]["hls_file"])`
Move options to when we init the service class 2015-12-26 11:46:14 +01:00			`streams = hlsparse(self.options, self.http.request("get", hls), hls)`
services: parse hls playlist first. 2014-04-21 21:55:39 +02:00			`for n in list(streams.keys()):`
reorder arguments for hlsparse and hdsparse 2015-10-04 14:37:16 +02:00			`yield streams[n]`
urplay: support for HD streams 2014-01-09 00:32:14 +01:00			`if hd:`
Move options to when we init the service class 2015-12-26 11:46:14 +01:00			`streams = hlsparse(self.options, self.http.request("get", hls_hd), hls_hd)`
services: parse hls playlist first. 2014-04-21 21:55:39 +02:00			`for n in list(streams.keys()):`
reorder arguments for hlsparse and hdsparse 2015-10-04 14:37:16 +02:00			`yield streams[n]`
Split subtitle getting to separate method 2014-01-11 23:02:47 +01:00
urplay: find all episodes support. 2014-04-03 21:09:42 +02:00			`def find_all_episodes(self, options):`
urplay: detect related videos for -A this is good enough to find related videos. it would be better to use bs4 with lxml.. fixes #470 2016-12-06 22:42:54 +01:00			`parse = urlparse(self.url)`
			`episodes = []`
urplay: support for -A on urskola.se fixes #677 2017-09-16 17:36:37 +02:00
			`if parse.netloc == "urskola.se":`
			`data = self.get_urldata()`
			`match = re.search('data-limit="[^"]+" href="([^"]+)"', data)`
			`if match:`
urskola: fix -A 2018-01-04 22:16:49 +01:00			`res = self.http.get(urljoin("https://urskola.se", match.group(1)))`
urplay: support for -A on urskola.se fixes #677 2017-09-16 17:36:37 +02:00			`data = res.text`
urskola: fix -A 2018-01-04 22:16:49 +01:00			`tags = re.findall('<a class="puff program tv video" title="[^"]+" href="([^"]+)"', data)`
urplay: support for -A on urskola.se fixes #677 2017-09-16 17:36:37 +02:00			`for i in tags:`
urskola: fix -A 2018-01-04 22:16:49 +01:00			`url = urljoin("https://urskola.se/", i)`
urplay: support for -A on urskola.se fixes #677 2017-09-16 17:36:37 +02:00			`if url not in episodes:`
			`episodes.append(url)`
			`else:`
			`match = re.search("/program/\d+-(\w+)-", parse.path)`
			`if not match:`
			`log.error("Can't find any videos")`
			`return None`
			`keyword = match.group(1)`
			`all_links = re.findall('card-link" href="([^"]+)"', self.get_urldata())`
			`for i in all_links:`
			`match = re.search("/program/\d+-(\w+)-", i)`
			`if match and match.group(1) == keyword:`
urplay: add scheme to urls fixes #742 2018-01-04 22:09:17 +01:00			`episodes.append(urljoin("https://urplay.se/", i))`
urplay: find all episodes support. 2014-04-03 21:09:42 +02:00
urplay: support for downloading lastest X episodes 2014-12-21 13:45:44 +01:00			`episodes_new = []`
			`n = 0`
			`for i in episodes:`
			`if n == options.all_last:`
			`break`
urplay: dont include dupes 2015-09-06 22:41:49 +02:00			`if i not in episodes_new:`
			`episodes_new.append(i)`
urplay: support for downloading lastest X episodes 2014-12-21 13:45:44 +01:00			`n += 1`
ur.se: Add scraping for all episodes if rss doesn't exist Example: http://www.ur.se/Produkter/178229-Vilda-djur-Trana 2015-03-07 10:43:21 +01:00			`return episodes_new`