svtplay-dl/lib/svtplay_dl/service/urplay.py

# ex:ts=4:sw=4:sts=4:et
# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*-
from __future__ import absolute_import
import re
import json
import copy
import xml.etree.ElementTree as ET

from svtplay_dl.service import Service, OpenGraphThumbMixin
from svtplay_dl.utils.urllib import urljoin
from svtplay_dl.fetcher.hls import hlsparse
from svtplay_dl.log import log
from svtplay_dl.error import ServiceError
from svtplay_dl.subtitle import subtitle
from svtplay_dl.utils import filenamify


class Urplay(Service, OpenGraphThumbMixin):
    supported_domains = ['urplay.se', 'ur.se', 'betaplay.ur.se', 'urskola.se']

    def get(self):
        data = self.get_urldata()
        match = re.search(r"urPlayer.init\((.*)\);", data)
        if not match:
            yield ServiceError("Can't find json info")
            return

        if self.exclude():
            yield ServiceError("Excluding video")
            return

        data = match.group(1)
        jsondata = json.loads(data)
        if len(jsondata["subtitles"]) > 0:
            for sub in jsondata["subtitles"]:
                if "label" in sub:
                    if self.options.get_all_subtitles:
                        yield subtitle(copy.copy(self.options), "tt", sub["file"].split(",")[0], "-" + filenamify(sub["label"]))
                    else:
                        yield subtitle(copy.copy(self.options), "tt", sub["file"].split(",")[0])
                        
        if "streamer" in jsondata["streaming_config"]:
            basedomain = jsondata["streaming_config"]["streamer"]["redirect"]
        else:
            lbjson = self.http.request("get", jsondata["streaming_config"]["loadbalancer"]).text
            lbjson = json.loads(lbjson)
            basedomain = lbjson["redirect"]
        http = "http://%s/%s" % (basedomain, jsondata["file_http"])
        hd = None
        if len(jsondata["file_http_hd"]) > 0:
            http_hd = "http://%s/%s" % (basedomain, jsondata["file_http_hd"])
            hls_hd = "%s%s" % (http_hd, jsondata["streaming_config"]["http_streaming"]["hls_file"])
            hd = True
        hls = "%s%s" % (http, jsondata["streaming_config"]["http_streaming"]["hls_file"])
        streams = hlsparse(self.options, self.http.request("get", hls), hls)
        for n in list(streams.keys()):
            yield streams[n]
        if hd:
            streams = hlsparse(self.options, self.http.request("get", hls_hd), hls_hd)
            for n in list(streams.keys()):
                yield streams[n]

    def scrape_episodes(self, options):
        res = []
        for relurl in re.findall(r'<a class="puff tv video"\s+title="[^"]*"\s+href="([^"]*)"',
                                 self.get_urldata()):
            res.append(urljoin(self.url, relurl.replace("&amp;", "&")))

        for relurl in re.findall(r'<a class="card program"\s+href="([^"]*)"',
                                  self.get_urldata()):
            res.append(urljoin(self.url, relurl.replace("&amp;", "&")))

        if options.all_last != -1:
            res = res[-options.all_last:]

        return res

    def find_all_episodes(self, options):
        match = re.search(r'<link rel="alternate" type="application/rss\+xml" [^>]*href="([^"]+)"',
                          self.get_urldata())
        if match is None:
            log.info("Couldn't retrieve episode list as rss, trying to scrape")
            return self.scrape_episodes(options)

        url = "http://urplay.se%s" % match.group(1).replace("&amp;", "&")
        xml = ET.XML(self.http.request("get", url).content)

        episodes = [x.text for x in xml.findall(".//item/link")]
        episodes_new = []
        n = 0
        for i in episodes:
            if n == options.all_last:
                break
            if i not in episodes_new:
                episodes_new.append(i)
            n += 1
        return episodes_new
Add editor modelines 2013-03-02 21:26:28 +01:00			`# ex:ts=4:sw=4:sts=4:et`
			`# -- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil --`
Use absolute_import from __future__ everywhere 2013-03-01 23:39:42 +01:00			`from __future__ import absolute_import`
Add minimal set of imports for services to work 2013-02-12 19:43:37 +01:00			`import re`
urplay: add support for the new site some help from @unayok 2013-03-03 10:58:37 +01:00			`import json`
service: copy options to fetcher 2014-06-07 20:43:40 +02:00			`import copy`
urplay: find all episodes support. 2014-04-03 21:09:42 +02:00			`import xml.etree.ElementTree as ET`
Add minimal set of imports for services to work 2013-02-12 19:43:37 +01:00
Add --thumbnail Names thumbnails as $basename.tbn (Hi xbmc!) 2014-01-19 14:26:48 +01:00			`from svtplay_dl.service import Service, OpenGraphThumbMixin`
ur.se: Add scraping for all episodes if rss doesn't exist Example: http://www.ur.se/Produkter/178229-Vilda-djur-Trana 2015-03-07 10:43:21 +01:00			`from svtplay_dl.utils.urllib import urljoin`
reorder arguments for hlsparse and hdsparse 2015-10-04 14:37:16 +02:00			`from svtplay_dl.fetcher.hls import hlsparse`
urplay: import log 2015-09-06 22:41:29 +02:00			`from svtplay_dl.log import log`
Show only one error message instead of two 2015-09-06 14:19:10 +02:00			`from svtplay_dl.error import ServiceError`
subtitle: refactor so we can reuse the try-except-thing 2014-08-31 01:20:36 +02:00			`from svtplay_dl.subtitle import subtitle`
urplay: dont crash when there is no label 2016-05-03 20:35:16 +02:00			`from svtplay_dl.utils import filenamify`
Add minimal set of imports for services to work 2013-02-12 19:43:37 +01:00
pip8. expected 2 lines found 1 2015-09-15 20:10:32 +02:00
Add --thumbnail Names thumbnails as $basename.tbn (Hi xbmc!) 2014-01-19 14:26:48 +01:00			`class Urplay(Service, OpenGraphThumbMixin):`
urplay: support for urskola fixes: #358 2016-03-20 18:21:07 +01:00			`supported_domains = ['urplay.se', 'ur.se', 'betaplay.ur.se', 'urskola.se']`
Initial work on splitting script to modules Does not work reliably (downloading SVTPlay videos with HDS may work if you're lucky). 2013-01-17 00:21:47 +01:00
Move options to when we init the service class 2015-12-26 11:46:14 +01:00			`def get(self):`
adding request support. still need some more work 2015-08-30 00:06:20 +02:00			`data = self.get_urldata()`
Rewrite http request handling. 2014-12-08 23:07:02 +01:00			`match = re.search(r"urPlayer.init\((.*)\);", data)`
urplay: it is not necessary to replace characters anymore 2014-01-03 12:15:21 +01:00			`if not match:`
Show only one error message instead of two 2015-09-06 14:19:10 +02:00			`yield ServiceError("Can't find json info")`
service: replace sys.exit with return 2014-10-06 23:21:43 +02:00			`return`
Support for exclude filenames with WORD in them. this fixes #190 2014-12-22 17:41:40 +01:00
remove options in argument for exclude 2016-05-14 22:54:30 +02:00			`if self.exclude():`
Better excluding message fixing #198 2015-09-06 23:04:48 +02:00			`yield ServiceError("Excluding video")`
Support for exclude filenames with WORD in them. this fixes #190 2014-12-22 17:41:40 +01:00			`return`

urplay: it is not necessary to replace characters anymore 2014-01-03 12:15:21 +01:00			`data = match.group(1)`
urplay: add support for the new site some help from @unayok 2013-03-03 10:58:37 +01:00			`jsondata = json.loads(data)`
urplay: Check how many items we have in subtitles This fixes #235 2015-05-06 10:33:24 +02:00			`if len(jsondata["subtitles"]) > 0:`
urplay: dont crash when there is no label 2016-05-03 20:35:16 +02:00			`for sub in jsondata["subtitles"]:`
			`if "label" in sub:`
			`if self.options.get_all_subtitles:`
			`yield subtitle(copy.copy(self.options), "tt", sub["file"].split(",")[0], "-" + filenamify(sub["label"]))`
			`else:`
			`yield subtitle(copy.copy(self.options), "tt", sub["file"].split(",")[0])`
Merge branch 'master' of https://github.com/spaam/svtplay-dl.git into rawsubs # By Johan Andersson (7) and qnorsten (4) # Via Johan Andersson (2) and qnorsten (1) * 'master' of https://github.com/spaam/svtplay-dl.git: urplay: dont crash when there is no label twitch: Fix so we can get live url again fetcher: dont print newline when its silent main: add dash in the help text for preferred tv4play: free videos with premuim showed up as None tv4play: fix unicode errors in showname Fixed missing space error Some minor code improvments postprocess: ext includes dot before fileextention Updated gitignore Added support for subfix in filename of subtitles, when several languages are available Added command to download all available subtitles for a video (--all-subtitles) Added support to print all the subtitle urls when the get url parameter is used Fixed so subtitle url and stream url get printed if -S and -g but not --force-subtitles parameter is used Added support for downloading all subtitles and auto subfix them with language name for Urplay and Urskola (even when just one subtitle is downloaded) 2016-05-03 22:43:57 +02:00
urplay: rtmp is so 90s. removing it 2015-10-25 02:07:25 +02:00			`if "streamer" in jsondata["streaming_config"]:`
			`basedomain = jsondata["streaming_config"]["streamer"]["redirect"]`
			`else:`
			`lbjson = self.http.request("get", jsondata["streaming_config"]["loadbalancer"]).text`
			`lbjson = json.loads(lbjson)`
			`basedomain = lbjson["redirect"]`
urplay: they updated their site a bit. this fixes #213 2015-03-21 17:34:22 +01:00			`http = "http://%s/%s" % (basedomain, jsondata["file_http"])`
urplay: support for HD streams 2014-01-09 00:32:14 +01:00			`hd = None`
urplay: they updated their site a bit. this fixes #213 2015-03-21 17:34:22 +01:00			`if len(jsondata["file_http_hd"]) > 0:`
			`http_hd = "http://%s/%s" % (basedomain, jsondata["file_http_hd"])`
urplay: support for HD streams 2014-01-09 00:32:14 +01:00			`hls_hd = "%s%s" % (http_hd, jsondata["streaming_config"]["http_streaming"]["hls_file"])`
			`hd = True`
urplay: add support for the new site some help from @unayok 2013-03-03 10:58:37 +01:00			`hls = "%s%s" % (http, jsondata["streaming_config"]["http_streaming"]["hls_file"])`
Move options to when we init the service class 2015-12-26 11:46:14 +01:00			`streams = hlsparse(self.options, self.http.request("get", hls), hls)`
services: parse hls playlist first. 2014-04-21 21:55:39 +02:00			`for n in list(streams.keys()):`
reorder arguments for hlsparse and hdsparse 2015-10-04 14:37:16 +02:00			`yield streams[n]`
urplay: support for HD streams 2014-01-09 00:32:14 +01:00			`if hd:`
Move options to when we init the service class 2015-12-26 11:46:14 +01:00			`streams = hlsparse(self.options, self.http.request("get", hls_hd), hls_hd)`
services: parse hls playlist first. 2014-04-21 21:55:39 +02:00			`for n in list(streams.keys()):`
reorder arguments for hlsparse and hdsparse 2015-10-04 14:37:16 +02:00			`yield streams[n]`
Split subtitle getting to separate method 2014-01-11 23:02:47 +01:00
ur.se: Add scraping for all episodes if rss doesn't exist Example: http://www.ur.se/Produkter/178229-Vilda-djur-Trana 2015-03-07 10:43:21 +01:00			`def scrape_episodes(self, options):`
			`res = []`
			`for relurl in re.findall(r'<a class="puff tv video"\s+title="[^"]"\s+href="([^"])"',`
adding request support. still need some more work 2015-08-30 00:06:20 +02:00			`self.get_urldata()):`
ur.se: Add scraping for all episodes if rss doesn't exist Example: http://www.ur.se/Produkter/178229-Vilda-djur-Trana 2015-03-07 10:43:21 +01:00			`res.append(urljoin(self.url, relurl.replace("&", "&")))`

urplay: fix scrape so it works again this fixes: #374 2016-04-26 23:22:59 +02:00			`for relurl in re.findall(r'<a class="card program"\s+href="([^"]*)"',`
			`self.get_urldata()):`
			`res.append(urljoin(self.url, relurl.replace("&", "&")))`

ur.se: Add scraping for all episodes if rss doesn't exist Example: http://www.ur.se/Produkter/178229-Vilda-djur-Trana 2015-03-07 10:43:21 +01:00			`if options.all_last != -1:`
			`res = res[-options.all_last:]`

			`return res`

urplay: find all episodes support. 2014-04-03 21:09:42 +02:00			`def find_all_episodes(self, options):`
			`match = re.search(r'<link rel="alternate" type="application/rss\+xml" [^>]*href="([^"]+)"',`
adding request support. still need some more work 2015-08-30 00:06:20 +02:00			`self.get_urldata())`
urplay: find all episodes support. 2014-04-03 21:09:42 +02:00			`if match is None:`
ur.se: Add scraping for all episodes if rss doesn't exist Example: http://www.ur.se/Produkter/178229-Vilda-djur-Trana 2015-03-07 10:43:21 +01:00			`log.info("Couldn't retrieve episode list as rss, trying to scrape")`
			`return self.scrape_episodes(options)`

urplay: find all episodes support. 2014-04-03 21:09:42 +02:00			`url = "http://urplay.se%s" % match.group(1).replace("&", "&")`
replace self.http.get with our own function with debug info 2015-08-31 19:45:15 +02:00			`xml = ET.XML(self.http.request("get", url).content)`
urplay: find all episodes support. 2014-04-03 21:09:42 +02:00
urplay: support for downloading lastest X episodes 2014-12-21 13:45:44 +01:00			`episodes = [x.text for x in xml.findall(".//item/link")]`
			`episodes_new = []`
			`n = 0`
			`for i in episodes:`
			`if n == options.all_last:`
			`break`
urplay: dont include dupes 2015-09-06 22:41:49 +02:00			`if i not in episodes_new:`
			`episodes_new.append(i)`
urplay: support for downloading lastest X episodes 2014-12-21 13:45:44 +01:00			`n += 1`
ur.se: Add scraping for all episodes if rss doesn't exist Example: http://www.ur.se/Produkter/178229-Vilda-djur-Trana 2015-03-07 10:43:21 +01:00			`return episodes_new`