svtplay-dl/lib/svtplay_dl/service/picsearch.py

# ex:ts=4:sw=4:sts=4:et
# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*-
import copy
import json
import re
from urllib.parse import urlparse

from svtplay_dl.error import ServiceError
from svtplay_dl.fetcher.hls import hlsparse
from svtplay_dl.fetcher.http import HTTP
from svtplay_dl.service import OpenGraphThumbMixin
from svtplay_dl.service import Service


class Picsearch(Service, OpenGraphThumbMixin):
    supported_domains = ["dn.se", "mobil.dn.se", "di.se", "csp.picsearch.com", "csp.screen9.com"]
    backupapi = None

    def get(self):
        ajax_auth = self.get_auth()
        if not ajax_auth:
            yield ServiceError("Cant find token for video")
            return

        mediaid = self.get_mediaid()
        if not mediaid:
            yield ServiceError("Cant find media id")
            return
        if not isinstance(mediaid, str):
            mediaid = mediaid.group(1)

        jsondata = self.http.request(
            "get",
            f"http://csp.screen9.com/player?eventParam=1&ajaxauth={ajax_auth.group(1)}&method=embed&mediaid={mediaid}",
        ).text
        jsondata = json.loads(jsondata)

        if "data" in jsondata:
            if "live" in jsondata["data"]["publishing_status"]:
                self.config.set("live", jsondata["data"]["publishing_status"]["live"])
            playlist = jsondata["data"]["streams"]
            for i in playlist:
                if "application/x-mpegurl" in i:
                    yield from hlsparse(
                        self.config,
                        self.http.request("get", i["application/x-mpegurl"]),
                        i["application/x-mpegurl"],
                        output=self.output,
                    )
                if "video/mp4" in i:
                    yield HTTP(copy.copy(self.config), i["video/mp4"], 800, output=self.output)

        if self.backupapi:
            res = self.http.get(self.backupapi.replace("i=", ""), params={"i": "object"})
            data = res.text.replace("ps.embedHandler(", "").replace('"");', "")
            data = data[: data.rfind(",")]
            jansson = json.loads(data)
            for i in jansson["media"]["playerconfig"]["playlist"]:
                if "provider" in i and i["provider"] == "httpstreaming":
                    yield from hlsparse(self.config, self.http.request("get", i["url"]), i["url"], output=self.output)

    def get_auth(self):
        match = re.search(r"picsearch_ajax_auth[ ]*=[ ]*['\"]([^'\"]+)['\"]", self.get_urldata())
        if not match:
            match = re.search(r'screen9-ajax-auth="([^"]+)"', self.get_urldata())
        if not match:
            match = re.search('screen9"[ ]*:[ ]*"([^"]+)"', self.get_urldata())
        if not match:
            match = re.search('data-auth="([^"]+)"', self.get_urldata())
        if not match:
            match = re.search('s.src="(https://csp-ssl.picsearch.com[^"]+|http://csp.picsearch.com/rest[^"]+)', self.get_urldata())
            if match:
                data = self.http.request("get", match.group(1))
                self.backupapi = match.group(1)
                match = re.search(r'ajaxAuth": "([^"]+)"', data.text)
            if not match:
                match = re.search('iframe src="(//csp.screen9.com[^"]+)"', self.get_urldata())
                if match:
                    url = f"http:{match.group(1)}"
                    data = self.http.request("get", url)
                    self.backupapi = url
                    match = re.search(r"picsearch_ajax_auth = '([^']+)'", data.text)
                    if not match:
                        match = re.search(r"screen9_ajax_auth = '([^']+)'", data.text)

        return match

    def get_mediaid(self):
        match = re.search(r"mediaId = '([^']+)';", self.get_urldata())
        if not match:
            match = re.search(r'media-id="([^"]+)"', self.get_urldata())
        if not match:
            match = re.search(r'screen9-mid="([^"]+)"', self.get_urldata())
        if not match:
            match = re.search(r'data-id="([^"]+)"', self.get_urldata())
        if not match:
            match = re.search(r"data-id=([^ ]+) ", self.get_urldata())
        if not match:
            match = re.search(r'data-videoid="([^"]+)"', self.get_urldata())
        if not match:
            match = re.search('s.src="(https://csp-ssl.picsearch.com[^"]+|http://csp.picsearch.com/rest[^"]+)', self.get_urldata())
            if match:
                data = self.http.request("get", match.group(1))
                match = re.search(r'mediaid": "([^"]+)"', data.text)
            if not match:
                match = re.search('iframe src="(//csp.screen9.com[^"]+)"', self.get_urldata())
                if match:
                    url = f"http:{match.group(1)}"
                    data = self.http.request("get", url)
                    match = re.search(r"mediaid: '([^']+)'", data.text)
        if not match:
            urlp = urlparse(self.url)
            match = urlp.fragment
        return match
picsearch: New service. DN.se is using picsearch now days instead of qbrick 2014-03-25 15:37:41 +01:00			`# ex:ts=4:sw=4:sts=4:et`
			`# -- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil --`
service: copy options to fetcher 2014-06-07 20:43:40 +02:00			`import copy`
pre-commit: reorder imports 2019-08-25 00:40:39 +02:00			`import json`
			`import re`
No need for utils.urllib anymore 2018-01-30 22:07:21 +01:00			`from urllib.parse import urlparse`
picsearch: New service. DN.se is using picsearch now days instead of qbrick 2014-03-25 15:37:41 +01:00
pre-commit: reorder imports 2019-08-25 00:40:39 +02:00			`from svtplay_dl.error import ServiceError`
picsearch: new type of embedded video 2016-04-19 21:08:17 +02:00			`from svtplay_dl.fetcher.hls import hlsparse`
picsearch: sometimes we don't get the video in player api fixes: #527 2017-01-27 02:02:57 +01:00			`from svtplay_dl.fetcher.http import HTTP`
pre-commit: reorder imports 2019-08-25 00:40:39 +02:00			`from svtplay_dl.service import OpenGraphThumbMixin`
			`from svtplay_dl.service import Service`
picsearch: New service. DN.se is using picsearch now days instead of qbrick 2014-03-25 15:37:41 +01:00
pip8. expected 2 lines found 1 2015-09-15 20:10:32 +02:00
picsearch: New service. DN.se is using picsearch now days instead of qbrick 2014-03-25 15:37:41 +01:00			`class Picsearch(Service, OpenGraphThumbMixin):`
pre-commit: its a good thing to commit config... 2019-09-06 22:49:49 +02:00			`supported_domains = ["dn.se", "mobil.dn.se", "di.se", "csp.picsearch.com", "csp.screen9.com"]`
pylint fixes 2021-12-18 21:37:09 +01:00			`backupapi = None`
picsearch: New service. DN.se is using picsearch now days instead of qbrick 2014-03-25 15:37:41 +01:00
Move options to when we init the service class 2015-12-26 11:46:14 +01:00			`def get(self):`
generic: download embedded picsearch videos fixes: #356 2016-03-16 22:50:43 +01:00			`ajax_auth = self.get_auth()`
picsearch: New service. DN.se is using picsearch now days instead of qbrick 2014-03-25 15:37:41 +01:00			`if not ajax_auth:`
generic: download embedded picsearch videos fixes: #356 2016-03-16 22:50:43 +01:00			`yield ServiceError("Cant find token for video")`
			`return`

			`mediaid = self.get_mediaid()`
picsearch: New service. DN.se is using picsearch now days instead of qbrick 2014-03-25 15:37:41 +01:00			`if not mediaid:`
generic: download embedded picsearch videos fixes: #356 2016-03-16 22:50:43 +01:00			`yield ServiceError("Cant find media id")`
			`return`
			`if not isinstance(mediaid, str):`
			`mediaid = mediaid.group(1)`

pre-commit: black fixes 2019-08-25 00:27:31 +02:00			`jsondata = self.http.request(`
pre-commit: add-trailing-comma 2020-12-26 13:10:56 +01:00			`"get",`
f-string fixes 2021-12-18 19:52:08 +01:00			`f"http://csp.screen9.com/player?eventParam=1&ajaxauth={ajax_auth.group(1)}&method=embed&mediaid={mediaid}",`
pre-commit: black fixes 2019-08-25 00:27:31 +02:00			`).text`
picsearch: New service. DN.se is using picsearch now days instead of qbrick 2014-03-25 15:37:41 +01:00			`jsondata = json.loads(jsondata)`
picsearch: sometimes we don't get the video in player api fixes: #527 2017-01-27 02:02:57 +01:00
			`if "data" in jsondata:`
			`if "live" in jsondata["data"]["publishing_status"]:`
Options to config 2018-05-13 13:06:45 +02:00			`self.config.set("live", jsondata["data"]["publishing_status"]["live"])`
picsearch: sometimes we don't get the video in player api fixes: #527 2017-01-27 02:02:57 +01:00			`playlist = jsondata["data"]["streams"]`
			`for i in playlist:`
Cosmetic changes from flake8 2019-03-23 00:57:19 +01:00			`if "application/x-mpegurl" in i:`
Add support for audio language and audio role 2021-05-16 02:22:37 +02:00			`yield from hlsparse(`
pre-commit: add-trailing-comma 2020-12-26 13:10:56 +01:00			`self.config,`
			`self.http.request("get", i["application/x-mpegurl"]),`
			`i["application/x-mpegurl"],`
			`output=self.output,`
pre-commit: black fixes 2019-08-25 00:27:31 +02:00			`)`
Cosmetic changes from flake8 2019-03-23 00:57:19 +01:00			`if "video/mp4" in i:`
pre-commit: its a good thing to commit config... 2019-09-06 22:49:49 +02:00			`yield HTTP(copy.copy(self.config), i["video/mp4"], 800, output=self.output)`
picsearch: sometimes we don't get the video in player api fixes: #527 2017-01-27 02:02:57 +01:00
			`if self.backupapi:`
pre-commit: its a good thing to commit config... 2019-09-06 22:49:49 +02:00			`res = self.http.get(self.backupapi.replace("i=", ""), params={"i": "object"})`
pre-commit: black fixes 2019-08-25 00:27:31 +02:00			`data = res.text.replace("ps.embedHandler(", "").replace('"");', "")`
			`data = data[: data.rfind(",")]`
picsearch: sometimes we don't get the video in player api fixes: #527 2017-01-27 02:02:57 +01:00			`jansson = json.loads(data)`
			`for i in jansson["media"]["playerconfig"]["playlist"]:`
			`if "provider" in i and i["provider"] == "httpstreaming":`
Add support for audio language and audio role 2021-05-16 02:22:37 +02:00			`yield from hlsparse(self.config, self.http.request("get", i["url"]), i["url"], output=self.output)`
generic: download embedded picsearch videos fixes: #356 2016-03-16 22:50:43 +01:00
			`def get_auth(self):`
pre-commit: its a good thing to commit config... 2019-09-06 22:49:49 +02:00			`match = re.search(r"picsearch_ajax_auth[ ]=[ ]['\"]([^'\"]+)['\"]", self.get_urldata())`
generic: download embedded picsearch videos fixes: #356 2016-03-16 22:50:43 +01:00			`if not match:`
			`match = re.search(r'screen9-ajax-auth="([^"]+)"', self.get_urldata())`
picsearch: new type of embedded video 2016-04-19 21:08:17 +02:00			`if not match:`
			`match = re.search('screen9"[ ]:[ ]"([^"]+)"', self.get_urldata())`
picsearch: New way to get the token and id 2016-11-22 18:05:08 +01:00			`if not match:`
			`match = re.search('data-auth="([^"]+)"', self.get_urldata())`
generic: download embedded picsearch videos fixes: #356 2016-03-16 22:50:43 +01:00			`if not match:`
pre-commit: its a good thing to commit config... 2019-09-06 22:49:49 +02:00			`match = re.search('s.src="(https://csp-ssl.picsearch.com[^"]+\|http://csp.picsearch.com/rest[^"]+)', self.get_urldata())`
generic: download embedded picsearch videos fixes: #356 2016-03-16 22:50:43 +01:00			`if match:`
			`data = self.http.request("get", match.group(1))`
picsearch: sometimes we don't get the video in player api fixes: #527 2017-01-27 02:02:57 +01:00			`self.backupapi = match.group(1)`
generic: download embedded picsearch videos fixes: #356 2016-03-16 22:50:43 +01:00			`match = re.search(r'ajaxAuth": "([^"]+)"', data.text)`
picsearch: more ways to detect the service 2016-03-20 18:20:19 +01:00			`if not match:`
pre-commit: its a good thing to commit config... 2019-09-06 22:49:49 +02:00			`match = re.search('iframe src="(//csp.screen9.com[^"]+)"', self.get_urldata())`
picsearch: more ways to detect the service 2016-03-20 18:20:19 +01:00			`if match:`
pyupgrade fixes 2021-04-27 19:44:09 +02:00			`url = f"http:{match.group(1)}"`
picsearch: more ways to detect the service 2016-03-20 18:20:19 +01:00			`data = self.http.request("get", url)`
picsearch: sometimes we don't get the video in player api fixes: #527 2017-01-27 02:02:57 +01:00			`self.backupapi = url`
picsearch: more ways to detect the service 2016-03-20 18:20:19 +01:00			`match = re.search(r"picsearch_ajax_auth = '([^']+)'", data.text)`
picsearch: sometimes we don't get the video in player api fixes: #527 2017-01-27 02:02:57 +01:00			`if not match:`
			`match = re.search(r"screen9_ajax_auth = '([^']+)'", data.text)`
picsearch: more ways to detect the service 2016-03-20 18:20:19 +01:00
generic: download embedded picsearch videos fixes: #356 2016-03-16 22:50:43 +01:00			`return match`

			`def get_mediaid(self):`
			`match = re.search(r"mediaId = '([^']+)';", self.get_urldata())`
			`if not match:`
			`match = re.search(r'media-id="([^"]+)"', self.get_urldata())`
			`if not match:`
			`match = re.search(r'screen9-mid="([^"]+)"', self.get_urldata())`
picsearch: new type of embedded video 2016-04-19 21:08:17 +02:00			`if not match:`
			`match = re.search(r'data-id="([^"]+)"', self.get_urldata())`
dn.se has data-id attributes with a slightly different format 2018-01-21 15:03:15 +01:00			`if not match:`
pre-commit: black fixes 2019-08-25 00:27:31 +02:00			`match = re.search(r"data-id=([^ ]+) ", self.get_urldata())`
picsearch: New way to get the token and id 2016-11-22 18:05:08 +01:00			`if not match:`
			`match = re.search(r'data-videoid="([^"]+)"', self.get_urldata())`
generic: download embedded picsearch videos fixes: #356 2016-03-16 22:50:43 +01:00			`if not match:`
pre-commit: its a good thing to commit config... 2019-09-06 22:49:49 +02:00			`match = re.search('s.src="(https://csp-ssl.picsearch.com[^"]+\|http://csp.picsearch.com/rest[^"]+)', self.get_urldata())`
generic: download embedded picsearch videos fixes: #356 2016-03-16 22:50:43 +01:00			`if match:`
			`data = self.http.request("get", match.group(1))`
			`match = re.search(r'mediaid": "([^"]+)"', data.text)`
picsearch: more ways to detect the service 2016-03-20 18:20:19 +01:00			`if not match:`
pre-commit: its a good thing to commit config... 2019-09-06 22:49:49 +02:00			`match = re.search('iframe src="(//csp.screen9.com[^"]+)"', self.get_urldata())`
picsearch: more ways to detect the service 2016-03-20 18:20:19 +01:00			`if match:`
pyupgrade fixes 2021-04-27 19:44:09 +02:00			`url = f"http:{match.group(1)}"`
picsearch: more ways to detect the service 2016-03-20 18:20:19 +01:00			`data = self.http.request("get", url)`
			`match = re.search(r"mediaid: '([^']+)'", data.text)`
generic: download embedded picsearch videos fixes: #356 2016-03-16 22:50:43 +01:00			`if not match:`
			`urlp = urlparse(self.url)`
			`match = urlp.fragment`
			`return match`