1
0
mirror of https://github.com/spaam/svtplay-dl.git synced 2024-11-28 06:04:17 +01:00
svtplay-dl/lib/svtplay_dl/service/viaplay.py

340 lines
14 KiB
Python
Raw Normal View History

2013-03-02 21:26:28 +01:00
# ex:ts=4:sw=4:sts=4:et
# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*-
# pylint has issues with urlparse: "some types could not be inferred"
# pylint: disable=E1103
from __future__ import absolute_import
2019-08-25 00:40:39 +02:00
2014-06-07 20:43:40 +02:00
import copy
2019-08-25 00:40:39 +02:00
import json
import re
2018-01-30 22:07:21 +01:00
from urllib.parse import urlparse
2019-08-25 00:40:39 +02:00
from svtplay_dl.error import ServiceError
2014-05-01 16:52:05 +02:00
from svtplay_dl.fetcher.hds import hdsparse
2015-10-24 21:55:33 +02:00
from svtplay_dl.fetcher.hls import hlsparse
2019-08-25 00:40:39 +02:00
from svtplay_dl.service import OpenGraphThumbMixin
from svtplay_dl.service import Service
from svtplay_dl.subtitle import subtitle
2015-09-15 20:10:32 +02:00
2014-01-26 01:54:20 +01:00
class Viaplay(Service, OpenGraphThumbMixin):
supported_domains = [
2019-08-25 00:27:31 +02:00
"tv3play.se",
"tv6play.se",
"tv8play.se",
"tv10play.se",
"tv3play.no",
"tv3play.dk",
"tv6play.no",
"viasat4play.no",
"tv3play.ee",
"tv3play.lv",
"tv3play.lt",
"tvplay.lv",
"viagame.com",
"juicyplay.se",
"viafree.se",
"viafree.dk",
"viafree.no",
"viafree.fi",
"play.tv3.lt",
"tv3play.tv3.ee",
"tvplay.skaties.lv",
]
def _get_video_id(self, url=None):
"""
Extract video id. It will try to avoid making an HTTP request
if it can find the ID in the URL, but otherwise it will try
to scrape it from the HTML document. Returns None in case it's
unable to extract the ID at all.
"""
if url:
html_data = self.http.request("get", url).text
else:
html_data = self.get_urldata()
2015-08-31 17:14:08 +02:00
html_data = self.get_urldata()
match = re.search(r'data-video-id="([0-9]+)"', html_data)
if match:
return match.group(1)
match = re.search(r'data-videoid="([0-9]+)', html_data)
2019-01-02 20:58:34 +01:00
if match:
return match.group(1)
match = re.search(r'"mediaGuid":"([0-9]+)"', html_data)
if match:
return match.group(1)
clips = False
slug = None
match = re.search('params":({.*}),"query', self.get_urldata())
2016-08-15 19:32:52 +02:00
if match:
jansson = json.loads(match.group(1))
if "seasonNumberOrVideoId" in jansson:
season = jansson["seasonNumberOrVideoId"]
match = re.search(r"\w-(\d+)$", season)
if match:
season = match.group(1)
else:
match = self._conentpage(self.get_urldata())
if match: # this only happen on the program page?
janson2 = json.loads(match.group(1))
if janson2["formatPage"]["format"]:
season = janson2["formatPage"]["format"]["seasonNumber"]
return janson2["formatPage"]["format"]["videos"][str(season)]["program"][0]["id"]
return None
if "videoIdOrEpisodeNumber" in jansson:
videp = jansson["videoIdOrEpisodeNumber"]
2019-08-25 00:27:31 +02:00
match = re.search(r"(\w+)-(\d+)", videp)
if match:
episodenr = match.group(2)
else:
episodenr = videp
clips = True
2019-08-25 00:27:31 +02:00
match = re.search(r"(s\w+)-(\d+)", season)
if match:
season = match.group(2)
else:
# sometimes videoIdOrEpisodeNumber does not work.. this is a workaround
2019-08-25 00:27:31 +02:00
match = re.search(r"(episode|avsnitt)-(\d+)", self.url)
if match:
episodenr = match.group(2)
else:
episodenr = season
if "slug" in jansson:
slug = jansson["slug"]
if clips:
return episodenr
else:
match = self._conentpage(self.get_urldata())
if match:
janson = json.loads(match.group(1))
for i in janson["formatPage"]["format"]["videos"].keys():
if "program" in janson["formatPage"]["format"]["videos"][str(i)]:
for n in janson["formatPage"]["format"]["videos"][i]["program"]:
if str(n["episodeNumber"]) and int(episodenr) == n["episodeNumber"] and int(season) == n["seasonNumber"]:
if slug is None or slug == n["formatSlug"]:
return n["id"]
elif n["id"] == episodenr:
return episodenr
parse = urlparse(self.url)
2019-08-25 00:27:31 +02:00
match = re.search(r"/\w+/(\d+)", parse.path)
if match:
return match.group(1)
match = re.search(r'iframe src="http://play.juicyplay.se[^\"]+id=(\d+)', html_data)
if match:
return match.group(1)
match = re.search(r'<meta property="og:image" content="([\S]+)"', html_data)
if match:
return match.group(1).split("/")[-2]
return None
2018-01-30 20:11:37 +01:00
def get(self):
parse = urlparse(self.url)
2014-12-08 23:07:02 +01:00
vid = self._get_video_id()
if vid is None:
if parse.path[:6] == "/sport":
result = self._sport()
2019-08-25 00:33:51 +02:00
yield from result
return
else:
2019-08-25 00:33:51 +02:00
yield ServiceError("Can't find video file for: {}".format(self.url))
return
2018-01-30 20:11:37 +01:00
data = self._get_video_data(vid)
if data.status_code == 403:
yield ServiceError("Can't play this because the video is geoblocked.")
2014-11-01 21:10:06 +01:00
return
2015-08-31 23:18:37 +02:00
dataj = json.loads(data.text)
2018-01-30 20:11:37 +01:00
if "msg" in dataj:
yield ServiceError(dataj["msg"])
return
if dataj["type"] == "live":
2018-05-13 13:06:45 +02:00
self.config.set("live", True)
2018-05-13 13:06:45 +02:00
self.output["id"] = vid
self._autoname(dataj)
streams = self.http.request("get", "http://playapi.mtgx.tv/v3/videos/stream/{}".format(vid))
if streams.status_code == 403:
yield ServiceError("Can't play this because the video is geoblocked.")
2014-11-01 21:10:06 +01:00
return
2015-08-31 23:18:37 +02:00
streamj = json.loads(streams.text)
if "msg" in streamj:
yield ServiceError("Can't play this because the video is either not found or geoblocked.")
return
if dataj["sami_path"]:
2016-12-05 19:57:57 +01:00
if dataj["sami_path"].endswith("vtt"):
subtype = "wrst"
else:
subtype = "sami"
yield subtitle(copy.copy(self.config), subtype, dataj["sami_path"], output=self.output)
if dataj["subtitles_webvtt"]:
yield subtitle(copy.copy(self.config), "wrst", dataj["subtitles_webvtt"], output=self.output)
if dataj["subtitles_for_hearing_impaired"]:
2016-12-05 19:57:57 +01:00
if dataj["subtitles_for_hearing_impaired"].endswith("vtt"):
subtype = "wrst"
else:
subtype = "sami"
2018-05-08 22:46:11 +02:00
if self.config.get("get_all_subtitles"):
yield subtitle(copy.copy(self.config), subtype, dataj["subtitles_for_hearing_impaired"], "-SDH", output=self.output)
2018-01-30 20:11:37 +01:00
else:
yield subtitle(copy.copy(self.config), subtype, dataj["subtitles_for_hearing_impaired"], output=self.output)
if streamj["streams"]["medium"] and streamj["streams"]["medium"][:7] != "[empty]":
filename = streamj["streams"]["medium"]
if ".f4m" in filename:
streams = hdsparse(self.config, self.http.request("get", filename, params={"hdcore": "3.7.0"}), filename, output=self.output)
2018-05-08 22:48:55 +02:00
for n in list(streams.keys()):
yield streams[n]
if streamj["streams"]["hls"]:
streams = hlsparse(self.config, self.http.request("get", streamj["streams"]["hls"]), streamj["streams"]["hls"], output=self.output)
2018-05-08 22:48:55 +02:00
for n in list(streams.keys()):
yield streams[n]
2018-05-21 22:56:22 +02:00
def find_all_episodes(self, config):
seasons = []
match = re.search(r"(sasong|sesong)-(\d+)", urlparse(self.url).path)
2016-08-15 22:46:32 +02:00
if match:
seasons.append(match.group(2))
else:
match = self._conentpage(self.get_urldata())
if match:
janson = json.loads(match.group(1))
for i in janson["formatPage"]["format"]["seasons"]:
seasons.append(i["seasonNumber"])
2018-05-21 22:56:22 +02:00
episodes = self._grab_episodes(config, seasons)
if config.get("all_last") > 0:
2019-08-25 00:27:31 +02:00
return episodes[-config.get("all_last") :]
return sorted(episodes)
2018-05-08 22:46:11 +02:00
def _grab_episodes(self, config, seasons):
episodes = []
baseurl = self.url
match = re.search(r"(saeson|sasong|sesong)-\d+", urlparse(self.url).path)
if match:
if re.search(r"(avsnitt|episode)", urlparse(baseurl).path):
2019-08-25 00:27:31 +02:00
baseurl = baseurl[: baseurl.rfind("/")]
baseurl = baseurl[: baseurl.rfind("/")]
for i in seasons:
2019-08-25 00:33:51 +02:00
url = "{}/{}-{}".format(baseurl, self._isswe(self.url), i)
res = self.http.get(url)
if res:
match = self._conentpage(res.text)
if match:
janson = json.loads(match.group(1))
if "program" in janson["formatPage"]["format"]["videos"][str(i)]:
for n in janson["formatPage"]["format"]["videos"][str(i)]["program"]:
episodes = self._videos_to_list(n["sharingUrl"], n["id"], episodes)
2018-05-08 22:46:11 +02:00
if config.get("include_clips"):
if "clip" in janson["formatPage"]["format"]["videos"][str(i)]:
for n in janson["formatPage"]["format"]["videos"][str(i)]["clip"]:
episodes = self._videos_to_list(n["sharingUrl"], n["id"], episodes)
return episodes
def _isswe(self, url):
if re.search(r".se$", urlparse(url).netloc):
return "sasong"
elif re.search(r".dk$", urlparse(url).netloc):
return "saeson"
else:
return "sesong"
def _conentpage(self, data):
return re.search(r'=({"sportsPlayer.*}); window.__config', data)
2018-01-30 20:11:37 +01:00
def _videos_to_list(self, url, vid, episodes):
dataj = json.loads(self._get_video_data(vid).text)
2018-01-30 20:11:37 +01:00
if "msg" not in dataj:
2018-05-08 22:50:17 +02:00
if url not in episodes:
episodes.append(url)
return episodes
2018-01-30 20:11:37 +01:00
def _get_video_data(self, vid):
2019-08-25 00:33:51 +02:00
url = "http://playapi.mtgx.tv/v3/videos/{}".format(vid)
data = self.http.request("get", url)
return data
2018-01-30 20:11:37 +01:00
2015-10-24 21:55:33 +02:00
def _autoname(self, dataj):
program = dataj["format_slug"]
season = None
2015-10-24 21:55:33 +02:00
episode = None
title = None
2017-02-15 23:15:50 +01:00
if "season" in dataj["format_position"]:
if dataj["format_position"]["season"] and dataj["format_position"]["season"] > 0:
season = dataj["format_position"]["season"]
2015-10-24 21:55:33 +02:00
if season:
if dataj["format_position"]["episode"] and len(dataj["format_position"]["episode"]) > 0:
2015-10-24 21:55:33 +02:00
episode = dataj["format_position"]["episode"]
2018-03-10 11:40:36 +01:00
if episode:
try:
episode = int(episode)
except (TypeError, ValueError):
2018-05-13 13:06:45 +02:00
title = episode
2018-03-10 11:40:36 +01:00
episode = None
2018-03-10 19:37:05 +01:00
else:
title = dataj["summary"].replace("{} - ".format(dataj["format_title"]), "")
if title[-1] == ".":
2019-08-25 00:27:31 +02:00
title = title[: len(title) - 1] # remove the last dot
if dataj["type"] == "clip":
2018-01-30 20:11:37 +01:00
# Removes the show name from the end of the filename
# e.g. Showname.S0X.title instead of Showname.S07.title-showname
2019-08-25 00:27:31 +02:00
match = re.search(r"(.+)-", dataj["title"])
if match:
2018-05-13 13:06:45 +02:00
title = match.group(1)
2017-02-15 23:15:50 +01:00
else:
2018-05-13 13:06:45 +02:00
title = dataj["title"]
if "derived_from_id" in dataj:
if dataj["derived_from_id"]:
parent_id = dataj["derived_from_id"]
parent_episode = self.http.request("get", "http://playapi.mtgx.tv/v3/videos/{}".format(parent_id))
2018-01-30 20:11:37 +01:00
if parent_episode.status_code != 403: # if not geoblocked
datajparent = json.loads(parent_episode.text)
if not season and datajparent["format_position"]["season"] > 0:
season = datajparent["format_position"]["season"]
if len(datajparent["format_position"]["episode"]) > 0:
episode = datajparent["format_position"]["episode"]
2017-02-15 23:15:50 +01:00
2018-05-13 13:06:45 +02:00
self.output["title"] = program
self.output["season"] = season
self.output["episode"] = episode
2018-05-13 13:06:45 +02:00
self.output["episodename"] = title
2017-02-15 23:15:50 +01:00
2018-05-13 13:06:45 +02:00
return True
def _sport(self):
content = self._conentpage(self.get_urldata())
if not content:
2019-08-25 00:33:51 +02:00
yield ServiceError("Can't find video file for: {}".format(self.url))
return
janson = json.loads(content.group(1))
if not janson["sportsPlayer"]["currentVideo"]:
2019-08-25 00:33:51 +02:00
yield ServiceError("Can't find video file for: {}".format(self.url))
return
self.output["title"] = janson["sportsPlayer"]["currentVideo"]["title"]
res = self.http.request("get", janson["sportsPlayer"]["currentVideo"]["_links"]["streamLink"]["href"])
if res.status_code == 403:
yield ServiceError("Can't play this because the video is geoblocked.")
return
for i in res.json()["embedded"]["prioritizedStreams"]:
streams = hlsparse(self.config, self.http.request("get", i["links"]["stream"]["href"]), i["links"]["stream"]["href"], output=self.output)
if streams:
for n in list(streams.keys()):
yield streams[n]