From 58140554c8f170ef8e4e50a649cb3986b59bd974 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Sun, 9 Mar 2014 15:56:00 +0100 Subject: [PATCH] viaplay: try harder when extracting video IDs Before this change, the viaplay class would look for the video id only in the URL. With this change, it will also look for it in the HTML document (after fetching it) if it isn't in the URL. This makes svtplay-dl able to support "latest episode" URLs, like: http://www.viasat4play.no/programmer/viva-la-bam vs http://www.viasat4play.no/programmer/viva-la-bam/359853 --- lib/svtplay_dl/service/viaplay.py | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/lib/svtplay_dl/service/viaplay.py b/lib/svtplay_dl/service/viaplay.py index e787a96..fcce0a7 100644 --- a/lib/svtplay_dl/service/viaplay.py +++ b/lib/svtplay_dl/service/viaplay.py @@ -25,13 +25,34 @@ class Viaplay(Service, OpenGraphThumbMixin): Service.__init__(self, url) self.subtitle = None - def get(self, options): + + def _get_video_id(self): + """ + Extract video id. It will try to avoid making an HTTP request + if it can find the ID in the URL, but otherwise it will try + to scrape it from the HTML document. Returns None in case it's + unable to extract the ID at all. + """ parse = urlparse(self.url) match = re.search(r'\/(\d+)/?', parse.path) - if not match: + if match: + return match.group(1) + + html_data = self.get_urldata() + match = re.search(r'data-link="[^"]+/([0-9]+)"', html_data) + if match: + return match.group(1) + + return None + + + def get(self, options): + vid = self._get_video_id() + if vid is None: log.error("Cant find video file") sys.exit(2) - url = "http://viastream.viasat.tv/PlayProduct/%s" % match.group(1) + + url = "http://viastream.viasat.tv/PlayProduct/%s" % vid options.other = "" data = get_http_data(url) xml = ET.XML(data)