From 58140554c8f170ef8e4e50a649cb3986b59bd974 Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@ethup.se>
Date: Sun, 9 Mar 2014 15:56:00 +0100
Subject: [PATCH] viaplay: try harder when extracting video IDs

Before this change, the viaplay class would look for the video id only in the
URL. With this change, it will also look for it in the HTML document (after
fetching it) if it isn't in the URL. This makes svtplay-dl able to support
"latest episode" URLs, like:

    http://www.viasat4play.no/programmer/viva-la-bam

vs

    http://www.viasat4play.no/programmer/viva-la-bam/359853
---
 lib/svtplay_dl/service/viaplay.py | 27 ++++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/lib/svtplay_dl/service/viaplay.py b/lib/svtplay_dl/service/viaplay.py
index e787a96..fcce0a7 100644
--- a/lib/svtplay_dl/service/viaplay.py
+++ b/lib/svtplay_dl/service/viaplay.py
@@ -25,13 +25,34 @@ class Viaplay(Service, OpenGraphThumbMixin):
         Service.__init__(self, url)
         self.subtitle = None
 
-    def get(self, options):
+
+    def _get_video_id(self):
+        """
+        Extract video id. It will try to avoid making an HTTP request
+        if it can find the ID in the URL, but otherwise it will try
+        to scrape it from the HTML document. Returns None in case it's
+        unable to extract the ID at all.
+        """
         parse = urlparse(self.url)
         match = re.search(r'\/(\d+)/?', parse.path)
-        if not match:
+        if match:
+            return match.group(1)
+
+        html_data = self.get_urldata()
+        match = re.search(r'data-link="[^"]+/([0-9]+)"', html_data)
+        if match:
+            return match.group(1)
+
+        return None
+
+
+    def get(self, options):
+        vid = self._get_video_id()
+        if vid is None:
             log.error("Cant find video file")
             sys.exit(2)
-        url = "http://viastream.viasat.tv/PlayProduct/%s" % match.group(1)
+
+        url = "http://viastream.viasat.tv/PlayProduct/%s" % vid
         options.other = ""
         data = get_http_data(url)
         xml = ET.XML(data)