From 5deeb84fc90a11d1808e3022ed64428f3ca321dc Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Thu, 1 May 2014 19:51:21 +0200 Subject: [PATCH] Break out OppetArkiv to subclass of Svtplay --- lib/svtplay_dl/service/__init__.py | 2 ++ lib/svtplay_dl/service/oppetarkiv.py | 35 ++++++++++++++++++ lib/svtplay_dl/service/svtplay.py | 42 +++++----------------- lib/svtplay_dl/service/tests/oppetarkiv.py | 20 +++++++++++ lib/svtplay_dl/service/tests/svtplay.py | 2 +- 5 files changed, 66 insertions(+), 35 deletions(-) create mode 100644 lib/svtplay_dl/service/oppetarkiv.py create mode 100644 lib/svtplay_dl/service/tests/oppetarkiv.py diff --git a/lib/svtplay_dl/service/__init__.py b/lib/svtplay_dl/service/__init__.py index 2c93089..9f1a8ce 100644 --- a/lib/svtplay_dl/service/__init__.py +++ b/lib/svtplay_dl/service/__init__.py @@ -99,6 +99,7 @@ from svtplay_dl.service.ruv import Ruv from svtplay_dl.service.radioplay import Radioplay from svtplay_dl.service.sr import Sr from svtplay_dl.service.svtplay import Svtplay +from svtplay_dl.service.oppetarkiv import OppetArkiv from svtplay_dl.service.tv4play import Tv4play from svtplay_dl.service.urplay import Urplay from svtplay_dl.service.viaplay import Viaplay @@ -122,6 +123,7 @@ sites = [ Radioplay, Sr, Svtplay, + OppetArkiv, Tv4play, Urplay, Viaplay, diff --git a/lib/svtplay_dl/service/oppetarkiv.py b/lib/svtplay_dl/service/oppetarkiv.py new file mode 100644 index 0000000..f0092d7 --- /dev/null +++ b/lib/svtplay_dl/service/oppetarkiv.py @@ -0,0 +1,35 @@ +# ex:ts=4:sw=4:sts=4:et +# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- +from __future__ import absolute_import +import sys +import re + +from svtplay_dl.service.svtplay import Svtplay +from svtplay_dl.log import log + +class OppetArkiv(Svtplay): + supported_domains = ['oppetarkiv.se'] + + def find_all_episodes(self, options): + page = 1 + match = re.search(r'"http://www.oppetarkiv.se/etikett/titel/([^"/]+)', self.get_urldata()) + if match is None: + match = re.search(r'"http://www.oppetarkiv.se/etikett/titel/([^"/]+)', self.url) + if match is None: + log.error("Couldn't find title") + sys.exit(2) + program = match.group(1) + more = True + episodes = [] + while more: + url = "http://www.oppetarkiv.se/etikett/titel/%s/?sida=%s&sort=tid_stigande&embed=true" % (program, page) + data = get_http_data(url) + visa = re.search(r'svtXColorDarkLightGrey', data) + if not visa: + more = False + regex = re.compile(r'(http://www.oppetarkiv.se/video/[^"]+)') + for match in regex.finditer(data): + episodes.append(match.group(1)) + page += 1 + + return episodes diff --git a/lib/svtplay_dl/service/svtplay.py b/lib/svtplay_dl/service/svtplay.py index 7f59ffc..9080ecc 100644 --- a/lib/svtplay_dl/service/svtplay.py +++ b/lib/svtplay_dl/service/svtplay.py @@ -17,7 +17,7 @@ from svtplay_dl.fetcher.http import download_http from svtplay_dl.log import log class Svtplay(Service, OpenGraphThumbMixin): - supported_domains = ['svtplay.se', 'svt.se', 'oppetarkiv.se', 'beta.svtplay.se', 'svtflow.se'] + supported_domains = ['svtplay.se', 'svt.se', 'beta.svtplay.se', 'svtflow.se'] def __init__(self, url): Service.__init__(self, url) @@ -119,38 +119,12 @@ class Svtplay(Service, OpenGraphThumbMixin): def find_all_episodes(self, options): - parse = urlparse(self.url) - if parse.netloc == "www.oppetarkiv.se": - page = 1 - match = re.search(r'"http://www.oppetarkiv.se/etikett/titel/([^"/]+)', self.get_urldata()) - if match is None: - match = re.search(r'"http://www.oppetarkiv.se/etikett/titel/([^"/]+)', self.url) - if match is None: - log.error("Couldn't find title") - sys.exit(2) - program = match.group(1) - more = True - episodes = [] - while more: - url = "http://www.oppetarkiv.se/etikett/titel/%s/?sida=%s&sort=tid_stigande&embed=true" % (program, page) - data = get_http_data(url) - visa = re.search(r'svtXColorDarkLightGrey', data) - if not visa: - more = False - regex = re.compile(r'(http://www.oppetarkiv.se/video/[^"]+)') - for match in regex.finditer(data): - episodes.append(match.group(1)) - page += 1 + match = re.search(r']*href="([^"]+)"', + self.get_urldata()) + if match is None: + log.error("Couldn't retrieve episode list") + sys.exit(2) - return episodes + xml = ET.XML(get_http_data(match.group(1))) - else: - match = re.search(r']*href="([^"]+)"', - self.get_urldata()) - if match is None: - log.error("Couldn't retrieve episode list") - sys.exit(2) - - xml = ET.XML(get_http_data(match.group(1))) - - return sorted(x.text for x in xml.findall(".//item/link")) + return sorted(x.text for x in xml.findall(".//item/link")) diff --git a/lib/svtplay_dl/service/tests/oppetarkiv.py b/lib/svtplay_dl/service/tests/oppetarkiv.py new file mode 100644 index 0000000..23468a9 --- /dev/null +++ b/lib/svtplay_dl/service/tests/oppetarkiv.py @@ -0,0 +1,20 @@ +#!/usr/bin/python +# ex:ts=4:sw=4:sts=4:et +# -*- tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- + +# The unittest framwork doesn't play nice with pylint: +# pylint: disable-msg=C0103 + +from __future__ import absolute_import +import unittest +from svtplay_dl.service.oppetarkiv import OppetArkiv + +class handlesTest(unittest.TestCase): + def handles_oppetarkiv_se_test(self): + self.assertTrue(OppetArkiv.handles( + "http://www.oppetarkiv.se/video/1129844/jacobs-stege-avsnitt-1-av-1")) + + def handles_svtplay_se_test(self): + self.assertFalse(OppetArkiv.handles( + "http://www.svtplay.se/video/1090393/del-9")) + diff --git a/lib/svtplay_dl/service/tests/svtplay.py b/lib/svtplay_dl/service/tests/svtplay.py index ab1ae3d..cea3174 100644 --- a/lib/svtplay_dl/service/tests/svtplay.py +++ b/lib/svtplay_dl/service/tests/svtplay.py @@ -20,7 +20,7 @@ class handlesTest(unittest.TestCase): "http://www.svt.se/nyheter/sverige/det-ar-en-dodsfalla")) def handles_oppetarkiv_se_test(self): - self.assertTrue(Svtplay.handles( + self.assertFalse(Svtplay.handles( "http://www.oppetarkiv.se/video/1129844/jacobs-stege-avsnitt-1-av-1")) def handles_dn_se_test(self):