Add an extractor for videofy.me (closes #1171)

Also modify find_xpath_attr to accept values with spaces like for id="HQ on"
2013-08-03 22:50:27 +02:00 · 2013-08-03 22:50:27 +02:00 · bba12cec89
commit bba12cec89
parent 70c4c03cb8
3 changed files with 51 additions and 1 deletions
--- a/youtube_dl/extractor/init.py
+++ b/youtube_dl/extractor/init.py
@ -73,6 +73,7 @@ from .ustream import UstreamIE
 from .vbox7 import Vbox7IE
 from .veoh import VeohIE
 from .vevo import VevoIE
 from .videofyme import VideofyMeIE
 from .vimeo import VimeoIE, VimeoChannelIE
 from .vine import VineIE
 from .c56 import C56IE
--- a/youtube_dl/extractor/videofyme.py
+++ b/youtube_dl/extractor/videofyme.py
@ -0,0 +1,49 @@
 import re
 import xml.etree.ElementTree
 from .common import InfoExtractor
 from ..utils import (
    find_xpath_attr,
    determine_ext,
 )
 class VideofyMeIE(InfoExtractor):
    _VALID_URL = r'https?://(www.videofy.me/.+?|p.videofy.me/v)/(?P<id>\d+)(&|#|$)'
    IE_NAME = u'videofy.me'
    _TEST = {
        u'url': u'http://www.videofy.me/thisisvideofyme/1100701',
        u'file':  u'1100701.mp4',
        u'md5': u'2046dd5758541d630bfa93e741e2fd79',
        u'info_dict': {
            u'title': u'This is VideofyMe',
            u'description': None,
            u'uploader': u'VideofyMe',
            u'uploader_id': u'thisisvideofyme',
        },
    }
    def _real_extract(self, url):
        mobj = re.match(self._VALID_URL, url)
        video_id = mobj.group('id')
        config_xml = self._download_webpage('http://sunshine.videofy.me/?videoId=%s' % video_id,
                                            video_id)
        config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8'))
        video = config.find('video')
        sources = video.find('sources')
        url_node = find_xpath_attr(sources, 'source', 'id', 'HQ on')
        if url_node is None:
            url_node = find_xpath_attr(sources, 'source', 'id', 'HQ off')
        video_url = url_node.find('url').text
        return {'id': video_id,
                'title': video.find('title').text,
                'url': video_url,
                'ext': determine_ext(video_url),
                'thumbnail': video.find('thumb').text,
                'description': video.find('description').text,
                'uploader': config.find('blog/name').text,
                'uploader_id': video.find('identifier').text,
                'view_count': re.search(r'\d+', video.find('views').text).group(),
                }
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@ -207,7 +207,7 @@ if sys.version_info >= (2,7):
    def find_xpath_attr(node, xpath, key, val):
        """ Find the xpath xpath[@key=val] """
        assert re.match(r'^[a-zA-Z]+$', key)
-        assert re.match(r'^[a-zA-Z@]*$', val)
+        assert re.match(r'^[a-zA-Z@\s]*$', val)
        expr = xpath + u"[@%s='%s']" % (key, val)
        return node.find(expr)
 else: