svtplay-dl/lib/svtplay_dl/subtitle/__init__.py

import xml.etree.ElementTree as ET
import json
import re
from svtplay_dl.log import log
from svtplay_dl.utils import is_py2, get_http_data

class subtitle():
    def __init__(self, url):
        self.url = url
        self.subtitle = None

class subtitle_tt(subtitle):
    def download(self, options):
        self.subtitle = get_http_data(self.url)
        i = 1
        data = ""
        skip = False
        tree = ET.ElementTree(ET.fromstring(subtitle))
        for node in tree.iter():
            tag = norm(node.tag)
            if tag == "p":
                if skip:
                    data = data + "\n"
                begin = node.attrib["begin"]
                if not ("dur" in node.attrib):
                    duration = node.attrib["duration"]
                else:
                    duration = node.attrib["dur"]
                if not ("end" in node.attrib):
                    begin2 = begin.split(":")
                    duration2 = duration.split(":")
                    sec = float(begin2[2]) + float(duration2[2])
                    end = "%02d:%02d:%06.3f" % (int(begin[0]), int(begin[1]), sec)
                else:
                    end = node.attrib["end"]
                data += '%s\n%s --> %s\n' % (i, begin.replace(".",","), end.replace(".",","))
                data += '%s\n' % node.text.strip(' \t\n\r')
                skip = True
                i += 1
            if tag == "br":
                if node.tail:
                    data += '%s\n\n' % node.tail.strip(' \t\n\r')
                    skip = False

        if is_py2:
            data = data.encode('utf8')
        save(options, data)

class subtitle_json(subtitle):
    def download(self, options):
        self.subtitle = get_http_data(self.url)
        data = json.loads(self.subtitle)
        number = 1
        subs = ""
        for i in data:
            subs += "%s\n%s --> %s\n" % (number, timestr(int(i["startMillis"])), timestr(int(i["endMillis"])))
            subs += "%s\n\n" % i["text"].encode("utf-8")
            number += 1

        save(options, subs)

class subtitle_sami(subtitle):
    def download(self, options):
        self.subtitle = get_http_data(self.url)
        tree = ET.XML(self.subtitle)
        subt = tree.find("Font")
        subs = ""
        n = 0
        for i in subt.getiterator():
            if i.tag == "Subtitle":
                n = i.attrib["SpotNumber"]
                if i.attrib["SpotNumber"] == "1":
                    subs += "%s\n%s --> %s\n" % (i.attrib["SpotNumber"], i.attrib["TimeIn"], i.attrib["TimeOut"])
                else:
                    subs += "\n%s\n%s --> %s\n" % (i.attrib["SpotNumber"], i.attrib["TimeIn"], i.attrib["TimeOut"])
            else:
                if int(n) > 0:
                    subs += "%s\n" % i.text

        if is_py2:
            subs = subs.encode('utf8')
        save(options, subs)

class subtitle_smi(subtitle):
    def download(self, options):
        self.subtitle = get_http_data(self.url)
        recomp = re.compile(r'<SYNC Start=(\d+)>\s+<P Class=\w+>(.*)<br>\s+<SYNC Start=(\d+)>\s+<P Class=\w+>', re.M|re.I|re.U)
        number = 1
        subs = ""
        for i in recomp.finditer(str(self.subtitle)):
            subs += "%s\n%s --> %s\n" % (number, timestr(i.group(1)), timestr(i.group(3)))
            text = "%s\n\n" % i.group(2)
            subs += text.replace("<br>", "\n")
            number += 1

        save(options, subs)

class subtitle_wsrt(subtitle):
    def download(self, options):
        self.subtitle = get_http_data(self.url)
        recomp = re.compile(r"(\d+)\r\n([\d:\.]+ --> [\d:\.]+)?([^\r\n]+)?\r\n([^\r\n]+)\r\n(([^\r\n]*)\r\n)?")
        srt = ""
        for i in recomp.finditer(self.subtitle):
            sub = "%s\n%s\n%s\n" % (i.group(1), i.group(2).replace(".", ","), i.group(4))
            if len(i.group(6)) > 0:
                sub += "%s\n" % i.group(6)
            sub += "\n"
            sub = re.sub('<[^>]*>', '', sub)
            srt += sub

        save(options, srt)

def save(options, data):
    filename = re.search(r"(.*)\.[a-z0-9]{2,3}$", options.output)
    if filename:
        options.output = "%s.srt" % filename.group(1)
    else:
        options.output = "%s.srt" % options.output

    log.info("Subtitle: %s", options.output)
    fd = open(options.output, "w")
    fd.write(data)
    fd.close()

def timestr(msec):
    """
    Convert a millisecond value to a string of the following
    format:

        HH:MM:SS,SS

    with 10 millisecond precision. Note the , seperator in
    the seconds.
    """
    sec = float(msec) / 1000

    hours = int(sec / 3600)
    sec -= hours * 3600

    minutes = int(sec / 60)
    sec -= minutes * 60

    output = "%02d:%02d:%05.2f" % (hours, minutes, sec)
    return output.replace(".", ",")

def norm(name):
    if name[0] == "{":
        _, tag = name[1:].split("}")
        return tag
    else:
        return name
Move subtitles into its own file 2014-04-21 19:52:09 +02:00			`import xml.etree.ElementTree as ET`
			`import json`
			`import re`
			`from svtplay_dl.log import log`
			`from svtplay_dl.utils import is_py2, get_http_data`

			`class subtitle():`
			`def __init__(self, url):`
			`self.url = url`
			`self.subtitle = None`

			`class subtitle_tt(subtitle):`
subtitle_tt cosmetics. 2014-04-21 20:04:41 +02:00			`def download(self, options):`
Move subtitles into its own file 2014-04-21 19:52:09 +02:00			`self.subtitle = get_http_data(self.url)`
			`i = 1`
			`data = ""`
			`skip = False`
			`tree = ET.ElementTree(ET.fromstring(subtitle))`
			`for node in tree.iter():`
			`tag = norm(node.tag)`
			`if tag == "p":`
			`if skip:`
			`data = data + "\n"`
			`begin = node.attrib["begin"]`
			`if not ("dur" in node.attrib):`
			`duration = node.attrib["duration"]`
			`else:`
			`duration = node.attrib["dur"]`
			`if not ("end" in node.attrib):`
			`begin2 = begin.split(":")`
			`duration2 = duration.split(":")`
			`sec = float(begin2[2]) + float(duration2[2])`
			`end = "%02d:%02d:%06.3f" % (int(begin[0]), int(begin[1]), sec)`
			`else:`
			`end = node.attrib["end"]`
			`data += '%s\n%s --> %s\n' % (i, begin.replace(".",","), end.replace(".",","))`
			`data += '%s\n' % node.text.strip(' \t\n\r')`
			`skip = True`
			`i += 1`
			`if tag == "br":`
			`if node.tail:`
			`data += '%s\n\n' % node.tail.strip(' \t\n\r')`
			`skip = False`
subtitle: we only need one save function. 2014-04-27 15:33:05 +02:00
Move subtitles into its own file 2014-04-21 19:52:09 +02:00			`if is_py2:`
			`data = data.encode('utf8')`
subtitle: we only need one save function. 2014-04-27 15:33:05 +02:00			`save(options, data)`
Move subtitles into its own file 2014-04-21 19:52:09 +02:00
			`class subtitle_json(subtitle):`
			`def download(self, options):`
			`self.subtitle = get_http_data(self.url)`
			`data = json.loads(self.subtitle)`
			`number = 1`
			`subs = ""`
			`for i in data:`
			`subs += "%s\n%s --> %s\n" % (number, timestr(int(i["startMillis"])), timestr(int(i["endMillis"])))`
			`subs += "%s\n\n" % i["text"].encode("utf-8")`
			`number += 1`

subtitle: we only need one save function. 2014-04-27 15:33:05 +02:00			`save(options, subs)`
Move subtitles into its own file 2014-04-21 19:52:09 +02:00
			`class subtitle_sami(subtitle):`
			`def download(self, options):`
			`self.subtitle = get_http_data(self.url)`
			`tree = ET.XML(self.subtitle)`
			`subt = tree.find("Font")`
			`subs = ""`
			`n = 0`
			`for i in subt.getiterator():`
			`if i.tag == "Subtitle":`
			`n = i.attrib["SpotNumber"]`
			`if i.attrib["SpotNumber"] == "1":`
			`subs += "%s\n%s --> %s\n" % (i.attrib["SpotNumber"], i.attrib["TimeIn"], i.attrib["TimeOut"])`
			`else:`
			`subs += "\n%s\n%s --> %s\n" % (i.attrib["SpotNumber"], i.attrib["TimeIn"], i.attrib["TimeOut"])`
			`else:`
			`if int(n) > 0:`
			`subs += "%s\n" % i.text`

			`if is_py2:`
			`subs = subs.encode('utf8')`
subtitle: we only need one save function. 2014-04-27 15:33:05 +02:00			`save(options, subs)`
Move subtitles into its own file 2014-04-21 19:52:09 +02:00
			`class subtitle_smi(subtitle):`
			`def download(self, options):`
			`self.subtitle = get_http_data(self.url)`
			`recomp = re.compile(r'<SYNC Start=(\d+)>\s+<P Class=\w+>(.*)<br>\s+<SYNC Start=(\d+)>\s+<P Class=\w+>', re.M\|re.I\|re.U)`
			`number = 1`
			`subs = ""`
			`for i in recomp.finditer(str(self.subtitle)):`
			`subs += "%s\n%s --> %s\n" % (number, timestr(i.group(1)), timestr(i.group(3)))`
			`text = "%s\n\n" % i.group(2)`
			`subs += text.replace("<br>", "\n")`
			`number += 1`

subtitle: we only need one save function. 2014-04-27 15:33:05 +02:00			`save(options, subs)`
Move subtitles into its own file 2014-04-21 19:52:09 +02:00
			`class subtitle_wsrt(subtitle):`
			`def download(self, options):`
			`self.subtitle = get_http_data(self.url)`
			`recomp = re.compile(r"(\d+)\r\n([\d:\.]+ --> [\d:\.]+)?([^\r\n]+)?\r\n([^\r\n]+)\r\n(([^\r\n]*)\r\n)?")`
			`srt = ""`
			`for i in recomp.finditer(self.subtitle):`
			`sub = "%s\n%s\n%s\n" % (i.group(1), i.group(2).replace(".", ","), i.group(4))`
			`if len(i.group(6)) > 0:`
			`sub += "%s\n" % i.group(6)`
			`sub += "\n"`
			`sub = re.sub('<[^>]*>', '', sub)`
			`srt += sub`
subtitle: we only need one save function. 2014-04-27 15:33:05 +02:00
			`save(options, srt)`

			`def save(options, data):`
			`filename = re.search(r"(.*)\.[a-z0-9]{2,3}$", options.output)`
			`if filename:`
			`options.output = "%s.srt" % filename.group(1)`
			`else:`
			`options.output = "%s.srt" % options.output`

			`log.info("Subtitle: %s", options.output)`
			`fd = open(options.output, "w")`
			`fd.write(data)`
			`fd.close()`
Move subtitles into its own file 2014-04-21 19:52:09 +02:00
			`def timestr(msec):`
			`"""`
			`Convert a millisecond value to a string of the following`
			`format:`

			`HH:MM:SS,SS`

			`with 10 millisecond precision. Note the , seperator in`
			`the seconds.`
			`"""`
			`sec = float(msec) / 1000`

			`hours = int(sec / 3600)`
			`sec -= hours * 3600`

			`minutes = int(sec / 60)`
			`sec -= minutes * 60`

			`output = "%02d:%02d:%05.2f" % (hours, minutes, sec)`
			`return output.replace(".", ",")`

			`def norm(name):`
			`if name[0] == "{":`
			`_, tag = name[1:].split("}")`
			`return tag`
			`else:`
			`return name`