svtplay-dl/lib/svtplay_dl/subtitle/__init__.py

import xml.etree.ElementTree as ET
import json
import re
from io import StringIO

from svtplay_dl.log import log
from svtplay_dl.utils.text import decode_html_entities
from svtplay_dl.utils.http import HTTP
from svtplay_dl.utils.output import output

from requests import __build__ as requests_version
import platform


class subtitle(object):
    def __init__(self, config, subtype, url, subfix=None, **kwargs):
        self.url = url
        self.subtitle = None
        self.config = config
        self.subtype = subtype
        self.http = HTTP(config)
        self.subfix = subfix
        self.bom = False
        self.output = kwargs.pop("output", None)

    def __repr__(self):
        return "<Subtitle(type={}, url={}>".format(self.subtype, self.url)

    def download(self):
        subdata = self.http.request("get", self.url)
        if subdata.status_code != 200:
            log.warning("Can't download subtitle file")
            return

        data = None
        if "mtgx" in self.url and subdata.content[:3] == b"\xef\xbb\xbf":
            subdata.encoding = "utf-8"
            self.bom = True

        if self.subtype == "tt":
            data = self.tt(subdata)
        if self.subtype == "json":
            data = self.json(subdata)
        if self.subtype == "sami":
            data = self.sami(subdata)
        if self.subtype == "smi":
            data = self.smi(subdata)
        if self.subtype == "wrst":
            if "tv4play" in self.url and subdata.content[:3] == b"\xef\xbb\xbf":
                subdata.encoding = "utf-8"
                self.bom = True
            if "dplay" in self.url:
                subdata.encoding = "utf-8"
            data = self.wrst(subdata)
        if self.subtype == "raw":
            data = self.raw(subdata)

        if self.subfix:
            if self.config.get("get_all_subtitles"):
                if self.output["episodename"]:
                    self.output["episodename"] = "{}-{}".format(self.output["episodename"], self.subfix)
                else:
                    self.output["episodename"] = self.subfix

        if self.config.get("get_raw_subtitles"):
            subdata = self.raw(subdata)
            self.save_file(subdata, self.subtype)

        self.save_file(data, "srt")

    def save_file(self, data, subtype):
        if platform.system() == "Windows":
            file_d = output(self.output, self.config, subtype, mode="wt", encoding="utf-8")
        else:
            file_d = output(self.output, self.config, subtype, mode="wt")
        if hasattr(file_d, "read") is False:
            return
        file_d.write(data)
        file_d.close()

    def raw(self, subdata):
        return subdata.text

    def tt(self, subdata):
        i = 1
        data = ""
        subs = subdata.text

        subdata = re.sub(' xmlns="[^"]+"', '', subs, count=1)
        tree = ET.XML(subdata)
        xml = tree.find("body").find("div")
        plist = list(xml.findall("p"))
        for node in plist:
            tag = norm(node.tag)
            if tag == "p" or tag == "span":
                begin = node.attrib["begin"]
                if not ("dur" in node.attrib):
                    duration = node.attrib["duration"]
                else:
                    duration = node.attrib["dur"]
                if not ("end" in node.attrib):
                    begin2 = begin.split(":")
                    duration2 = duration.split(":")
                    try:
                        sec = float(begin2[2]) + float(duration2[2])
                    except ValueError:
                        sec = 0.000
                    end = "%02d:%02d:%06.3f" % (int(begin2[0]), int(begin2[1]), sec)
                else:
                    end = node.attrib["end"]
                data += '%s\n%s --> %s\n' % (i, begin.replace(".", ","), end.replace(".", ","))
                data = tt_text(node, data)
                data += "\n"
                i += 1

        return data

    def json(self, subdata):
        data = json.loads(subdata.text)
        number = 1
        subs = ""
        for i in data:
            subs += "%s\n%s --> %s\n" % (number, timestr(int(i["startMillis"])), timestr(int(i["endMillis"])))
            subs += "%s\n\n" % i["text"]
            number += 1

        return subs

    def sami(self, subdata):
        text = subdata.text
        text = re.sub(r'&', '&amp;', text)
        tree = ET.fromstring(text)
        subt = tree.find("Font")
        subs = ""
        n = 0
        for i in subt.getiterator():
            if i.tag == "Subtitle":
                n = i.attrib["SpotNumber"]

                if i.attrib["SpotNumber"] == "1":
                    subs += "%s\n%s --> %s\n" % (i.attrib["SpotNumber"], timecolon(i.attrib["TimeIn"]), timecolon(i.attrib["TimeOut"]))
                else:
                    subs += "\n%s\n%s --> %s\n" % (i.attrib["SpotNumber"], timecolon(i.attrib["TimeIn"]), timecolon(i.attrib["TimeOut"]))
            else:
                if int(n) > 0 and i.text:
                    subs += "%s\n" % decode_html_entities(i.text)

        subs = re.sub('&amp;', r'&', subs)
        return subs

    def smi(self, subdata):
        if requests_version < 0x20300:
            subdata = subdata.content.decode("latin")
        else:
            subdata.encoding = "ISO-8859-1"
            subdata = subdata.text
        ssubdata = StringIO(subdata)
        timea = 0
        number = 1
        data = None
        subs = ""
        TAG_RE = re.compile(r'<(?!\/?i).*?>')
        bad_char = re.compile(r'\x96')
        for i in ssubdata.readlines():
            i = i.rstrip()
            sync = re.search(r"<SYNC Start=(\d+)>", i)
            if sync:
                if int(sync.group(1)) != int(timea):
                    if data and data != "&nbsp;":
                        subs += "%s\n%s --> %s\n" % (number, timestr(timea), timestr(sync.group(1)))
                        text = "%s\n" % TAG_RE.sub('', data.replace("<br>", "\n"))
                        text = decode_html_entities(text)
                        if text[len(text) - 2] != "\n":
                            text += "\n"
                        subs += text
                        number += 1
                timea = sync.group(1)
            text = re.search("<P Class=SVCC>(.*)", i)
            if text:
                data = text.group(1)
        recomp = re.compile(r'\r')
        text = bad_char.sub('-', recomp.sub('', subs))
        return text

    def wrst(self, subdata):
        ssubdata = StringIO(subdata.text)
        srt = ""
        subtract = False
        number_b = 1
        number = 0
        block = 0
        subnr = False
        if self.bom:
            ssubdata.read(1)
        for i in ssubdata.readlines():
            match = re.search(r"^[\r\n]+", i)
            match2 = re.search(r"([\d:\.]+ --> [\d:\.]+)", i)
            match3 = re.search(r"^(\d+)\s", i)
            if i[:6] == "WEBVTT":
                continue
            elif "X-TIMESTAMP" in i:
                continue
            elif match and number_b == 1 and self.bom:
                continue
            elif match and number_b > 1:
                block = 0
                srt += "\n"
            elif match2:
                if not subnr:
                    srt += "%s\n" % number_b
                matchx = re.search(r'(?P<h1>\d+):(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<h2>\d+):(?P<m2>\d+):(?P<s2>[\d\.]+)', i)
                if matchx:
                    hour1 = int(matchx.group("h1"))
                    hour2 = int(matchx.group("h2"))
                    if int(number) == 1:
                        if hour1 > 9:
                            subtract = True
                    if subtract:
                        hour1 -= 10
                        hour2 -= 10
                else:
                    matchx = re.search(r'(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<m2>\d+):(?P<s2>[\d\.]+)', i)
                    hour1 = 0
                    hour2 = 0
                time = "{0:02d}:{1}:{2} --> {3:02d}:{4}:{5}\n".format(hour1, matchx.group("m1"), matchx.group("s1").replace(".", ","),
                                                                      hour2, matchx.group("m2"), matchx.group("s2").replace(".", ","))
                srt += time
                block = 1
                subnr = False
                number_b += 1

            elif match3 and block == 0:
                number = match3.group(1)
                srt += "%s\n" % number
                subnr = True
            else:
                if self.config.get("convert_subtitle_colors"):
                    colors = {'30': '#000000', '31': '#ff0000', '32': '#00ff00', '33': '#ffff00',
                              '34': '#0000ff', '35': '#ff00ff', '36': '#00ffff', '37': '#ffffff'}
                    sub = i
                    for tag, color in colors.items():
                        regex1 = '<' + tag + '>'
                        replace = '<font color="' + color + '">'
                        sub = re.sub(regex1, replace, sub)

                    sub = re.sub('</.+>', '</font>', sub)
                else:
                    sub = re.sub('<[^>]*>', '', i)
                srt += sub.strip()
                srt += "\n"
        srt = decode_html_entities(srt)
        return srt


def timestr(msec):
    """
    Convert a millisecond value to a string of the following
    format:

        HH:MM:SS,SS

    with 10 millisecond precision. Note the , seperator in
    the seconds.
    """
    sec = float(msec) / 1000

    hours = int(sec / 3600)
    sec -= hours * 3600

    minutes = int(sec / 60)
    sec -= minutes * 60

    output = "%02d:%02d:%06.3f" % (hours, minutes, sec)
    return output.replace(".", ",")


def timecolon(data):
    match = re.search(r"(\d+:\d+:\d+):(\d+)", data)
    return "%s,%s" % (match.group(1), match.group(2))


def norm(name):
    if name[0] == "{":
        _, tag = name[1:].split("}")
        return tag
    else:
        return name


def tt_text(node, data):
    if node.text:
        data += "%s\n" % node.text.strip(' \t\n\r')
    for i in node:
        if i.text:
            data += "%s\n" % i.text.strip(' \t\n\r')
        if i.tail:
            text = i.tail.strip(' \t\n\r')
            if text:
                data += "%s\n" % text
    return data
Move subtitles into its own file 2014-04-21 19:52:09 +02:00			`import xml.etree.ElementTree as ET`
			`import json`
			`import re`
No need for utils.io 2018-01-30 22:09:31 +01:00			`from io import StringIO`

Move subtitles into its own file 2014-04-21 19:52:09 +02:00			`from svtplay_dl.log import log`
utils: move functions out of init to its own files. 2018-03-13 00:33:39 +01:00			`from svtplay_dl.utils.text import decode_html_entities`
			`from svtplay_dl.utils.http import HTTP`
Options to config 2018-05-13 13:06:45 +02:00			`from svtplay_dl.utils.output import output`

subtitle: workaround a bug in requests in ubuntu 14.04 LTS 2.2.1 cant convert string from bytes to text right. fixes #259 2015-09-20 15:15:50 +02:00			`from requests import __build__ as requests_version`
subtitle: we need to tell the file is utf-8 on windows and py3 2015-10-25 17:19:16 +01:00			`import platform`
Move subtitles into its own file 2014-04-21 19:52:09 +02:00
unused imports 2015-01-25 14:41:29 +01:00
subtitle: inherit from object 2014-07-28 15:53:23 +02:00			`class subtitle(object):`
more options to config replaces 2018-05-08 22:46:11 +02:00			`def __init__(self, config, subtype, url, subfix=None, **kwargs):`
Move subtitles into its own file 2014-04-21 19:52:09 +02:00			`self.url = url`
			`self.subtitle = None`
more options to config replaces 2018-05-08 22:46:11 +02:00			`self.config = config`
subtitle: refactor so we can reuse the try-except-thing 2014-08-31 01:20:36 +02:00			`self.subtype = subtype`
more options to config replaces 2018-05-08 22:46:11 +02:00			`self.http = HTTP(config)`
Added support for subfix in filename of subtitles, when several languages are available Added command to download all available subtitles for a video (--all-subtitles) Added support to print all the subtitle urls when the get url parameter is used Fixed so subtitle url and stream url get printed if -S and -g but not --force-subtitles parameter is used Added support for downloading all subtitles and auto subfix them with language name for Urplay and Urskola (even when just one subtitle is downloaded) 2016-04-27 10:37:47 +02:00			`self.subfix = subfix`
subtitle: dont double encode utf8 fixes: #507 fixes: #501 2016-12-05 20:45:14 +01:00			`self.bom = False`
Options to config 2018-05-13 13:06:45 +02:00			`self.output = kwargs.pop("output", None)`
Move subtitles into its own file 2014-04-21 19:52:09 +02:00
subtitle: better represent line 2018-05-12 15:12:37 +02:00			`def __repr__(self):`
			`return "<Subtitle(type={}, url={}>".format(self.subtype, self.url)`

subtitle: refactor so we can reuse the try-except-thing 2014-08-31 01:20:36 +02:00			`def download(self):`
subtitle: remove unused cookies variable 2018-05-13 01:46:51 +02:00			`subdata = self.http.request("get", self.url)`
subtitle: don’t crash if we cant download subtitle show a warning that we cant download it. 2016-11-14 21:51:39 +01:00			`if subdata.status_code != 200:`
			`log.warning("Can't download subtitle file")`
			`return`

subtitle: refactor so we can reuse the try-except-thing 2014-08-31 01:20:36 +02:00			`data = None`
subtitle: dont double encode utf8 fixes: #507 fixes: #501 2016-12-05 20:45:14 +01:00			`if "mtgx" in self.url and subdata.content[:3] == b"\xef\xbb\xbf":`
			`subdata.encoding = "utf-8"`
			`self.bom = True`

subtitle: refactor so we can reuse the try-except-thing 2014-08-31 01:20:36 +02:00			`if self.subtype == "tt":`
			`data = self.tt(subdata)`
			`if self.subtype == "json":`
			`data = self.json(subdata)`
			`if self.subtype == "sami":`
			`data = self.sami(subdata)`
			`if self.subtype == "smi":`
			`data = self.smi(subdata)`
			`if self.subtype == "wrst":`
subtitle: tv4 subs have a bom headers. 2017-09-17 11:21:04 +02:00			`if "tv4play" in self.url and subdata.content[:3] == b"\xef\xbb\xbf":`
			`subdata.encoding = "utf-8"`
			`self.bom = True`
subtitle: we need to tell requests to use utf8 for encoding 2018-02-22 03:29:21 +01:00			`if "dplay" in self.url:`
			`subdata.encoding = "utf-8"`
subtitle: refactor so we can reuse the try-except-thing 2014-08-31 01:20:36 +02:00			`data = self.wrst(subdata)`
subtitle: raw type 2015-10-30 00:51:35 +01:00			`if self.subtype == "raw":`
subtitle: added command --raw-subtitles to download raw subtitle file also 2016-04-27 13:12:30 +02:00			`data = self.raw(subdata)`
Flake8 fixes 2018-01-30 20:11:37 +01:00
Fixed missing space error 2016-04-27 19:41:23 +02:00			`if self.subfix:`
subtitle: Add subfix after episodename if we want to download all subtitles. 2018-05-27 15:55:25 +02:00			`if self.config.get("get_all_subtitles"):`
			`if self.output["episodename"]:`
			`self.output["episodename"] = "{}-{}".format(self.output["episodename"], self.subfix)`
			`else:`
			`self.output["episodename"] = self.subfix`
Flake8 fixes 2018-01-30 20:11:37 +01:00
subtitle: use config instead of options 2018-05-13 01:46:17 +02:00			`if self.config.get("get_raw_subtitles"):`
subtitle: added command --raw-subtitles to download raw subtitle file also 2016-04-27 13:12:30 +02:00			`subdata = self.raw(subdata)`
			`self.save_file(subdata, self.subtype)`
Flake8 fixes 2018-01-30 20:11:37 +01:00
subtitle: added command --raw-subtitles to download raw subtitle file also 2016-04-27 13:12:30 +02:00			`self.save_file(data, "srt")`
Flake8 fixes 2018-01-30 20:11:37 +01:00
subtitle: added command --raw-subtitles to download raw subtitle file also 2016-04-27 13:12:30 +02:00			`def save_file(self, data, subtype):`
remove a bunch of is_py. time for py3. 2018-01-13 20:27:40 +01:00			`if platform.system() == "Windows":`
subtitle: use config instead of options 2018-05-13 01:46:17 +02:00			`file_d = output(self.output, self.config, subtype, mode="wt", encoding="utf-8")`
subtitle: we need to tell the file is utf-8 on windows and py3 2015-10-25 17:19:16 +01:00			`else:`
subtitle: use config instead of options 2018-05-13 01:46:17 +02:00			`file_d = output(self.output, self.config, subtype, mode="wt")`
subtitle: use output function instead of the special one for this class. 2014-12-30 21:20:03 +01:00			`if hasattr(file_d, "read") is False:`
			`return`
			`file_d.write(data)`
			`file_d.close()`
Flake8 fixes 2018-01-30 20:11:37 +01:00
			`def raw(self, subdata):`
remove a bunch of is_py. time for py3. 2018-01-13 20:27:40 +01:00			`return subdata.text`
Flake8 fixes 2018-01-30 20:11:37 +01:00
subtitle: refactor so we can reuse the try-except-thing 2014-08-31 01:20:36 +02:00			`def tt(self, subdata):`
Move subtitles into its own file 2014-04-21 19:52:09 +02:00			`i = 1`
			`data = ""`
remove a bunch of is_py. time for py3. 2018-01-13 20:27:40 +01:00			`subs = subdata.text`
swap place on py2 and py3 checks 2016-01-27 19:49:38 +01:00
subtitle_tt: remove namespace info 2015-10-10 16:31:42 +02:00			`subdata = re.sub(' xmlns="[^"]+"', '', subs, count=1)`
			`tree = ET.XML(subdata)`
			`xml = tree.find("body").find("div")`
			`plist = list(xml.findall("p"))`
subtitle_tt: rewrote the function. this fixes #111 2014-07-09 18:39:18 +02:00			`for node in plist:`
Move subtitles into its own file 2014-04-21 19:52:09 +02:00			`tag = norm(node.tag)`
subtitle_tt: rewrote the function. this fixes #111 2014-07-09 18:39:18 +02:00			`if tag == "p" or tag == "span":`
Move subtitles into its own file 2014-04-21 19:52:09 +02:00			`begin = node.attrib["begin"]`
			`if not ("dur" in node.attrib):`
			`duration = node.attrib["duration"]`
			`else:`
			`duration = node.attrib["dur"]`
			`if not ("end" in node.attrib):`
			`begin2 = begin.split(":")`
			`duration2 = duration.split(":")`
subtitle.tt: dont crash on negative milliseconds. 2016-02-08 21:28:39 +01:00			`try:`
			`sec = float(begin2[2]) + float(duration2[2])`
			`except ValueError:`
			`sec = 0.000`
subtitle.tt: fix error when calculating end time. 2016-10-24 21:24:36 +02:00			`end = "%02d:%02d:%06.3f" % (int(begin2[0]), int(begin2[1]), sec)`
Move subtitles into its own file 2014-04-21 19:52:09 +02:00			`else:`
			`end = node.attrib["end"]`
pylint fixes 2014-12-26 02:04:29 +01:00			`data += '%s\n%s --> %s\n' % (i, begin.replace(".", ","), end.replace(".", ","))`
subtitle_tt: rewrote the function. this fixes #111 2014-07-09 18:39:18 +02:00			`data = tt_text(node, data)`
			`data += "\n"`
Move subtitles into its own file 2014-04-21 19:52:09 +02:00			`i += 1`
remove a bunch of is_py. time for py3. 2018-01-13 20:27:40 +01:00
subtitle: refactor so we can reuse the try-except-thing 2014-08-31 01:20:36 +02:00			`return data`
Move subtitles into its own file 2014-04-21 19:52:09 +02:00
subtitle: refactor so we can reuse the try-except-thing 2014-08-31 01:20:36 +02:00			`def json(self, subdata):`
subtitle: workaround a bug in requests in ubuntu 14.04 LTS 2.2.1 cant convert string from bytes to text right. fixes #259 2015-09-20 15:15:50 +02:00			`data = json.loads(subdata.text)`
Move subtitles into its own file 2014-04-21 19:52:09 +02:00			`number = 1`
			`subs = ""`
			`for i in data:`
			`subs += "%s\n%s --> %s\n" % (number, timestr(int(i["startMillis"])), timestr(int(i["endMillis"])))`
remove a bunch of is_py. time for py3. 2018-01-13 20:27:40 +01:00			`subs += "%s\n\n" % i["text"]`
Move subtitles into its own file 2014-04-21 19:52:09 +02:00			`number += 1`

subtitle: refactor so we can reuse the try-except-thing 2014-08-31 01:20:36 +02:00			`return subs`
Move subtitles into its own file 2014-04-21 19:52:09 +02:00
subtitle: refactor so we can reuse the try-except-thing 2014-08-31 01:20:36 +02:00			`def sami(self, subdata):`
sami: dont crash on & this fixes #396 2016-06-01 22:43:39 +02:00			`text = subdata.text`
			`text = re.sub(r'&', '&', text)`
			`tree = ET.fromstring(text)`
Move subtitles into its own file 2014-04-21 19:52:09 +02:00			`subt = tree.find("Font")`
			`subs = ""`
			`n = 0`
			`for i in subt.getiterator():`
			`if i.tag == "Subtitle":`
			`n = i.attrib["SpotNumber"]`
sami: they started to use : instead of , 2014-12-15 22:19:58 +01:00
Move subtitles into its own file 2014-04-21 19:52:09 +02:00			`if i.attrib["SpotNumber"] == "1":`
sami: they started to use : instead of , 2014-12-15 22:19:58 +01:00			`subs += "%s\n%s --> %s\n" % (i.attrib["SpotNumber"], timecolon(i.attrib["TimeIn"]), timecolon(i.attrib["TimeOut"]))`
Move subtitles into its own file 2014-04-21 19:52:09 +02:00			`else:`
sami: they started to use : instead of , 2014-12-15 22:19:58 +01:00			`subs += "\n%s\n%s --> %s\n" % (i.attrib["SpotNumber"], timecolon(i.attrib["TimeIn"]), timecolon(i.attrib["TimeOut"]))`
Move subtitles into its own file 2014-04-21 19:52:09 +02:00			`else:`
subtitle.sami: don’t try to decode None fixes: #477 2016-10-17 21:40:20 +02:00			`if int(n) > 0 and i.text:`
subtitle: decode some htmlentities in sami fixes #421 2016-08-01 21:02:38 +02:00			`subs += "%s\n" % decode_html_entities(i.text)`
Move subtitles into its own file 2014-04-21 19:52:09 +02:00
sami: dont crash on & this fixes #396 2016-06-01 22:43:39 +02:00			`subs = re.sub('&', r'&', subs)`
subtitle: refactor so we can reuse the try-except-thing 2014-08-31 01:20:36 +02:00			`return subs`
Move subtitles into its own file 2014-04-21 19:52:09 +02:00
subtitle: refactor so we can reuse the try-except-thing 2014-08-31 01:20:36 +02:00			`def smi(self, subdata):`
subtitle: workaround a bug in requests in ubuntu 14.04 LTS 2.2.1 cant convert string from bytes to text right. fixes #259 2015-09-20 15:15:50 +02:00			`if requests_version < 0x20300:`
remove a bunch of is_py. time for py3. 2018-01-13 20:27:40 +01:00			`subdata = subdata.content.decode("latin")`
subtitle: workaround a bug in requests in ubuntu 14.04 LTS 2.2.1 cant convert string from bytes to text right. fixes #259 2015-09-20 15:15:50 +02:00			`else:`
subtitle_smi: encode it as iso-8859-1 this fixes #282 2015-10-19 23:35:57 +02:00			`subdata.encoding = "ISO-8859-1"`
subtitle: workaround a bug in requests in ubuntu 14.04 LTS 2.2.1 cant convert string from bytes to text right. fixes #259 2015-09-20 15:15:50 +02:00			`subdata = subdata.text`
smi: Handle subtitle that starts direct after the previous one. this fixes #225 2015-04-28 23:00:24 +02:00			`ssubdata = StringIO(subdata)`
			`timea = 0`
Move subtitles into its own file 2014-04-21 19:52:09 +02:00			`number = 1`
smi: Handle subtitle that starts direct after the previous one. this fixes #225 2015-04-28 23:00:24 +02:00			`data = None`
Move subtitles into its own file 2014-04-21 19:52:09 +02:00			`subs = ""`
Subtitles: Smi-subtitles updated regex to also remove <div tags> 2017-03-21 09:49:07 +01:00			`TAG_RE = re.compile(r'<(?!\/?i).*?>')`
subtitle_smi: Empty subtitles. this fixes #180 2014-11-23 13:02:14 +01:00			`bad_char = re.compile(r'\x96')`
smi: Handle subtitle that starts direct after the previous one. this fixes #225 2015-04-28 23:00:24 +02:00			`for i in ssubdata.readlines():`
smi: check if data is empty 2015-05-01 22:34:02 +02:00			`i = i.rstrip()`
subtitle: missing r-prefix 2015-04-28 23:16:44 +02:00			`sync = re.search(r"<SYNC Start=(\d+)>", i)`
smi: Handle subtitle that starts direct after the previous one. this fixes #225 2015-04-28 23:00:24 +02:00			`if sync:`
			`if int(sync.group(1)) != int(timea):`
smi: check if data is empty 2015-05-01 22:34:02 +02:00			`if data and data != " ":`
smi: Handle subtitle that starts direct after the previous one. this fixes #225 2015-04-28 23:00:24 +02:00			`subs += "%s\n%s --> %s\n" % (number, timestr(timea), timestr(sync.group(1)))`
subtitle: use decode_html_entities function use decode_html_entities to decode html entities 2017-03-22 23:22:57 +01:00			`text = "%s\n" % TAG_RE.sub('', data.replace("<br>", "\n"))`
			`text = decode_html_entities(text)`
Flake8 fixes 2018-01-30 20:11:37 +01:00			`if text[len(text) - 2] != "\n":`
smi: check if data is empty 2015-05-01 22:34:02 +02:00			`text += "\n"`
smi: Handle subtitle that starts direct after the previous one. this fixes #225 2015-04-28 23:00:24 +02:00			`subs += text`
			`number += 1`
			`timea = sync.group(1)`
			`text = re.search("<P Class=SVCC>(.*)", i)`
			`if text:`
			`data = text.group(1)`
subtitle_smi: Empty subtitles. this fixes #180 2014-11-23 13:02:14 +01:00			`recomp = re.compile(r'\r')`
subtitle: use decode_html_entities function use decode_html_entities to decode html entities 2017-03-22 23:22:57 +01:00			`text = bad_char.sub('-', recomp.sub('', subs))`
subtitle: refactor so we can reuse the try-except-thing 2014-08-31 01:20:36 +02:00			`return text`
Move subtitles into its own file 2014-04-21 19:52:09 +02:00
subtitle: refactor so we can reuse the try-except-thing 2014-08-31 01:20:36 +02:00			`def wrst(self, subdata):`
subtitle: workaround a bug in requests in ubuntu 14.04 LTS 2.2.1 cant convert string from bytes to text right. fixes #259 2015-09-20 15:15:50 +02:00			`ssubdata = StringIO(subdata.text)`
Move subtitles into its own file 2014-04-21 19:52:09 +02:00			`srt = ""`
subtitle_wsrt: subtract 10 hours when it begins at >9 hours. This fixes #101 2014-06-07 18:50:51 +02:00			`subtract = False`
subtitle: rewrite wrst function 2015-03-01 21:44:55 +01:00			`number_b = 1`
			`number = 0`
			`block = 0`
			`subnr = False`
subtitle: dont double encode utf8 fixes: #507 fixes: #501 2016-12-05 20:45:14 +01:00			`if self.bom:`
			`ssubdata.read(1)`
subtitle: rewrite wrst function 2015-03-01 21:44:55 +01:00			`for i in ssubdata.readlines():`
subtitle: missing r-prefix 2015-04-28 23:16:44 +02:00			`match = re.search(r"^[\r\n]+", i)`
			`match2 = re.search(r"([\d:\.]+ --> [\d:\.]+)", i)`
			`match3 = re.search(r"^(\d+)\s", i)`
subtitle: rewrite wrst function 2015-03-01 21:44:55 +01:00			`if i[:6] == "WEBVTT":`
subtitle: dont double encode utf8 fixes: #507 fixes: #501 2016-12-05 20:45:14 +01:00			`continue`
			`elif "X-TIMESTAMP" in i:`
			`continue`
			`elif match and number_b == 1 and self.bom:`
			`continue`
subtitle: rewrite wrst function 2015-03-01 21:44:55 +01:00			`elif match and number_b > 1:`
			`block = 0`
			`srt += "\n"`
			`elif match2:`
			`if not subnr:`
			`srt += "%s\n" % number_b`
subtitle.wsrt: urplay only had minutes and seconds fixes #639 2017-06-05 17:31:47 +02:00			`matchx = re.search(r'(?P<h1>\d+):(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<h2>\d+):(?P<m2>\d+):(?P<s2>[\d\.]+)', i)`
			`if matchx:`
			`hour1 = int(matchx.group("h1"))`
			`hour2 = int(matchx.group("h2"))`
			`if int(number) == 1:`
			`if hour1 > 9:`
			`subtract = True`
			`if subtract:`
			`hour1 -= 10`
			`hour2 -= 10`
			`else:`
			`matchx = re.search(r'(?P<m1>\d+):(?P<s1>[\d\.]+) --> (?P<m2>\d+):(?P<s2>[\d\.]+)', i)`
			`hour1 = 0`
			`hour2 = 0`
Flake8 fixes 2018-01-30 20:11:37 +01:00			`time = "{0:02d}:{1}:{2} --> {3:02d}:{4}:{5}\n".format(hour1, matchx.group("m1"), matchx.group("s1").replace(".", ","),`
			`hour2, matchx.group("m2"), matchx.group("s2").replace(".", ","))`
subtitle: rewrite wrst function 2015-03-01 21:44:55 +01:00			`srt += time`
			`block = 1`
			`subnr = False`
			`number_b += 1`

			`elif match3 and block == 0:`
			`number = match3.group(1)`
			`srt += "%s\n" % number`
			`subnr = True`
			`else:`
subtitle: use config instead of options 2018-05-13 01:46:17 +02:00			`if self.config.get("convert_subtitle_colors"):`
Flake8 fixes 2018-01-30 20:11:37 +01:00			`colors = {'30': '#000000', '31': '#ff0000', '32': '#00ff00', '33': '#ffff00',`
			`'34': '#0000ff', '35': '#ff00ff', '36': '#00ffff', '37': '#ffffff'}`
subtitles: --convert-subtitle-colors command added to convert color information in wsrt subtitle files to <font color=""> tags. 2016-05-09 15:10:58 +02:00			`sub = i`
			`for tag, color in colors.items():`
			`regex1 = '<' + tag + '>'`
			`replace = '<font color="' + color + '">'`
			`sub = re.sub(regex1, replace, sub)`
Flake8 fixes 2018-01-30 20:11:37 +01:00
			`sub = re.sub('</.+>', '</font>', sub)`
subtitles: --convert-subtitle-colors command added to convert color information in wsrt subtitle files to <font color=""> tags. 2016-05-09 15:10:58 +02:00			`else:`
			`sub = re.sub('<[^>]*>', '', i)`
subtitle: strip line endings 2016-04-20 18:42:46 +02:00			`srt += sub.strip()`
Flake8 fixes 2018-01-30 20:11:37 +01:00			`srt += "\n"`
subtitle_wsrt: decode html entities fixes #255 2015-09-01 22:54:16 +02:00			`srt = decode_html_entities(srt)`
wrst: don’t encode the data to utf8 on python3 2015-04-28 22:59:07 +02:00			`return srt`
subtitle: we only need one save function. 2014-04-27 15:33:05 +02:00
pip8. expected 2 lines found 1 2015-09-15 20:10:32 +02:00
Move subtitles into its own file 2014-04-21 19:52:09 +02:00			`def timestr(msec):`
			`"""`
			`Convert a millisecond value to a string of the following`
			`format:`

			`HH:MM:SS,SS`

			`with 10 millisecond precision. Note the , seperator in`
			`the seconds.`
			`"""`
			`sec = float(msec) / 1000`

			`hours = int(sec / 3600)`
			`sec -= hours * 3600`

			`minutes = int(sec / 60)`
			`sec -= minutes * 60`

timestr: leading zero in single digit 2016-06-03 00:09:11 +02:00			`output = "%02d:%02d:%06.3f" % (hours, minutes, sec)`
Move subtitles into its own file 2014-04-21 19:52:09 +02:00			`return output.replace(".", ",")`

pip8. expected 2 lines found 1 2015-09-15 20:10:32 +02:00
sami: they started to use : instead of , 2014-12-15 22:19:58 +01:00			`def timecolon(data):`
missing r prefix in regex matches. 2014-12-22 10:20:37 +01:00			`match = re.search(r"(\d+:\d+:\d+):(\d+)", data)`
sami: they started to use : instead of , 2014-12-15 22:19:58 +01:00			`return "%s,%s" % (match.group(1), match.group(2))`

pip8. expected 2 lines found 1 2015-09-15 20:10:32 +02:00
Move subtitles into its own file 2014-04-21 19:52:09 +02:00			`def norm(name):`
			`if name[0] == "{":`
			`_, tag = name[1:].split("}")`
			`return tag`
			`else:`
			`return name`
subtitle_tt: rewrote the function. this fixes #111 2014-07-09 18:39:18 +02:00
pip8. expected 2 lines found 1 2015-09-15 20:10:32 +02:00
subtitle_tt: rewrote the function. this fixes #111 2014-07-09 18:39:18 +02:00			`def tt_text(node, data):`
			`if node.text:`
			`data += "%s\n" % node.text.strip(' \t\n\r')`
			`for i in node:`
			`if i.text:`
			`data += "%s\n" % i.text.strip(' \t\n\r')`
			`if i.tail:`
			`text = i.tail.strip(' \t\n\r')`
			`if text:`
			`data += "%s\n" % text`
pylint fixes 2015-01-05 21:52:34 +01:00			`return data`