1
0
mirror of https://github.com/spaam/svtplay-dl.git synced 2024-11-27 21:54:17 +01:00
svtplay-dl/lib/svtplay_dl/postprocess/__init__.py

257 lines
10 KiB
Python
Raw Normal View History

2019-08-25 00:40:39 +02:00
import logging
import os
import platform
import re
2019-08-25 00:40:39 +02:00
from json import dumps
from random import sample
from re import match
2019-08-25 00:40:39 +02:00
from shutil import which
2019-08-25 00:40:39 +02:00
from requests import codes
from requests import post
from requests import Timeout
2018-05-13 01:44:21 +02:00
from svtplay_dl.utils.output import formatname
from svtplay_dl.utils.proc import run_program
2019-08-25 00:33:51 +02:00
class postprocess:
2018-05-13 01:44:00 +02:00
def __init__(self, stream, config, subfixes=None):
self.stream = stream
2018-05-13 01:44:00 +02:00
self.config = config
self.subfixes = subfixes
self.detect = None
for i in ["ffmpeg", "avconv"]:
self.detect = which(i)
if self.detect:
break
def sublanguage(self):
# parse() function partly borrowed from a guy on github. /thanks!
# https://github.com/riobard/srt.py/blob/master/srt.py
def parse(self):
def parse_block(block):
2019-08-25 00:27:31 +02:00
lines = block.strip("-").split("\n")
txt = "\r\n".join(lines[2:])
return txt
2018-01-13 20:27:40 +01:00
if platform.system() == "Windows":
fd = open(self, encoding="utf8")
else:
fd = open(self)
return list(map(parse_block, fd.read().strip().replace("\r", "").split("\n\n")))
2017-02-15 23:15:50 +01:00
def query(self):
_ = parse(self)
random_sentences = " ".join(sample(_, len(_) if len(_) < 8 else 8)).replace("\r\n", "")
2019-08-25 00:27:31 +02:00
url = "https://whatlanguage.herokuapp.com"
2017-02-15 23:15:50 +01:00
payload = {"query": random_sentences}
2018-01-30 20:11:37 +01:00
# Note: requests handles json from version 2.4.2 and onwards so i use json.dumps for now.
2019-08-25 00:27:31 +02:00
headers = {"content-type": "application/json"}
try:
2018-01-30 20:11:37 +01:00
# Note: reasonable timeout i guess? svtplay-dl is mainly used while multitasking i presume,
# and it is heroku after all (fast enough)
r = post(url, data=dumps(payload), headers=headers, timeout=30)
if r.status_code == codes.ok:
try:
response = r.json()
2019-08-25 00:27:31 +02:00
return response["language"]
except TypeError:
2019-08-25 00:27:31 +02:00
return "und"
else:
logging.error("Server error appeared. Setting language as undetermined.")
2019-08-25 00:27:31 +02:00
return "und"
except Timeout:
logging.error("30 seconds server timeout reached. Setting language as undetermined.")
2019-08-25 00:27:31 +02:00
return "und"
langs = []
2019-08-25 00:27:31 +02:00
exceptions = {"lulesamiska": "smj", "meankieli": "fit", "jiddisch": "yid"}
if self.subfixes and len(self.subfixes) >= 2:
2018-11-18 12:47:19 +01:00
logging.info("Determining the languages of the subtitles.")
2018-01-30 20:11:37 +01:00
else:
2018-11-18 12:47:19 +01:00
logging.info("Determining the language of the subtitle.")
2018-05-13 01:44:00 +02:00
if self.config.get("get_all_subtitles"):
for subfix in self.subfixes:
if [exceptions[key] for key in exceptions.keys() if match(key, subfix.strip("-"))]:
2019-08-25 00:27:31 +02:00
if "oversattning" in subfix.strip("-"):
subfix = subfix.strip("-").split(".")[0]
else:
2019-08-25 00:27:31 +02:00
subfix = subfix.strip("-")
langs += [exceptions[subfix]]
continue
subfile = "{}.srt".format(os.path.splitext(formatname(self.stream.output, self.config, self.stream.output_extention))[0] + subfix)
langs += [query(subfile)]
else:
subfile = "{}.srt".format(os.path.splitext(formatname(self.stream.output, self.config, self.stream.output_extention))[0])
langs += [query(subfile)]
if len(langs) >= 2:
2019-08-25 00:27:31 +02:00
logging.info("Language codes: " + ", ".join(langs))
2018-01-30 20:11:37 +01:00
else:
2018-11-18 12:47:19 +01:00
logging.info("Language code: " + langs[0])
return langs
2016-03-26 21:38:31 +01:00
def remux(self):
if self.detect is None:
2018-11-18 12:47:19 +01:00
logging.error("Cant detect ffmpeg or avconv. Cant mux files without it.")
return
if self.stream.finished is False:
return
if formatname(self.stream.output, self.config, self.stream.output_extention).endswith(".mp4") is False:
orig_filename = formatname(self.stream.output, self.config, self.stream.output_extention)
2016-06-02 01:47:25 +02:00
name, ext = os.path.splitext(orig_filename)
2019-08-25 00:33:51 +02:00
new_name = "{}.mp4".format(name)
cmd = [self.detect, "-i", orig_filename]
2018-03-13 00:44:34 +01:00
_, stdout, stderr = run_program(cmd, False) # return 1 is good here.
streams = _streams(stderr)
videotrack, audiotrack = _checktracks(streams)
2018-05-13 01:44:00 +02:00
if self.config.get("merge_subtitle"):
logging.info("Muxing {} and merging its subtitle into {}".format(orig_filename, new_name))
else:
2019-08-25 00:33:51 +02:00
logging.info("Muxing {} into {}".format(orig_filename, new_name))
2019-08-25 00:33:51 +02:00
tempfile = "{}.temp".format(orig_filename)
arguments = []
if videotrack:
arguments += ["-map", "{}".format(videotrack)]
if audiotrack:
arguments += ["-map", "{}".format(audiotrack)]
arguments += ["-c", "copy", "-f", "mp4"]
if ext == ".ts" and "aac" in _getcodec(streams, audiotrack):
2016-06-02 01:47:25 +02:00
arguments += ["-bsf:a", "aac_adtstoasc"]
2017-02-15 23:15:50 +01:00
2018-05-13 01:44:00 +02:00
if self.config.get("merge_subtitle"):
langs = self.sublanguage()
for stream_num, language in enumerate(langs):
2019-08-25 00:27:31 +02:00
arguments += [
"-map",
str(stream_num + 1),
"-c:s:" + str(stream_num),
"mov_text",
"-metadata:s:s:" + str(stream_num),
"language=" + language,
]
2018-01-13 21:21:49 +01:00
if self.subfixes and len(self.subfixes) >= 2:
for subfix in self.subfixes:
2019-08-25 00:33:51 +02:00
subfile = "{}.srt".format(name + subfix)
cmd += ["-i", subfile]
else:
2019-08-25 00:33:51 +02:00
subfile = "{}.srt".format(name)
cmd += ["-i", subfile]
2017-02-15 23:15:50 +01:00
arguments += ["-y", tempfile]
2016-06-02 01:47:25 +02:00
cmd += arguments
returncode, stdout, stderr = run_program(cmd)
if returncode != 0:
2016-06-02 01:47:25 +02:00
return
2017-02-15 23:15:50 +01:00
2018-05-13 01:44:00 +02:00
if self.config.get("merge_subtitle") and not self.config.get("subtitle"):
2018-11-18 12:47:19 +01:00
logging.info("Muxing done, removing the old files.")
if self.subfixes and len(self.subfixes) >= 2:
for subfix in self.subfixes:
2019-08-25 00:33:51 +02:00
subfile = "{}.srt".format(name + subfix)
os.remove(subfile)
2018-01-30 20:11:37 +01:00
else:
os.remove(subfile)
else:
2018-11-18 12:47:19 +01:00
logging.info("Muxing done, removing the old file.")
os.remove(orig_filename)
2016-06-02 01:47:25 +02:00
os.rename(tempfile, new_name)
2016-03-26 21:38:31 +01:00
def merge(self):
if self.detect is None:
2018-11-18 12:47:19 +01:00
logging.error("Cant detect ffmpeg or avconv. Cant mux files without it.")
2016-03-26 21:38:31 +01:00
return
if self.stream.finished is False:
return
orig_filename = formatname(self.stream.output, self.config, self.stream.output_extention)
name, ext = os.path.splitext(orig_filename)
audio_filename = "{}.audio.ts".format(name)
cmd = [self.detect, "-i", orig_filename, "-i", audio_filename]
_, stdout, stderr = run_program(cmd, False) # return 1 is good here.
streams = _streams(stderr)
videotrack, audiotrack = _checktracks(streams)
2018-05-13 01:44:00 +02:00
if self.config.get("merge_subtitle"):
logging.info("Merge audio, video and subtitle into {}".format(orig_filename))
else:
2019-08-25 00:33:51 +02:00
logging.info("Merge audio and video into {}".format(orig_filename))
2019-08-25 00:33:51 +02:00
tempfile = "{}.temp".format(orig_filename)
arguments = ["-c:v", "copy", "-c:a", "copy", "-f", "mp4"]
if ext == ".ts":
if audiotrack and "aac" in _getcodec(streams, audiotrack):
arguments += ["-bsf:a", "aac_adtstoasc"]
else:
2019-08-25 00:33:51 +02:00
audio_filename = "{}.m4a".format(name)
2016-03-26 21:38:31 +01:00
cmd = [self.detect, "-i", orig_filename, "-i", audio_filename]
if videotrack:
arguments += ["-map", "{}".format(videotrack)]
if audiotrack:
arguments += ["-map", "{}".format(audiotrack)]
2018-05-13 01:44:00 +02:00
if self.config.get("merge_subtitle"):
langs = self.sublanguage()
tracks = [x for x in [videotrack, audiotrack] if x]
for stream_num, language in enumerate(langs, start=len(tracks)):
2019-08-25 00:27:31 +02:00
arguments += [
"-map",
str(stream_num),
"-c:s:" + str(stream_num - 2),
"mov_text",
"-metadata:s:s:" + str(stream_num - 2),
"language=" + language,
]
if self.subfixes and len(self.subfixes) >= 2:
for subfix in self.subfixes:
2019-08-25 00:33:51 +02:00
subfile = "{}.srt".format(name + subfix)
cmd += ["-i", subfile]
else:
2019-08-25 00:33:51 +02:00
subfile = "{}.srt".format(name)
cmd += ["-i", subfile]
2017-02-15 23:15:50 +01:00
arguments += ["-y", tempfile]
2016-03-26 21:38:31 +01:00
cmd += arguments
returncode, stdout, stderr = run_program(cmd)
if returncode != 0:
2016-03-26 21:38:31 +01:00
return
2018-11-18 12:47:19 +01:00
logging.info("Merging done, removing old files.")
os.remove(orig_filename)
2016-03-26 21:38:31 +01:00
os.remove(audio_filename)
2018-05-13 01:44:00 +02:00
if self.config.get("merge_subtitle") and not self.config.get("subtitle"):
if self.subfixes and len(self.subfixes) >= 2:
for subfix in self.subfixes:
2019-08-25 00:33:51 +02:00
subfile = "{}.srt".format(name + subfix)
os.remove(subfile)
2018-01-30 20:11:37 +01:00
else:
os.remove(subfile)
os.rename(tempfile, orig_filename)
def _streams(output):
return re.findall(r"Stream \#(\d:\d)([\[\(][^\[]+[\]\)])?([\(\)\w]+)?: (Video|Audio): (.*)", output)
def _getcodec(streams, number):
for stream in streams:
if stream[0] == number:
return stream[4]
return None
def _checktracks(streams):
videotrack = None
audiotrack = None
for stream in streams:
if stream[3] == "Video":
videotrack = stream[0]
if stream[3] == "Audio":
if stream[4] == "mp3, 0 channels":
continue
audiotrack = stream[0]
return videotrack, audiotrack