svtplay-dl/lib/svtplay_dl/utils/http.py

import http.client
import logging
import re
from html import unescape
from urllib.parse import urljoin

from requests import Session
from requests.adapters import HTTPAdapter
from requests.adapters import Retry
from svtplay_dl.utils.output import formatname
from svtplay_dl.utils.parser import Options

http.client._MAXHEADERS = 200

# Used for UA spoofing in get_http_data()
FIREFOX_UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36"

retry = Retry(total=5, read=5, connect=5, backoff_factor=0.3, status_forcelist=(500, 502, 504))


class HTTP(Session):
    def __init__(self, config={}, *args, **kwargs):
        Session.__init__(self, *args, **kwargs)
        adapter = HTTPAdapter(max_retries=retry)

        self.mount("http://", adapter)
        self.mount("https://", adapter)
        self.verify = config.get("ssl_verify")
        self.proxy = config.get("proxy")
        if config.get("http_headers"):
            self.headers.update(self.split_header(config.get("http_headers")))
        if config.get("cookies"):
            self.cookies.update(self.split_header(config.get("cookies")))
        self.headers.update({"User-Agent": FIREFOX_UA})

    def check_redirect(self, url):
        return self.get(url, stream=True).url

    def request(self, method, url, *args, **kwargs):
        headers = kwargs.pop("headers", None)
        if headers:
            for i in headers.keys():
                self.headers[i] = headers[i]
        else:
            if "Range" in self.headers:  # for some reason headers is always there for each request
                del self.headers["Range"]  # need to remove it because we dont want it
        logging.debug("HTTP getting %r", url)
        res = Session.request(self, method, url, verify=self.verify, proxies=self.proxy, *args, **kwargs)
        return res

    def split_header(self, headers):
        return dict(x.split("=") for x in headers.split(";") if x)


def download_thumbnails(output, config, urls):
    for show, url in urls:
        if "&amp;" in url:
            url = unescape(url)
        data = Session().get(url).content
        loutout = output.copy()
        loutout["ext"] = "tbn"
        if show:
            # Config for downloading show thumbnail
            cconfig = Options()
            cconfig.set("output", config.get("output"))
            cconfig.set("path", config.get("path"))
            cconfig.set("subfolder", config.get("subfolder"))
            cconfig.set("filename", "{title}.tvshow.{ext}")
        else:
            cconfig = config

        filename = formatname(loutout, cconfig)
        logging.info("Thumbnail: %s", filename)

        with open(filename, "wb") as fd:
            fd.write(data)


def get_full_url(url, srcurl):
    if url[:4] == "http":
        return url
    if url[0] == "/":
        baseurl = re.search(r"^(http[s]{0,1}://[^/]+)/", srcurl)
        return f"{baseurl.group(1)}{url}"

    # remove everything after last / in the path of the URL
    baseurl = re.sub(r"^([^\?]+)/[^/]*(\?.*)?$", r"\1/", srcurl)
    returl = urljoin(baseurl, url)

    return returl
utils.http: increase number of headers akamai seems to add a bunch of headers for some reason 2023-04-19 03:25:31 +02:00			`import http.client`
utils: move functions out of init to its own files. 2018-03-13 00:33:39 +01:00			`import logging`
pre-commit: reorder imports 2019-08-25 00:40:39 +02:00			`import re`
utils.download_thumbnails: unescape url tv4play escape their thumb urls for some reason 2021-07-09 20:30:31 +02:00			`from html import unescape`
utilis.http: move get_full_url from hls to http 2018-07-05 01:24:16 +02:00			`from urllib.parse import urljoin`
utils: move functions out of init to its own files. 2018-03-13 00:33:39 +01:00
			`from requests import Session`
			`from requests.adapters import HTTPAdapter`
utils.http: increase number of headers akamai seems to add a bunch of headers for some reason 2023-04-19 03:25:31 +02:00			`from requests.adapters import Retry`
Working thumbnail download. Used by svtplay and barnkanalen, downloads both episode and show thumbnails. 2018-07-22 11:12:22 +02:00			`from svtplay_dl.utils.output import formatname`
			`from svtplay_dl.utils.parser import Options`
utils: move functions out of init to its own files. 2018-03-13 00:33:39 +01:00
utils.http: increase number of headers akamai seems to add a bunch of headers for some reason 2023-04-19 03:25:31 +02:00			`http.client._MAXHEADERS = 200`

utils: move functions out of init to its own files. 2018-03-13 00:33:39 +01:00			`# Used for UA spoofing in get_http_data()`
Update user-agent to something newer 2023-04-19 03:27:59 +02:00			`FIREFOX_UA = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36"`
utils: move functions out of init to its own files. 2018-03-13 00:33:39 +01:00
pre-commit: its a good thing to commit config... 2019-09-06 22:49:49 +02:00			`retry = Retry(total=5, read=5, connect=5, backoff_factor=0.3, status_forcelist=(500, 502, 504))`
utils: move functions out of init to its own files. 2018-03-13 00:33:39 +01:00

			`class HTTP(Session):`
pylint fixes 2021-12-18 21:37:09 +01:00			`def __init__(self, config={}, args, *kwargs):`
utils: move functions out of init to its own files. 2018-03-13 00:33:39 +01:00			`Session.__init__(self, args, *kwargs)`
			`adapter = HTTPAdapter(max_retries=retry)`
Options to config 2018-05-13 13:06:45 +02:00
pre-commit: black fixes 2019-08-25 00:27:31 +02:00			`self.mount("http://", adapter)`
			`self.mount("https://", adapter)`
more options to config replaces 2018-05-08 22:46:11 +02:00			`self.verify = config.get("ssl_verify")`
			`self.proxy = config.get("proxy")`
			`if config.get("http_headers"):`
			`self.headers.update(self.split_header(config.get("http_headers")))`
Add support for cookies it works similar to headers 2020-09-15 23:43:20 +02:00			`if config.get("cookies"):`
			`self.cookies.update(self.split_header(config.get("cookies")))`
utils: move functions out of init to its own files. 2018-03-13 00:33:39 +01:00			`self.headers.update({"User-Agent": FIREFOX_UA})`

			`def check_redirect(self, url):`
			`return self.get(url, stream=True).url`

			`def request(self, method, url, args, *kwargs):`
			`headers = kwargs.pop("headers", None)`
			`if headers:`
			`for i in headers.keys():`
			`self.headers[i] = headers[i]`
http: remove range because we dont need it all the time for some reason once you set it, it will be in every request after it. 2023-11-29 00:01:17 +01:00			`else:`
			`if "Range" in self.headers: # for some reason headers is always there for each request`
			`del self.headers["Range"] # need to remove it because we dont want it`
utils: move functions out of init to its own files. 2018-03-13 00:33:39 +01:00			`logging.debug("HTTP getting %r", url)`
pre-commit: its a good thing to commit config... 2019-09-06 22:49:49 +02:00			`res = Session.request(self, method, url, verify=self.verify, proxies=self.proxy, args, *kwargs)`
utils: move functions out of init to its own files. 2018-03-13 00:33:39 +01:00			`return res`

			`def split_header(self, headers):`
http: check if split_header value is correct 2021-02-23 23:44:15 +01:00			`return dict(x.split("=") for x in headers.split(";") if x)`
utils: move functions out of init to its own files. 2018-03-13 00:33:39 +01:00

Working thumbnail download. Used by svtplay and barnkanalen, downloads both episode and show thumbnails. 2018-07-22 11:12:22 +02:00			`def download_thumbnails(output, config, urls):`
			`for show, url in urls:`
utils.download_thumbnails: unescape url tv4play escape their thumb urls for some reason 2021-07-09 20:30:31 +02:00			`if "&" in url:`
			`url = unescape(url)`
Working thumbnail download. Used by svtplay and barnkanalen, downloads both episode and show thumbnails. 2018-07-22 11:12:22 +02:00			`data = Session().get(url).content`
Rewrite how we detect already downloaded files this will also check if we already downloaded the files if we already have it or not 2021-05-03 01:43:37 +02:00			`loutout = output.copy()`
			`loutout["ext"] = "tbn"`
Working thumbnail download. Used by svtplay and barnkanalen, downloads both episode and show thumbnails. 2018-07-22 11:12:22 +02:00			`if show:`
			`# Config for downloading show thumbnail`
			`cconfig = Options()`
			`cconfig.set("output", config.get("output"))`
			`cconfig.set("path", config.get("path"))`
			`cconfig.set("subfolder", config.get("subfolder"))`
			`cconfig.set("filename", "{title}.tvshow.{ext}")`
			`else:`
			`cconfig = config`

Rewrite how we detect already downloaded files this will also check if we already downloaded the files if we already have it or not 2021-05-03 01:43:37 +02:00			`filename = formatname(loutout, cconfig)`
Working thumbnail download. Used by svtplay and barnkanalen, downloads both episode and show thumbnails. 2018-07-22 11:12:22 +02:00			`logging.info("Thumbnail: %s", filename)`

Rewrite how we detect already downloaded files this will also check if we already downloaded the files if we already have it or not 2021-05-03 01:43:37 +02:00			`with open(filename, "wb") as fd:`
			`fd.write(data)`
Working thumbnail download. Used by svtplay and barnkanalen, downloads both episode and show thumbnails. 2018-07-22 11:12:22 +02:00

utilis.http: move get_full_url from hls to http 2018-07-05 01:24:16 +02:00			`def get_full_url(url, srcurl):`
pre-commit: black fixes 2019-08-25 00:27:31 +02:00			`if url[:4] == "http":`
utilis.http: move get_full_url from hls to http 2018-07-05 01:24:16 +02:00			`return url`
pre-commit: black fixes 2019-08-25 00:27:31 +02:00			`if url[0] == "/":`
			`baseurl = re.search(r"^(http[s]{0,1}://[^/]+)/", srcurl)`
pyupgrade fixes 2021-04-27 19:44:09 +02:00			`return f"{baseurl.group(1)}{url}"`
utilis.http: move get_full_url from hls to http 2018-07-05 01:24:16 +02:00
			`# remove everything after last / in the path of the URL`
pre-commit: black fixes 2019-08-25 00:27:31 +02:00			`baseurl = re.sub(r"^([^\?]+)/[^/](\?.)?$", r"\1/", srcurl)`
utilis.http: move get_full_url from hls to http 2018-07-05 01:24:16 +02:00			`returl = urljoin(baseurl, url)`

			`return returl`