From 22f267bdac7df0d767b3c3549a757b5c6af99c32 Mon Sep 17 00:00:00 2001 From: Jack Jackson Date: Sun, 3 Nov 2024 14:02:10 -0800 Subject: [PATCH] First commit --- .drone.yml | 43 +++++++++++++++++++++++++++ Dockerfile | 12 ++++++++ README.md | 17 +++++++++++ handler.py | 76 ++++++++++++++++++++++++++++++++++++++++++++++++ main.py | 18 ++++++++++++ requirements.txt | 1 + 6 files changed, 167 insertions(+) create mode 100644 .drone.yml create mode 100644 Dockerfile create mode 100644 handler.py create mode 100644 main.py create mode 100644 requirements.txt diff --git a/.drone.yml b/.drone.yml new file mode 100644 index 0000000..f0bfc1a --- /dev/null +++ b/.drone.yml @@ -0,0 +1,43 @@ +kind: pipeline +name: build-and-deploy +type: docker + +platform: + os: linux + arch: arm64 + + +steps: + - name: docker-build-and-push + image: thegeeklab/drone-docker-buildx # Absurd that this isn't offered as first-party! + privileged: true + settings: + # Consider testing with this if the cache-to arguments don't help! + # args: + # - BUILDKIT_INLINE_CACHE=1 + cache_from: + - "gitea.scubbo.org/${DRONE_REPO}:${DRONE_BRANCH}" + cache_to: + # https://docs.docker.com/build/cache/backends/registry/ + # `mode=max` necessary in order to cache intermediate stages + # + # https://github.com/go-gitea/gitea/issues/28973#issuecomment-1961859655 + # Some parameters are necessary in order to make the push to gitea succeed + # + # Another reference: + # https://github.com/thegeeklab/drone-docker-buildx/issues/47 + - "type=registry,ref=gitea.scubbo.org/${DRONE_REPO}:${DRONE_BRANCH},mode=max,image-manifest=true,oci-mediatypes=true" + registry: gitea.scubbo.org + username: scubbo + password: + from_secret: gitea_password + repo: gitea.scubbo.org/${DRONE_REPO} + tags: + - ${DRONE_BRANCH} + - ${DRONE_COMMIT_SHA:0:10} + target: prod + platforms: + - linux/arm64 + - linux/amd64 + +# TODO - automated update of Helm Chart/Deployment Repo diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..3a6b3e0 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,12 @@ +FROM python:3.13 + +WORKDIR /usr/src/app + +RUN apt-get update && apt-get install -y ffmpeg + +COPY requirements.txt ./ +RUN pip install --no-cache-dir -r requirements.txt + +COPY . . + +CMD [ "python", "./main.py" ] \ No newline at end of file diff --git a/README.md b/README.md index e69de29..2d6cc33 100644 --- a/README.md +++ b/README.md @@ -0,0 +1,17 @@ +This package puts [yt-dlp](https://github.com/yt-dlp/yt-dlp) behind an HTTP-callable server. + +**Motivation:** I had a little snippet of bash that I could run on one of my [`*arr` Pods](https://wiki.servarr.com/) to install the `yt-dlp` CLI tool and then use it to download the audio of a video at a given URL. That _worked_, but was awkward - especially having to reinstall the tool any time a Pod was reinitialized. With this setup, I can deploy a light image alongside the Arr Pods that can be invoked over HTTP to download whatever URL I'm interested in, without having to do a `kubectl exec` to shell into the existing pods. + +There are _tons_ of improvements that could be made to this, such as: +* not hard-coding the audio format (I've picked the one that appears to work best for my [Jellyfin](https://en.wikipedia.org/wiki/Jellyfin) setup), or indeed allowing the passthrough of oher customizations. +* running this process as a Kubernetes job, or some other separated long-running job mechanism + * exposing logs more-easily +* using a more secure and robust server implementation than `http.server` +* filename customization (using the `--output` flag) +* Testing :P + +# Customization + +Environment variables: +* `DOWNLOAD_DIR` sets the directory into-which to download files (defaults to `.`) +* `PORT` sets the port to listen on (default to `8000`) diff --git a/handler.py b/handler.py new file mode 100644 index 0000000..31d2865 --- /dev/null +++ b/handler.py @@ -0,0 +1,76 @@ +import os +import json +from http import HTTPStatus +import http.server + +from yt_dlp import YoutubeDL + +# https://github.com/yt-dlp/yt-dlp?tab=readme-ov-file#extract-audio +def download(url): + ydl_opts = { + 'format': 'm4a/bestaudio/best', + 'paths': { + 'home': os.environ.get('DOWNLOAD_DIR', '.') + }, + 'postprocessors': [{ + 'key': 'FFmpegExtractAudio', + 'preferredcodec': 'm4a' + }] + } + with YoutubeDL(ydl_opts) as ydl: + ydl.download(url) + +class Handler(http.server.SimpleHTTPRequestHandler): + def __init__(self, pool, *args, **kwargs): + self.pool = pool + # https://stackoverflow.com/a/52046062/1040915 + super().__init__(*args, **kwargs) + + def do_POST(self): + try: + content_length = self.headers['Content-Length'] + if not content_length: + self.send_response(HTTPStatus.INTERNAL_SERVER_ERROR) + content = "Looks like you forgot to send a body".encode('utf-8') + self.send_header("Content-type", 'application/json') + self.send_header('Content-Length', len(content)) + self.end_headers() + self.wfile.write(content) + return + + data_string = self.rfile.read(int(content_length)) + body = json.loads(data_string) # TODO - better error-handling here + url = body.get('url') + if not url: + self.send_response(HTTPStatus.INTERNAL_SERVER_ERROR) + content = "Looks like you forgot to send a `url` parameter".encode('utf-8') + self.send_header("Content-type", 'application/json') + self.send_header('Content-Length', len(content)) + self.end_headers() + self.wfile.write(content) + return + + self.send_response(HTTPStatus.ACCEPTED) + content = f'Accepted download request for {url}\n'.encode('utf-8') + self.send_header("Content-type", 'application/json') + self.send_header('Content-Length', len(content)) + self.end_headers() + # TODO - check for success of kicking-off the thread + self.pool.apply_async(download, (url,)) + self.wfile.write(content) + except Exception as e: + self.send_response(HTTPStatus.INTERNAL_SERVER_ERROR) + content = str(e).encode('utf-8') + print(e) # TODO - better logging! + self.send_header("Content-type", 'application/json') + self.send_header('Content-Length', len(content)) + self.end_headers() + self.wfile.write(content) + + def do_GET(self): + self.send_response(HTTPStatus.INTERNAL_SERVER_ERROR) + content = 'ERROR: Only HEAD requests are permitted\n'.encode('utf-8') + self.send_header("Content-type", 'application/json') + self.send_header('Content-Length', len(content)) + self.end_headers() + self.wfile.write(content) diff --git a/main.py b/main.py new file mode 100644 index 0000000..726bb1f --- /dev/null +++ b/main.py @@ -0,0 +1,18 @@ +#!/usr/bin/python + +import os +import socketserver + +from functools import partial +from handler import Handler +from multiprocessing import Pool + +PORT = int(os.environ.get('PORT', '8000')) + +if __name__ == '__main__': + with Pool(5) as p: + # https://stackoverflow.com/a/52046062/1040915 + handler = partial(Handler, p) + with socketserver.TCPServer(("", PORT), handler) as httpd: + print("serving at port", PORT) + httpd.serve_forever() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..b1df116 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +yt-dlp