From 22f267bdac7df0d767b3c3549a757b5c6af99c32 Mon Sep 17 00:00:00 2001
From: Jack Jackson <scubbojj@gmail.com>
Date: Sun, 3 Nov 2024 14:02:10 -0800
Subject: [PATCH] First commit

---
 .drone.yml       | 43 +++++++++++++++++++++++++++
 Dockerfile       | 12 ++++++++
 README.md        | 17 +++++++++++
 handler.py       | 76 ++++++++++++++++++++++++++++++++++++++++++++++++
 main.py          | 18 ++++++++++++
 requirements.txt |  1 +
 6 files changed, 167 insertions(+)
 create mode 100644 .drone.yml
 create mode 100644 Dockerfile
 create mode 100644 handler.py
 create mode 100644 main.py
 create mode 100644 requirements.txt

diff --git a/.drone.yml b/.drone.yml
new file mode 100644
index 0000000..f0bfc1a
--- /dev/null
+++ b/.drone.yml
@@ -0,0 +1,43 @@
+kind: pipeline
+name: build-and-deploy
+type: docker
+
+platform:
+  os: linux
+  arch: arm64
+
+
+steps:
+  - name: docker-build-and-push
+    image: thegeeklab/drone-docker-buildx # Absurd that this isn't offered as first-party!
+    privileged: true
+    settings:
+      # Consider testing with this if the cache-to arguments don't help!
+      # args:
+      #   - BUILDKIT_INLINE_CACHE=1
+      cache_from:
+        - "gitea.scubbo.org/${DRONE_REPO}:${DRONE_BRANCH}"
+      cache_to:
+        # https://docs.docker.com/build/cache/backends/registry/
+        # `mode=max` necessary in order to cache intermediate stages
+        #
+        # https://github.com/go-gitea/gitea/issues/28973#issuecomment-1961859655
+        # Some parameters are necessary in order to make the push to gitea succeed
+        #
+        # Another reference:
+        # https://github.com/thegeeklab/drone-docker-buildx/issues/47
+        - "type=registry,ref=gitea.scubbo.org/${DRONE_REPO}:${DRONE_BRANCH},mode=max,image-manifest=true,oci-mediatypes=true"
+      registry: gitea.scubbo.org
+      username: scubbo
+      password:
+        from_secret: gitea_password
+      repo: gitea.scubbo.org/${DRONE_REPO}
+      tags:
+        - ${DRONE_BRANCH}
+        - ${DRONE_COMMIT_SHA:0:10}
+      target: prod
+      platforms:
+        - linux/arm64
+        - linux/amd64
+
+# TODO - automated update of Helm Chart/Deployment Repo
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..3a6b3e0
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,12 @@
+FROM python:3.13
+
+WORKDIR /usr/src/app
+
+RUN apt-get update && apt-get install -y ffmpeg
+
+COPY requirements.txt ./
+RUN pip install --no-cache-dir -r requirements.txt
+
+COPY . .
+
+CMD [ "python", "./main.py" ]
\ No newline at end of file
diff --git a/README.md b/README.md
index e69de29..2d6cc33 100644
--- a/README.md
+++ b/README.md
@@ -0,0 +1,17 @@
+This package puts [yt-dlp](https://github.com/yt-dlp/yt-dlp) behind an HTTP-callable server.
+
+**Motivation:** I had a little snippet of bash that I could run on one of my [`*arr` Pods](https://wiki.servarr.com/) to install the `yt-dlp` CLI tool and then use it to download the audio of a video at a given URL. That _worked_, but was awkward - especially having to reinstall the tool any time a Pod was reinitialized. With this setup, I can deploy a light image alongside the Arr Pods that can be invoked over HTTP to download whatever URL I'm interested in, without having to do a `kubectl exec` to shell into the existing pods.
+
+There are _tons_ of improvements that could be made to this, such as:
+* not hard-coding the audio format (I've picked the one that appears to work best for my [Jellyfin](https://en.wikipedia.org/wiki/Jellyfin) setup), or indeed allowing the passthrough of oher customizations.
+* running this process as a Kubernetes job, or some other separated long-running job mechanism
+  * exposing logs more-easily
+* using a more secure and robust server implementation than `http.server`
+* filename customization (using the `--output` flag)
+* Testing :P
+
+# Customization
+
+Environment variables:
+* `DOWNLOAD_DIR` sets the directory into-which to download files (defaults to `.`)
+* `PORT` sets the port to listen on (default to `8000`)
diff --git a/handler.py b/handler.py
new file mode 100644
index 0000000..31d2865
--- /dev/null
+++ b/handler.py
@@ -0,0 +1,76 @@
+import os
+import json
+from http import HTTPStatus
+import http.server
+
+from yt_dlp import YoutubeDL
+
+# https://github.com/yt-dlp/yt-dlp?tab=readme-ov-file#extract-audio
+def download(url):
+    ydl_opts = {
+        'format': 'm4a/bestaudio/best',
+        'paths': {
+            'home': os.environ.get('DOWNLOAD_DIR', '.')
+        },
+        'postprocessors': [{
+            'key': 'FFmpegExtractAudio',
+            'preferredcodec': 'm4a'
+        }]
+    }
+    with YoutubeDL(ydl_opts) as ydl:
+        ydl.download(url)
+
+class Handler(http.server.SimpleHTTPRequestHandler):
+    def __init__(self, pool, *args, **kwargs):
+        self.pool = pool
+        # https://stackoverflow.com/a/52046062/1040915
+        super().__init__(*args, **kwargs)
+
+    def do_POST(self):
+        try:
+            content_length = self.headers['Content-Length']
+            if not content_length:
+                self.send_response(HTTPStatus.INTERNAL_SERVER_ERROR)
+                content = "Looks like you forgot to send a body".encode('utf-8')
+                self.send_header("Content-type", 'application/json')
+                self.send_header('Content-Length', len(content))
+                self.end_headers()
+                self.wfile.write(content)
+                return
+
+            data_string = self.rfile.read(int(content_length))
+            body = json.loads(data_string) # TODO - better error-handling here
+            url = body.get('url')
+            if not url:
+                self.send_response(HTTPStatus.INTERNAL_SERVER_ERROR)
+                content = "Looks like you forgot to send a `url` parameter".encode('utf-8')
+                self.send_header("Content-type", 'application/json')
+                self.send_header('Content-Length', len(content))
+                self.end_headers()
+                self.wfile.write(content)
+                return
+
+            self.send_response(HTTPStatus.ACCEPTED)
+            content = f'Accepted download request for {url}\n'.encode('utf-8')
+            self.send_header("Content-type", 'application/json')
+            self.send_header('Content-Length', len(content))
+            self.end_headers()
+            # TODO - check for success of kicking-off the thread
+            self.pool.apply_async(download, (url,))
+            self.wfile.write(content)
+        except Exception as e:
+            self.send_response(HTTPStatus.INTERNAL_SERVER_ERROR)
+            content = str(e).encode('utf-8')
+            print(e) # TODO - better logging!
+            self.send_header("Content-type", 'application/json')
+            self.send_header('Content-Length', len(content))
+            self.end_headers()
+            self.wfile.write(content)
+
+    def do_GET(self):
+        self.send_response(HTTPStatus.INTERNAL_SERVER_ERROR)
+        content = 'ERROR: Only HEAD requests are permitted\n'.encode('utf-8')
+        self.send_header("Content-type", 'application/json')
+        self.send_header('Content-Length', len(content))
+        self.end_headers()
+        self.wfile.write(content)
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..726bb1f
--- /dev/null
+++ b/main.py
@@ -0,0 +1,18 @@
+#!/usr/bin/python
+
+import os
+import socketserver
+
+from functools import partial
+from handler import Handler
+from multiprocessing import Pool
+
+PORT = int(os.environ.get('PORT', '8000'))
+
+if __name__ == '__main__':
+    with Pool(5) as p:
+        # https://stackoverflow.com/a/52046062/1040915
+        handler = partial(Handler, p)
+        with socketserver.TCPServer(("", PORT), handler) as httpd:
+            print("serving at port", PORT)
+            httpd.serve_forever()
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..b1df116
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1 @@
+yt-dlp