commit 8e137ace86c25624dddc12b1fcc8f34ae852fbb8 Author: Julien Calixte Date: Mon Mar 23 18:54:10 2026 +0100 init diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..7c1f3a9 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,8 @@ +.git +.claude +__pycache__ +*.pyc +*.pyo +.env +.venv +venv diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..a16e5e5 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,20 @@ +FROM python:3.11-slim + +RUN apt-get update && apt-get install -y --no-install-recommends \ + ffmpeg \ + curl \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY app/ ./app/ + +RUN mkdir -p /tmp/apoena-audio + +ENV MAX_UPLOAD_SIZE_MB=500 + +EXPOSE 8000 +CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/app/__init__.py b/app/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/config.py b/app/config.py new file mode 100644 index 0000000..4c9f011 --- /dev/null +++ b/app/config.py @@ -0,0 +1,12 @@ +from pydantic_settings import BaseSettings + + +class Settings(BaseSettings): + max_upload_size_mb: int = 500 + + @property + def max_upload_size_bytes(self) -> int: + return self.max_upload_size_mb * 1024 * 1024 + + +settings = Settings() diff --git a/app/downloader.py b/app/downloader.py new file mode 100644 index 0000000..24d2cbb --- /dev/null +++ b/app/downloader.py @@ -0,0 +1,45 @@ +import asyncio +import uuid +import os +from pathlib import Path + +AUDIO_TMP_DIR = "/tmp/apoena-audio" + + +async def extract_audio(url: str) -> Path: + """Download audio-only from a URL using yt-dlp. Returns path to an mp3 temp file.""" + job_id = str(uuid.uuid4()) + outtmpl = f"{AUDIO_TMP_DIR}/{job_id}.%(ext)s" + expected = Path(f"{AUDIO_TMP_DIR}/{job_id}.mp3") + + cmd = [ + "yt-dlp", + "--no-warnings", + "--quiet", + "--extract-audio", + "--audio-format", "mp3", + "--audio-quality", "128K", + "--format", "bestaudio/best", + "--output", outtmpl, + url, + ] + + proc = await asyncio.create_subprocess_exec( + *cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + _, stderr = await proc.communicate() + + if proc.returncode != 0: + raise RuntimeError(stderr.decode().strip() or "yt-dlp failed with no output") + + if expected.exists(): + return expected + + # yt-dlp sometimes keeps the original extension even with --audio-format mp3 + # Find whatever file was created with this job_id prefix + for f in Path(AUDIO_TMP_DIR).glob(f"{job_id}.*"): + return f + + raise RuntimeError("yt-dlp produced no output file") diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000..ce78669 --- /dev/null +++ b/app/main.py @@ -0,0 +1,59 @@ +import os +from contextlib import asynccontextmanager +from pathlib import Path + +from fastapi import FastAPI, HTTPException, BackgroundTasks +from fastapi.responses import FileResponse +from pydantic import BaseModel + +from app import downloader + + +STATIC_DIR = Path(__file__).parent / "static" +AUDIO_TMP_DIR = Path("/tmp/apoena-audio") + + +@asynccontextmanager +async def lifespan(app: FastAPI): + AUDIO_TMP_DIR.mkdir(parents=True, exist_ok=True) + yield + + +app = FastAPI(title="apoena-transcript", lifespan=lifespan) + + +class ExtractAudioRequest(BaseModel): + url: str + + +@app.get("/health") +async def health(): + return {"status": "ok"} + + +@app.get("/worker.js") +async def worker_js(): + return FileResponse(STATIC_DIR / "worker.js", media_type="application/javascript") + + +@app.post("/extract-audio") +async def extract_audio(body: ExtractAudioRequest, background_tasks: BackgroundTasks): + try: + audio_path = await downloader.extract_audio(body.url) + except RuntimeError as e: + raise HTTPException(status_code=422, detail=str(e)) + + background_tasks.add_task(_delete_file, audio_path) + return FileResponse(audio_path, media_type="audio/mpeg", filename="audio.mp3") + + +def _delete_file(path): + try: + os.unlink(path) + except OSError: + pass + + +@app.get("/") +async def index(): + return FileResponse(STATIC_DIR / "index.html") diff --git a/app/static/index.html b/app/static/index.html new file mode 100644 index 0000000..ffe50fc --- /dev/null +++ b/app/static/index.html @@ -0,0 +1,611 @@ + + + + + + Apoena Transcript + + + +
+
+

apoena transcript

+ Loading... +
+ + +
+ Loading model β€” first visit downloads ~100 MB, then it's cached locally. + +
+ + +
+ + +
+

Upload file

+
+ πŸ“‚ + Drop an audio or video file here, or click to browse + +
+
+ Language: + +
+
+ + +
+

YouTube / TikTok / URL

+
+ + +
+
+ Language: + +
+
+ + +
+
+
Transcribing…
+
+
+ + +
+
+

Transcript

+
+ + +
+
+
+
+
+ + + + diff --git a/app/static/worker.js b/app/static/worker.js new file mode 100644 index 0000000..6f80b7e --- /dev/null +++ b/app/static/worker.js @@ -0,0 +1,58 @@ +import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3/dist/transformers.min.js'; + +// Disable local model file check β€” always fetch from HuggingFace Hub +env.allowLocalModels = false; + +let transcriber = null; + +async function loadModel(modelId) { + transcriber = await pipeline( + 'automatic-speech-recognition', + modelId, + { + device: 'webgpu', + dtype: 'q4', + progress_callback: (progress) => { + self.postMessage({ type: 'model-progress', progress }); + }, + } + ); + self.postMessage({ type: 'model-ready' }); +} + +// audioData is a Float32Array of 16kHz mono PCM β€” decoded in the main thread +// to avoid the missing AudioContext issue in Web Workers. +async function transcribe(audioData, language) { + if (!transcriber) { + self.postMessage({ type: 'error', message: 'Model not loaded' }); + return; + } + + try { + const result = await transcriber(audioData, { + return_timestamps: true, + chunk_length_s: 30, + stride_length_s: 5, + language: language || null, + task: 'transcribe', + }); + + self.postMessage({ type: 'result', text: result.text, chunks: result.chunks || [] }); + } catch (err) { + self.postMessage({ type: 'error', message: err.message }); + } +} + +self.onmessage = async (event) => { + const { type, modelId, audioData, language } = event.data; + + if (type === 'load') { + try { + await loadModel(modelId || 'Xenova/whisper-small'); + } catch (err) { + self.postMessage({ type: 'error', message: `Failed to load model: ${err.message}` }); + } + } else if (type === 'transcribe') { + await transcribe(audioData, language); + } +}; diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..dbae7e3 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,9 @@ +services: + app: + build: . + ports: + - "8000:8000" + environment: + - MAX_UPLOAD_SIZE_MB=500 + volumes: + - /tmp/apoena-audio:/tmp/apoena-audio diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..9deb25b --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +fastapi>=0.111 +uvicorn[standard]>=0.29 +python-multipart>=0.0.9 +yt-dlp>=2024.1 +pydantic>=2.0 +pydantic-settings>=2.0 +aiofiles>=23.0