From c49ecab33f2ec5b7faf7e2155578527197d2d002 Mon Sep 17 00:00:00 2001 From: Julien Calixte Date: Mon, 23 Mar 2026 19:28:17 +0100 Subject: [PATCH] feat: add images --- app/static/index.html | 75 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 73 insertions(+), 2 deletions(-) diff --git a/app/static/index.html b/app/static/index.html index 365b141..437ba01 100644 --- a/app/static/index.html +++ b/app/static/index.html @@ -4,6 +4,7 @@ Apoena Transcript + @@ -294,6 +295,31 @@ + +
+

Image (book page / photo)

+
+ πŸ–Ό + Drop an image here, or click to browse + +
+
+ Language: + +
+
+

YouTube / TikTok / URL

@@ -365,6 +391,9 @@ const resultText = document.getElementById('result-text'); const copyBtn = document.getElementById('copy-btn'); const srtBtn = document.getElementById('srt-btn'); const errorPanel = document.getElementById('error-panel'); +const imageDropZone = document.getElementById('image-drop-zone'); +const imageInput = document.getElementById('image-input'); +const langImage = document.getElementById('lang-image'); // ── WebGPU detection ─────────────────────────────────────────────────────── async function detectDevice() { @@ -408,7 +437,7 @@ worker.onmessage = (e) => { if (type === 'result') { lastChunks = chunks; - showResult(text, chunks); + showResult(text, chunks, 'audio'); setBusy(false); } @@ -496,6 +525,47 @@ async function handleFile(file) { } } +// ── Image OCR ────────────────────────────────────────────────────────────── +imageDropZone.addEventListener('click', () => imageInput.click()); +imageDropZone.addEventListener('dragover', (e) => { e.preventDefault(); imageDropZone.classList.add('drag-over'); }); +imageDropZone.addEventListener('dragleave', () => imageDropZone.classList.remove('drag-over')); +imageDropZone.addEventListener('drop', (e) => { + e.preventDefault(); + imageDropZone.classList.remove('drag-over'); + const file = e.dataTransfer.files[0]; + if (file) handleImage(file); +}); +imageInput.addEventListener('change', () => { + if (imageInput.files[0]) handleImage(imageInput.files[0]); +}); + +async function handleImage(file) { + if (busy) return; + setBusy(true); + transcribingL.textContent = 'Loading OCR engine…'; + try { + const worker = await Tesseract.createWorker(langImage.value, 1, { + workerPath: 'https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/worker.min.js', + langPath: 'https://tessdata.projectnaptha.com/4.0.0', + corePath: 'https://cdn.jsdelivr.net/npm/tesseract.js-core@5/tesseract-core-simd-lstm.wasm.js', + logger: (m) => { + if (m.status === 'recognizing text') { + transcribingL.textContent = `Recognizing… ${Math.round(m.progress * 100)}%`; + } else if (m.status) { + transcribingL.textContent = m.status.charAt(0).toUpperCase() + m.status.slice(1) + '…'; + } + }, + }); + const { data: { text } } = await worker.recognize(file); + await worker.terminate(); + showResult(text, [], 'image'); + setBusy(false); + } catch (err) { + showError('OCR failed: ' + err.message); + setBusy(false); + } +} + // ── URL input ────────────────────────────────────────────────────────────── urlInput.addEventListener('keydown', (e) => { if (e.key === 'Enter' && !urlBtn.disabled) urlBtn.click(); @@ -557,8 +627,9 @@ function setBusy(state) { } } -function showResult(text, chunks) { +function showResult(text, chunks, type = 'audio') { resultText.textContent = text.trim(); + srtBtn.style.display = type === 'audio' ? '' : 'none'; resultPanel.className = 'result-panel active'; }