feat: add images

This commit is contained in:
Julien Calixte
2026-03-23 19:28:17 +01:00
parent da1f3566c2
commit c49ecab33f

View File

@@ -4,6 +4,7 @@
<meta charset="UTF-8" /> <meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Apoena Transcript</title> <title>Apoena Transcript</title>
<script src="https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/tesseract.min.js"></script>
<link rel="manifest" href="/manifest.json" /> <link rel="manifest" href="/manifest.json" />
<link rel="icon" href="/icon.svg" type="image/svg+xml" /> <link rel="icon" href="/icon.svg" type="image/svg+xml" />
<meta name="theme-color" content="#7c6af7" /> <meta name="theme-color" content="#7c6af7" />
@@ -294,6 +295,31 @@
</div> </div>
</div> </div>
<!-- Image OCR card -->
<div class="card">
<h2>Image (book page / photo)</h2>
<div class="drop-zone" id="image-drop-zone">
<span class="icon">🖼</span>
Drop an image here, or click to browse
<input type="file" id="image-input" accept="image/*" />
</div>
<div class="option-row">
Language:
<select id="lang-image">
<option value="eng">English</option>
<option value="fra">French</option>
<option value="spa">Spanish</option>
<option value="deu">German</option>
<option value="ita">Italian</option>
<option value="por">Portuguese</option>
<option value="jpn">Japanese</option>
<option value="chi_sim">Chinese (Simplified)</option>
<option value="ara">Arabic</option>
<option value="kor">Korean</option>
</select>
</div>
</div>
<!-- URL card --> <!-- URL card -->
<div class="card"> <div class="card">
<h2>YouTube / TikTok / URL</h2> <h2>YouTube / TikTok / URL</h2>
@@ -365,6 +391,9 @@ const resultText = document.getElementById('result-text');
const copyBtn = document.getElementById('copy-btn'); const copyBtn = document.getElementById('copy-btn');
const srtBtn = document.getElementById('srt-btn'); const srtBtn = document.getElementById('srt-btn');
const errorPanel = document.getElementById('error-panel'); const errorPanel = document.getElementById('error-panel');
const imageDropZone = document.getElementById('image-drop-zone');
const imageInput = document.getElementById('image-input');
const langImage = document.getElementById('lang-image');
// ── WebGPU detection ─────────────────────────────────────────────────────── // ── WebGPU detection ───────────────────────────────────────────────────────
async function detectDevice() { async function detectDevice() {
@@ -408,7 +437,7 @@ worker.onmessage = (e) => {
if (type === 'result') { if (type === 'result') {
lastChunks = chunks; lastChunks = chunks;
showResult(text, chunks); showResult(text, chunks, 'audio');
setBusy(false); setBusy(false);
} }
@@ -496,6 +525,47 @@ async function handleFile(file) {
} }
} }
// ── Image OCR ──────────────────────────────────────────────────────────────
imageDropZone.addEventListener('click', () => imageInput.click());
imageDropZone.addEventListener('dragover', (e) => { e.preventDefault(); imageDropZone.classList.add('drag-over'); });
imageDropZone.addEventListener('dragleave', () => imageDropZone.classList.remove('drag-over'));
imageDropZone.addEventListener('drop', (e) => {
e.preventDefault();
imageDropZone.classList.remove('drag-over');
const file = e.dataTransfer.files[0];
if (file) handleImage(file);
});
imageInput.addEventListener('change', () => {
if (imageInput.files[0]) handleImage(imageInput.files[0]);
});
async function handleImage(file) {
if (busy) return;
setBusy(true);
transcribingL.textContent = 'Loading OCR engine…';
try {
const worker = await Tesseract.createWorker(langImage.value, 1, {
workerPath: 'https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/worker.min.js',
langPath: 'https://tessdata.projectnaptha.com/4.0.0',
corePath: 'https://cdn.jsdelivr.net/npm/tesseract.js-core@5/tesseract-core-simd-lstm.wasm.js',
logger: (m) => {
if (m.status === 'recognizing text') {
transcribingL.textContent = `Recognizing… ${Math.round(m.progress * 100)}%`;
} else if (m.status) {
transcribingL.textContent = m.status.charAt(0).toUpperCase() + m.status.slice(1) + '…';
}
},
});
const { data: { text } } = await worker.recognize(file);
await worker.terminate();
showResult(text, [], 'image');
setBusy(false);
} catch (err) {
showError('OCR failed: ' + err.message);
setBusy(false);
}
}
// ── URL input ────────────────────────────────────────────────────────────── // ── URL input ──────────────────────────────────────────────────────────────
urlInput.addEventListener('keydown', (e) => { urlInput.addEventListener('keydown', (e) => {
if (e.key === 'Enter' && !urlBtn.disabled) urlBtn.click(); if (e.key === 'Enter' && !urlBtn.disabled) urlBtn.click();
@@ -557,8 +627,9 @@ function setBusy(state) {
} }
} }
function showResult(text, chunks) { function showResult(text, chunks, type = 'audio') {
resultText.textContent = text.trim(); resultText.textContent = text.trim();
srtBtn.style.display = type === 'audio' ? '' : 'none';
resultPanel.className = 'result-panel active'; resultPanel.className = 'result-panel active';
} }