feat: add images
This commit is contained in:
@@ -4,6 +4,7 @@
|
||||
<meta charset="UTF-8" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>Apoena Transcript</title>
|
||||
<script src="https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/tesseract.min.js"></script>
|
||||
<link rel="manifest" href="/manifest.json" />
|
||||
<link rel="icon" href="/icon.svg" type="image/svg+xml" />
|
||||
<meta name="theme-color" content="#7c6af7" />
|
||||
@@ -294,6 +295,31 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- Image OCR card -->
|
||||
<div class="card">
|
||||
<h2>Image (book page / photo)</h2>
|
||||
<div class="drop-zone" id="image-drop-zone">
|
||||
<span class="icon">🖼</span>
|
||||
Drop an image here, or click to browse
|
||||
<input type="file" id="image-input" accept="image/*" />
|
||||
</div>
|
||||
<div class="option-row">
|
||||
Language:
|
||||
<select id="lang-image">
|
||||
<option value="eng">English</option>
|
||||
<option value="fra">French</option>
|
||||
<option value="spa">Spanish</option>
|
||||
<option value="deu">German</option>
|
||||
<option value="ita">Italian</option>
|
||||
<option value="por">Portuguese</option>
|
||||
<option value="jpn">Japanese</option>
|
||||
<option value="chi_sim">Chinese (Simplified)</option>
|
||||
<option value="ara">Arabic</option>
|
||||
<option value="kor">Korean</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<!-- URL card -->
|
||||
<div class="card">
|
||||
<h2>YouTube / TikTok / URL</h2>
|
||||
@@ -365,6 +391,9 @@ const resultText = document.getElementById('result-text');
|
||||
const copyBtn = document.getElementById('copy-btn');
|
||||
const srtBtn = document.getElementById('srt-btn');
|
||||
const errorPanel = document.getElementById('error-panel');
|
||||
const imageDropZone = document.getElementById('image-drop-zone');
|
||||
const imageInput = document.getElementById('image-input');
|
||||
const langImage = document.getElementById('lang-image');
|
||||
|
||||
// ── WebGPU detection ───────────────────────────────────────────────────────
|
||||
async function detectDevice() {
|
||||
@@ -408,7 +437,7 @@ worker.onmessage = (e) => {
|
||||
|
||||
if (type === 'result') {
|
||||
lastChunks = chunks;
|
||||
showResult(text, chunks);
|
||||
showResult(text, chunks, 'audio');
|
||||
setBusy(false);
|
||||
}
|
||||
|
||||
@@ -496,6 +525,47 @@ async function handleFile(file) {
|
||||
}
|
||||
}
|
||||
|
||||
// ── Image OCR ──────────────────────────────────────────────────────────────
|
||||
imageDropZone.addEventListener('click', () => imageInput.click());
|
||||
imageDropZone.addEventListener('dragover', (e) => { e.preventDefault(); imageDropZone.classList.add('drag-over'); });
|
||||
imageDropZone.addEventListener('dragleave', () => imageDropZone.classList.remove('drag-over'));
|
||||
imageDropZone.addEventListener('drop', (e) => {
|
||||
e.preventDefault();
|
||||
imageDropZone.classList.remove('drag-over');
|
||||
const file = e.dataTransfer.files[0];
|
||||
if (file) handleImage(file);
|
||||
});
|
||||
imageInput.addEventListener('change', () => {
|
||||
if (imageInput.files[0]) handleImage(imageInput.files[0]);
|
||||
});
|
||||
|
||||
async function handleImage(file) {
|
||||
if (busy) return;
|
||||
setBusy(true);
|
||||
transcribingL.textContent = 'Loading OCR engine…';
|
||||
try {
|
||||
const worker = await Tesseract.createWorker(langImage.value, 1, {
|
||||
workerPath: 'https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/worker.min.js',
|
||||
langPath: 'https://tessdata.projectnaptha.com/4.0.0',
|
||||
corePath: 'https://cdn.jsdelivr.net/npm/tesseract.js-core@5/tesseract-core-simd-lstm.wasm.js',
|
||||
logger: (m) => {
|
||||
if (m.status === 'recognizing text') {
|
||||
transcribingL.textContent = `Recognizing… ${Math.round(m.progress * 100)}%`;
|
||||
} else if (m.status) {
|
||||
transcribingL.textContent = m.status.charAt(0).toUpperCase() + m.status.slice(1) + '…';
|
||||
}
|
||||
},
|
||||
});
|
||||
const { data: { text } } = await worker.recognize(file);
|
||||
await worker.terminate();
|
||||
showResult(text, [], 'image');
|
||||
setBusy(false);
|
||||
} catch (err) {
|
||||
showError('OCR failed: ' + err.message);
|
||||
setBusy(false);
|
||||
}
|
||||
}
|
||||
|
||||
// ── URL input ──────────────────────────────────────────────────────────────
|
||||
urlInput.addEventListener('keydown', (e) => {
|
||||
if (e.key === 'Enter' && !urlBtn.disabled) urlBtn.click();
|
||||
@@ -557,8 +627,9 @@ function setBusy(state) {
|
||||
}
|
||||
}
|
||||
|
||||
function showResult(text, chunks) {
|
||||
function showResult(text, chunks, type = 'audio') {
|
||||
resultText.textContent = text.trim();
|
||||
srtBtn.style.display = type === 'audio' ? '' : 'none';
|
||||
resultPanel.className = 'result-panel active';
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user