diff --git a/app/static/index.html b/app/static/index.html
index 365b141..437ba01 100644
--- a/app/static/index.html
+++ b/app/static/index.html
@@ -4,6 +4,7 @@
Apoena Transcript
+
@@ -294,6 +295,31 @@
+
+
+
Image (book page / photo)
+
+ πΌ
+ Drop an image here, or click to browse
+
+
+
+ Language:
+
+
+
+
YouTube / TikTok / URL
@@ -365,6 +391,9 @@ const resultText = document.getElementById('result-text');
const copyBtn = document.getElementById('copy-btn');
const srtBtn = document.getElementById('srt-btn');
const errorPanel = document.getElementById('error-panel');
+const imageDropZone = document.getElementById('image-drop-zone');
+const imageInput = document.getElementById('image-input');
+const langImage = document.getElementById('lang-image');
// ββ WebGPU detection βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
async function detectDevice() {
@@ -408,7 +437,7 @@ worker.onmessage = (e) => {
if (type === 'result') {
lastChunks = chunks;
- showResult(text, chunks);
+ showResult(text, chunks, 'audio');
setBusy(false);
}
@@ -496,6 +525,47 @@ async function handleFile(file) {
}
}
+// ββ Image OCR ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
+imageDropZone.addEventListener('click', () => imageInput.click());
+imageDropZone.addEventListener('dragover', (e) => { e.preventDefault(); imageDropZone.classList.add('drag-over'); });
+imageDropZone.addEventListener('dragleave', () => imageDropZone.classList.remove('drag-over'));
+imageDropZone.addEventListener('drop', (e) => {
+ e.preventDefault();
+ imageDropZone.classList.remove('drag-over');
+ const file = e.dataTransfer.files[0];
+ if (file) handleImage(file);
+});
+imageInput.addEventListener('change', () => {
+ if (imageInput.files[0]) handleImage(imageInput.files[0]);
+});
+
+async function handleImage(file) {
+ if (busy) return;
+ setBusy(true);
+ transcribingL.textContent = 'Loading OCR engineβ¦';
+ try {
+ const worker = await Tesseract.createWorker(langImage.value, 1, {
+ workerPath: 'https://cdn.jsdelivr.net/npm/tesseract.js@5/dist/worker.min.js',
+ langPath: 'https://tessdata.projectnaptha.com/4.0.0',
+ corePath: 'https://cdn.jsdelivr.net/npm/tesseract.js-core@5/tesseract-core-simd-lstm.wasm.js',
+ logger: (m) => {
+ if (m.status === 'recognizing text') {
+ transcribingL.textContent = `Recognizing⦠${Math.round(m.progress * 100)}%`;
+ } else if (m.status) {
+ transcribingL.textContent = m.status.charAt(0).toUpperCase() + m.status.slice(1) + 'β¦';
+ }
+ },
+ });
+ const { data: { text } } = await worker.recognize(file);
+ await worker.terminate();
+ showResult(text, [], 'image');
+ setBusy(false);
+ } catch (err) {
+ showError('OCR failed: ' + err.message);
+ setBusy(false);
+ }
+}
+
// ββ URL input ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
urlInput.addEventListener('keydown', (e) => {
if (e.key === 'Enter' && !urlBtn.disabled) urlBtn.click();
@@ -557,8 +627,9 @@ function setBusy(state) {
}
}
-function showResult(text, chunks) {
+function showResult(text, chunks, type = 'audio') {
resultText.textContent = text.trim();
+ srtBtn.style.display = type === 'audio' ? '' : 'none';
resultPanel.className = 'result-panel active';
}