59 lines
1.6 KiB
JavaScript
59 lines
1.6 KiB
JavaScript
import { pipeline, env } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3/dist/transformers.min.js';
|
|
|
|
// Disable local model file check — always fetch from HuggingFace Hub
|
|
env.allowLocalModels = false;
|
|
|
|
let transcriber = null;
|
|
|
|
async function loadModel(modelId) {
|
|
transcriber = await pipeline(
|
|
'automatic-speech-recognition',
|
|
modelId,
|
|
{
|
|
device: 'webgpu',
|
|
dtype: 'q4',
|
|
progress_callback: (progress) => {
|
|
self.postMessage({ type: 'model-progress', progress });
|
|
},
|
|
}
|
|
);
|
|
self.postMessage({ type: 'model-ready' });
|
|
}
|
|
|
|
// audioData is a Float32Array of 16kHz mono PCM — decoded in the main thread
|
|
// to avoid the missing AudioContext issue in Web Workers.
|
|
async function transcribe(audioData, language) {
|
|
if (!transcriber) {
|
|
self.postMessage({ type: 'error', message: 'Model not loaded' });
|
|
return;
|
|
}
|
|
|
|
try {
|
|
const result = await transcriber(audioData, {
|
|
return_timestamps: true,
|
|
chunk_length_s: 30,
|
|
stride_length_s: 5,
|
|
language: language || null,
|
|
task: 'transcribe',
|
|
});
|
|
|
|
self.postMessage({ type: 'result', text: result.text, chunks: result.chunks || [] });
|
|
} catch (err) {
|
|
self.postMessage({ type: 'error', message: err.message });
|
|
}
|
|
}
|
|
|
|
self.onmessage = async (event) => {
|
|
const { type, modelId, audioData, language } = event.data;
|
|
|
|
if (type === 'load') {
|
|
try {
|
|
await loadModel(modelId || 'Xenova/whisper-small');
|
|
} catch (err) {
|
|
self.postMessage({ type: 'error', message: `Failed to load model: ${err.message}` });
|
|
}
|
|
} else if (type === 'transcribe') {
|
|
await transcribe(audioData, language);
|
|
}
|
|
};
|