Getting Started
Please make sure that you completed the Getting Started section before reading this guide.
Features
- 🤖 Transcribe Audio to text
- 🚀 Fast and easy
- 🧩 Shadcn-vue inspired
- âš¡ Copy paste the codecomposable & worker
- 📚 Examples of usage
Demo
Usage
TranscriptionView.vue
<script lang="ts" setup>
const { getModels, modelsOptions, selectedModel, progress, init, status, run, result } = useTranscriber()
await getModels()
const HandleSelectedFile = (event: Event) => {
const audioFile = event.target as HTMLInputElement
const file = audioFile.files
if (!file || !file[0]) return
const selectedFile = file[0]
const language = 'en'
run(selectedFile, language)
}
</script>
<template>
<div>
<section>
<h1>Options</h1>
<div>
<select
v-model="selectedModel"
placeholder="Select a model"
>
<option
disabled
value=""
>
Select a model
</option>
<option
v-for="model in modelsOptions"
:key="model.id"
:value="model.id"
>
{{ model.name }}
</option>
</select>
</div>
</section>
<section>
<h2>Load model</h2>
<button
:disabled="!selectedModel"
@click="init"
>
Load selected model
</button>
</section>
<section>
<h2>Loading indicator</h2>
<div>
{{ status == "loading" ? "loading ..." : "" }} {{ progress }}
</div>
</section>
<section>
<h2>Transcribe</h2>
<div>
<input
type="file"
accept="audio/*"
@change="HandleSelectedFile"
>
</div>
</section>
<section>
<h2>Result</h2>
<div>
{{ result }}
</div>
</section>
</div>
</template>
<style scoped></style>
Code
Is recommended to create the Web Worker in the
assets/workers
Create composable
useTranscriber.ts
import { ref, computed } from 'vue'
import TranscriptionWorker from '~~/assets/workers/transcription.worker?worker'
type WorkerStatus = 'idle' | 'loading' | 'loaded' | 'transcribing' | 'done' | 'models' | 'error'
type ModelsOption = {
id: string
name: string
description: string
}
const getAudioData = async (audioFile: File): Promise<AudioBuffer> => {
const arrayBuffer = await audioFile.arrayBuffer()
const audioCTX = new AudioContext({
sampleRate: 16000,
})
const audioBuffer = await audioCTX.decodeAudioData(arrayBuffer)
return audioBuffer
}
const status = ref<WorkerStatus>('idle')
const worker = ref<Worker | null>(null)
const selectedModel = ref('Xenova/whisper-tiny.en')
const modelsOptions = ref<ModelsOption[]>([])
const progress = ref<number>(0)
const error = ref<string | null>(null)
const result = ref<any>(null)
export const useTranscriber = () => {
const initWorker = async () => {
if (!worker.value) {
worker.value = new TranscriptionWorker()
if (!worker.value) {
throw new Error('Failed to create worker')
}
worker.value.onmessage = (event) => {
const { type, status: workerStatus, progress: workerProgress, error: workerError, result: workerResult, modelsOptions: availableModels } = event.data
switch (type) {
case 'models':
modelsOptions.value = availableModels
break
case 'status':
status.value = workerStatus
if (workerProgress !== undefined) {
progress.value = workerProgress
}
break
case 'error':
status.value = 'error'
error.value = workerError
break
case 'result':
result.value = workerResult
break
}
}
}
}
const init = async () => {
await initWorker()
worker.value?.postMessage({ type: 'loadModel', payload: { model: selectedModel.value } })
}
const run = async (audioFile: File, language: string) => {
if (status.value === 'idle') {
await init()
}
try {
const audioBuffer = await getAudioData(audioFile)
// Convert AudioBuffer to Float32Array for the worker
const audioData = audioBuffer.getChannelData(0)
worker.value?.postMessage({
type: 'transcribe',
payload: {
audio: audioData,
language,
model: selectedModel.value,
},
})
}
catch (err) {
error.value = (err as Error).message
status.value = 'error'
}
}
const getModels = async () => {
await initWorker()
worker.value?.postMessage({ type: 'models' })
status.value = 'models'
}
return {
getModels,
init,
run,
isLoaded: computed(() => status.value === 'loaded' || status.value === 'done'),
isRunning: computed(() => status.value === 'transcribing'),
modelsOptions,
selectedModel,
status,
progress,
error,
result,
}
}
Create worker
transcription.worker.ts
import { type AutomaticSpeechRecognitionPipeline, pipeline } from '@huggingface/transformers'
interface ErrorEvent {
message: string
}
const BASE_MODEL = 'Xenova/whisper-tiny.en'
let transcriber: AutomaticSpeechRecognitionPipeline | null = null
let isEnglishModel = true
const modelsOptions = [
{
id: 'Xenova/whisper-tiny.en',
name: 'whisper-tiny.en',
description: 'whisper-tiny.en (English)',
},
{
id: 'Xenova/whisper-tiny',
name: 'whisper-tiny',
description: 'whisper-tiny (Multilingual)',
},
{
id: 'Xenova/whisper-small',
name: 'whisper-small',
description: 'whisper-small (Multilingual)',
},
{
id: 'Xenova/whisper-small.en',
name: 'whisper-small.en',
description: 'whisper-small.en (English)',
},
{
id: 'Xenova/whisper-base',
name: 'whisper-base',
description: 'whisper-base (Multilingual)',
},
{
id: 'Xenova/whisper-medium.en',
name: 'whisper-medium.en',
description: 'whisper-medium.en (English)',
},
{
id: 'Xenova/whisper-medium',
name: 'whisper-medium',
description: 'whisper-medium (Multilingual)',
},
{
id: 'Xenova/whisper-large',
name: 'whisper-large',
description: 'whisper-large (Multilingual)',
},
{
id: 'Xenova/whisper-large-v2',
name: 'whisper-large-v2',
description: 'whisper-large-v2 (Multilingual)',
},
{
id: 'Xenova/whisper-large-v3',
name: 'whisper-large-v3',
description: 'whisper-large-v3 (Multilingual)',
},
{
id: 'onnx-community/whisper-tiny_timestamped',
name: 'whisper-tiny_timestamped',
description: 'whisper-tiny_timestamped (Multilingual)',
},
{
id: 'onnx-community/whisper-small_timestamped',
name: 'whisper-small_timestamped',
description: 'whisper-small_timestamped (Multilingual)',
},
{
id: 'onnx-community/whisper-base_timestamped',
name: 'whisper-base_timestamped',
description: 'whisper-base_timestamped (Multilingual)',
},
{
id: 'onnx-community/whisper-large-v3-turbo_timestamped',
name: 'whisper-large-v3-turbo_timestamped',
description: 'whisper-large-v3-turbo_timestamped (Multilingual)',
},
]
globalThis.onmessage = async (event) => {
const { type, payload } = event.data
switch (type) {
case 'models':
try {
globalThis.postMessage({ type: 'models', modelsOptions })
}
catch (error: any) {
globalThis.postMessage({ type: 'error', error: error.message })
}
break
case 'loadModel':
try {
globalThis.postMessage({ type: 'status', status: 'loading', progress: 0 })
if (payload.model.includes('.en')) {
isEnglishModel = true
}
else {
isEnglishModel = false
}
transcriber = await pipeline('automatic-speech-recognition', payload.model || BASE_MODEL)
globalThis.postMessage({ type: 'status', status: 'loaded', progress: 100 })
}
catch (error: any) {
globalThis.postMessage({ type: 'error', error: error.message })
}
break
case 'transcribe':
if (!transcriber) {
globalThis.postMessage({ type: 'error', error: 'Model not loaded' })
return
}
try {
globalThis.postMessage({ type: 'status', status: 'transcribing', progress: 0 })
const { audio, language, model } = payload
console.log('Transcribing audio:', audio, 'with language:', language, 'and model:', model)
const settings = {
language,
return_timestamps: true,
}
const result = await transcriber(audio, !isEnglishModel ? settings : {})
globalThis.postMessage({ type: 'status', status: 'done', progress: 100 })
globalThis.postMessage({ type: 'result', result })
}
catch (error: any) {
globalThis.postMessage({ type: 'error', error: error.message })
}
break
case 'unloadModel':
transcriber = null
globalThis.postMessage({ type: 'status', status: 'unloaded', progress: 100 })
break
default:
globalThis.postMessage({ type: 'error', error: 'Unknown message type' })
break
}
}
Other model that can be use for transcription
Examples
- Audio Notes: Privacy friendly Audio to text notes.
- whisper-web