const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition

export class SpeechToTextProvider {
    private static instance: SpeechToTextProvider

    private recognition?: SpeechRecognition
    private constructor() {}

    static getInstance() {
        if (!SpeechToTextProvider.instance) {
            SpeechToTextProvider.instance = new SpeechToTextProvider()
        }
        return SpeechToTextProvider.instance
    }

    public isAvailable() {
        return !!SpeechRecognition
    }

    public start(lang: string, callback: (result: string) => void, onEndCallback: () => void) {
        this.recognition = new SpeechRecognition()
        this.recognition.continuous = true
        this.recognition.interimResults = true

        this.recognition.onend = () => {
            onEndCallback()
            this.stop()
            if (this.recognition) {
                delete this.recognition
            }
        }

        this.recognition.onresult = (event: SpeechRecognitionEvent) => {
            const transcript = Array.from(event.results).reduce((acc, result) => {
                acc += result[0].transcript
                return acc
            }, "")
            const lastResult = event.results[event.results.length - 1]
            callback(transcript)
            if (lastResult.isFinal) {
                this.recognition?.stop()
            }
        }
        this.recognition.lang = lang
        this.recognition.start()
    }

    public stop() {
        this.recognition?.stop()
    }
}
