import { useCallback, useEffect, useRef, useState } from 'react';
import * as SpeechSDK from 'microsoft-cognitiveservices-speech-sdk';
import { useSttDictionaries } from './useSttDictionaries';
import { ILanguages } from '../util/language';
import { LocalStorage } from '../store/LocalStorage';
import MediaStreamRecorder from 'msr';

type UseAzureSpeechArgs = {
  roomOwnerId: string,
  needInterim?: boolean
  onMessage: (result: string, isFinal?: boolean, language?: string) => void
  onError?: (message: string) => void,
}

export const useAzureSpeech = ({
 roomOwnerId,
 onMessage,
 onError,
 needInterim,
}: UseAzureSpeechArgs) => {
  const handlersRef = useRef({
    onMessage,
    onError
  });

  useEffect(() => {
    handlersRef.current = {onMessage, onError};
  }, [onMessage, onError]);

  const {replaceTextByDictionaries} = useSttDictionaries(roomOwnerId);

  const [initialized, setInitialized] = useState(false);

  const speechRecognizerRef = useRef<SpeechSDK.SpeechRecognizer>()
  const pushStreamRef = useRef<SpeechSDK.PushAudioInputStream>()
  const mediaRecorderRef = useRef<MediaStreamRecorder>();

  useEffect(() => {
    setInitialized(true);
  }, [])

  const getLanguageCodes = useCallback(() => {
    // 主要言語と翻訳言語をまとめて送信する
    const languages: ILanguages[] = [];
    if(LocalStorage.language && LocalStorage.language.length !== 0){
      languages.push(...LocalStorage.language);
    }
    if(LocalStorage.translationLanguage && !languages.includes(LocalStorage.translationLanguage)){
      languages.push(LocalStorage.translationLanguage);
    }
    if(languages.length === 0){
      languages.push(ILanguages["ja-JP"]);
    }
    return languages;
  }, []);

  const cleanUp = useCallback(() => {
    if(speechRecognizerRef.current){
      // イベントリスナーを無効化
      speechRecognizerRef.current.recognizing = () => {};
      speechRecognizerRef.current.recognized = () => {};

      // リソースを解放し、コールバックを処理
      speechRecognizerRef.current.close(() => {
      }, (e) => {
        handlersRef.current.onError && handlersRef.current.onError(e);
      });

      // オブジェクト参照を解除
      speechRecognizerRef.current = undefined;
    }
    if(pushStreamRef.current){
      // リソースを解放
      pushStreamRef.current.close();
      // オブジェクト参照を解除
      pushStreamRef.current = undefined;
    }
  }, []);

  const recognizing = useCallback( async (event: SpeechSDK.SpeechRecognitionEventArgs) => {
    if(event.result && event.result.text && event.result.text !== ''){
      handlersRef.current.onMessage(event.result.text, false, event.result.language);
    }
  }, []);

  const recognized = useCallback( async (event: SpeechSDK.SpeechRecognitionResult) => {
    if (event.text && event.text !== '') {
      // 辞書登録に従い言葉を置換
      const replaced = await replaceTextByDictionaries(event.text);
      handlersRef.current.onMessage(replaced, true, event.language);
    }
    cleanUp();
  }, [cleanUp, replaceTextByDictionaries]);

  const recorderStop = useCallback(() => {
    if(mediaRecorderRef.current){
      mediaRecorderRef.current.stop();
      mediaRecorderRef.current.ondataavailable = (blob: Blob) => {};
    }
  }, []);

  const recorderStart = useCallback((stream: MediaStream) => {
    recorderStop();
    cleanUp();

    mediaRecorderRef.current = new MediaStreamRecorder(stream);
    mediaRecorderRef.current.stream = stream;
    mediaRecorderRef.current.recorderType = MediaStreamRecorder.StereoAudioRecorder;
    mediaRecorderRef.current.mimeType = 'audio/wav';

    mediaRecorderRef.current.ondataavailable = async (blob: Blob) => {
      const arrayBuffer = await blob.arrayBuffer();
      if(!pushStreamRef.current){
        const wavHeader = arrayBuffer.slice(0, 44);
        const dataView = new DataView(wavHeader);
        const channel = dataView.getUint16(22, true);
        const sampleSize = dataView.getUint16(34, true);
        const sampleRate = dataView.getUint32(24, true);

        pushStreamRef.current = SpeechSDK.AudioInputStream.createPushStream(
          SpeechSDK.AudioStreamFormat.getWaveFormatPCM(sampleRate, sampleSize, channel)
        );
      }

      if(!speechRecognizerRef.current){
        const languages = getLanguageCodes();
        const speechConfig = SpeechSDK.SpeechConfig.fromSubscription(process.env.REACT_APP_AZURE_SUBSCRIPTION_KEY!, 'japaneast');
        const audioConfig = SpeechSDK.AudioConfig.fromStreamInput(pushStreamRef.current);
        const autoDetectSourceLanguageConfig = SpeechSDK.AutoDetectSourceLanguageConfig.fromLanguages(languages);
        speechRecognizerRef.current = SpeechSDK.SpeechRecognizer.FromConfig(speechConfig, autoDetectSourceLanguageConfig, audioConfig);

        if(needInterim){
          // recognizingリスナーの設定：認識途中で呼ばれる
          speechRecognizerRef.current.recognizing = (sender, event: SpeechSDK.SpeechRecognitionEventArgs) => {
            recognizing(event);
          }
        }
        speechRecognizerRef.current.recognizeOnceAsync(async (event: SpeechSDK.SpeechRecognitionResult) => {
          await recognized(event);
        });
      }
      const rawPCM = arrayBuffer.slice(44);
      pushStreamRef.current.write(rawPCM);
    }
    mediaRecorderRef.current.start(500);
  }, [cleanUp, getLanguageCodes, needInterim, recognized, recognizing, recorderStop]);

  // 音声ストリームを受け取り、認識を開始する関数
  const start = useCallback((stream: MediaStream) => {
    console.log('startAzureCloudStream ==')
    recorderStart(stream);
  }, [recorderStart]);

  // 認識を停止する関数
  const stop = useCallback(() => {
    console.log('stopAzureCloudStream')
    recorderStop();
  }, [recorderStop])

  return {
    initialized,
    start,
    stop,
  }
}
