import { useCallback, useEffect, useRef, useState } from 'react';
import * as SpeechSDK from 'microsoft-cognitiveservices-speech-sdk';
import { PropertyId } from 'microsoft-cognitiveservices-speech-sdk';
import { useSttDictionaries } from './useSttDictionaries';
import { getLanguageCodes } from '../util/language';
import { UseSpeechArgs, UseSpeechHandles } from '../types/speech';
import { sentryLog } from '../util/sentry';

/**
 * マイクからの音声の録音と解析を行うためのReactフックの表現。
 *
 * @param {Object} UseSpeechArgs - フック設定のためのパラメータ。
 * @param {string} UseSpeechArgs.roomOwnerId - ルームオーナーのID。
 * @param {boolean} UseSpeechArgs.needInterim - 中間結果が必要かどうか。
 * @param {function} UseSpeechArgs.onMessage - メッセージ処理用の関数。
 * @param {function} UseSpeechArgs.onDataAvailable - 録音データが利用可能になった際の処理を行う関数。
 * @param {function} UseSpeechArgs.onError - エラー発生時の処理を行う関数。
 *
 * @returns {Object} UseSpeechHandles - 録音プロセス用のコールバック関数と状態変数を含むオブジェクト。
 * @returns {boolean} UseSpeechHandles.initialized - 初期化が完了し、接続が確立しているかどうかを示すブール値。
 * @returns {Object} UseSpeechHandles.initializedRef - 接続が確立しているかどうかを参照するrefオブジェクト。
 * @returns {function} UseSpeechHandles.start - 音声文字変換処理を開始するための関数。
 * @returns {function} UseSpeechHandles.stop - 音声文字変換処理を停止するための関数。
 * @returns {function} UseSpeechHandles.setUseStart - このhookを使用するかどうかを設定するための関数。
 */
export const useAzureSpeech = ({
   roomOwnerId,
   needInterim,
   onMessage,
   onDataAvailable,
   onError,
}: UseSpeechArgs): UseSpeechHandles => {
  // hookのイベントハンドラ
  const handlersRef = useRef({
    onMessage,
    onError
  });
  useEffect(() => {
    handlersRef.current = { onMessage, onError };
  }, [onMessage, onError]);

  // 起動するかどうか
  const [useStart, setUseStart] = useState<boolean>(false);

  // 準備完了
  const [initialized, setInitialized] = useState(false);
  const initializedRef = useRef(initialized);
  useEffect(() => {
    initializedRef.current = initialized;
  }, [initialized]);

  // レコーダー
  const audioContextRef = useRef<AudioContext>();
  const mediaStreamAudioSourceNodeRef = useRef<MediaStreamAudioSourceNode>();
  const audioWorkletNodeRef = useRef<AudioWorkletNode>();
  const isRecordingRef = useRef<boolean>(false);
  const streamRef = useRef<MediaStream>();
  const languagesRef = useRef<string[]>([]);

  // Azureライブラリ
  const speechRecognizerRef = useRef<SpeechSDK.ConversationTranscriber>();
  // AzureSTTに送るストリーム
  const pushStreamRef = useRef<SpeechSDK.PushAudioInputStream>();

  // 辞書適用関数
  const { replaceTextByDictionaries } = useSttDictionaries(roomOwnerId);

  // セッションが途切れた場合に加算するカウンタ
  const guestCounterRef = useRef<number>(0);
  // 現在の最大のGuest-NのN部分
  const maxGuestNumberRef = useRef<number>(1);
  // 'Guest-N'からN部分を抽出する関数
  const extractGuestNumber = useCallback((guestLabel: string) => {
    const match = guestLabel.match(/Guest-(\d+)/);
    if (match) {
      return parseInt(match[1], 10); // Nを数値として返す
    }
    return 0; // 'Guest-N'形式でない場合は0を返す
  }, []);
  // 'Guest-N'からN部分を取り出し、最大値を保持
  const updateMaxGuest = useCallback((guestLabel: string) => {
    const guestNumber = extractGuestNumber(guestLabel);
    if (guestNumber > maxGuestNumberRef.current) {
      maxGuestNumberRef.current = guestNumber; // 最大値を更新
    }
  }, [extractGuestNumber]);
  // Guest-Nを生成する関数
  const generateSpeakerIdNum = useCallback(() => {
    return maxGuestNumberRef.current + guestCounterRef.current;
  }, []);
  // セッションが途切れた際にカウンタを+1する関数
  const incrementGuestCounter = useCallback(() => {
    guestCounterRef.current += 1;
  }, []);

  const recognizing = useCallback(async (event: SpeechSDK.ConversationTranscriptionEventArgs) => {
    if (event.result && event.result.text && event.result.text !== '') {
      handlersRef.current.onMessage(event.result.text, false, event.result.language);
    }
  }, []);

  const recognized = useCallback(async (event: SpeechSDK.ConversationTranscriptionEventArgs) => {
    if (event.result && event.result.text && event.result.text !== '') {
      // 辞書登録に従い言葉を置換
      const replaced = await replaceTextByDictionaries(event.result.text);

      // 数値部分を取得
      const idNum = extractGuestNumber(event.result.speakerId);
      // 0 の場合はuser?とする
      if (idNum === 0) {
        handlersRef.current.onMessage(replaced, true, event.result.language, undefined, 'user?');
        return;
      }
      // 話者IDを加算
      updateMaxGuest(event.result.speakerId);
      // 話者IDを取得
      const speakerIdNum = generateSpeakerIdNum();
      handlersRef.current.onMessage(replaced, true, event.result.language, undefined, 'user' + speakerIdNum);
    }
  }, [extractGuestNumber, generateSpeakerIdNum, replaceTextByDictionaries, updateMaxGuest]);

  const recorderCleanUp = useCallback((sessionClear: boolean = false) => {
    return new Promise<void>(resolve => {
      if (mediaStreamAudioSourceNodeRef.current) {
        mediaStreamAudioSourceNodeRef.current.disconnect();
        mediaStreamAudioSourceNodeRef.current = undefined;
      }
      if (audioWorkletNodeRef.current) {
        audioWorkletNodeRef.current.port.onmessage = null;
        audioWorkletNodeRef.current.disconnect();
        audioWorkletNodeRef.current = undefined;
      }
      if (audioContextRef.current) {
        audioContextRef.current.close();
        audioContextRef.current = undefined;
      }
      if (sessionClear && speechRecognizerRef.current) {
        // リソースを解放し、コールバックを処理
        speechRecognizerRef.current.stopTranscribingAsync(() => {
          if (speechRecognizerRef.current) {
            incrementGuestCounter();
            speechRecognizerRef.current.close(() => {
              if (speechRecognizerRef.current) {
                // イベントリスナーを無効化
                speechRecognizerRef.current.transcribing = () => {};
                speechRecognizerRef.current.transcribed = () => {};
                // オブジェクト参照を解除
                speechRecognizerRef.current = undefined;
              }
              resolve();
            }, (e) => {
              speechRecognizerRef.current = undefined;
              pushStreamRef.current = undefined;
              handlersRef.current.onError && handlersRef.current.onError(e);
              resolve();
            });
          }
        }, (e) => {
          speechRecognizerRef.current = undefined;
          pushStreamRef.current = undefined;
          handlersRef.current.onError && handlersRef.current.onError(e);
          resolve();
        });
      } else {
        resolve();
      }
    });
  }, [incrementGuestCounter]);

  const recorderSetUp = useCallback(async (stream: MediaStream) => {
    const MAX_RETRIES = 3;  // 最大リトライ回数
    if (!audioContextRef.current) {
      audioContextRef.current = new AudioContext();

      // AudioWorklet初期化のリトライロジック
      const sleep = (ms: number) => new Promise(resolve => setTimeout(resolve, ms));

      let attempt = 0;
      while (attempt < MAX_RETRIES) {
        try {
          await audioContextRef.current.audioWorklet.addModule('/worklet/recorder-processor.js');
          console.log('AudioWorklet initialized successfully');
          break; // 成功したらループを抜ける
        } catch (error) {
          attempt++;
          console.error(`AudioWorklet initialization attempt ${attempt} failed:`, error);

          if (attempt === MAX_RETRIES) {
            handlersRef.current.onError && handlersRef.current.onError(
              `Failed to initialize AudioWorklet after ${MAX_RETRIES} attempts: ${(error as Error).message}`
            );
            throw error;
          }
          // 次の試行前に少し待機（徐々に待機時間を増やす）
          await sleep(attempt * 1000);
        }
      }
    }
    if (!mediaStreamAudioSourceNodeRef.current) {
      mediaStreamAudioSourceNodeRef.current = audioContextRef.current.createMediaStreamSource(stream);
    }
    if (!audioWorkletNodeRef.current) {
      audioWorkletNodeRef.current = new AudioWorkletNode(audioContextRef.current, 'recorder-processor', {
        processorOptions: {
          sampleRate: audioContextRef.current.sampleRate,
        },
      });
      mediaStreamAudioSourceNodeRef.current.connect(audioWorkletNodeRef.current);

      // AudioWorkletProcessorからのメッセージを処理
      audioWorkletNodeRef.current.port.onmessage = async (event) => {
        if (!isRecordingRef.current) {
          return;
        }

        const arrayBuffer = event.data; // WAVデータのArrayBuffer
        const blob = new Blob([arrayBuffer], { type: 'audio/wav' });

        try {
          const rawPCM = arrayBuffer.slice(44);
          pushStreamRef.current && pushStreamRef.current.write(rawPCM);

          if (process.env.REACT_APP_ENVIRONMENT !== 'production') {
            await onDataAvailable(blob);
          }
        } catch (e: any) {
          console.error(e);
          sentryLog(e);
        }
      };
    }

    if (!pushStreamRef.current) {
      pushStreamRef.current = SpeechSDK.AudioInputStream.createPushStream(
        SpeechSDK.AudioStreamFormat.getWaveFormatPCM(16000, 16, 1),
      );
    }

    if (!speechRecognizerRef.current) {
      const speechConfig = SpeechSDK.SpeechConfig.fromSubscription(process.env.REACT_APP_AZURE_SUBSCRIPTION_KEY!, 'japaneast');
      speechConfig.setProfanity(SpeechSDK.ProfanityOption.Removed);
      speechConfig.setProperty(PropertyId.Speech_SegmentationSilenceTimeoutMs, '250');
      speechConfig.setProperty(PropertyId.SpeechServiceConnection_LanguageIdMode, 'Continuous');
      const audioConfig = SpeechSDK.AudioConfig.fromStreamInput(pushStreamRef.current);
      const autoDetectSourceLanguageConfig = SpeechSDK.AutoDetectSourceLanguageConfig.fromLanguages(languagesRef.current);
      speechRecognizerRef.current = SpeechSDK.ConversationTranscriber.FromConfig(speechConfig, autoDetectSourceLanguageConfig, audioConfig);
      if (needInterim) {
        // recognizingリスナーの設定：認識途中で呼ばれる
        speechRecognizerRef.current.transcribing = (sender, event: SpeechSDK.ConversationTranscriptionEventArgs) => {
          recognizing(event);
        };
      }
      speechRecognizerRef.current.transcribed = (sender, event: SpeechSDK.ConversationTranscriptionEventArgs) => {
        recognized(event);
      };
    }
  }, [needInterim, onDataAvailable, recognized, recognizing]);

  useEffect(() => {
    if (useStart) {
      setInitialized(true);
    }
  }, [useStart]);

  const recorderStop = useCallback(() => {
    isRecordingRef.current = false;
  }, []);

  const recorderStart = useCallback(async (stream: MediaStream) => {
    isRecordingRef.current = true;

    let sessionClear = false;
    // 言語が前回と異なる場合
    if (languagesRef.current.join(',') !== getLanguageCodes().join(',')) {
      languagesRef.current = getLanguageCodes();
      sessionClear = true;
    } else if (streamRef.current && streamRef.current.getAudioTracks()[0].getSettings().deviceId !== stream.getAudioTracks()[0].getSettings().deviceId) {
      streamRef.current.getTracks().forEach(track => track.stop());
      sessionClear = true;
    }
    streamRef.current = stream;
    await recorderCleanUp(sessionClear);
    await recorderSetUp(stream);

    return new Promise<void>(resolve => {
      if (sessionClear) {
        speechRecognizerRef.current && speechRecognizerRef.current.startTranscribingAsync(() => {
          resolve();
        }, (e) => {
          speechRecognizerRef.current = undefined;
          pushStreamRef.current = undefined;
          handlersRef.current.onError && handlersRef.current.onError(e);
          resolve();
        });
      } else {
        resolve();
      }
    });
  }, [recorderCleanUp, recorderSetUp]);

  // 音声ストリームを受け取り、認識を開始する関数
  const start = useCallback(async (stream: MediaStream) => {
    console.log('startAzureCloudStream ==');
    await recorderStart(stream);
  }, [recorderStart]);

  // 認識を停止する関数
  const stop = useCallback(() => {
    console.log('stopAzureCloudStream');
    recorderStop();
  }, [recorderStop]);

  return {
    initialized,
    initializedRef,
    start,
    stop,
    setUseStart,
  };
};
