'use client'; import { useState, useCallback, useEffect, useMemo, useRef } from 'react'; import { useToast } from '@fastgpt/web/hooks/useToast'; import { getErrText } from '@fastgpt/global/common/error/utils'; import type { AppTTSConfigType } from '@fastgpt/global/core/app/type.d'; import { TTSTypeEnum } from '@/web/core/app/constants'; import { useTranslation } from 'next-i18next'; import type { OutLinkChatAuthProps } from '@fastgpt/global/support/permission/chat.d'; import { useMount } from 'ahooks'; import { getWebReqUrl } from '@fastgpt/web/common/system/utils'; const contentType = 'audio/mpeg'; const splitMarker = 'SPLIT_MARKER'; // 添加 MediaSource 支持检测函数 const isMediaSourceSupported = () => { return typeof MediaSource !== 'undefined' && MediaSource.isTypeSupported?.(contentType); }; export const useAudioPlay = (props?: OutLinkChatAuthProps & { ttsConfig?: AppTTSConfigType }) => { const { t } = useTranslation(); const { ttsConfig, shareId, outLinkUid, teamId, teamToken } = props || {}; const { toast } = useToast(); const audioRef = useRef(); const [audioLoading, setAudioLoading] = useState(false); const [audioPlaying, setAudioPlaying] = useState(false); const audioController = useRef(new AbortController()); // Check whether the voice is supported const hasAudio = (() => { if (typeof window === 'undefined') return false; if (ttsConfig?.type === TTSTypeEnum.none) return false; if (ttsConfig?.type === TTSTypeEnum.model) return true; const voices = window?.speechSynthesis?.getVoices?.() || []; // 获取语言包 const voice = voices.find((item) => { return item.lang === 'zh-CN' || item.lang === 'zh'; }); return !!voice; })(); const getAudioStream = useCallback( async (input: string) => { if (!input) return Promise.reject('Text is empty'); setAudioLoading(true); audioController.current = new AbortController(); const response = await fetch(getWebReqUrl('/api/core/chat/item/getSpeech'), { method: 'POST', headers: { 'Content-Type': 'application/json' }, signal: audioController.current.signal, body: JSON.stringify({ ttsConfig, input: input.trim(), shareId, outLinkUid, teamId, teamToken }) }).finally(() => { setAudioLoading(false); }); if (!response.body || !response.ok) { const data = await response.json(); toast({ status: 'error', title: getErrText(data, t('common:core.chat.Audio Speech Error')) }); return Promise.reject(data); } return response.body; }, [outLinkUid, shareId, t, teamId, teamToken, toast, ttsConfig] ); const playWebAudio = useCallback((text: string) => { // window speech window?.speechSynthesis?.cancel(); const msg = new SpeechSynthesisUtterance(text); const voices = window?.speechSynthesis?.getVoices?.() || []; // 获取语言包 const voice = voices.find((item) => { return item.lang === 'zh-CN'; }); if (voice) { msg.onstart = () => { setAudioPlaying(true); }; msg.onend = () => { setAudioPlaying(false); msg.onstart = null; msg.onend = null; }; msg.voice = voice; window.speechSynthesis?.speak(msg); } }, []); const cancelAudio = useCallback(() => { try { window.speechSynthesis?.cancel(); audioController.current.abort(''); } catch (error) {} if (audioRef.current) { audioRef.current.pause(); audioRef.current.src = ''; } setAudioPlaying(false); }, []); /* Perform a voice playback */ const playAudioByText = useCallback( async ({ text, buffer }: { text: string; buffer?: Uint8Array }) => { const playAudioBuffer = (buffer: Uint8Array) => { if (!audioRef.current) return; const audioUrl = URL.createObjectURL(new Blob([buffer], { type: contentType })); audioRef.current.src = audioUrl; audioRef.current.play(); }; const readAudioStream = (stream: ReadableStream) => { if (!audioRef.current) return; if (!isMediaSourceSupported()) { // 不支持 MediaSource 时,直接读取完整流并播放 return new Promise(async (resolve) => { const reader = stream.getReader(); let chunks: Uint8Array[] = []; while (true) { const { done, value } = await reader.read(); if (done) break; chunks.push(value); } const fullBuffer = new Uint8Array(chunks.reduce((acc, chunk) => acc + chunk.length, 0)); let offset = 0; for (const chunk of chunks) { fullBuffer.set(chunk, offset); offset += chunk.length; } playAudioBuffer(fullBuffer); resolve(fullBuffer); }); } // 原有的 MediaSource 逻辑 const ms = new MediaSource(); const url = URL.createObjectURL(ms); audioRef.current.src = url; audioRef.current.play(); let u8Arr: Uint8Array = new Uint8Array(); return new Promise(async (resolve, reject) => { // Async to read data from ms await new Promise((resolve) => { ms.onsourceopen = resolve; }); const sourceBuffer = ms.addSourceBuffer(contentType); const reader = stream.getReader(); // read stream try { while (true) { const { done, value } = await reader.read(); if (done || audioRef.current?.paused) { resolve(u8Arr); if (sourceBuffer.updating) { await new Promise((resolve) => (sourceBuffer.onupdateend = resolve)); } ms.endOfStream(); return; } u8Arr = new Uint8Array([...u8Arr, ...value]); await new Promise((resolve) => { sourceBuffer.onupdateend = resolve; sourceBuffer.appendBuffer(value.buffer); }); } } catch (error) { reject(error); } }); }; return new Promise<{ buffer?: Uint8Array }>(async (resolve, reject) => { text = text.replace(/\\n/g, '\n'); try { // stop last audio cancelAudio(); // tts play if (audioRef.current && ttsConfig?.type === TTSTypeEnum.model) { /* buffer tts */ if (buffer) { playAudioBuffer(buffer); return resolve({ buffer }); } /* request tts */ const audioBuffer = await readAudioStream(await getAudioStream(text)); resolve({ buffer: audioBuffer }); } else { // window speech playWebAudio(text); resolve({}); } } catch (error) { toast({ status: 'error', title: getErrText(error, t('common:core.chat.Audio Speech Error')) }); reject(error); } }); }, [cancelAudio, getAudioStream, playWebAudio, t, toast, ttsConfig?.type] ); // segmented params const segmentedMediaSource = useRef(); const segmentedSourceBuffer = useRef(); const segmentedTextList = useRef([]); const appendAudioPromise = useRef>(Promise.resolve()); /* Segmented voice playback */ const startSegmentedAudio = useCallback(async () => { if (!audioRef.current) return; if (!isMediaSourceSupported()) { // 不支持 MediaSource 时,直接使用简单的音频播放 cancelAudio(); segmentedTextList.current = []; return; } cancelAudio(); /* reset all source */ const buffer = segmentedSourceBuffer.current; if (buffer) { buffer.updating && (await new Promise((resolve) => (buffer.onupdateend = resolve))); segmentedSourceBuffer.current = undefined; } if (segmentedMediaSource.current) { if (segmentedMediaSource.current?.readyState === 'open') { segmentedMediaSource.current.endOfStream(); } segmentedMediaSource.current = undefined; } /* init source */ segmentedTextList.current = []; appendAudioPromise.current = Promise.resolve(); /* start ms and source buffer */ const ms = new MediaSource(); segmentedMediaSource.current = ms; const url = URL.createObjectURL(ms); audioRef.current.src = url; audioRef.current.play(); await new Promise((resolve) => { ms.onsourceopen = resolve; }); const sourceBuffer = ms.addSourceBuffer(contentType); segmentedSourceBuffer.current = sourceBuffer; }, [cancelAudio, t, toast]); const finishSegmentedAudio = useCallback(() => { if (!isMediaSourceSupported()) { // 不支持 MediaSource 时,不需要特殊处理 return; } appendAudioPromise.current = appendAudioPromise.current.finally(() => { if (segmentedMediaSource.current?.readyState === 'open') { segmentedMediaSource.current.endOfStream(); } }); }, []); const appendAudioStream = useCallback( (input: string) => { const buffer = segmentedSourceBuffer.current; if (!buffer) return; let u8Arr: Uint8Array = new Uint8Array(); return new Promise(async (resolve, reject) => { // read stream try { const stream = await getAudioStream(input); const reader = stream.getReader(); while (true) { const { done, value } = await reader.read(); if (done || !audioRef.current?.played) { buffer.updating && (await new Promise((resolve) => (buffer.onupdateend = resolve))); return resolve(u8Arr); } u8Arr = new Uint8Array([...u8Arr, ...value]); await new Promise((resolve) => { buffer.onupdateend = resolve; buffer.appendBuffer(value.buffer); }); } } catch (error) { reject(error); } }); }, [getAudioStream, segmentedSourceBuffer] ); /* split audio text and fetch tts */ const splitText2Audio = useCallback( async (text: string, done?: boolean) => { if (ttsConfig?.type === TTSTypeEnum.model && ttsConfig?.model) { if (!isMediaSourceSupported()) { // 不支持 MediaSource 时,等待文本结束后一次性播放 if (done) { try { const stream = await getAudioStream(text); const reader = stream.getReader(); let chunks: Uint8Array[] = []; while (true) { const { done, value } = await reader.read(); if (done) break; chunks.push(value); } const fullBuffer = new Uint8Array( chunks.reduce((acc, chunk) => acc + chunk.length, 0) ); let offset = 0; for (const chunk of chunks) { fullBuffer.set(chunk, offset); offset += chunk.length; } if (audioRef.current) { const audioUrl = URL.createObjectURL(new Blob([fullBuffer], { type: contentType })); audioRef.current.src = audioUrl; audioRef.current.play(); } } catch (error) { console.error('Play audio error:', error); } } return; } // 原有的分段逻辑 const splitReg = /([。!?]|[.!?]\s)/g; const storeText = segmentedTextList.current.join(''); const newText = text.slice(storeText.length); const splitTexts = newText .replace(splitReg, (() => `$1${splitMarker}`.trim())()) .split(`${splitMarker}`) .filter((part) => part.trim()); if (splitTexts.length > 1 || done) { let splitList = splitTexts.slice(); // concat same sentence if (!done) { splitList = splitTexts.slice(0, -1); splitList = [splitList.join('')]; } segmentedTextList.current = segmentedTextList.current.concat(splitList); for (const item of splitList) { appendAudioPromise.current = appendAudioPromise.current.then(() => appendAudioStream(item) ); } } } else if (ttsConfig?.type === TTSTypeEnum.web && done) { playWebAudio(text); } }, [appendAudioStream, playWebAudio, ttsConfig?.model, ttsConfig?.type] ); // listen audio status useMount(() => { const audio = new Audio(); audioRef.current = audio; audio.onplay = () => { setAudioPlaying(true); }; audio.onended = () => { setAudioPlaying(false); }; audio.onerror = () => { setAudioPlaying(false); }; audio.oncancel = () => { setAudioPlaying(false); }; const listen = () => { cancelAudio(); }; window.addEventListener('beforeunload', listen); return () => { audio.onplay = null; audio.onended = null; audio.onerror = null; cancelAudio(); audio.remove(); window.removeEventListener('beforeunload', listen); }; }); return { audio: audioRef.current, audioLoading, audioPlaying, setAudioPlaying, getAudioStream, cancelAudio, audioController, hasAudio: useMemo(() => hasAudio, [hasAudio]), playAudioByText, startSegmentedAudio, finishSegmentedAudio, splitText2Audio }; };