diff --git a/src/chat_with_audio.js b/src/chat_with_audio.js index 73735f0..bbe0d4d 100644 --- a/src/chat_with_audio.js +++ b/src/chat_with_audio.js @@ -2,62 +2,61 @@ import { requestLLMStream } from './llm_stream.js'; import { requestMinimaxi } from './minimaxi_stream.js'; +import { getLLMConfig, getMinimaxiConfig, getAudioConfig, validateConfig } from './config.js'; -async function chatWithAudioStream({ userInput, llmApiKey, llmModel, minimaxiApiKey, minimaxiGroupId }) { +// 防止重复播放的标志 +let isPlaying = false; + +async function chatWithAudioStream(userInput) { + // 验证配置 + if (!validateConfig()) { + throw new Error('配置不完整,请检查config.js文件中的API密钥设置'); + } + console.log('用户输入:', userInput); + // 获取配置 + const llmConfig = getLLMConfig(); + const minimaxiConfig = getMinimaxiConfig(); + const audioConfig = getAudioConfig(); + // 1. 请求大模型回答 console.log('\n=== 请求大模型回答 ==='); const llmResponse = await requestLLMStream({ - apiKey: llmApiKey, - model: llmModel, + apiKey: llmConfig.apiKey, + model: llmConfig.model, messages: [ { role: 'system', content: 'You are a helpful assistant.' }, { role: 'user', content: userInput }, ], }); - // 提取大模型回答内容(假设返回的是JSON格式,包含content字段) - let llmContent = ''; - try { - const llmData = JSON.parse(llmResponse); - llmContent = llmData.choices?.[0]?.message?.content || llmResponse; - } catch (e) { - llmContent = llmResponse; - } + // 提取大模型回答内容(现在直接返回内容) + const llmContent = llmResponse; console.log('\n=== 大模型回答 ==='); - console.log(llmContent); + console.log("llmResponse: ", llmContent); // 2. 合成音频 console.log('\n=== 开始合成音频 ==='); const audioResult = await requestMinimaxi({ - apiKey: minimaxiApiKey, - groupId: minimaxiGroupId, + apiKey: minimaxiConfig.apiKey, + groupId: minimaxiConfig.groupId, body: { - model: 'speech-02-hd', + model: audioConfig.model, text: llmContent, - stream: true, - language_boost: 'auto', - output_format: 'hex', - voice_setting: { - voice_id: 'male-qn-qingse', - speed: 1, - vol: 1, - pitch: 0, - emotion: 'happy', - }, - audio_setting: { - sample_rate: 32000, - bitrate: 128000, - format: 'mp3', - }, + stream: audioConfig.stream, + language_boost: audioConfig.language_boost, + output_format: audioConfig.output_format, + voice_setting: audioConfig.voiceSetting, + audio_setting: audioConfig.audioSetting, }, stream: true, }); // 3. 流式播放音频 console.log('\n=== 开始流式播放音频 ==='); + // console.log('音频数据长度:', audioResult.data.audio.length); await playAudioStream(audioResult.data.audio); return { @@ -69,6 +68,16 @@ async function chatWithAudioStream({ userInput, llmApiKey, llmModel, minimaxiApi // 流式播放音频 async function playAudioStream(audioHex) { + if (isPlaying) { + console.log('音频正在播放中,跳过重复播放'); + return; + } + + console.log('=== 开始播放音频 ==='); + console.log('音频数据长度:', audioHex.length); + + isPlaying = true; + // 将hex转换为ArrayBuffer const audioBuffer = hexToArrayBuffer(audioHex); @@ -93,11 +102,13 @@ async function playAudioStream(audioHex) { return new Promise((resolve) => { source.onended = () => { console.log('音频播放完成'); + isPlaying = false; resolve(); }; }); } catch (error) { console.error('音频播放失败:', error); + isPlaying = false; throw error; } } @@ -113,17 +124,23 @@ function hexToArrayBuffer(hex) { // 在Node.js环境下的音频播放(使用play-sound库) async function playAudioStreamNode(audioHex) { - const fs = require('fs'); - const path = require('path'); - - // 将hex转换为buffer - const audioBuffer = Buffer.from(audioHex, 'hex'); - - // 保存为临时文件 - const tempFile = path.join(process.cwd(), 'temp_audio.mp3'); - fs.writeFileSync(tempFile, audioBuffer); + // 检查是否在Node.js环境中 + if (typeof window !== 'undefined') { + console.warn('playAudioStreamNode 只能在Node.js环境中使用'); + return; + } try { + const fs = require('fs'); + const path = require('path'); + + // 将hex转换为buffer + const audioBuffer = Buffer.from(audioHex, 'hex'); + + // 保存为临时文件 + const tempFile = path.join(process.cwd(), 'temp_audio.mp3'); + fs.writeFileSync(tempFile, audioBuffer); + // 使用系统默认播放器播放 const { exec } = require('child_process'); const platform = process.platform; @@ -158,27 +175,4 @@ async function playAudioStreamNode(audioHex) { } } -// 示例用法 -if (require.main === module) { - const llmApiKey = process.env.ARK_API_KEY; - const llmModel = 'bot-20250720193048-84fkp'; - const minimaxiApiKey = process.env.MINIMAXI_API_KEY; - const minimaxiGroupId = process.env.MINIMAXI_GROUP_ID; - - if (!llmApiKey || !minimaxiApiKey || !minimaxiGroupId) { - console.error('请设置环境变量: ARK_API_KEY, MINIMAXI_API_KEY, MINIMAXI_GROUP_ID'); - process.exit(1); - } - - const userInput = process.argv[2] || '你好,请介绍一下人工智能的发展历程'; - - chatWithAudioStream({ - userInput, - llmApiKey, - llmModel, - minimaxiApiKey, - minimaxiGroupId, - }).catch(console.error); -} - export { chatWithAudioStream, playAudioStream, playAudioStreamNode }; \ No newline at end of file diff --git a/src/config.example.js b/src/config.example.js new file mode 100644 index 0000000..dca8101 --- /dev/null +++ b/src/config.example.js @@ -0,0 +1,94 @@ +// 示例配置文件 - 请复制此文件为 config.js 并填入实际的API密钥 +export const config = { + // LLM API配置 + llm: { + apiKey: 'your_ark_api_key_here', // 请替换为实际的ARK API密钥 + model: 'bot-20250720193048-84fkp', + }, + + // Minimaxi API配置 + minimaxi: { + apiKey: 'your_minimaxi_api_key_here', // 请替换为实际的Minimaxi API密钥 + groupId: 'your_minimaxi_group_id_here', // 请替换为实际的Minimaxi Group ID + }, + + // 音频配置 + audio: { + model: 'speech-02-hd', + voiceSetting: { + voice_id: 'yantu-qinggang', + speed: 1, + vol: 1, + pitch: 0, + emotion: 'happy', + }, + audioSetting: { + sample_rate: 32000, + bitrate: 128000, + format: 'mp3', + }, + }, + + // 系统配置 + system: { + language_boost: 'auto', + output_format: 'hex', + stream: true, + }, +}; + +// 验证配置是否完整 +export function validateConfig() { + const requiredFields = [ + 'llm.apiKey', + 'llm.model', + 'minimaxi.apiKey', + 'minimaxi.groupId' + ]; + + const missingFields = []; + + for (const field of requiredFields) { + const keys = field.split('.'); + let value = config; + for (const key of keys) { + value = value[key]; + if (!value) break; + } + + if (!value || value === 'your_ark_api_key_here' || value === 'your_minimaxi_api_key_here' || value === 'your_minimaxi_group_id_here') { + missingFields.push(field); + } + } + + if (missingFields.length > 0) { + console.warn('配置不完整,请检查以下字段:', missingFields); + return false; + } + + return true; +} + +// 获取配置的便捷方法 +export function getLLMConfig() { + return { + apiKey: config.llm.apiKey, + model: config.llm.model, + }; +} + +export function getMinimaxiConfig() { + return { + apiKey: config.minimaxi.apiKey, + groupId: config.minimaxi.groupId, + }; +} + +export function getAudioConfig() { + return { + model: config.audio.model, + voiceSetting: config.audio.voiceSetting, + audioSetting: config.audio.audioSetting, + ...config.system, + }; +} \ No newline at end of file diff --git a/src/config.js b/src/config.js new file mode 100644 index 0000000..8cb236c --- /dev/null +++ b/src/config.js @@ -0,0 +1,94 @@ +// 配置管理文件 +export const config = { + // LLM API配置 + llm: { + apiKey: 'd012651b-a65b-4b13-8ff3-cc4ff3a29783', // 请替换为实际的API密钥 + model: 'bot-20250720193048-84fkp', + }, + + // Minimaxi API配置 + minimaxi: { + apiKey: 'eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJHcm91cE5hbWUiOiLkuIrmtbfpopzpgJTnp5HmioDmnInpmZDlhazlj7giLCJVc2VyTmFtZSI6IuadqOmqpSIsIkFjY291bnQiOiIiLCJTdWJqZWN0SUQiOiIxNzI4NzEyMzI0OTc5NjI2ODM5IiwiUGhvbmUiOiIxMzM4MTU1OTYxOCIsIkdyb3VwSUQiOiIxNzI4NzEyMzI0OTcxMjM4MjMxIiwiUGFnZU5hbWUiOiIiLCJNYWlsIjoiIiwiQ3JlYXRlVGltZSI6IjIwMjUtMDYtMTYgMTY6Mjk6NTkiLCJUb2tlblR5cGUiOjEsImlzcyI6Im1pbmltYXgifQ.D_JF0-nO89NdMZCYq4ocEyqxtZ9SeEdtMvbeSkZTWspt0XfX2QpPAVh-DI3MCPZTeSmjNWLf4fA_Th2zpVrj4UxWMbGKBeLZWLulNpwAHGMUTdqenuih3daCDPCzs0duhlFyQnZgGcEOGQ476HL72N2klujP8BUy_vfAh_Zv0po-aujQa5RxardDSOsbs49NTPEw0SQEXwaJ5bVmiZ5s-ysJ9pZWSEiyJ6SX9z3JeZHKj9DxHdOw5roZR8izo54e4IoqyLlzEfhOMW7P15-ffDH3M6HGiEmeBaGRYGAIciELjZS19ONNMKsTj-wXNGWtKG-sjAB1uuqkkT5Ul9Dunw', // 请替换为实际的API密钥 + groupId: '1728712324971238231', // 请替换为实际的Group ID + }, + + // 音频配置 + audio: { + model: 'speech-02-hd', + voiceSetting: { + voice_id: 'yantu-qinggang', + speed: 1, + vol: 1, + pitch: 0, + emotion: 'happy', + }, + audioSetting: { + sample_rate: 32000, + bitrate: 128000, + format: 'mp3', + }, + }, + + // 系统配置 + system: { + language_boost: 'auto', + output_format: 'hex', + stream: true, + }, +}; + +// 验证配置是否完整 +export function validateConfig() { + const requiredFields = [ + 'llm.apiKey', + 'llm.model', + 'minimaxi.apiKey', + 'minimaxi.groupId' + ]; + + const missingFields = []; + + for (const field of requiredFields) { + const keys = field.split('.'); + let value = config; + for (const key of keys) { + value = value[key]; + if (!value) break; + } + + if (!value || value === 'your_ark_api_key_here' || value === 'your_minimaxi_api_key_here' || value === 'your_minimaxi_group_id_here') { + missingFields.push(field); + } + } + + if (missingFields.length > 0) { + console.warn('配置不完整,请检查以下字段:', missingFields); + return false; + } + + return true; +} + +// 获取配置的便捷方法 +export function getLLMConfig() { + return { + apiKey: config.llm.apiKey, + model: config.llm.model, + }; +} + +export function getMinimaxiConfig() { + return { + apiKey: config.minimaxi.apiKey, + groupId: config.minimaxi.groupId, + }; +} + +export function getAudioConfig() { + return { + model: config.audio.model, + voiceSetting: config.audio.voiceSetting, + audioSetting: config.audio.audioSetting, + ...config.system, + }; +} \ No newline at end of file diff --git a/src/debug_audio.js b/src/debug_audio.js new file mode 100644 index 0000000..7a5b669 --- /dev/null +++ b/src/debug_audio.js @@ -0,0 +1,26 @@ +// 调试音频数据 +function debugAudioData(audioHex) { + console.log('=== 音频数据调试 ==='); + console.log('音频数据长度:', audioHex.length); + console.log('音频数据前100个字符:', audioHex.substring(0, 100)); + console.log('音频数据后100个字符:', audioHex.substring(audioHex.length - 100)); + + // 检查是否有重复模式 + const halfLength = Math.floor(audioHex.length / 2); + const firstHalf = audioHex.substring(0, halfLength); + const secondHalf = audioHex.substring(halfLength); + + if (firstHalf === secondHalf) { + console.log('⚠️ 警告:音频数据可能是重复的!'); + } else { + console.log('✅ 音频数据没有重复'); + } +} + +// 如果在浏览器环境中运行 +if (typeof window !== 'undefined') { + window.debugAudioData = debugAudioData; + console.log('音频调试函数已挂载到 window.debugAudioData'); +} + +export { debugAudioData }; \ No newline at end of file diff --git a/src/index.html b/src/index.html index b5f8049..c729b17 100644 --- a/src/index.html +++ b/src/index.html @@ -77,6 +77,6 @@ - + \ No newline at end of file diff --git a/src/index.js b/src/index.js index 13a776d..86c0698 100644 --- a/src/index.js +++ b/src/index.js @@ -1,4 +1,6 @@ // WebRTC 音视频通话应用 +import { chatWithAudioStream } from './chat_with_audio.js'; + class WebRTCChat { constructor() { this.socket = null; @@ -582,15 +584,25 @@ class WebRTCChat { } } - sendText() { + async sendText() { const text = this.textInput.value.trim(); if (text) { this.socket.emit('text-input', { text }); this.logMessage(`发送文本: ${text}`, 'info'); this.textInput.value = ''; - // 根据文本查找对应视频并切换 - this.handleTextInput(text); + try { + // 调用chat_with_audio进行大模型回答和音频合成 + this.logMessage('正在处理文本,请稍候...', 'info'); + const result = await chatWithAudioStream(text); + this.logMessage(`大模型回答: ${result.llmResponse}`, 'success'); + + // 根据文本查找对应视频并切换 + await this.handleTextInput(text); + } catch (error) { + this.logMessage(`处理文本失败: ${error.message}`, 'error'); + console.error('chatWithAudioStream error:', error); + } } } diff --git a/src/llm_stream.js b/src/llm_stream.js index ff7bcef..0308340 100644 --- a/src/llm_stream.js +++ b/src/llm_stream.js @@ -6,6 +6,8 @@ async function requestLLMStream({ apiKey, model, messages }) { headers: { 'Authorization': `Bearer ${apiKey}`, 'Content-Type': 'application/json', + 'Accept': 'text/event-stream', + 'Cache-Control': 'no-cache', }, body: JSON.stringify({ model, @@ -23,6 +25,7 @@ async function requestLLMStream({ apiKey, model, messages }) { const decoder = new TextDecoder('utf-8'); let done = false; let buffer = ''; + let content = ''; while (!done) { const { value, done: doneReading } = await reader.read(); @@ -30,30 +33,43 @@ async function requestLLMStream({ apiKey, model, messages }) { if (value) { const chunk = decoder.decode(value, { stream: true }); buffer += chunk; - // 打印每次收到的内容 - process.stdout.write(chunk); + + // 处理SSE格式的数据 + const lines = buffer.split('\n'); + buffer = lines.pop(); // 最后一行可能是不完整的,留到下次 + + for (const line of lines) { + if (!line.trim()) continue; + + // 检查是否是SSE格式的数据行 + if (line.startsWith('data:')) { + const jsonStr = line.substring(5).trim(); // 移除 'data:' 前缀 + + if (jsonStr === '[DONE]') { + console.log('LLM SSE流结束'); + continue; + } + + try { + const obj = JSON.parse(jsonStr); + if (obj.choices && obj.choices[0] && obj.choices[0].delta && obj.choices[0].delta.content) { + const deltaContent = obj.choices[0].delta.content; + content += deltaContent; + console.log('LLM内容片段:', deltaContent); + } + } catch (e) { + console.error('解析LLM SSE数据失败:', e, '原始数据:', jsonStr); + } + } else if (line.startsWith('event: ') || line.startsWith('id: ') || line.startsWith('retry: ')) { + // 忽略SSE的其他字段 + continue; + } + } } } - // 可选:返回完整内容 - return buffer; -} - -// 示例用法 -if (require.main === module) { - const apiKey = process.env.ARK_API_KEY; - if (!apiKey) { - console.error('请设置环境变量 ARK_API_KEY'); - process.exit(1); - } - requestLLMStream({ - apiKey, - model: 'bot-20250720193048-84fkp', - messages: [ - { role: 'system', content: 'You are a helpful assistant.' }, - { role: 'user', content: 'Hello!' }, - ], - }).catch(console.error); + // 返回完整内容 + return content; } export { requestLLMStream }; \ No newline at end of file diff --git a/src/minimaxi_stream.js b/src/minimaxi_stream.js index 59616f1..cc3b369 100644 --- a/src/minimaxi_stream.js +++ b/src/minimaxi_stream.js @@ -1,13 +1,15 @@ // 以流式或非流式方式请求 minimaxi 大模型接口,并打印/返回内容 async function requestMinimaxi({ apiKey, groupId, body, stream = true }) { - const url = `https://api.minimaxi.com/v1/t2a_v2/${groupId}`; + const url = `https://api.minimaxi.com/v1/t2a_v2`; const reqBody = { ...body, stream }; const response = await fetch(url, { method: 'POST', headers: { 'Authorization': `Bearer ${apiKey}`, 'Content-Type': 'application/json', + 'Accept': 'text/event-stream', + 'Cache-Control': 'no-cache', }, body: JSON.stringify(reqBody), }); @@ -36,29 +38,66 @@ async function requestMinimaxi({ apiKey, groupId, body, stream = true }) { if (value) { const chunk = decoder.decode(value, { stream: true }); buffer += chunk; - // 处理多条JSON(以\n分割) + // console.log('收到原始chunk:', chunk); + + // 处理SSE格式的数据(以\n分割) let lines = buffer.split('\n'); buffer = lines.pop(); // 最后一行可能是不完整的,留到下次 for (const line of lines) { if (!line.trim()) continue; - try { - const obj = JSON.parse(line); - if (obj.data && obj.data.audio) { - audioHex += obj.data.audio; + // console.log('处理行:', line); + + // 检查是否是SSE格式的数据行 + if (line.startsWith('data:')) { + const jsonStr = line.substring(6); // 移除 'data: ' 前缀 + // console.log('提取的JSON字符串:', jsonStr); + + if (jsonStr.trim() === '[DONE]') { + console.log('SSE流结束'); + continue; } - // status=2为最后一个chunk,记录完整结构 - if (obj.data && obj.data.status === 2) { - lastFullResult = obj; + + try { + const obj = JSON.parse(jsonStr); + // 流式,解析每个chunk,合并audio + if (obj.data && obj.data.audio) { + audioHex += obj.data.audio; + } + // status=2为最后一个chunk,记录完整结构 + if (obj.data && obj.data.status === 2) { + lastFullResult = obj; + console.log('收到最终状态'); + } + // 实时打印每个chunk + console.log('解析成功:', JSON.stringify(obj)); + } catch (e) { + console.error('解析SSE数据失败:', e, '原始数据:', jsonStr); + } + } else if (line.startsWith('event: ') || line.startsWith('id: ') || line.startsWith('retry: ')) { + // 忽略SSE的其他字段 + console.log('忽略SSE字段:', line); + continue; + } else if (line.trim() && !line.startsWith('data:')) { + // 尝试直接解析(兼容非SSE格式,但避免重复处理) + console.log('尝试直接解析:', line); + try { + const obj = JSON.parse(line); + if (obj.data && obj.data.audio) { + audioHex += obj.data.audio; + } + if (obj.data && obj.data.status === 2) { + lastFullResult = obj; + } + console.log('直接解析成功:', JSON.stringify(obj)); + } catch (e) { + console.error('解析chunk失败:', e, line); } - // 实时打印每个chunk - console.log('chunk:', JSON.stringify(obj)); - } catch (e) { - console.error('解析chunk失败:', e, line); } } } } // 合成最终结构 + console.log('音频数据总长度:', audioHex.length); if (lastFullResult) { lastFullResult.data.audio = audioHex; console.log('最终合成结果:', JSON.stringify(lastFullResult, null, 2)); @@ -70,47 +109,4 @@ async function requestMinimaxi({ apiKey, groupId, body, stream = true }) { } } -// 示例用法 -if (require.main === module) { - const apiKey = process.env.MINIMAXI_API_KEY; - const groupId = process.env.MINIMAXI_GROUP_ID; - if (!apiKey || !groupId) { - console.error('请设置环境变量 MINIMAXI_API_KEY 和 MINIMAXI_GROUP_ID'); - process.exit(1); - } - const baseBody = { - model: 'speech-02-hd', - text: '真正的危险不是计算机开始像人一样思考,而是人开始像计算机一样思考。计算机只是可以帮我们处理一些简单事务。', - language_boost: 'auto', - output_format: 'hex', - voice_setting: { - voice_id: 'male-qn-qingse', - speed: 1, - vol: 1, - pitch: 0, - emotion: 'happy', - }, - audio_setting: { - sample_rate: 32000, - bitrate: 128000, - format: 'mp3', - }, - }; - // 非流式 - requestMinimaxi({ - apiKey, - groupId, - body: baseBody, - stream: false, - }).then(() => { - // 流式 - return requestMinimaxi({ - apiKey, - groupId, - body: baseBody, - stream: true, - }); - }).catch(console.error); -} - -export { requestMinimaxi }; \ No newline at end of file +export { requestMinimaxi }; \ No newline at end of file diff --git a/src/video_audio_sync.js b/src/video_audio_sync.js index e94d8e2..dbed2e9 100644 --- a/src/video_audio_sync.js +++ b/src/video_audio_sync.js @@ -1,4 +1,5 @@ import { requestMinimaxi } from './minimaxi_stream.js'; +import { getMinimaxiConfig } from './config.js'; export async function playVideoWithAudio(videoPath, text) { // 1. 初始化视频播放 @@ -7,15 +8,16 @@ export async function playVideoWithAudio(videoPath, text) { document.body.appendChild(video); // 2. 启动音频合成流 + const minimaxiConfig = getMinimaxiConfig(); const audioStream = await requestMinimaxi({ - apiKey: process.env.MINIMAXI_API_KEY, - groupId: process.env.MINIMAXI_GROUP_ID, + apiKey: minimaxiConfig.apiKey, + groupId: minimaxiConfig.groupId, body: { model: 'speech-02-hd', text, - output_format: 'hex', + output_format: 'hex', // 流式场景必须使用hex voice_setting: { - voice_id: 'male-qn-qingse', + voice_id: 'yantu-qinggang', speed: 1 } },