diff --git a/src/chat_with_audio.js b/src/chat_with_audio.js new file mode 100644 index 0000000..73735f0 --- /dev/null +++ b/src/chat_with_audio.js @@ -0,0 +1,184 @@ +// 用户输入文本后,进行大模型回答,并且合成音频,流式播放 + +import { requestLLMStream } from './llm_stream.js'; +import { requestMinimaxi } from './minimaxi_stream.js'; + +async function chatWithAudioStream({ userInput, llmApiKey, llmModel, minimaxiApiKey, minimaxiGroupId }) { + console.log('用户输入:', userInput); + + // 1. 请求大模型回答 + console.log('\n=== 请求大模型回答 ==='); + const llmResponse = await requestLLMStream({ + apiKey: llmApiKey, + model: llmModel, + messages: [ + { role: 'system', content: 'You are a helpful assistant.' }, + { role: 'user', content: userInput }, + ], + }); + + // 提取大模型回答内容(假设返回的是JSON格式,包含content字段) + let llmContent = ''; + try { + const llmData = JSON.parse(llmResponse); + llmContent = llmData.choices?.[0]?.message?.content || llmResponse; + } catch (e) { + llmContent = llmResponse; + } + + console.log('\n=== 大模型回答 ==='); + console.log(llmContent); + + // 2. 合成音频 + console.log('\n=== 开始合成音频 ==='); + const audioResult = await requestMinimaxi({ + apiKey: minimaxiApiKey, + groupId: minimaxiGroupId, + body: { + model: 'speech-02-hd', + text: llmContent, + stream: true, + language_boost: 'auto', + output_format: 'hex', + voice_setting: { + voice_id: 'male-qn-qingse', + speed: 1, + vol: 1, + pitch: 0, + emotion: 'happy', + }, + audio_setting: { + sample_rate: 32000, + bitrate: 128000, + format: 'mp3', + }, + }, + stream: true, + }); + + // 3. 流式播放音频 + console.log('\n=== 开始流式播放音频 ==='); + await playAudioStream(audioResult.data.audio); + + return { + userInput, + llmResponse: llmContent, + audioResult, + }; +} + +// 流式播放音频 +async function playAudioStream(audioHex) { + // 将hex转换为ArrayBuffer + const audioBuffer = hexToArrayBuffer(audioHex); + + // 创建AudioContext + const audioContext = new (window.AudioContext || window.webkitAudioContext)(); + + try { + // 解码音频 + const audioData = await audioContext.decodeAudioData(audioBuffer); + + // 创建音频源 + const source = audioContext.createBufferSource(); + source.buffer = audioData; + source.connect(audioContext.destination); + + // 播放 + source.start(0); + + console.log('音频播放开始,时长:', audioData.duration, '秒'); + + // 等待播放完成 + return new Promise((resolve) => { + source.onended = () => { + console.log('音频播放完成'); + resolve(); + }; + }); + } catch (error) { + console.error('音频播放失败:', error); + throw error; + } +} + +// 将hex字符串转换为ArrayBuffer +function hexToArrayBuffer(hex) { + const bytes = new Uint8Array(hex.length / 2); + for (let i = 0; i < hex.length; i += 2) { + bytes[i / 2] = parseInt(hex.substr(i, 2), 16); + } + return bytes.buffer; +} + +// 在Node.js环境下的音频播放(使用play-sound库) +async function playAudioStreamNode(audioHex) { + const fs = require('fs'); + const path = require('path'); + + // 将hex转换为buffer + const audioBuffer = Buffer.from(audioHex, 'hex'); + + // 保存为临时文件 + const tempFile = path.join(process.cwd(), 'temp_audio.mp3'); + fs.writeFileSync(tempFile, audioBuffer); + + try { + // 使用系统默认播放器播放 + const { exec } = require('child_process'); + const platform = process.platform; + + let command; + if (platform === 'win32') { + command = `start "" "${tempFile}"`; + } else if (platform === 'darwin') { + command = `open "${tempFile}"`; + } else { + command = `xdg-open "${tempFile}"`; + } + + exec(command, (error) => { + if (error) { + console.error('播放音频失败:', error); + } else { + console.log('音频播放开始'); + } + }); + + // 等待一段时间后删除临时文件 + setTimeout(() => { + if (fs.existsSync(tempFile)) { + fs.unlinkSync(tempFile); + } + }, 10000); + + } catch (error) { + console.error('音频播放失败:', error); + throw error; + } +} + +// 示例用法 +if (require.main === module) { + const llmApiKey = process.env.ARK_API_KEY; + const llmModel = 'bot-20250720193048-84fkp'; + const minimaxiApiKey = process.env.MINIMAXI_API_KEY; + const minimaxiGroupId = process.env.MINIMAXI_GROUP_ID; + + if (!llmApiKey || !minimaxiApiKey || !minimaxiGroupId) { + console.error('请设置环境变量: ARK_API_KEY, MINIMAXI_API_KEY, MINIMAXI_GROUP_ID'); + process.exit(1); + } + + const userInput = process.argv[2] || '你好,请介绍一下人工智能的发展历程'; + + chatWithAudioStream({ + userInput, + llmApiKey, + llmModel, + minimaxiApiKey, + minimaxiGroupId, + }).catch(console.error); +} + +export { chatWithAudioStream, playAudioStream, playAudioStreamNode }; \ No newline at end of file diff --git a/src/llm_stream.js b/src/llm_stream.js new file mode 100644 index 0000000..ff7bcef --- /dev/null +++ b/src/llm_stream.js @@ -0,0 +1,59 @@ +// 以流式方式请求LLM大模型接口,并打印流式返回内容 + +async function requestLLMStream({ apiKey, model, messages }) { + const response = await fetch('https://ark.cn-beijing.volces.com/api/v3/bots/chat/completions', { + method: 'POST', + headers: { + 'Authorization': `Bearer ${apiKey}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify({ + model, + stream: true, + stream_options: { include_usage: true }, + messages, + }), + }); + + if (!response.ok) { + throw new Error(`HTTP error! status: ${response.status}`); + } + + const reader = response.body.getReader(); + const decoder = new TextDecoder('utf-8'); + let done = false; + let buffer = ''; + + while (!done) { + const { value, done: doneReading } = await reader.read(); + done = doneReading; + if (value) { + const chunk = decoder.decode(value, { stream: true }); + buffer += chunk; + // 打印每次收到的内容 + process.stdout.write(chunk); + } + } + + // 可选:返回完整内容 + return buffer; +} + +// 示例用法 +if (require.main === module) { + const apiKey = process.env.ARK_API_KEY; + if (!apiKey) { + console.error('请设置环境变量 ARK_API_KEY'); + process.exit(1); + } + requestLLMStream({ + apiKey, + model: 'bot-20250720193048-84fkp', + messages: [ + { role: 'system', content: 'You are a helpful assistant.' }, + { role: 'user', content: 'Hello!' }, + ], + }).catch(console.error); +} + +export { requestLLMStream }; \ No newline at end of file diff --git a/src/minimaxi_stream.js b/src/minimaxi_stream.js new file mode 100644 index 0000000..59616f1 --- /dev/null +++ b/src/minimaxi_stream.js @@ -0,0 +1,116 @@ +// 以流式或非流式方式请求 minimaxi 大模型接口,并打印/返回内容 + +async function requestMinimaxi({ apiKey, groupId, body, stream = true }) { + const url = `https://api.minimaxi.com/v1/t2a_v2/${groupId}`; + const reqBody = { ...body, stream }; + const response = await fetch(url, { + method: 'POST', + headers: { + 'Authorization': `Bearer ${apiKey}`, + 'Content-Type': 'application/json', + }, + body: JSON.stringify(reqBody), + }); + + if (!response.ok) { + throw new Error(`HTTP error! status: ${response.status}`); + } + + if (!stream) { + // 非流式,直接返回JSON + const result = await response.json(); + console.log(JSON.stringify(result, null, 2)); + return result; + } else { + // 流式,解析每个chunk,合并audio + const reader = response.body.getReader(); + const decoder = new TextDecoder('utf-8'); + let done = false; + let buffer = ''; + let audioHex = ''; + let lastFullResult = null; + + while (!done) { + const { value, done: doneReading } = await reader.read(); + done = doneReading; + if (value) { + const chunk = decoder.decode(value, { stream: true }); + buffer += chunk; + // 处理多条JSON(以\n分割) + let lines = buffer.split('\n'); + buffer = lines.pop(); // 最后一行可能是不完整的,留到下次 + for (const line of lines) { + if (!line.trim()) continue; + try { + const obj = JSON.parse(line); + if (obj.data && obj.data.audio) { + audioHex += obj.data.audio; + } + // status=2为最后一个chunk,记录完整结构 + if (obj.data && obj.data.status === 2) { + lastFullResult = obj; + } + // 实时打印每个chunk + console.log('chunk:', JSON.stringify(obj)); + } catch (e) { + console.error('解析chunk失败:', e, line); + } + } + } + } + // 合成最终结构 + if (lastFullResult) { + lastFullResult.data.audio = audioHex; + console.log('最终合成结果:', JSON.stringify(lastFullResult, null, 2)); + return lastFullResult; + } else { + // 没有完整结构,返回合成的audio + return { data: { audio: audioHex } }; + } + } +} + +// 示例用法 +if (require.main === module) { + const apiKey = process.env.MINIMAXI_API_KEY; + const groupId = process.env.MINIMAXI_GROUP_ID; + if (!apiKey || !groupId) { + console.error('请设置环境变量 MINIMAXI_API_KEY 和 MINIMAXI_GROUP_ID'); + process.exit(1); + } + const baseBody = { + model: 'speech-02-hd', + text: '真正的危险不是计算机开始像人一样思考,而是人开始像计算机一样思考。计算机只是可以帮我们处理一些简单事务。', + language_boost: 'auto', + output_format: 'hex', + voice_setting: { + voice_id: 'male-qn-qingse', + speed: 1, + vol: 1, + pitch: 0, + emotion: 'happy', + }, + audio_setting: { + sample_rate: 32000, + bitrate: 128000, + format: 'mp3', + }, + }; + // 非流式 + requestMinimaxi({ + apiKey, + groupId, + body: baseBody, + stream: false, + }).then(() => { + // 流式 + return requestMinimaxi({ + apiKey, + groupId, + body: baseBody, + stream: true, + }); + }).catch(console.error); +} + +export { requestMinimaxi }; \ No newline at end of file diff --git a/src/video_audio_sync.js b/src/video_audio_sync.js new file mode 100644 index 0000000..e94d8e2 --- /dev/null +++ b/src/video_audio_sync.js @@ -0,0 +1,42 @@ +import { requestMinimaxi } from './minimaxi_stream.js'; + +export async function playVideoWithAudio(videoPath, text) { + // 1. 初始化视频播放 + const video = document.createElement('video'); + video.src = videoPath; + document.body.appendChild(video); + + // 2. 启动音频合成流 + const audioStream = await requestMinimaxi({ + apiKey: process.env.MINIMAXI_API_KEY, + groupId: process.env.MINIMAXI_GROUP_ID, + body: { + model: 'speech-02-hd', + text, + output_format: 'hex', + voice_setting: { + voice_id: 'male-qn-qingse', + speed: 1 + } + }, + stream: true + }); + + // 3. 将音频hex转换为可播放格式 + const audioCtx = new AudioContext(); + const audioBuffer = await audioCtx.decodeAudioData( + hexToArrayBuffer(audioStream.data.audio) + ); + + // 4. 同步播放 + const source = audioCtx.createBufferSource(); + source.buffer = audioBuffer; + source.connect(audioCtx.destination); + + video.play(); + source.start(0); +} + +function hexToArrayBuffer(hex) { + // ... hex转ArrayBuffer实现 +} \ No newline at end of file