Compare commits
8 Commits
384d6bbb67
...
1359af31d8
| Author | SHA1 | Date | |
|---|---|---|---|
| 1359af31d8 | |||
|
|
0b02f01bec | ||
|
|
6ba96bc177 | ||
|
|
31c6d29bd8 | ||
|
|
31877b8729 | ||
|
|
3e97e3031f | ||
|
|
b1c0656bb4 | ||
| 06aa52f152 |
82
server.js
82
server.js
@ -9,7 +9,7 @@ const { MessageHistory } = require('./src/message_history.js');
|
||||
const app = express();
|
||||
const server = http.createServer(app);
|
||||
const io = socketIo(server, {
|
||||
pingTimeout: 60000, // 60秒超时
|
||||
pingTimeout: 300000, // 60秒超时
|
||||
pingInterval: 25000, // 25秒心跳间隔
|
||||
upgradeTimeout: 30000, // 30秒升级超时
|
||||
allowEIO3: true, // 允许Engine.IO v3客户端
|
||||
@ -135,28 +135,65 @@ const scenes = [
|
||||
defaultVideo: 'qc-bd-4.mp4',
|
||||
interactionVideo: 'qc-hc-7.mp4',
|
||||
tag: 'wakeup',
|
||||
apiKey: 'bot-20250724150616-xqpz8' // 起床场景的API key
|
||||
apiKey: 'bot-20250724150616-xqpz8', // 起床场景的API key
|
||||
openingLines: [
|
||||
// '你靠得近点...像梦里那样',
|
||||
"刚醒就看到你,比闹钟好使一百倍。",
|
||||
// "昨晚梦里喊你名字喊醒了,现在视频看到你,心跳比梦里还烫。",
|
||||
// "你穿我T恤的样子...让我想咬住衣领不放。",
|
||||
"宝贝早...你声音比阳光还暖。",
|
||||
// "喉咙里有咖啡味...要不要来尝尝?记得上次你咬我锁骨时,咖啡还温着。",
|
||||
"晨雾遮住你半张脸...但你的呼吸声比雾气更潮湿,让我想起你咬我耳垂的感觉。"
|
||||
]
|
||||
},
|
||||
{
|
||||
name: '开车',
|
||||
defaultVideo: 'kc-bd-3.mp4',
|
||||
interactionVideo: 'kc-sh-3.mp4',
|
||||
tag: 'driving',
|
||||
apiKey: 'bot-20250623140339-r8f8b' // 开车场景的API key
|
||||
apiKey: 'bot-20250623140339-r8f8b', // 开车场景的API key
|
||||
openingLines: [
|
||||
'宝贝起了?看到你的脸,我这早起的怨气‘咻’一下就飞走了。车窗外的阳光都没你眼睛亮。',
|
||||
'刚过桥,下面河水在闪光,突然就好想带你来兜风…你负责吃零食看风景,我负责偷瞄你。',
|
||||
// '看到那个大钟楼没?每次路过都想你。想着要是能牵着你的手,站在最高层往下看该多好。',
|
||||
'唉,快到了…真不想挂。感觉你像块小磁铁,隔着屏幕都想把我吸过去。今天上班肯定满脑子都是你。',
|
||||
'要开始堵车了...要是你在副驾的话,这段路该多甜呀',
|
||||
'宝贝起床了,刚过红绿灯就忍不住想你了,路上全是你的影子~',
|
||||
'车载音乐随机到那首我们接吻时哼的歌,心跳又乱了',
|
||||
'导航说今天要开58分钟,其实想开58小时去你心里'
|
||||
]
|
||||
},
|
||||
{
|
||||
name: '喝茶',
|
||||
name: '咖啡',
|
||||
defaultVideo: 'hc-bd-3.mp4',
|
||||
interactionVideo: 'hc-sh-3(1).mp4',
|
||||
tag: 'tea',
|
||||
apiKey: 'bot-20250804180724-4dgtk' // 喝茶场景的API key
|
||||
tag: 'coffee',
|
||||
apiKey: 'bot-20250804180724-4dgtk', // 喝茶场景的API key
|
||||
openingLines: [
|
||||
'拿铁拉花是你上次画的爱心形状,甜度刚好',
|
||||
'摩卡有点苦,要是加上你的笑容就甜了',
|
||||
'咖啡师问我一个人?我说在等我的甜度',
|
||||
'今天的冰拿铁好甜,是不是你偷偷往我杯子里撒糖了?',
|
||||
'拉花师给我在咖啡里画了颗心形的奶泡,说是给视频里的小仙女加糖',
|
||||
// '这杯好苦…但一看到你,就自动回甘了。比加十包糖都管用。你说你是不是我的专属甜味剂?'
|
||||
]
|
||||
},
|
||||
{
|
||||
name: '睡觉',
|
||||
defaultVideo: '8-8-sj-bd.mp4',
|
||||
interactionVideo: '8-8-sj-sh-1.mp4',
|
||||
tag: 'sleep',
|
||||
apiKey: 'bot-20250808120704-lbxwj' // 睡觉场景的API key
|
||||
apiKey: 'bot-20250808120704-lbxwj', // 睡觉场景的API key
|
||||
openingLines: [
|
||||
'宝贝,一看到你,我这电量‘噌’就满了。准备关机前最后充会儿电…嗯,用眼睛充。',
|
||||
'熄灯前最后一道光是你,真好。感觉今天积攒的烦心事,都被你眼睛里的星星照没了。',
|
||||
'唉…手指头碰不到你屏幕都嫌凉。下次见面,这距离得用抱抱补回来,利息按秒算。',
|
||||
'周围好安静,就剩你的呼吸声当背景音乐了。比什么助眠App都好使…就是听久了,心跳会抢拍子。',
|
||||
'困不困?我眼皮在打架了…但就是想再多看你几秒。感觉多看一秒,梦里遇见你的概率就大一点。',
|
||||
'好啦,我的小月亮,该哄世界睡觉了…但你先哄哄我?随便说句什么,我当睡前故事收藏。',
|
||||
'捕捉到一只睡前小可爱…成功!',
|
||||
'世界要静音了…但你的声音是白名单。多说几句?'
|
||||
]
|
||||
}
|
||||
];
|
||||
|
||||
@ -273,6 +310,37 @@ app.get('/api/default-video', (req, res) => {
|
||||
});
|
||||
});
|
||||
|
||||
// 在现有的API接口后添加
|
||||
app.get('/api/current-scene/opening-line', (req, res) => {
|
||||
try {
|
||||
const currentScene = getCurrentScene();
|
||||
if (currentScene && currentScene.openingLines && currentScene.openingLines.length > 0) {
|
||||
// 随机选择一个开场白
|
||||
const randomIndex = Math.floor(Math.random() * currentScene.openingLines.length);
|
||||
const selectedOpeningLine = currentScene.openingLines[randomIndex];
|
||||
|
||||
res.json({
|
||||
success: true,
|
||||
openingLine: selectedOpeningLine,
|
||||
sceneName: currentScene.name,
|
||||
sceneTag: currentScene.tag
|
||||
});
|
||||
} else {
|
||||
res.json({
|
||||
success: false,
|
||||
message: '当前场景没有配置开场白'
|
||||
});
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('获取开场白失败:', error);
|
||||
res.status(500).json({
|
||||
success: false,
|
||||
message: '获取开场白失败',
|
||||
error: error.message
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// Socket.IO 连接处理
|
||||
io.on('connection', (socket) => {
|
||||
console.log('用户连接:', socket.id);
|
||||
|
||||
@ -61,16 +61,15 @@ function updateHistoryMessage(userInput, assistantResponse) {
|
||||
);
|
||||
|
||||
// 可选:限制历史消息数量,保持最近的对话
|
||||
// const maxMessages = 20; // 保留最近10轮对话(20条消息)
|
||||
// if (historyMessage.length > maxMessages) {
|
||||
// // 保留系统消息和最近的对话
|
||||
// const systemMessages = historyMessage.filter(msg => msg.role === 'system');
|
||||
// const recentMessages = historyMessage.slice(-maxMessages + systemMessages.length);
|
||||
// historyMessage = [...systemMessages, ...recentMessages.filter(msg => msg.role !== 'system')];
|
||||
// }
|
||||
const maxMessages = 20; // 保留最近10轮对话(20条消息)
|
||||
if (historyMessage.length > maxMessages) {
|
||||
// 保留系统消息和最近的对话
|
||||
const systemMessages = historyMessage.filter(msg => msg.role === 'system');
|
||||
const recentMessages = historyMessage.slice(-maxMessages + systemMessages.length);
|
||||
historyMessage = [...systemMessages, ...recentMessages.filter(msg => msg.role !== 'system')];
|
||||
}
|
||||
}
|
||||
|
||||
// 保存消息到服务端
|
||||
// 保存消息到服务端
|
||||
async function saveMessage(userInput, assistantResponse) {
|
||||
try {
|
||||
@ -198,7 +197,7 @@ async function chatWithAudioStream(userInput) {
|
||||
}
|
||||
|
||||
// 导出初始化函数,供外部调用
|
||||
export { chatWithAudioStream, initializeHistoryMessage, getCurrentHistoryMessage };
|
||||
export { chatWithAudioStream, initializeHistoryMessage, getCurrentHistoryMessage, saveMessage, updateHistoryMessage };
|
||||
|
||||
// 处理音频播放队列
|
||||
async function processAudioQueue() {
|
||||
|
||||
@ -1,94 +0,0 @@
|
||||
// 示例配置文件 - 请复制此文件为 config.js 并填入实际的API密钥
|
||||
export const config = {
|
||||
// LLM API配置
|
||||
llm: {
|
||||
apiKey: 'your_ark_api_key_here', // 请替换为实际的ARK API密钥
|
||||
model: 'bot-20250720193048-84fkp',
|
||||
},
|
||||
|
||||
// Minimaxi API配置
|
||||
minimaxi: {
|
||||
apiKey: 'your_minimaxi_api_key_here', // 请替换为实际的Minimaxi API密钥
|
||||
groupId: 'your_minimaxi_group_id_here', // 请替换为实际的Minimaxi Group ID
|
||||
},
|
||||
|
||||
// 音频配置
|
||||
audio: {
|
||||
model: 'speech-02-hd',
|
||||
voiceSetting: {
|
||||
voice_id: 'yantu-qinggang',
|
||||
speed: 1,
|
||||
vol: 1,
|
||||
pitch: 0,
|
||||
emotion: 'happy',
|
||||
},
|
||||
audioSetting: {
|
||||
sample_rate: 32000,
|
||||
bitrate: 128000,
|
||||
format: 'mp3',
|
||||
},
|
||||
},
|
||||
|
||||
// 系统配置
|
||||
system: {
|
||||
language_boost: 'auto',
|
||||
output_format: 'hex',
|
||||
stream: true,
|
||||
},
|
||||
};
|
||||
|
||||
// 验证配置是否完整
|
||||
export function validateConfig() {
|
||||
const requiredFields = [
|
||||
'llm.apiKey',
|
||||
'llm.model',
|
||||
'minimaxi.apiKey',
|
||||
'minimaxi.groupId'
|
||||
];
|
||||
|
||||
const missingFields = [];
|
||||
|
||||
for (const field of requiredFields) {
|
||||
const keys = field.split('.');
|
||||
let value = config;
|
||||
for (const key of keys) {
|
||||
value = value[key];
|
||||
if (!value) break;
|
||||
}
|
||||
|
||||
if (!value || value === 'your_ark_api_key_here' || value === 'your_minimaxi_api_key_here' || value === 'your_minimaxi_group_id_here') {
|
||||
missingFields.push(field);
|
||||
}
|
||||
}
|
||||
|
||||
if (missingFields.length > 0) {
|
||||
console.warn('配置不完整,请检查以下字段:', missingFields);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// 获取配置的便捷方法
|
||||
export function getLLMConfig() {
|
||||
return {
|
||||
apiKey: config.llm.apiKey,
|
||||
model: config.llm.model,
|
||||
};
|
||||
}
|
||||
|
||||
export function getMinimaxiConfig() {
|
||||
return {
|
||||
apiKey: config.minimaxi.apiKey,
|
||||
groupId: config.minimaxi.groupId,
|
||||
};
|
||||
}
|
||||
|
||||
export function getAudioConfig() {
|
||||
return {
|
||||
model: config.audio.model,
|
||||
voiceSetting: config.audio.voiceSetting,
|
||||
audioSetting: config.audio.audioSetting,
|
||||
...config.system,
|
||||
};
|
||||
}
|
||||
@ -1,26 +0,0 @@
|
||||
// 调试音频数据
|
||||
function debugAudioData(audioHex) {
|
||||
console.log('=== 音频数据调试 ===');
|
||||
console.log('音频数据长度:', audioHex.length);
|
||||
console.log('音频数据前100个字符:', audioHex.substring(0, 100));
|
||||
console.log('音频数据后100个字符:', audioHex.substring(audioHex.length - 100));
|
||||
|
||||
// 检查是否有重复模式
|
||||
const halfLength = Math.floor(audioHex.length / 2);
|
||||
const firstHalf = audioHex.substring(0, halfLength);
|
||||
const secondHalf = audioHex.substring(halfLength);
|
||||
|
||||
if (firstHalf === secondHalf) {
|
||||
console.log('⚠️ 警告:音频数据可能是重复的!');
|
||||
} else {
|
||||
console.log('✅ 音频数据没有重复');
|
||||
}
|
||||
}
|
||||
|
||||
// 如果在浏览器环境中运行
|
||||
if (typeof window !== 'undefined') {
|
||||
window.debugAudioData = debugAudioData;
|
||||
console.log('音频调试函数已挂载到 window.debugAudioData');
|
||||
}
|
||||
|
||||
export { debugAudioData };
|
||||
182
src/index.js
182
src/index.js
@ -1,7 +1,8 @@
|
||||
console.log('视频文件:');
|
||||
// WebRTC 音视频通话应用
|
||||
// import { chatWithAudioStream } from './chat_with_audio.js';
|
||||
import { chatWithAudioStream, initializeHistoryMessage } from './chat_with_audio.js';
|
||||
import { chatWithAudioStream, initializeHistoryMessage, updateHistoryMessage } from './chat_with_audio.js';
|
||||
|
||||
import { AudioProcessor } from './audio_processor.js';
|
||||
|
||||
// 在应用初始化时调用
|
||||
@ -74,6 +75,10 @@ class WebRTCChat {
|
||||
this.preloadVideoResources();
|
||||
this.bindEvents();
|
||||
|
||||
// 添加开场白相关属性
|
||||
this.openingAudioData = null;
|
||||
this.isOpeningAudioReady = false;
|
||||
|
||||
// 在初始化完成后预加载常用视频
|
||||
// setTimeout(() => {
|
||||
// this.logMessage('开始预加载常用视频...', 'info');
|
||||
@ -233,6 +238,110 @@ class WebRTCChat {
|
||||
console.error('历史消息初始化失败:', error);
|
||||
}
|
||||
}
|
||||
|
||||
// 新增方法:初始化开场白音频
|
||||
async initializeOpeningAudio() {
|
||||
try {
|
||||
console.log('开始初始化开场白音频...');
|
||||
|
||||
// 获取当前场景的开场白
|
||||
const response = await fetch('/api/current-scene/opening-line');
|
||||
const data = await response.json();
|
||||
|
||||
if (data.success && data.openingLine) {
|
||||
console.log(`获取到开场白: ${data.openingLine}`);
|
||||
|
||||
// 生成开场白音频
|
||||
await this.generateOpeningAudio(data.openingLine);
|
||||
this.logMessage(`开场白音频已准备就绪: ${data.openingLine}`, 'success');
|
||||
} else {
|
||||
console.warn('未获取到开场白:', data.message);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('初始化开场白音频失败:', error);
|
||||
this.logMessage(`开场白音频初始化失败: ${error.message}`, 'error');
|
||||
}
|
||||
}
|
||||
|
||||
// 新增方法:生成开场白音频
|
||||
async generateOpeningAudio(text) {
|
||||
try {
|
||||
// 动态导入 minimaxi_stream 模块
|
||||
const { requestMinimaxi } = await import('./minimaxi_stream.js');
|
||||
const { getMinimaxiConfig, getAudioConfig, getLLMConfigByScene } = await import('./config.js');
|
||||
const { saveMessage } = await import('./chat_with_audio.js');
|
||||
|
||||
const minimaxiConfig = getMinimaxiConfig();
|
||||
const audioConfig = getAudioConfig();
|
||||
const llmConfig = await getLLMConfigByScene();
|
||||
|
||||
const requestBody = {
|
||||
model: audioConfig.model,
|
||||
text: text,
|
||||
voice_setting: audioConfig.voiceSetting,
|
||||
audio_setting: audioConfig.audioSetting,
|
||||
language_boost: 'auto',
|
||||
output_format: 'hex'
|
||||
};
|
||||
|
||||
console.log('开始生成开场白音频...');
|
||||
|
||||
// 生成音频数据
|
||||
const audioHexData = await requestMinimaxi({
|
||||
apiKey: minimaxiConfig.apiKey,
|
||||
groupId: minimaxiConfig.groupId,
|
||||
body: requestBody,
|
||||
stream: false, // 非流式,一次性获取完整音频
|
||||
textPlay: false
|
||||
});
|
||||
|
||||
if (audioHexData && audioHexData.data && audioHexData.data.audio) {
|
||||
this.openingAudioData = audioHexData.data.audio;
|
||||
this.isOpeningAudioReady = true;
|
||||
console.log('开场白音频生成成功');
|
||||
}
|
||||
// 先更新本地历史消息
|
||||
updateHistoryMessage(`场景切换-${llmConfig.sceneName}`, text);
|
||||
|
||||
await saveMessage(`场景切换-${llmConfig.sceneName}`,text);
|
||||
|
||||
} catch (error) {
|
||||
console.error('生成开场白音频失败:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
// 新增方法:播放开场白音频
|
||||
async playOpeningAudio() {
|
||||
if (!this.isOpeningAudioReady || !this.openingAudioData) {
|
||||
console.warn('开场白音频未准备就绪');
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
// 动态导入 addAudioToQueue 函数
|
||||
const { addAudioToQueue } = await import('./minimaxi_stream.js');
|
||||
|
||||
console.log('将开场白音频添加到队列');
|
||||
await addAudioToQueue(this.openingAudioData);
|
||||
|
||||
this.logMessage('开场白音频已开始播放', 'success');
|
||||
} catch (error) {
|
||||
console.error('播放开场白音频失败:', error);
|
||||
this.logMessage(`播放开场白音频失败: ${error.message}`, 'error');
|
||||
}
|
||||
}
|
||||
|
||||
// 新增方法:获取开场白音频时长
|
||||
getOpeningAudioDuration() {
|
||||
// 估算开场白音频时长,可以根据实际情况调整
|
||||
// 这里假设平均每个字符对应100ms的音频时长
|
||||
if (this.openingAudioData) {
|
||||
// 简单估算:假设开场白大约3-5秒
|
||||
return 4000; // 4秒
|
||||
}
|
||||
return 3000; // 默认3秒
|
||||
}
|
||||
|
||||
async loadVideoMapping() {
|
||||
try {
|
||||
@ -432,6 +541,7 @@ class WebRTCChat {
|
||||
|
||||
// 预创建重要视频流
|
||||
async precreateImportantVideos() {
|
||||
|
||||
if (this.isInitialized) return;
|
||||
|
||||
console.log('开始预创建重要流...', 'info');
|
||||
@ -1151,6 +1261,9 @@ class WebRTCChat {
|
||||
this.showConnectionWaiting();
|
||||
// 切换到通话中图标
|
||||
this.switchToCallingIcon();
|
||||
|
||||
// 在初始化完成后生成开场白音频
|
||||
await this.initializeOpeningAudio();
|
||||
|
||||
// 现在才开始显示视频
|
||||
await this.startDefaultVideoStream();
|
||||
@ -1164,35 +1277,54 @@ class WebRTCChat {
|
||||
console.log('麦克风权限获取成功');
|
||||
|
||||
await this.createPeerConnection();
|
||||
await this.startVoiceRecording();
|
||||
|
||||
this.startButton.disabled = true;
|
||||
this.startButton.style.opacity = '0.5'
|
||||
this.stopButton.disabled = false;
|
||||
this.startButton.style.opacity = '0.5'
|
||||
this.stopButton.disabled = false;
|
||||
|
||||
// 隐藏头像,显示视频
|
||||
if (this.videoContainer) {
|
||||
// 隐藏头像,显示视频
|
||||
if (this.videoContainer) {
|
||||
|
||||
this.videoContainer.classList.add('calling');
|
||||
}
|
||||
this.videoContainer.classList.add('calling');
|
||||
}
|
||||
|
||||
// 显示结束通话按钮
|
||||
this.stopButton.style.display = 'block';
|
||||
|
||||
|
||||
|
||||
this.updateAudioStatus('已连接', 'connected');
|
||||
this.logMessage('音频通话已开始', 'success');
|
||||
|
||||
// 确保视频映射已加载
|
||||
if (Object.keys(this.videoMapping).length === 0) {
|
||||
await this.loadVideoMapping();
|
||||
}
|
||||
|
||||
this.logMessage(`视频映射已加载: ${Object.keys(this.videoMapping).length} 个映射`, 'info');
|
||||
|
||||
// 通知服务器通话开始
|
||||
this.socket.emit('call-started');
|
||||
|
||||
// 播放开场白,然后启动语音录制
|
||||
if (this.isOpeningAudioReady) {
|
||||
console.log('播放开场白音频...');
|
||||
await this.playOpeningAudio();
|
||||
|
||||
// 显示结束通话按钮
|
||||
this.stopButton.style.display = 'block';
|
||||
|
||||
|
||||
|
||||
this.updateAudioStatus('已连接', 'connected');
|
||||
this.logMessage('音频通话已开始', 'success');
|
||||
|
||||
// 确保视频映射已加载
|
||||
if (Object.keys(this.videoMapping).length === 0) {
|
||||
await this.loadVideoMapping();
|
||||
}
|
||||
|
||||
this.logMessage(`视频映射已加载: ${Object.keys(this.videoMapping).length} 个映射`, 'info');
|
||||
|
||||
// 通知服务器通话开始
|
||||
this.socket.emit('call-started');
|
||||
// 等待开场白播放完成后再启动语音录制
|
||||
setTimeout(async () => {
|
||||
console.log('开场白播放完成,启动语音录制...');
|
||||
await this.startVoiceRecording();
|
||||
this.logMessage('语音录制已启动,可以开始对话', 'success');
|
||||
}, this.getOpeningAudioDuration() + 1000); // 开场白时长 + 1秒缓冲
|
||||
} else {
|
||||
console.warn('开场白音频尚未准备就绪,延迟启动语音录制');
|
||||
// 如果没有开场白,延迟500ms后启动录制
|
||||
setTimeout(async () => {
|
||||
await this.startVoiceRecording();
|
||||
this.logMessage('语音录制已启动,可以开始对话', 'success');
|
||||
}, 500);
|
||||
}
|
||||
|
||||
// 开始播放当前场景的默认视频
|
||||
// await this.precreateImportantVideos();
|
||||
|
||||
@ -1,5 +1,35 @@
|
||||
// 以流式方式请求LLM大模型接口,并打印流式返回内容
|
||||
|
||||
// 过滤旁白内容的函数
|
||||
function filterNarration(text) {
|
||||
if (!text) return text;
|
||||
|
||||
// 匹配各种括号内的旁白内容
|
||||
// 包括:()、【】、[]、{}、〈〉、《》等
|
||||
const narrationPatterns = [
|
||||
/([^)]*)/g, // 中文圆括号
|
||||
/\([^)]*\)/g, // 英文圆括号
|
||||
/【[^】]*】/g, // 中文方括号
|
||||
/\[[^\]]*\]/g, // 英文方括号
|
||||
/\{[^}]*\}/g, // 花括号
|
||||
/〈[^〉]*〉/g, // 中文尖括号
|
||||
/《[^》]*》/g, // 中文书名号
|
||||
/<[^>]*>/g // 英文尖括号
|
||||
];
|
||||
|
||||
let filteredText = text;
|
||||
|
||||
// 逐个应用过滤规则
|
||||
narrationPatterns.forEach(pattern => {
|
||||
filteredText = filteredText.replace(pattern, '');
|
||||
});
|
||||
|
||||
// 清理多余的空格和换行
|
||||
filteredText = filteredText.replace(/\s+/g, ' ').trim();
|
||||
|
||||
return filteredText;
|
||||
}
|
||||
|
||||
async function requestLLMStream({ apiKey, model, messages, onSegment }) {
|
||||
const response = await fetch('https://ark.cn-beijing.volces.com/api/v3/bots/chat/completions', {
|
||||
method: 'POST',
|
||||
@ -54,7 +84,14 @@ async function requestLLMStream({ apiKey, model, messages, onSegment }) {
|
||||
// 处理最后的待处理文本(无论长度是否大于5个字)
|
||||
if (pendingText.trim() && onSegment) {
|
||||
console.log('处理最后的待处理文本:', pendingText.trim());
|
||||
await onSegment(pendingText.trim(), true);
|
||||
// 过滤旁白内容
|
||||
const filteredText = filterNarration(pendingText.trim());
|
||||
if (filteredText.trim()) {
|
||||
console.log('过滤旁白后的最后文本:', filteredText);
|
||||
await onSegment(filteredText, true);
|
||||
} else {
|
||||
console.log('最后的文本被完全过滤,跳过');
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
@ -65,12 +102,15 @@ async function requestLLMStream({ apiKey, model, messages, onSegment }) {
|
||||
const deltaContent = obj.choices[0].delta.content;
|
||||
content += deltaContent;
|
||||
pendingText += deltaContent;
|
||||
console.log('LLM内容片段:', deltaContent);
|
||||
console.log('【未过滤】LLM内容片段:', pendingText);
|
||||
|
||||
// 检查是否包含分段分隔符
|
||||
if (segmentDelimiters.test(pendingText)) {
|
||||
// 按分隔符分割文本
|
||||
const segments = pendingText.split(segmentDelimiters);
|
||||
// 先过滤旁白,再检查分段分隔符
|
||||
const filteredPendingText = filterNarration(pendingText);
|
||||
|
||||
// 检查过滤后的文本是否包含分段分隔符
|
||||
if (segmentDelimiters.test(filteredPendingText)) {
|
||||
// 按分隔符分割已过滤的文本
|
||||
const segments = filteredPendingText.split(segmentDelimiters);
|
||||
|
||||
// 重新组合处理:只处理足够长的完整段落
|
||||
let accumulatedText = '';
|
||||
@ -81,25 +121,30 @@ async function requestLLMStream({ apiKey, model, messages, onSegment }) {
|
||||
if (segment) {
|
||||
accumulatedText += segment;
|
||||
// 找到分隔符
|
||||
const delimiterMatch = pendingText.match(segmentDelimiters);
|
||||
const delimiterMatch = filteredPendingText.match(segmentDelimiters);
|
||||
if (delimiterMatch) {
|
||||
accumulatedText += delimiterMatch[0];
|
||||
}
|
||||
|
||||
// 如果累积文本长度大于5个字,处理它
|
||||
if (accumulatedText.length > 8 && onSegment) {
|
||||
console.log('检测到完整段落:', accumulatedText);
|
||||
await onSegment(accumulatedText, false);
|
||||
console.log('【已过滤】检测到完整段落:', accumulatedText);
|
||||
// 文本已经过滤过旁白,直接使用
|
||||
if (accumulatedText.trim()) {
|
||||
console.log('处理过滤后的文本:', accumulatedText);
|
||||
await onSegment(accumulatedText, false);
|
||||
}
|
||||
hasProcessed = true;
|
||||
accumulatedText = ''; // 重置
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 更新pendingText
|
||||
// 更新pendingText - 使用原始文本但需要相应调整
|
||||
if (hasProcessed) {
|
||||
// 保留未处理的累积文本和最后一个不完整段落
|
||||
pendingText = accumulatedText + (segments[segments.length - 1] || '');
|
||||
// 计算已处理的原始文本长度,更新pendingText
|
||||
const processedLength = pendingText.length - (segments[segments.length - 1] || '').length;
|
||||
pendingText = pendingText.substring(processedLength);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -431,4 +431,4 @@ function generateUUID() {
|
||||
});
|
||||
}
|
||||
|
||||
export { requestMinimaxi, requestVolcanTTS };
|
||||
export { requestMinimaxi, requestVolcanTTS, addAudioToQueue };
|
||||
346
src/new_app.js
346
src/new_app.js
@ -1,346 +0,0 @@
|
||||
let ASRTEXT = ''
|
||||
|
||||
class HttpASRRecognizer {
|
||||
constructor() {
|
||||
this.mediaRecorder = null;
|
||||
this.audioContext = null;
|
||||
this.isRecording = false;
|
||||
this.audioChunks = [];
|
||||
|
||||
// VAD相关属性
|
||||
this.isSpeaking = false;
|
||||
this.silenceThreshold = 0.01;
|
||||
this.silenceTimeout = 1000;
|
||||
this.minSpeechDuration = 300;
|
||||
this.silenceTimer = null;
|
||||
this.speechStartTime = null;
|
||||
this.audioBuffer = [];
|
||||
|
||||
// API配置
|
||||
this.apiConfig = {
|
||||
url: 'https://openspeech.bytedance.com/api/v3/auc/bigmodel/recognize/flash',
|
||||
headers: {
|
||||
'X-Api-App-Key': '1988591469',
|
||||
'X-Api-Access-Key': 'mdEyhgZ59on1-NK3GXWAp3L4iLldSG0r',
|
||||
'X-Api-Resource-Id': 'volc.bigasr.auc_turbo',
|
||||
'X-Api-Request-Id': this.generateUUID(),
|
||||
'X-Api-Sequence': '-1',
|
||||
'Content-Type': 'application/json'
|
||||
}
|
||||
};
|
||||
|
||||
this.recordBtn = document.getElementById('startVoiceButton');
|
||||
this.statusDiv = document.getElementById('status');
|
||||
this.resultsDiv = document.getElementById('results');
|
||||
|
||||
this.initEventListeners();
|
||||
}
|
||||
|
||||
initEventListeners() {
|
||||
this.recordBtn.addEventListener('click', () => {
|
||||
if (this.isRecording) {
|
||||
this.stopRecording();
|
||||
} else {
|
||||
this.startRecording();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// 生成UUID
|
||||
generateUUID() {
|
||||
return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) {
|
||||
const r = Math.random() * 16 | 0;
|
||||
const v = c == 'x' ? r : (r & 0x3 | 0x8);
|
||||
return v.toString(16);
|
||||
});
|
||||
}
|
||||
|
||||
// 计算音频能量(音量)
|
||||
calculateAudioLevel(audioData) {
|
||||
let sum = 0;
|
||||
for (let i = 0; i < audioData.length; i++) {
|
||||
sum += audioData[i] * audioData[i];
|
||||
}
|
||||
return Math.sqrt(sum / audioData.length);
|
||||
}
|
||||
|
||||
// 语音活动检测
|
||||
detectVoiceActivity(audioData) {
|
||||
const audioLevel = this.calculateAudioLevel(audioData);
|
||||
const currentTime = Date.now();
|
||||
|
||||
if (audioLevel > this.silenceThreshold) {
|
||||
if (!this.isSpeaking) {
|
||||
this.isSpeaking = true;
|
||||
this.speechStartTime = currentTime;
|
||||
this.audioBuffer = [];
|
||||
this.updateStatus('检测到语音,开始录音...', 'speaking');
|
||||
console.log('开始说话');
|
||||
}
|
||||
|
||||
if (this.silenceTimer) {
|
||||
clearTimeout(this.silenceTimer);
|
||||
this.silenceTimer = null;
|
||||
}
|
||||
|
||||
return true;
|
||||
} else {
|
||||
if (this.isSpeaking && !this.silenceTimer) {
|
||||
this.silenceTimer = setTimeout(() => {
|
||||
this.onSpeechEnd();
|
||||
}, this.silenceTimeout);
|
||||
}
|
||||
|
||||
return this.isSpeaking;
|
||||
}
|
||||
}
|
||||
|
||||
// 语音结束处理
|
||||
async onSpeechEnd() {
|
||||
if (this.isSpeaking) {
|
||||
const speechDuration = Date.now() - this.speechStartTime;
|
||||
|
||||
if (speechDuration >= this.minSpeechDuration) {
|
||||
console.log(`语音结束,时长: ${speechDuration}ms`);
|
||||
await this.processAudioBuffer();
|
||||
// this.updateStatus('语音识别中...', 'processing');
|
||||
console.log('语音识别中')
|
||||
} else {
|
||||
console.log('说话时长太短,忽略');
|
||||
// this.updateStatus('等待语音输入...', 'ready');
|
||||
console.log('等待语音输入...')
|
||||
|
||||
}
|
||||
|
||||
this.isSpeaking = false;
|
||||
this.speechStartTime = null;
|
||||
this.audioBuffer = [];
|
||||
}
|
||||
|
||||
if (this.silenceTimer) {
|
||||
clearTimeout(this.silenceTimer);
|
||||
this.silenceTimer = null;
|
||||
}
|
||||
}
|
||||
|
||||
// 处理音频缓冲区并发送到API
|
||||
async processAudioBuffer() {
|
||||
if (this.audioBuffer.length === 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
// 合并所有音频数据
|
||||
const totalLength = this.audioBuffer.reduce((sum, buffer) => sum + buffer.length, 0);
|
||||
const combinedBuffer = new Float32Array(totalLength);
|
||||
let offset = 0;
|
||||
|
||||
for (const buffer of this.audioBuffer) {
|
||||
combinedBuffer.set(buffer, offset);
|
||||
offset += buffer.length;
|
||||
}
|
||||
|
||||
// 转换为WAV格式并编码为base64
|
||||
const wavBuffer = this.encodeWAV(combinedBuffer, 16000);
|
||||
const base64Audio = this.arrayBufferToBase64(wavBuffer);
|
||||
|
||||
// 调用ASR API
|
||||
await this.callASRAPI(base64Audio);
|
||||
|
||||
} catch (error) {
|
||||
console.error('处理音频数据失败:', error);
|
||||
this.updateStatus('识别失败', 'error');
|
||||
}
|
||||
}
|
||||
|
||||
// 调用ASR API
|
||||
async callASRAPI(base64AudioData) {
|
||||
try {
|
||||
const requestBody = {
|
||||
user: {
|
||||
uid: "1988591469"
|
||||
},
|
||||
audio: {
|
||||
data: base64AudioData
|
||||
},
|
||||
request: {
|
||||
model_name: "bigmodel"
|
||||
}
|
||||
};
|
||||
|
||||
const response = await fetch(this.apiConfig.url, {
|
||||
method: 'POST',
|
||||
headers: this.apiConfig.headers,
|
||||
body: JSON.stringify(requestBody)
|
||||
});
|
||||
|
||||
if (!response.ok) {
|
||||
throw new Error(`HTTP error! status: ${response.status}`);
|
||||
}
|
||||
|
||||
const result = await response.json();
|
||||
this.handleASRResponse(result);
|
||||
|
||||
} catch (error) {
|
||||
console.error('ASR API调用失败:', error);
|
||||
this.updateStatus('API调用失败', 'error');
|
||||
}
|
||||
}
|
||||
|
||||
// 处理ASR响应
|
||||
handleASRResponse(response) {
|
||||
console.log('ASR响应:', response);
|
||||
|
||||
if (response && response.data && response.data.result) {
|
||||
ASRTEXT = response.data.result;
|
||||
// this.displayResult(text);
|
||||
// this.updateStatus('识别完成', 'completed');
|
||||
console.log('识别完成')
|
||||
} else {
|
||||
console.log('未识别到文字');
|
||||
// this.updateStatus('未识别到文字', 'ready');
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
// 显示识别结果
|
||||
displayResult(text) {
|
||||
const resultElement = document.createElement('div');
|
||||
resultElement.className = 'result-item';
|
||||
resultElement.innerHTML = `
|
||||
<span class="timestamp">${new Date().toLocaleTimeString()}</span>
|
||||
<span class="text">${text}</span>
|
||||
`;
|
||||
this.resultsDiv.appendChild(resultElement);
|
||||
this.resultsDiv.scrollTop = this.resultsDiv.scrollHeight;
|
||||
}
|
||||
|
||||
// 更新状态显示
|
||||
updateStatus(message, status) {
|
||||
this.statusDiv.textContent = message;
|
||||
this.statusDiv.className = `status ${status}`;
|
||||
}
|
||||
|
||||
// 编码WAV格式
|
||||
encodeWAV(samples, sampleRate) {
|
||||
const length = samples.length;
|
||||
const buffer = new ArrayBuffer(44 + length * 2);
|
||||
const view = new DataView(buffer);
|
||||
|
||||
// WAV文件头
|
||||
const writeString = (offset, string) => {
|
||||
for (let i = 0; i < string.length; i++) {
|
||||
view.setUint8(offset + i, string.charCodeAt(i));
|
||||
}
|
||||
};
|
||||
|
||||
writeString(0, 'RIFF');
|
||||
view.setUint32(4, 36 + length * 2, true);
|
||||
writeString(8, 'WAVE');
|
||||
writeString(12, 'fmt ');
|
||||
view.setUint32(16, 16, true);
|
||||
view.setUint16(20, 1, true);
|
||||
view.setUint16(22, 1, true);
|
||||
view.setUint32(24, sampleRate, true);
|
||||
view.setUint32(28, sampleRate * 2, true);
|
||||
view.setUint16(32, 2, true);
|
||||
view.setUint16(34, 16, true);
|
||||
writeString(36, 'data');
|
||||
view.setUint32(40, length * 2, true);
|
||||
|
||||
// 写入音频数据
|
||||
let offset = 44;
|
||||
for (let i = 0; i < length; i++) {
|
||||
const sample = Math.max(-1, Math.min(1, samples[i]));
|
||||
view.setInt16(offset, sample * 0x7FFF, true);
|
||||
offset += 2;
|
||||
}
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
// ArrayBuffer转Base64
|
||||
arrayBufferToBase64(buffer) {
|
||||
let binary = '';
|
||||
const bytes = new Uint8Array(buffer);
|
||||
for (let i = 0; i < bytes.byteLength; i++) {
|
||||
binary += String.fromCharCode(bytes[i]);
|
||||
}
|
||||
return btoa(binary);
|
||||
}
|
||||
|
||||
async startRecording() {
|
||||
try {
|
||||
const stream = await navigator.mediaDevices.getUserMedia({
|
||||
audio: {
|
||||
sampleRate: 16000,
|
||||
channelCount: 1,
|
||||
echoCancellation: true,
|
||||
noiseSuppression: true
|
||||
}
|
||||
});
|
||||
|
||||
this.audioContext = new (window.AudioContext || window.webkitAudioContext)({
|
||||
sampleRate: 16000
|
||||
});
|
||||
|
||||
const source = this.audioContext.createMediaStreamSource(stream);
|
||||
const processor = this.audioContext.createScriptProcessor(4096, 1, 1);
|
||||
|
||||
processor.onaudioprocess = (event) => {
|
||||
const inputBuffer = event.inputBuffer;
|
||||
const inputData = inputBuffer.getChannelData(0);
|
||||
|
||||
// 语音活动检测
|
||||
if (this.detectVoiceActivity(inputData)) {
|
||||
// 如果检测到语音活动,缓存音频数据
|
||||
this.audioBuffer.push(new Float32Array(inputData));
|
||||
}
|
||||
};
|
||||
|
||||
source.connect(processor);
|
||||
processor.connect(this.audioContext.destination);
|
||||
|
||||
this.isRecording = true;
|
||||
this.recordBtn.textContent = '停止录音';
|
||||
this.recordBtn.className = 'btn recording';
|
||||
// this.updateStatus('等待语音输入...', 'ready');
|
||||
|
||||
} catch (error) {
|
||||
console.error('启动录音失败:', error);
|
||||
// this.updateStatus('录音启动失败', 'error');
|
||||
}
|
||||
}
|
||||
|
||||
stopRecording() {
|
||||
if (this.audioContext) {
|
||||
this.audioContext.close();
|
||||
this.audioContext = null;
|
||||
}
|
||||
|
||||
if (this.silenceTimer) {
|
||||
clearTimeout(this.silenceTimer);
|
||||
this.silenceTimer = null;
|
||||
}
|
||||
|
||||
// 如果正在说话,处理最后的音频
|
||||
if (this.isSpeaking) {
|
||||
this.onSpeechEnd();
|
||||
}
|
||||
|
||||
this.isRecording = false;
|
||||
this.isSpeaking = false;
|
||||
this.audioBuffer = [];
|
||||
|
||||
this.recordBtn.textContent = '开始录音';
|
||||
this.recordBtn.className = 'btn';
|
||||
console.log('录音已停止');
|
||||
// this.updateStatus('录音已停止', 'stopped');
|
||||
}
|
||||
}
|
||||
|
||||
// 初始化应用
|
||||
document.addEventListener('DOMContentLoaded', () => {
|
||||
const asrRecognizer = new HttpASRRecognizer();
|
||||
console.log('HTTP ASR识别器已初始化');
|
||||
});
|
||||
@ -1,44 +0,0 @@
|
||||
import { requestMinimaxi } from './minimaxi_stream.js';
|
||||
import { getMinimaxiConfig } from './config.js';
|
||||
|
||||
export async function playVideoWithAudio(videoPath, text) {
|
||||
// 1. 初始化视频播放
|
||||
const video = document.createElement('video');
|
||||
video.src = videoPath;
|
||||
document.body.appendChild(video);
|
||||
|
||||
// 2. 启动音频合成流
|
||||
const minimaxiConfig = getMinimaxiConfig();
|
||||
const audioStream = await requestMinimaxi({
|
||||
apiKey: minimaxiConfig.apiKey,
|
||||
groupId: minimaxiConfig.groupId,
|
||||
body: {
|
||||
model: 'speech-01-turbo',
|
||||
text,
|
||||
output_format: 'hex', // 流式场景必须使用hex
|
||||
voice_setting: {
|
||||
voice_id: 'tianbing_xinggan_03',
|
||||
speed: 1
|
||||
}
|
||||
},
|
||||
stream: true
|
||||
});
|
||||
|
||||
// 3. 将音频hex转换为可播放格式
|
||||
const audioCtx = new AudioContext();
|
||||
const audioBuffer = await audioCtx.decodeAudioData(
|
||||
hexToArrayBuffer(audioStream.data.audio)
|
||||
);
|
||||
|
||||
// 4. 同步播放
|
||||
const source = audioCtx.createBufferSource();
|
||||
source.buffer = audioBuffer;
|
||||
source.connect(audioCtx.destination);
|
||||
|
||||
video.play();
|
||||
source.start(0);
|
||||
}
|
||||
|
||||
function hexToArrayBuffer(hex) {
|
||||
// ... hex转ArrayBuffer实现
|
||||
}
|
||||
@ -1,89 +0,0 @@
|
||||
// 视频播放队列系统测试
|
||||
// 这个文件用于测试新的视频播放逻辑
|
||||
|
||||
export class VideoQueueTester {
|
||||
constructor(webrtcApp) {
|
||||
this.webrtcApp = webrtcApp;
|
||||
}
|
||||
|
||||
// 测试视频队列功能
|
||||
async testVideoQueue() {
|
||||
console.log('开始测试视频播放队列系统...');
|
||||
|
||||
// 测试1: 添加视频到队列
|
||||
await this.testAddToQueue();
|
||||
|
||||
// 测试2: 测试视频播放完成等待
|
||||
await this.testWaitForVideoFinish();
|
||||
|
||||
// 测试3: 测试音频视频同步
|
||||
await this.testAudioVideoSync();
|
||||
|
||||
console.log('视频播放队列系统测试完成');
|
||||
}
|
||||
|
||||
// 测试添加视频到队列
|
||||
async testAddToQueue() {
|
||||
console.log('测试1: 添加视频到队列');
|
||||
|
||||
// 清空队列
|
||||
this.webrtcApp.videoQueue = [];
|
||||
|
||||
// 添加测试视频
|
||||
await this.webrtcApp.addToVideoQueue('5.mp4', 'test', '测试视频1');
|
||||
await this.webrtcApp.addToVideoQueue('s-1.mp4', 'test', '测试视频2');
|
||||
|
||||
console.log(`队列长度: ${this.webrtcApp.videoQueue.length}`);
|
||||
console.log('队列内容:', this.webrtcApp.videoQueue);
|
||||
}
|
||||
|
||||
// 测试等待视频播放完成
|
||||
async testWaitForVideoFinish() {
|
||||
console.log('测试2: 等待视频播放完成');
|
||||
|
||||
// 模拟视频播放状态
|
||||
this.webrtcApp.isVideoPlaying = true;
|
||||
|
||||
// 模拟视频播放完成
|
||||
setTimeout(() => {
|
||||
this.webrtcApp.isVideoPlaying = false;
|
||||
console.log('模拟视频播放完成');
|
||||
}, 2000);
|
||||
|
||||
console.log('等待视频播放完成...');
|
||||
await this.webrtcApp.waitForCurrentVideoToFinish();
|
||||
console.log('视频播放完成等待测试通过');
|
||||
}
|
||||
|
||||
// 测试音频视频同步
|
||||
async testAudioVideoSync() {
|
||||
console.log('测试3: 音频视频同步');
|
||||
|
||||
// 模拟音频播放开始
|
||||
window.isPlaying = true;
|
||||
|
||||
// 添加视频到队列
|
||||
await this.webrtcApp.addToVideoQueue('5.mp4', 'audio', '音频同步测试');
|
||||
|
||||
// 模拟音频播放结束
|
||||
setTimeout(() => {
|
||||
window.isPlaying = false;
|
||||
console.log('模拟音频播放结束');
|
||||
}, 3000);
|
||||
|
||||
console.log('音频视频同步测试完成');
|
||||
}
|
||||
|
||||
// 运行所有测试
|
||||
async runAllTests() {
|
||||
try {
|
||||
await this.testVideoQueue();
|
||||
console.log('所有测试通过!');
|
||||
} catch (error) {
|
||||
console.error('测试失败:', error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 导出测试类
|
||||
export default VideoQueueTester;
|
||||
Loading…
x
Reference in New Issue
Block a user