diff --git a/src/audio_processor.js b/src/audio_processor.js
new file mode 100644
index 0000000..4734432
--- /dev/null
+++ b/src/audio_processor.js
@@ -0,0 +1,322 @@
+// 音频处理模块 - 提取自 new_app.js 的高级音频处理功能
+
+class AudioProcessor {
+ constructor(options = {}) {
+ this.audioContext = null;
+ this.isRecording = false;
+ this.audioChunks = [];
+
+ // VAD相关属性
+ this.isSpeaking = false;
+ this.silenceThreshold = options.silenceThreshold || 0.01;
+ this.silenceTimeout = options.silenceTimeout || 1000;
+ this.minSpeechDuration = options.minSpeechDuration || 300;
+ this.silenceTimer = null;
+ this.speechStartTime = null;
+ this.audioBuffer = [];
+
+ // API配置
+ this.apiConfig = {
+ url: 'https://openspeech.bytedance.com/api/v3/auc/bigmodel/recognize/flash',
+ headers: {
+ 'X-Api-App-Key': '1988591469',
+ 'X-Api-Access-Key': 'mdEyhgZ59on1-NK3GXWAp3L4iLldSG0r',
+ 'X-Api-Resource-Id': 'volc.bigasr.auc_turbo',
+ 'X-Api-Request-Id': this.generateUUID(),
+ 'X-Api-Sequence': '-1',
+ 'Content-Type': 'application/json'
+ }
+ };
+
+ // 回调函数
+ this.onSpeechStart = options.onSpeechStart || (() => {});
+ this.onSpeechEnd = options.onSpeechEnd || (() => {});
+ this.onRecognitionResult = options.onRecognitionResult || (() => {});
+ this.onError = options.onError || (() => {});
+ this.onStatusUpdate = options.onStatusUpdate || (() => {});
+ }
+
+ // 生成UUID
+ generateUUID() {
+ return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) {
+ const r = Math.random() * 16 | 0;
+ const v = c == 'x' ? r : (r & 0x3 | 0x8);
+ return v.toString(16);
+ });
+ }
+
+ // 计算音频能量(音量)
+ calculateAudioLevel(audioData) {
+ let sum = 0;
+ for (let i = 0; i < audioData.length; i++) {
+ sum += audioData[i] * audioData[i];
+ }
+ return Math.sqrt(sum / audioData.length);
+ }
+
+ // 语音活动检测
+ detectVoiceActivity(audioData) {
+ const audioLevel = this.calculateAudioLevel(audioData);
+ const currentTime = Date.now();
+
+ if (audioLevel > this.silenceThreshold) {
+ if (!this.isSpeaking) {
+ this.isSpeaking = true;
+ this.speechStartTime = currentTime;
+ this.audioBuffer = [];
+ this.onSpeechStart();
+ this.onStatusUpdate('检测到语音,开始录音...', 'speaking');
+ console.log('开始说话');
+ }
+
+ if (this.silenceTimer) {
+ clearTimeout(this.silenceTimer);
+ this.silenceTimer = null;
+ }
+
+ return true;
+ } else {
+ if (this.isSpeaking && !this.silenceTimer) {
+ this.silenceTimer = setTimeout(() => {
+ this.handleSpeechEnd();
+ }, this.silenceTimeout);
+ }
+
+ return this.isSpeaking;
+ }
+ }
+
+ // 语音结束处理
+ async handleSpeechEnd() {
+ if (this.isSpeaking) {
+ const speechDuration = Date.now() - this.speechStartTime;
+
+ if (speechDuration >= this.minSpeechDuration) {
+ console.log(`语音结束,时长: ${speechDuration}ms`);
+ await this.processAudioBuffer();
+ this.onStatusUpdate('语音识别中...', 'processing');
+ } else {
+ console.log('说话时长太短,忽略');
+ this.onStatusUpdate('等待语音输入...', 'ready');
+ }
+
+ this.isSpeaking = false;
+ this.speechStartTime = null;
+ this.audioBuffer = [];
+ this.onSpeechEnd();
+ }
+
+ if (this.silenceTimer) {
+ clearTimeout(this.silenceTimer);
+ this.silenceTimer = null;
+ }
+ }
+
+ // 处理音频缓冲区并发送到API
+ async processAudioBuffer() {
+ if (this.audioBuffer.length === 0) {
+ return;
+ }
+
+ try {
+ // 合并所有音频数据
+ const totalLength = this.audioBuffer.reduce((sum, buffer) => sum + buffer.length, 0);
+ const combinedBuffer = new Float32Array(totalLength);
+ let offset = 0;
+
+ for (const buffer of this.audioBuffer) {
+ combinedBuffer.set(buffer, offset);
+ offset += buffer.length;
+ }
+
+ // 转换为WAV格式并编码为base64
+ const wavBuffer = this.encodeWAV(combinedBuffer, 16000);
+ const base64Audio = this.arrayBufferToBase64(wavBuffer);
+
+ // 调用ASR API
+ await this.callASRAPI(base64Audio);
+
+ } catch (error) {
+ console.error('处理音频数据失败:', error);
+ this.onError('处理音频数据失败: ' + error.message);
+ }
+ }
+
+ // 调用ASR API
+ async callASRAPI(base64AudioData) {
+ try {
+ const requestBody = {
+ user: {
+ uid: "1988591469"
+ },
+ audio: {
+ data: base64AudioData
+ },
+ request: {
+ model_name: "bigmodel"
+ }
+ };
+
+ const response = await fetch(this.apiConfig.url, {
+ method: 'POST',
+ headers: this.apiConfig.headers,
+ body: JSON.stringify(requestBody)
+ });
+
+ if (!response.ok) {
+ throw new Error(`HTTP error! status: ${response.status}`);
+ }
+
+ const result = await response.json();
+ this.handleASRResponse(result);
+
+ } catch (error) {
+ console.error('ASR API调用失败:', error);
+ this.onError('ASR API调用失败: ' + error.message);
+ }
+ }
+
+ // 处理ASR响应
+ handleASRResponse(response) {
+ console.log('ASR响应:', response);
+
+ if (response && response.result) {
+ const recognizedText = response.result.text;
+ this.onRecognitionResult(recognizedText);
+ this.onStatusUpdate('识别完成', 'completed');
+ } else {
+ console.log('未识别到文字');
+ this.onStatusUpdate('未识别到文字', 'ready');
+ }
+ }
+
+ // 编码WAV格式
+ encodeWAV(samples, sampleRate) {
+ const length = samples.length;
+ const buffer = new ArrayBuffer(44 + length * 2);
+ const view = new DataView(buffer);
+
+ // WAV文件头
+ const writeString = (offset, string) => {
+ for (let i = 0; i < string.length; i++) {
+ view.setUint8(offset + i, string.charCodeAt(i));
+ }
+ };
+
+ writeString(0, 'RIFF');
+ view.setUint32(4, 36 + length * 2, true);
+ writeString(8, 'WAVE');
+ writeString(12, 'fmt ');
+ view.setUint32(16, 16, true);
+ view.setUint16(20, 1, true);
+ view.setUint16(22, 1, true);
+ view.setUint32(24, sampleRate, true);
+ view.setUint32(28, sampleRate * 2, true);
+ view.setUint16(32, 2, true);
+ view.setUint16(34, 16, true);
+ writeString(36, 'data');
+ view.setUint32(40, length * 2, true);
+
+ // 写入音频数据
+ let offset = 44;
+ for (let i = 0; i < length; i++) {
+ const sample = Math.max(-1, Math.min(1, samples[i]));
+ view.setInt16(offset, sample * 0x7FFF, true);
+ offset += 2;
+ }
+
+ return buffer;
+ }
+
+ // ArrayBuffer转Base64
+ arrayBufferToBase64(buffer) {
+ let binary = '';
+ const bytes = new Uint8Array(buffer);
+ for (let i = 0; i < bytes.byteLength; i++) {
+ binary += String.fromCharCode(bytes[i]);
+ }
+ return btoa(binary);
+ }
+
+ // 开始录音
+ async startRecording() {
+ try {
+ const stream = await navigator.mediaDevices.getUserMedia({
+ audio: {
+ sampleRate: 16000,
+ channelCount: 1,
+ echoCancellation: true,
+ noiseSuppression: true
+ }
+ });
+
+ this.audioContext = new (window.AudioContext || window.webkitAudioContext)({
+ sampleRate: 16000
+ });
+
+ const source = this.audioContext.createMediaStreamSource(stream);
+ const processor = this.audioContext.createScriptProcessor(4096, 1, 1);
+
+ processor.onaudioprocess = (event) => {
+ const inputBuffer = event.inputBuffer;
+ const inputData = inputBuffer.getChannelData(0);
+
+ // 语音活动检测
+ if (this.detectVoiceActivity(inputData)) {
+ // 如果检测到语音活动,缓存音频数据
+ this.audioBuffer.push(new Float32Array(inputData));
+ }
+ };
+
+ source.connect(processor);
+ processor.connect(this.audioContext.destination);
+
+ this.isRecording = true;
+ this.onStatusUpdate('等待语音输入...', 'ready');
+
+ return true;
+
+ } catch (error) {
+ console.error('启动录音失败:', error);
+ this.onError('启动录音失败: ' + error.message);
+ return false;
+ }
+ }
+
+ // 停止录音
+ stopRecording() {
+ if (this.audioContext) {
+ this.audioContext.close();
+ this.audioContext = null;
+ }
+
+ if (this.silenceTimer) {
+ clearTimeout(this.silenceTimer);
+ this.silenceTimer = null;
+ }
+
+ // 如果正在说话,处理最后的音频
+ if (this.isSpeaking) {
+ this.handleSpeechEnd();
+ }
+
+ this.isRecording = false;
+ this.isSpeaking = false;
+ this.audioBuffer = [];
+
+ this.onStatusUpdate('录音已停止', 'stopped');
+ console.log('录音已停止');
+ }
+
+ // 获取录音状态
+ getRecordingStatus() {
+ return {
+ isRecording: this.isRecording,
+ isSpeaking: this.isSpeaking,
+ hasAudioContext: !!this.audioContext
+ };
+ }
+}
+
+// 导出模块
+export { AudioProcessor };
\ No newline at end of file
diff --git a/src/chat_with_audio.js b/src/chat_with_audio.js
index bbe0d4d..6a0e6f8 100644
--- a/src/chat_with_audio.js
+++ b/src/chat_with_audio.js
@@ -6,6 +6,9 @@ import { getLLMConfig, getMinimaxiConfig, getAudioConfig, validateConfig } from
// 防止重复播放的标志
let isPlaying = false;
+// 音频播放队列
+let audioQueue = [];
+let isProcessingQueue = false;
async function chatWithAudioStream(userInput) {
// 验证配置
@@ -20,7 +23,48 @@ async function chatWithAudioStream(userInput) {
const minimaxiConfig = getMinimaxiConfig();
const audioConfig = getAudioConfig();
- // 1. 请求大模型回答
+ // 清空音频队列
+ audioQueue = [];
+
+ // 定义段落处理函数
+ const handleSegment = async (segment) => {
+ console.log('\n=== 处理文本段落 ===');
+ console.log('段落内容:', segment);
+
+ try {
+ // 为每个段落生成音频
+ const audioResult = await requestMinimaxi({
+ apiKey: minimaxiConfig.apiKey,
+ groupId: minimaxiConfig.groupId,
+ body: {
+ model: audioConfig.model,
+ text: segment,
+ stream: audioConfig.stream,
+ language_boost: audioConfig.language_boost,
+ output_format: audioConfig.output_format,
+ voice_setting: audioConfig.voiceSetting,
+ audio_setting: audioConfig.audioSetting,
+ },
+ stream: true,
+ });
+
+ // 将音频添加到播放队列
+ if (audioResult && audioResult.data && audioResult.data.audio) {
+ audioQueue.push({
+ text: segment,
+ audioHex: audioResult.data.audio
+ });
+ console.log('音频已添加到队列,队列长度:', audioQueue.length);
+
+ // 开始处理队列
+ processAudioQueue();
+ }
+ } catch (error) {
+ console.error('生成音频失败:', error);
+ }
+ };
+
+ // 1. 请求大模型回答,并实时处理段落
console.log('\n=== 请求大模型回答 ===');
const llmResponse = await requestLLMStream({
apiKey: llmConfig.apiKey,
@@ -29,55 +73,45 @@ async function chatWithAudioStream(userInput) {
{ role: 'system', content: 'You are a helpful assistant.' },
{ role: 'user', content: userInput },
],
+ onSegment: handleSegment // 传入段落处理回调
});
- // 提取大模型回答内容(现在直接返回内容)
- const llmContent = llmResponse;
-
- console.log('\n=== 大模型回答 ===');
- console.log("llmResponse: ", llmContent);
-
- // 2. 合成音频
- console.log('\n=== 开始合成音频 ===');
- const audioResult = await requestMinimaxi({
- apiKey: minimaxiConfig.apiKey,
- groupId: minimaxiConfig.groupId,
- body: {
- model: audioConfig.model,
- text: llmContent,
- stream: audioConfig.stream,
- language_boost: audioConfig.language_boost,
- output_format: audioConfig.output_format,
- voice_setting: audioConfig.voiceSetting,
- audio_setting: audioConfig.audioSetting,
- },
- stream: true,
- });
-
- // 3. 流式播放音频
- console.log('\n=== 开始流式播放音频 ===');
- // console.log('音频数据长度:', audioResult.data.audio.length);
- await playAudioStream(audioResult.data.audio);
+ console.log('\n=== 大模型完整回答 ===');
+ console.log("llmResponse: ", llmResponse);
return {
userInput,
- llmResponse: llmContent,
- audioResult,
+ llmResponse,
+ audioQueue: audioQueue.map(item => ({ text: item.text, hasAudio: !!item.audioHex }))
};
}
+// 处理音频播放队列
+async function processAudioQueue() {
+ if (isProcessingQueue) return;
+
+ isProcessingQueue = true;
+
+ // while (audioQueue.length > 0) {
+ // const audioItem = audioQueue.shift();
+ // console.log('\n=== 播放队列中的音频 ===');
+ // console.log('文本:', audioItem.text);
+
+ // try {
+ // await playAudioStream(audioItem.audioHex);
+ // } catch (error) {
+ // console.error('播放音频失败:', error);
+ // }
+ // }
+
+ isProcessingQueue = false;
+}
+
// 流式播放音频
async function playAudioStream(audioHex) {
- if (isPlaying) {
- console.log('音频正在播放中,跳过重复播放');
- return;
- }
-
console.log('=== 开始播放音频 ===');
console.log('音频数据长度:', audioHex.length);
- isPlaying = true;
-
// 将hex转换为ArrayBuffer
const audioBuffer = hexToArrayBuffer(audioHex);
@@ -102,13 +136,11 @@ async function playAudioStream(audioHex) {
return new Promise((resolve) => {
source.onended = () => {
console.log('音频播放完成');
- isPlaying = false;
resolve();
};
});
} catch (error) {
console.error('音频播放失败:', error);
- isPlaying = false;
throw error;
}
}
@@ -175,4 +207,6 @@ async function playAudioStreamNode(audioHex) {
}
}
-export { chatWithAudioStream, playAudioStream, playAudioStreamNode };
\ No newline at end of file
+
+
+export { chatWithAudioStream, playAudioStream, playAudioStreamNode};
\ No newline at end of file
diff --git a/src/config.js b/src/config.js
index 8cb236c..9d4b477 100644
--- a/src/config.js
+++ b/src/config.js
@@ -16,11 +16,11 @@ export const config = {
audio: {
model: 'speech-02-hd',
voiceSetting: {
- voice_id: 'yantu-qinggang',
+ voice_id: 'yantu-qinggang-2',
speed: 1,
vol: 1,
pitch: 0,
- emotion: 'happy',
+ // emotion: 'happy',
},
audioSetting: {
sample_rate: 32000,
diff --git a/src/index - 副本.html b/src/index - 副本.html
new file mode 100644
index 0000000..c8bb39e
--- /dev/null
+++ b/src/index - 副本.html
@@ -0,0 +1,139 @@
+
+
+
+
+
+ 实时语音识别
+
+
+
+
+
实时语音识别
+
+
+
+
+
+
未连接
+
+
+ 使用说明:
+ 1. 点击"开始录音"按钮开启麦克风
+ 2. 系统会自动检测您的语音,只有在检测到说话时才开始录音
+ 3. 说话结束后会自动发送音频进行识别
+ 4. 识别结果会显示在下方区域
+
+
+
识别结果:
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/src/index.js b/src/index.js
index 86c0698..ec15aa5 100644
--- a/src/index.js
+++ b/src/index.js
@@ -1,5 +1,6 @@
// WebRTC 音视频通话应用
import { chatWithAudioStream } from './chat_with_audio.js';
+import { AudioProcessor } from './audio_processor.js';
class WebRTCChat {
constructor() {
@@ -15,6 +16,30 @@ class WebRTCChat {
this.videoStreams = new Map(); // 存储不同视频的MediaStream
this.currentVideoStream = null;
+ // 初始化音频处理器
+ this.audioProcessor = new AudioProcessor({
+ onSpeechStart: () => {
+ this.voiceStatus.textContent = '检测到语音,开始录音...';
+ this.logMessage('检测到语音,开始录音...', 'info');
+ },
+ onSpeechEnd: () => {
+ // 语音结束回调
+ },
+ onRecognitionResult: (text) => {
+ // ASRTEXT = text;
+ this.voiceStatus.textContent = '识别完成';
+ this.logMessage(`语音识别结果: ${text}`, 'success');
+ this.handleVoiceInput(text);
+ },
+ onError: (error) => {
+ this.voiceStatus.textContent = '识别失败';
+ this.logMessage(error, 'error');
+ },
+ onStatusUpdate: (message, status) => {
+ this.voiceStatus.textContent = message;
+ }
+ });
+
this.initializeElements();
this.initializeSocket();
this.loadVideoMapping();
@@ -627,65 +652,34 @@ class WebRTCChat {
});
}
+ // 修改:使用音频处理器的语音录制功能
async startVoiceRecording() {
- try {
- const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
- this.mediaRecorder = new MediaRecorder(stream);
- this.audioChunks = [];
-
- this.mediaRecorder.ondataavailable = (event) => {
- this.audioChunks.push(event.data);
- };
-
- this.mediaRecorder.onstop = () => {
- const audioBlob = new Blob(this.audioChunks, { type: 'audio/wav' });
- this.processVoiceInput(audioBlob);
- };
-
- this.mediaRecorder.start();
- this.isRecording = true;
-
+ const success = await this.audioProcessor.startRecording();
+
+ if (success) {
this.startVoiceButton.disabled = true;
this.stopVoiceButton.disabled = false;
- this.voiceStatus.textContent = '正在录音...';
this.startVoiceButton.classList.add('recording');
-
- this.logMessage('开始语音录制', 'info');
- } catch (error) {
- this.logMessage('无法访问麦克风: ' + error.message, 'error');
+ this.voiceStatus.textContent = '等待语音输入...';
+ this.logMessage('高级语音录制已启动', 'success');
+ } else {
+ this.voiceStatus.textContent = '录音启动失败';
}
}
+ // 修改:停止语音录制
stopVoiceRecording() {
- if (this.mediaRecorder && this.isRecording) {
- this.mediaRecorder.stop();
- this.isRecording = false;
-
- this.startVoiceButton.disabled = false;
- this.stopVoiceButton.disabled = true;
- this.voiceStatus.textContent = '点击开始语音输入';
- this.startVoiceButton.classList.remove('recording');
-
- this.logMessage('停止语音录制', 'info');
- }
- }
-
- async processVoiceInput(audioBlob) {
- // 这里可以集成语音识别API,如Web Speech API或第三方服务
- // 为了演示,我们使用一个简单的模拟识别
- const mockText = this.simulateSpeechRecognition();
-
- this.socket.emit('voice-input', {
- audioData: audioBlob,
- text: mockText
- });
-
- this.logMessage(`语音识别结果: ${mockText}`, 'info');
-
- // 根据语音识别结果切换视频流
- await this.handleVoiceInput(mockText);
+ this.audioProcessor.stopRecording();
+
+ this.startVoiceButton.disabled = false;
+ this.stopVoiceButton.disabled = true;
+ this.startVoiceButton.classList.remove('recording');
+ this.voiceStatus.textContent = '点击开始语音输入';
+
+ this.logMessage('语音录制已停止', 'info');
}
+ // 处理语音输入结果
async handleVoiceInput(text) {
// 根据文本查找对应视频
let videoFile = this.videoMapping['默认'] || this.defaultVideo;
@@ -705,8 +699,21 @@ class WebRTCChat {
type: 'voice',
text
});
+
+ // 调用大模型处理
+ try {
+ this.logMessage('正在处理语音输入,请稍候...', 'info');
+ const result = await chatWithAudioStream(text);
+ this.logMessage(`大模型回答: ${result.llmResponse}`, 'success');
+ } catch (error) {
+ this.logMessage(`处理语音输入失败: ${error.message}`, 'error');
+ console.error('chatWithAudioStream error:', error);
+ }
}
+ // 删除原有的简单音频处理方法
+ // processVoiceInput() 和 simulateSpeechRecognition() 方法已被移除
+
simulateSpeechRecognition() {
// 模拟语音识别,随机返回预设的文本
const texts = ['你好', '再见', '谢谢', 'hello', 'goodbye', 'thank you'];
@@ -776,4 +783,4 @@ class WebRTCChat {
// 页面加载完成后初始化应用
document.addEventListener('DOMContentLoaded', () => {
new WebRTCChat();
-});
\ No newline at end of file
+});
\ No newline at end of file
diff --git a/src/llm_stream.js b/src/llm_stream.js
index 0308340..d096134 100644
--- a/src/llm_stream.js
+++ b/src/llm_stream.js
@@ -1,6 +1,6 @@
// 以流式方式请求LLM大模型接口,并打印流式返回内容
-async function requestLLMStream({ apiKey, model, messages }) {
+async function requestLLMStream({ apiKey, model, messages, onSegment }) {
const response = await fetch('https://ark.cn-beijing.volces.com/api/v3/bots/chat/completions', {
method: 'POST',
headers: {
@@ -26,6 +26,10 @@ async function requestLLMStream({ apiKey, model, messages }) {
let done = false;
let buffer = '';
let content = '';
+ let pendingText = ''; // 待处理的文本片段
+
+ // 分段分隔符
+ const segmentDelimiters = /[,。:;!?,.:;!?]/;
while (!done) {
const { value, done: doneReading } = await reader.read();
@@ -47,6 +51,10 @@ async function requestLLMStream({ apiKey, model, messages }) {
if (jsonStr === '[DONE]') {
console.log('LLM SSE流结束');
+ // 处理最后的待处理文本
+ if (pendingText.trim() && onSegment) {
+ await onSegment(pendingText.trim());
+ }
continue;
}
@@ -55,7 +63,29 @@ async function requestLLMStream({ apiKey, model, messages }) {
if (obj.choices && obj.choices[0] && obj.choices[0].delta && obj.choices[0].delta.content) {
const deltaContent = obj.choices[0].delta.content;
content += deltaContent;
+ pendingText += deltaContent;
console.log('LLM内容片段:', deltaContent);
+
+ // 检查是否包含分段分隔符
+ if (segmentDelimiters.test(pendingText)) {
+ // 按分隔符分割文本
+ const segments = pendingText.split(segmentDelimiters);
+
+ // 处理完整的段落(除了最后一个,因为可能不完整)
+ for (let i = 0; i < segments.length - 1; i++) {
+ const segment = segments[i].trim();
+ if (segment && onSegment) {
+ // 找到对应的分隔符
+ const delimiterMatch = pendingText.match(segmentDelimiters);
+ const segmentWithDelimiter = segment + (delimiterMatch ? delimiterMatch[0] : '');
+ console.log('检测到完整段落:', segmentWithDelimiter);
+ await onSegment(segmentWithDelimiter);
+ }
+ }
+
+ // 保留最后一个不完整的段落
+ pendingText = segments[segments.length - 1] || '';
+ }
}
} catch (e) {
console.error('解析LLM SSE数据失败:', e, '原始数据:', jsonStr);
@@ -72,4 +102,4 @@ async function requestLLMStream({ apiKey, model, messages }) {
return content;
}
-export { requestLLMStream };
\ No newline at end of file
+export { requestLLMStream };
\ No newline at end of file
diff --git a/src/minimaxi_stream.js b/src/minimaxi_stream.js
index cc3b369..a1eac7a 100644
--- a/src/minimaxi_stream.js
+++ b/src/minimaxi_stream.js
@@ -1,5 +1,135 @@
// 以流式或非流式方式请求 minimaxi 大模型接口,并打印/返回内容
+// 在文件顶部添加音频播放相关的变量和函数
+let audioContext = null;
+let audioQueue = []; // 音频队列
+let isPlaying = false;
+let isProcessingQueue = false; // 队列处理状态
+let nextStartTime = 0; // 添加这行来声明 nextStartTime 变量
+
+// 初始化音频上下文
+function initAudioContext() {
+ if (!audioContext) {
+ audioContext = new (window.AudioContext || window.webkitAudioContext)();
+ }
+ return audioContext;
+}
+
+// 将hex字符串转换为ArrayBuffer
+function hexToArrayBuffer(hex) {
+ const bytes = new Uint8Array(hex.length / 2);
+ for (let i = 0; i < hex.length; i += 2) {
+ bytes[i / 2] = parseInt(hex.substr(i, 2), 16);
+ }
+ return bytes.buffer;
+}
+
+// 将音频添加到队列(不等待播放)
+async function addAudioToQueue(audioHex) {
+ if (!audioHex || audioHex.length === 0) return;
+
+ try {
+ const ctx = initAudioContext();
+ const audioBuffer = hexToArrayBuffer(audioHex);
+ const audioData = await ctx.decodeAudioData(audioBuffer);
+
+ // 将解码后的音频数据添加到队列
+ audioQueue.push({
+ audioData,
+ timestamp: Date.now()
+ });
+
+ console.log(`音频已添加到队列,队列长度: ${audioQueue.length}`);
+
+ // 启动队列处理器(如果还没有运行)
+ if (!isProcessingQueue) {
+ processAudioQueue();
+ }
+
+ } catch (error) {
+ console.error('音频解码失败:', error);
+ }
+}
+
+// 队列处理器 - 独立运行,按顺序播放音频
+async function processAudioQueue() {
+ if (isProcessingQueue) return;
+
+ isProcessingQueue = true;
+ console.log('开始处理音频队列');
+
+ while (audioQueue.length > 0 || isPlaying) {
+ // 如果当前没有音频在播放,且队列中有音频
+ if (!isPlaying && audioQueue.length > 0) {
+ const audioItem = audioQueue.shift();
+ await playAudioData(audioItem.audioData);
+ } else {
+ // 等待一小段时间再检查
+ await new Promise(resolve => setTimeout(resolve, 50));
+ }
+ }
+
+ isProcessingQueue = false;
+ console.log('音频队列处理完成');
+}
+
+// 播放单个音频数据
+function playAudioData(audioData) {
+ return new Promise((resolve) => {
+ try {
+ const ctx = initAudioContext();
+ const source = ctx.createBufferSource();
+ source.buffer = audioData;
+ source.connect(ctx.destination);
+
+ isPlaying = true;
+
+ source.onended = () => {
+ console.log('音频片段播放完成');
+ isPlaying = false;
+ resolve();
+ };
+
+ // 超时保护
+ setTimeout(() => {
+ if (isPlaying) {
+ console.log('音频播放超时,强制结束');
+ isPlaying = false;
+ resolve();
+ }
+ }, (audioData.duration + 0.5) * 1000);
+
+ source.start(0);
+ console.log(`开始播放音频片段,时长: ${audioData.duration}秒`);
+
+ } catch (error) {
+ console.error('播放音频失败:', error);
+ isPlaying = false;
+ resolve();
+ }
+ });
+}
+
+// 修改原来的playAudioChunk函数,改为addAudioToQueue
+const playAudioChunk = addAudioToQueue;
+
+// 清空音频队列
+function clearAudioQueue() {
+ audioQueue.length = 0;
+ console.log('音频队列已清空');
+}
+
+// 获取队列状态
+function getQueueStatus() {
+ return {
+ queueLength: audioQueue.length,
+ isPlaying,
+ isProcessingQueue
+ };
+}
+
+// 移除waitForCurrentAudioToFinish函数,不再需要
+
async function requestMinimaxi({ apiKey, groupId, body, stream = true }) {
const url = `https://api.minimaxi.com/v1/t2a_v2`;
const reqBody = { ...body, stream };
@@ -24,13 +154,19 @@ async function requestMinimaxi({ apiKey, groupId, body, stream = true }) {
console.log(JSON.stringify(result, null, 2));
return result;
} else {
- // 流式,解析每个chunk,合并audio
+ // 流式,解析每个chunk,实时播放音频
const reader = response.body.getReader();
const decoder = new TextDecoder('utf-8');
let done = false;
let buffer = '';
let audioHex = '';
let lastFullResult = null;
+
+ // 重置播放状态
+ nextStartTime = 0;
+ if (audioContext) {
+ nextStartTime = audioContext.currentTime;
+ }
while (!done) {
const { value, done: doneReading } = await reader.read();
@@ -38,19 +174,16 @@ async function requestMinimaxi({ apiKey, groupId, body, stream = true }) {
if (value) {
const chunk = decoder.decode(value, { stream: true });
buffer += chunk;
- // console.log('收到原始chunk:', chunk);
// 处理SSE格式的数据(以\n分割)
let lines = buffer.split('\n');
buffer = lines.pop(); // 最后一行可能是不完整的,留到下次
for (const line of lines) {
if (!line.trim()) continue;
- // console.log('处理行:', line);
// 检查是否是SSE格式的数据行
if (line.startsWith('data:')) {
const jsonStr = line.substring(6); // 移除 'data: ' 前缀
- // console.log('提取的JSON字符串:', jsonStr);
if (jsonStr.trim() === '[DONE]') {
console.log('SSE流结束');
@@ -59,17 +192,19 @@ async function requestMinimaxi({ apiKey, groupId, body, stream = true }) {
try {
const obj = JSON.parse(jsonStr);
- // 流式,解析每个chunk,合并audio
- if (obj.data && obj.data.audio) {
+ // 流式,解析每个chunk,实时播放音频
+ if (obj.data && obj.data.audio && obj.data.status === 1) {
+ console.log('收到音频数据片段!', obj.data.audio.length);
audioHex += obj.data.audio;
+
+ // 立即播放这个音频片段
+ await playAudioChunk(obj.data.audio);
}
// status=2为最后一个chunk,记录完整结构
if (obj.data && obj.data.status === 2) {
lastFullResult = obj;
console.log('收到最终状态');
}
- // 实时打印每个chunk
- console.log('解析成功:', JSON.stringify(obj));
} catch (e) {
console.error('解析SSE数据失败:', e, '原始数据:', jsonStr);
}
@@ -83,7 +218,11 @@ async function requestMinimaxi({ apiKey, groupId, body, stream = true }) {
try {
const obj = JSON.parse(line);
if (obj.data && obj.data.audio) {
+ console.log('收到无data:音频数据!', obj.data.audio.length);
audioHex += obj.data.audio;
+
+ // 立即播放这个音频片段
+ await playAudioChunk(obj.data.audio);
}
if (obj.data && obj.data.status === 2) {
lastFullResult = obj;
@@ -109,4 +248,135 @@ async function requestMinimaxi({ apiKey, groupId, body, stream = true }) {
}
}
-export { requestMinimaxi };
\ No newline at end of file
+// 火山引擎TTS方法
+async function requestVolcanTTS({
+ appId,
+ accessKey,
+ resourceId = 'volc.service_type.10029',
+ appKey = 'aGjiRDfUWi',
+ body,
+ stream = true
+}) {
+ const url = 'https://openspeech.bytedance.com/api/v3/tts/unidirectional';
+
+ // 生成请求ID
+ const requestId = generateUUID();
+
+ const response = await fetch(url, {
+ method: 'POST',
+ headers: {
+ 'X-Api-App-Id': appId,
+ 'X-Api-Access-Key': accessKey,
+ 'X-Api-Resource-Id': resourceId,
+ 'X-Api-App-Key': appKey,
+ 'X-Api-Request-Id': requestId,
+ 'Content-Type': 'application/json',
+ 'Accept': stream ? 'text/event-stream' : 'application/json',
+ 'Cache-Control': 'no-cache',
+ },
+ body: JSON.stringify(body),
+ });
+
+ if (!response.ok) {
+ throw new Error(`HTTP error! status: ${response.status}`);
+ }
+
+ if (!stream) {
+ // 非流式,直接返回JSON
+ const result = await response.json();
+ console.log('火山引擎TTS非流式结果:', JSON.stringify(result, null, 2));
+ return result;
+ } else {
+ // 流式,解析每个chunk,合并audio
+ const reader = response.body.getReader();
+ const decoder = new TextDecoder('utf-8');
+ let done = false;
+ let buffer = '';
+ let audioBase64 = '';
+ let lastFullResult = null;
+
+ while (!done) {
+ const { value, done: doneReading } = await reader.read();
+ done = doneReading;
+ if (value) {
+ const chunk = decoder.decode(value, { stream: true });
+ buffer += chunk;
+
+ // 处理SSE格式的数据(以\n分割)
+ let lines = buffer.split('\n');
+ buffer = lines.pop(); // 最后一行可能是不完整的,留到下次
+
+ for (const line of lines) {
+ if (!line.trim()) continue;
+
+ // 检查是否是SSE格式的数据行
+ if (line.startsWith('data:')) {
+ const jsonStr = line.substring(6); // 移除 'data: ' 前缀
+
+ if (jsonStr.trim() === '[DONE]') {
+ console.log('火山引擎TTS流结束');
+ continue;
+ }
+
+ try {
+ const obj = JSON.parse(jsonStr);
+ // 流式,解析每个chunk,合并audio base64数据
+ if (obj.data) {
+ audioBase64 += obj.data;
+ lastFullResult = obj;
+ }
+ // 实时打印每个chunk
+ console.log('火山引擎TTS解析成功:', JSON.stringify(obj));
+ } catch (e) {
+ console.error('解析火山引擎TTS数据失败:', e, '原始数据:', jsonStr);
+ }
+ } else if (line.startsWith('event: ') || line.startsWith('id: ') || line.startsWith('retry: ')) {
+ // 忽略SSE的其他字段
+ console.log('忽略SSE字段:', line);
+ continue;
+ } else if (line.trim() && !line.startsWith('data:')) {
+ // 尝试直接解析(兼容非SSE格式)
+ try {
+ const obj = JSON.parse(line);
+ if (obj.data) {
+ audioBase64 += obj.data;
+ lastFullResult = obj;
+ }
+ console.log('火山引擎TTS直接解析成功:', JSON.stringify(obj));
+ } catch (e) {
+ console.error('解析火山引擎TTS chunk失败:', e, line);
+ }
+ }
+ }
+ }
+ }
+
+ // 合成最终结构
+ console.log('火山引擎TTS音频数据总长度:', audioBase64.length);
+
+ if (lastFullResult) {
+ // 更新最终结果的音频数据
+ lastFullResult.data = audioBase64;
+ console.log('火山引擎TTS最终合成结果:', JSON.stringify(lastFullResult, null, 2));
+ return lastFullResult;
+ } else {
+ // 没有完整结构,返回合成的audio
+ return {
+ code: 0,
+ message: '',
+ data: audioBase64
+ };
+ }
+ }
+}
+
+// 生成UUID的辅助函数
+function generateUUID() {
+ return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) {
+ const r = Math.random() * 16 | 0;
+ const v = c === 'x' ? r : (r & 0x3 | 0x8);
+ return v.toString(16);
+ });
+}
+
+export { requestMinimaxi, requestVolcanTTS };
\ No newline at end of file
diff --git a/src/new_app.js b/src/new_app.js
new file mode 100644
index 0000000..dcb730b
--- /dev/null
+++ b/src/new_app.js
@@ -0,0 +1,346 @@
+let ASRTEXT = ''
+
+class HttpASRRecognizer {
+ constructor() {
+ this.mediaRecorder = null;
+ this.audioContext = null;
+ this.isRecording = false;
+ this.audioChunks = [];
+
+ // VAD相关属性
+ this.isSpeaking = false;
+ this.silenceThreshold = 0.01;
+ this.silenceTimeout = 1000;
+ this.minSpeechDuration = 300;
+ this.silenceTimer = null;
+ this.speechStartTime = null;
+ this.audioBuffer = [];
+
+ // API配置
+ this.apiConfig = {
+ url: 'https://openspeech.bytedance.com/api/v3/auc/bigmodel/recognize/flash',
+ headers: {
+ 'X-Api-App-Key': '1988591469',
+ 'X-Api-Access-Key': 'mdEyhgZ59on1-NK3GXWAp3L4iLldSG0r',
+ 'X-Api-Resource-Id': 'volc.bigasr.auc_turbo',
+ 'X-Api-Request-Id': this.generateUUID(),
+ 'X-Api-Sequence': '-1',
+ 'Content-Type': 'application/json'
+ }
+ };
+
+ this.recordBtn = document.getElementById('startVoiceButton');
+ this.statusDiv = document.getElementById('status');
+ this.resultsDiv = document.getElementById('results');
+
+ this.initEventListeners();
+ }
+
+ initEventListeners() {
+ this.recordBtn.addEventListener('click', () => {
+ if (this.isRecording) {
+ this.stopRecording();
+ } else {
+ this.startRecording();
+ }
+ });
+ }
+
+ // 生成UUID
+ generateUUID() {
+ return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) {
+ const r = Math.random() * 16 | 0;
+ const v = c == 'x' ? r : (r & 0x3 | 0x8);
+ return v.toString(16);
+ });
+ }
+
+ // 计算音频能量(音量)
+ calculateAudioLevel(audioData) {
+ let sum = 0;
+ for (let i = 0; i < audioData.length; i++) {
+ sum += audioData[i] * audioData[i];
+ }
+ return Math.sqrt(sum / audioData.length);
+ }
+
+ // 语音活动检测
+ detectVoiceActivity(audioData) {
+ const audioLevel = this.calculateAudioLevel(audioData);
+ const currentTime = Date.now();
+
+ if (audioLevel > this.silenceThreshold) {
+ if (!this.isSpeaking) {
+ this.isSpeaking = true;
+ this.speechStartTime = currentTime;
+ this.audioBuffer = [];
+ this.updateStatus('检测到语音,开始录音...', 'speaking');
+ console.log('开始说话');
+ }
+
+ if (this.silenceTimer) {
+ clearTimeout(this.silenceTimer);
+ this.silenceTimer = null;
+ }
+
+ return true;
+ } else {
+ if (this.isSpeaking && !this.silenceTimer) {
+ this.silenceTimer = setTimeout(() => {
+ this.onSpeechEnd();
+ }, this.silenceTimeout);
+ }
+
+ return this.isSpeaking;
+ }
+ }
+
+ // 语音结束处理
+ async onSpeechEnd() {
+ if (this.isSpeaking) {
+ const speechDuration = Date.now() - this.speechStartTime;
+
+ if (speechDuration >= this.minSpeechDuration) {
+ console.log(`语音结束,时长: ${speechDuration}ms`);
+ await this.processAudioBuffer();
+ // this.updateStatus('语音识别中...', 'processing');
+ console.log('语音识别中')
+ } else {
+ console.log('说话时长太短,忽略');
+ // this.updateStatus('等待语音输入...', 'ready');
+ console.log('等待语音输入...')
+
+ }
+
+ this.isSpeaking = false;
+ this.speechStartTime = null;
+ this.audioBuffer = [];
+ }
+
+ if (this.silenceTimer) {
+ clearTimeout(this.silenceTimer);
+ this.silenceTimer = null;
+ }
+ }
+
+ // 处理音频缓冲区并发送到API
+ async processAudioBuffer() {
+ if (this.audioBuffer.length === 0) {
+ return;
+ }
+
+ try {
+ // 合并所有音频数据
+ const totalLength = this.audioBuffer.reduce((sum, buffer) => sum + buffer.length, 0);
+ const combinedBuffer = new Float32Array(totalLength);
+ let offset = 0;
+
+ for (const buffer of this.audioBuffer) {
+ combinedBuffer.set(buffer, offset);
+ offset += buffer.length;
+ }
+
+ // 转换为WAV格式并编码为base64
+ const wavBuffer = this.encodeWAV(combinedBuffer, 16000);
+ const base64Audio = this.arrayBufferToBase64(wavBuffer);
+
+ // 调用ASR API
+ await this.callASRAPI(base64Audio);
+
+ } catch (error) {
+ console.error('处理音频数据失败:', error);
+ this.updateStatus('识别失败', 'error');
+ }
+ }
+
+ // 调用ASR API
+ async callASRAPI(base64AudioData) {
+ try {
+ const requestBody = {
+ user: {
+ uid: "1988591469"
+ },
+ audio: {
+ data: base64AudioData
+ },
+ request: {
+ model_name: "bigmodel"
+ }
+ };
+
+ const response = await fetch(this.apiConfig.url, {
+ method: 'POST',
+ headers: this.apiConfig.headers,
+ body: JSON.stringify(requestBody)
+ });
+
+ if (!response.ok) {
+ throw new Error(`HTTP error! status: ${response.status}`);
+ }
+
+ const result = await response.json();
+ this.handleASRResponse(result);
+
+ } catch (error) {
+ console.error('ASR API调用失败:', error);
+ this.updateStatus('API调用失败', 'error');
+ }
+ }
+
+ // 处理ASR响应
+ handleASRResponse(response) {
+ console.log('ASR响应:', response);
+
+ if (response && response.data && response.data.result) {
+ ASRTEXT = response.data.result;
+ // this.displayResult(text);
+ // this.updateStatus('识别完成', 'completed');
+ console.log('识别完成')
+ } else {
+ console.log('未识别到文字');
+ // this.updateStatus('未识别到文字', 'ready');
+
+ }
+ }
+
+ // 显示识别结果
+ displayResult(text) {
+ const resultElement = document.createElement('div');
+ resultElement.className = 'result-item';
+ resultElement.innerHTML = `
+ ${new Date().toLocaleTimeString()}
+ ${text}
+ `;
+ this.resultsDiv.appendChild(resultElement);
+ this.resultsDiv.scrollTop = this.resultsDiv.scrollHeight;
+ }
+
+ // 更新状态显示
+ updateStatus(message, status) {
+ this.statusDiv.textContent = message;
+ this.statusDiv.className = `status ${status}`;
+ }
+
+ // 编码WAV格式
+ encodeWAV(samples, sampleRate) {
+ const length = samples.length;
+ const buffer = new ArrayBuffer(44 + length * 2);
+ const view = new DataView(buffer);
+
+ // WAV文件头
+ const writeString = (offset, string) => {
+ for (let i = 0; i < string.length; i++) {
+ view.setUint8(offset + i, string.charCodeAt(i));
+ }
+ };
+
+ writeString(0, 'RIFF');
+ view.setUint32(4, 36 + length * 2, true);
+ writeString(8, 'WAVE');
+ writeString(12, 'fmt ');
+ view.setUint32(16, 16, true);
+ view.setUint16(20, 1, true);
+ view.setUint16(22, 1, true);
+ view.setUint32(24, sampleRate, true);
+ view.setUint32(28, sampleRate * 2, true);
+ view.setUint16(32, 2, true);
+ view.setUint16(34, 16, true);
+ writeString(36, 'data');
+ view.setUint32(40, length * 2, true);
+
+ // 写入音频数据
+ let offset = 44;
+ for (let i = 0; i < length; i++) {
+ const sample = Math.max(-1, Math.min(1, samples[i]));
+ view.setInt16(offset, sample * 0x7FFF, true);
+ offset += 2;
+ }
+
+ return buffer;
+ }
+
+ // ArrayBuffer转Base64
+ arrayBufferToBase64(buffer) {
+ let binary = '';
+ const bytes = new Uint8Array(buffer);
+ for (let i = 0; i < bytes.byteLength; i++) {
+ binary += String.fromCharCode(bytes[i]);
+ }
+ return btoa(binary);
+ }
+
+ async startRecording() {
+ try {
+ const stream = await navigator.mediaDevices.getUserMedia({
+ audio: {
+ sampleRate: 16000,
+ channelCount: 1,
+ echoCancellation: true,
+ noiseSuppression: true
+ }
+ });
+
+ this.audioContext = new (window.AudioContext || window.webkitAudioContext)({
+ sampleRate: 16000
+ });
+
+ const source = this.audioContext.createMediaStreamSource(stream);
+ const processor = this.audioContext.createScriptProcessor(4096, 1, 1);
+
+ processor.onaudioprocess = (event) => {
+ const inputBuffer = event.inputBuffer;
+ const inputData = inputBuffer.getChannelData(0);
+
+ // 语音活动检测
+ if (this.detectVoiceActivity(inputData)) {
+ // 如果检测到语音活动,缓存音频数据
+ this.audioBuffer.push(new Float32Array(inputData));
+ }
+ };
+
+ source.connect(processor);
+ processor.connect(this.audioContext.destination);
+
+ this.isRecording = true;
+ this.recordBtn.textContent = '停止录音';
+ this.recordBtn.className = 'btn recording';
+ // this.updateStatus('等待语音输入...', 'ready');
+
+ } catch (error) {
+ console.error('启动录音失败:', error);
+ // this.updateStatus('录音启动失败', 'error');
+ }
+ }
+
+ stopRecording() {
+ if (this.audioContext) {
+ this.audioContext.close();
+ this.audioContext = null;
+ }
+
+ if (this.silenceTimer) {
+ clearTimeout(this.silenceTimer);
+ this.silenceTimer = null;
+ }
+
+ // 如果正在说话,处理最后的音频
+ if (this.isSpeaking) {
+ this.onSpeechEnd();
+ }
+
+ this.isRecording = false;
+ this.isSpeaking = false;
+ this.audioBuffer = [];
+
+ this.recordBtn.textContent = '开始录音';
+ this.recordBtn.className = 'btn';
+ console.log('录音已停止');
+ // this.updateStatus('录音已停止', 'stopped');
+ }
+}
+
+// 初始化应用
+document.addEventListener('DOMContentLoaded', () => {
+ const asrRecognizer = new HttpASRRecognizer();
+ console.log('HTTP ASR识别器已初始化');
+});
\ No newline at end of file