193 lines
8.8 KiB
TypeScript
193 lines
8.8 KiB
TypeScript
import React, { useState, useEffect, useCallback, useRef } from 'react';
|
|
import { startVoiceSession } from '../services/geminiService';
|
|
import { XMarkIcon } from './icons/XMarkIcon';
|
|
import { MicrophoneIcon } from './icons/MicrophoneIcon';
|
|
// FIX: Removed `LiveSession` as it is not an exported member of the module.
|
|
import { LiveServerMessage, Blob } from '@google/genai';
|
|
import { encode } from '../utils/audioUtils';
|
|
|
|
interface VoiceAssistantProps {
|
|
isOpen: boolean;
|
|
onClose: () => void;
|
|
}
|
|
|
|
type VoiceStatus = 'idle' | 'connecting' | 'listening' | 'speaking' | 'error';
|
|
|
|
export const VoiceAssistant: React.FC<VoiceAssistantProps> = ({ isOpen, onClose }) => {
|
|
const [status, setStatus] = useState<VoiceStatus>('idle');
|
|
const [userTranscript, setUserTranscript] = useState('');
|
|
const [modelTranscript, setModelTranscript] = useState('');
|
|
const [history, setHistory] = useState<{speaker: 'user' | 'model', text: string}[]>([]);
|
|
|
|
// FIX: Infer the session promise type from the return type of `startVoiceSession`
|
|
// to avoid needing to import the `LiveSession` type directly.
|
|
const sessionPromiseRef = useRef<ReturnType<typeof startVoiceSession> | null>(null);
|
|
const mediaStreamRef = useRef<MediaStream | null>(null);
|
|
const audioContextRef = useRef<AudioContext | null>(null);
|
|
const scriptProcessorRef = useRef<ScriptProcessorNode | null>(null);
|
|
|
|
const stopRecording = useCallback(() => {
|
|
if (mediaStreamRef.current) {
|
|
mediaStreamRef.current.getTracks().forEach(track => track.stop());
|
|
mediaStreamRef.current = null;
|
|
}
|
|
if (scriptProcessorRef.current) {
|
|
scriptProcessorRef.current.disconnect();
|
|
scriptProcessorRef.current = null;
|
|
}
|
|
if (audioContextRef.current && audioContextRef.current.state !== 'closed') {
|
|
audioContextRef.current.close();
|
|
audioContextRef.current = null;
|
|
}
|
|
}, []);
|
|
|
|
const handleClose = useCallback(() => {
|
|
if (sessionPromiseRef.current) {
|
|
sessionPromiseRef.current.then(session => session.close());
|
|
sessionPromiseRef.current = null;
|
|
}
|
|
stopRecording();
|
|
setStatus('idle');
|
|
setHistory([]);
|
|
setUserTranscript('');
|
|
setModelTranscript('');
|
|
onClose();
|
|
}, [onClose, stopRecording]);
|
|
|
|
const startSession = useCallback(async () => {
|
|
if (status !== 'idle' && status !== 'error') return;
|
|
|
|
setStatus('connecting');
|
|
setHistory([]);
|
|
setUserTranscript('');
|
|
setModelTranscript('');
|
|
|
|
try {
|
|
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
|
|
mediaStreamRef.current = stream;
|
|
|
|
const callbacks = {
|
|
onopen: () => {
|
|
console.log('Voice session opened.');
|
|
setStatus('listening');
|
|
|
|
// Start streaming microphone audio to the model
|
|
audioContextRef.current = new (window.AudioContext)({ sampleRate: 16000 });
|
|
const source = audioContextRef.current.createMediaStreamSource(stream);
|
|
const scriptProcessor = audioContextRef.current.createScriptProcessor(4096, 1, 1);
|
|
scriptProcessorRef.current = scriptProcessor;
|
|
|
|
scriptProcessor.onaudioprocess = (audioProcessingEvent) => {
|
|
const inputData = audioProcessingEvent.inputBuffer.getChannelData(0);
|
|
const pcmBlob: Blob = {
|
|
data: encode(new Uint8Array(new Int16Array(inputData.map(x => x * 32768)).buffer)),
|
|
mimeType: 'audio/pcm;rate=16000',
|
|
};
|
|
sessionPromiseRef.current?.then((session) => {
|
|
session.sendRealtimeInput({ media: pcmBlob });
|
|
});
|
|
};
|
|
source.connect(scriptProcessor);
|
|
scriptProcessor.connect(audioContextRef.current.destination);
|
|
},
|
|
onmessage: (message: LiveServerMessage) => {
|
|
// NOTE: This stub doesn't play audio, just displays transcripts.
|
|
// A full implementation would use the audioUtils to decode and play audio.
|
|
|
|
if (message.serverContent?.inputTranscription) {
|
|
setUserTranscript(prev => prev + message.serverContent.inputTranscription.text);
|
|
}
|
|
if (message.serverContent?.outputTranscription) {
|
|
setModelTranscript(prev => prev + message.serverContent.outputTranscription.text);
|
|
}
|
|
if (message.serverContent?.turnComplete) {
|
|
setHistory(prev => [...prev,
|
|
{ speaker: 'user', text: userTranscript },
|
|
{ speaker: 'model', text: modelTranscript }
|
|
]);
|
|
setUserTranscript('');
|
|
setModelTranscript('');
|
|
}
|
|
},
|
|
onerror: (e: ErrorEvent) => {
|
|
console.error('Voice session error:', e);
|
|
setStatus('error');
|
|
stopRecording();
|
|
},
|
|
onclose: () => {
|
|
console.log('Voice session closed.');
|
|
stopRecording();
|
|
setStatus('idle');
|
|
},
|
|
};
|
|
|
|
sessionPromiseRef.current = startVoiceSession(callbacks);
|
|
|
|
} catch (e: any) {
|
|
console.error("Failed to start voice session:", e);
|
|
setStatus('error');
|
|
}
|
|
|
|
}, [status, stopRecording, userTranscript, modelTranscript]);
|
|
|
|
|
|
useEffect(() => {
|
|
// Cleanup on unmount
|
|
return () => {
|
|
handleClose();
|
|
};
|
|
}, [handleClose]);
|
|
|
|
|
|
if (!isOpen) return null;
|
|
|
|
const getStatusText = () => {
|
|
switch (status) {
|
|
case 'idle': return 'Click the mic to start';
|
|
case 'connecting': return 'Connecting...';
|
|
case 'listening': return 'Listening...';
|
|
case 'speaking': return 'Thinking...';
|
|
case 'error': return 'Connection error. Please try again.';
|
|
}
|
|
}
|
|
|
|
return (
|
|
<div
|
|
className="fixed inset-0 bg-black bg-opacity-60 z-50 flex justify-center items-center p-4"
|
|
onClick={handleClose}
|
|
>
|
|
<div
|
|
className="bg-white dark:bg-gray-800 rounded-lg shadow-xl w-full max-w-lg relative flex flex-col h-[70vh]"
|
|
onClick={e => e.stopPropagation()}
|
|
>
|
|
<div className="flex justify-between items-center p-4 border-b border-gray-200 dark:border-gray-700">
|
|
<h2 className="text-xl font-bold text-gray-800 dark:text-white">Voice Assistant</h2>
|
|
<button onClick={handleClose} className="text-gray-400 hover:text-gray-600 dark:hover:text-gray-200">
|
|
<XMarkIcon className="w-6 h-6" />
|
|
</button>
|
|
</div>
|
|
|
|
<div className="flex-grow p-4 overflow-y-auto space-y-4">
|
|
{history.map((entry, index) => (
|
|
<div key={index} className={`p-3 rounded-lg max-w-[80%] ${entry.speaker === 'user' ? 'bg-blue-100 dark:bg-blue-900/50 ml-auto' : 'bg-gray-100 dark:bg-gray-700/50'}`}>
|
|
<p className="text-sm text-gray-800 dark:text-gray-200">{entry.text}</p>
|
|
</div>
|
|
))}
|
|
{userTranscript && <div className="p-3 rounded-lg max-w-[80%] bg-blue-100 dark:bg-blue-900/50 ml-auto opacity-70"><p className="text-sm text-gray-800 dark:text-gray-200">{userTranscript}</p></div>}
|
|
{modelTranscript && <div className="p-3 rounded-lg max-w-[80%] bg-gray-100 dark:bg-gray-700/50 opacity-70"><p className="text-sm text-gray-800 dark:text-gray-200">{modelTranscript}</p></div>}
|
|
</div>
|
|
|
|
<div className="p-4 border-t border-gray-200 dark:border-gray-700 flex flex-col items-center">
|
|
<button
|
|
onClick={status === 'idle' || status === 'error' ? startSession : handleClose}
|
|
className={`w-16 h-16 rounded-full flex items-center justify-center transition-colors ${status === 'listening' ? 'bg-red-500 hover:bg-red-600' : 'bg-brand-primary hover:bg-brand-secondary'}`}
|
|
>
|
|
<MicrophoneIcon className="w-8 h-8 text-white" />
|
|
</button>
|
|
<p className="text-sm text-gray-500 dark:text-gray-400 mt-2">{getStatusText()}</p>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
);
|
|
};
|