Files
flyer-crawler.projectium.com/components/VoiceAssistant.tsx

193 lines
8.8 KiB
TypeScript

import React, { useState, useEffect, useCallback, useRef } from 'react';
import { startVoiceSession } from '../services/geminiService';
import { XMarkIcon } from './icons/XMarkIcon';
import { MicrophoneIcon } from './icons/MicrophoneIcon';
// FIX: Removed `LiveSession` as it is not an exported member of the module.
import { LiveServerMessage, Blob } from '@google/genai';
import { encode } from '../utils/audioUtils';
interface VoiceAssistantProps {
isOpen: boolean;
onClose: () => void;
}
type VoiceStatus = 'idle' | 'connecting' | 'listening' | 'speaking' | 'error';
export const VoiceAssistant: React.FC<VoiceAssistantProps> = ({ isOpen, onClose }) => {
const [status, setStatus] = useState<VoiceStatus>('idle');
const [userTranscript, setUserTranscript] = useState('');
const [modelTranscript, setModelTranscript] = useState('');
const [history, setHistory] = useState<{speaker: 'user' | 'model', text: string}[]>([]);
// FIX: Infer the session promise type from the return type of `startVoiceSession`
// to avoid needing to import the `LiveSession` type directly.
const sessionPromiseRef = useRef<ReturnType<typeof startVoiceSession> | null>(null);
const mediaStreamRef = useRef<MediaStream | null>(null);
const audioContextRef = useRef<AudioContext | null>(null);
const scriptProcessorRef = useRef<ScriptProcessorNode | null>(null);
const stopRecording = useCallback(() => {
if (mediaStreamRef.current) {
mediaStreamRef.current.getTracks().forEach(track => track.stop());
mediaStreamRef.current = null;
}
if (scriptProcessorRef.current) {
scriptProcessorRef.current.disconnect();
scriptProcessorRef.current = null;
}
if (audioContextRef.current && audioContextRef.current.state !== 'closed') {
audioContextRef.current.close();
audioContextRef.current = null;
}
}, []);
const handleClose = useCallback(() => {
if (sessionPromiseRef.current) {
sessionPromiseRef.current.then(session => session.close());
sessionPromiseRef.current = null;
}
stopRecording();
setStatus('idle');
setHistory([]);
setUserTranscript('');
setModelTranscript('');
onClose();
}, [onClose, stopRecording]);
const startSession = useCallback(async () => {
if (status !== 'idle' && status !== 'error') return;
setStatus('connecting');
setHistory([]);
setUserTranscript('');
setModelTranscript('');
try {
const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
mediaStreamRef.current = stream;
const callbacks = {
onopen: () => {
console.log('Voice session opened.');
setStatus('listening');
// Start streaming microphone audio to the model
audioContextRef.current = new (window.AudioContext)({ sampleRate: 16000 });
const source = audioContextRef.current.createMediaStreamSource(stream);
const scriptProcessor = audioContextRef.current.createScriptProcessor(4096, 1, 1);
scriptProcessorRef.current = scriptProcessor;
scriptProcessor.onaudioprocess = (audioProcessingEvent) => {
const inputData = audioProcessingEvent.inputBuffer.getChannelData(0);
const pcmBlob: Blob = {
data: encode(new Uint8Array(new Int16Array(inputData.map(x => x * 32768)).buffer)),
mimeType: 'audio/pcm;rate=16000',
};
sessionPromiseRef.current?.then((session) => {
session.sendRealtimeInput({ media: pcmBlob });
});
};
source.connect(scriptProcessor);
scriptProcessor.connect(audioContextRef.current.destination);
},
onmessage: (message: LiveServerMessage) => {
// NOTE: This stub doesn't play audio, just displays transcripts.
// A full implementation would use the audioUtils to decode and play audio.
if (message.serverContent?.inputTranscription) {
setUserTranscript(prev => prev + message.serverContent.inputTranscription.text);
}
if (message.serverContent?.outputTranscription) {
setModelTranscript(prev => prev + message.serverContent.outputTranscription.text);
}
if (message.serverContent?.turnComplete) {
setHistory(prev => [...prev,
{ speaker: 'user', text: userTranscript },
{ speaker: 'model', text: modelTranscript }
]);
setUserTranscript('');
setModelTranscript('');
}
},
onerror: (e: ErrorEvent) => {
console.error('Voice session error:', e);
setStatus('error');
stopRecording();
},
onclose: () => {
console.log('Voice session closed.');
stopRecording();
setStatus('idle');
},
};
sessionPromiseRef.current = startVoiceSession(callbacks);
} catch (e: any) {
console.error("Failed to start voice session:", e);
setStatus('error');
}
}, [status, stopRecording, userTranscript, modelTranscript]);
useEffect(() => {
// Cleanup on unmount
return () => {
handleClose();
};
}, [handleClose]);
if (!isOpen) return null;
const getStatusText = () => {
switch (status) {
case 'idle': return 'Click the mic to start';
case 'connecting': return 'Connecting...';
case 'listening': return 'Listening...';
case 'speaking': return 'Thinking...';
case 'error': return 'Connection error. Please try again.';
}
}
return (
<div
className="fixed inset-0 bg-black bg-opacity-60 z-50 flex justify-center items-center p-4"
onClick={handleClose}
>
<div
className="bg-white dark:bg-gray-800 rounded-lg shadow-xl w-full max-w-lg relative flex flex-col h-[70vh]"
onClick={e => e.stopPropagation()}
>
<div className="flex justify-between items-center p-4 border-b border-gray-200 dark:border-gray-700">
<h2 className="text-xl font-bold text-gray-800 dark:text-white">Voice Assistant</h2>
<button onClick={handleClose} className="text-gray-400 hover:text-gray-600 dark:hover:text-gray-200">
<XMarkIcon className="w-6 h-6" />
</button>
</div>
<div className="flex-grow p-4 overflow-y-auto space-y-4">
{history.map((entry, index) => (
<div key={index} className={`p-3 rounded-lg max-w-[80%] ${entry.speaker === 'user' ? 'bg-blue-100 dark:bg-blue-900/50 ml-auto' : 'bg-gray-100 dark:bg-gray-700/50'}`}>
<p className="text-sm text-gray-800 dark:text-gray-200">{entry.text}</p>
</div>
))}
{userTranscript && <div className="p-3 rounded-lg max-w-[80%] bg-blue-100 dark:bg-blue-900/50 ml-auto opacity-70"><p className="text-sm text-gray-800 dark:text-gray-200">{userTranscript}</p></div>}
{modelTranscript && <div className="p-3 rounded-lg max-w-[80%] bg-gray-100 dark:bg-gray-700/50 opacity-70"><p className="text-sm text-gray-800 dark:text-gray-200">{modelTranscript}</p></div>}
</div>
<div className="p-4 border-t border-gray-200 dark:border-gray-700 flex flex-col items-center">
<button
onClick={status === 'idle' || status === 'error' ? startSession : handleClose}
className={`w-16 h-16 rounded-full flex items-center justify-center transition-colors ${status === 'listening' ? 'bg-red-500 hover:bg-red-600' : 'bg-brand-primary hover:bg-brand-secondary'}`}
>
<MicrophoneIcon className="w-8 h-8 text-white" />
</button>
<p className="text-sm text-gray-500 dark:text-gray-400 mt-2">{getStatusText()}</p>
</div>
</div>
</div>
);
};