59 lines
1.9 KiB
TypeScript
59 lines
1.9 KiB
TypeScript
/**
|
|
* Encodes a Uint8Array into a base64 string.
|
|
* This is a required utility for handling audio data for the Gemini API.
|
|
* @param bytes The Uint8Array to encode.
|
|
* @returns A base64 encoded string.
|
|
*/
|
|
export function encode(bytes: Uint8Array): string {
|
|
let binary = '';
|
|
const len = bytes.byteLength;
|
|
for (let i = 0; i < len; i++) {
|
|
binary += String.fromCharCode(bytes[i]);
|
|
}
|
|
return btoa(binary);
|
|
}
|
|
|
|
/**
|
|
* Decodes a base64 string into a Uint8Array.
|
|
* This is a required utility for handling audio data from the Gemini API.
|
|
* @param base64 The base64 encoded string.
|
|
* @returns A Uint8Array of the decoded data.
|
|
*/
|
|
export function decode(base64: string): Uint8Array {
|
|
const binaryString = atob(base64);
|
|
const len = binaryString.length;
|
|
const bytes = new Uint8Array(len);
|
|
for (let i = 0; i < len; i++) {
|
|
bytes[i] = binaryString.charCodeAt(i);
|
|
}
|
|
return bytes;
|
|
}
|
|
|
|
/**
|
|
* Decodes raw PCM audio data into an AudioBuffer that can be played by the browser.
|
|
* This is a required utility for handling audio data from the Gemini API.
|
|
* @param data The raw audio data as a Uint8Array.
|
|
* @param ctx The AudioContext to use for creating the buffer.
|
|
* @param sampleRate The sample rate of the audio (e.g., 24000 for TTS).
|
|
* @param numChannels The number of audio channels (typically 1).
|
|
* @returns A promise that resolves to an AudioBuffer.
|
|
*/
|
|
export async function decodeAudioData(
|
|
data: Uint8Array,
|
|
ctx: AudioContext,
|
|
sampleRate: number,
|
|
numChannels: number,
|
|
): Promise<AudioBuffer> {
|
|
const dataInt16 = new Int16Array(data.buffer);
|
|
const frameCount = dataInt16.length / numChannels;
|
|
const buffer = ctx.createBuffer(numChannels, frameCount, sampleRate);
|
|
|
|
for (let channel = 0; channel < numChannels; channel++) {
|
|
const channelData = buffer.getChannelData(channel);
|
|
for (let i = 0; i < frameCount; i++) {
|
|
channelData[i] = dataInt16[i * numChannels + channel] / 32768.0;
|
|
}
|
|
}
|
|
return buffer;
|
|
}
|