added genai rate limiting
All checks were successful
Deploy to Web Server flyer-crawler.projectium.com / deploy (push) Successful in 7m47s
All checks were successful
Deploy to Web Server flyer-crawler.projectium.com / deploy (push) Successful in 7m47s
This commit is contained in:
@@ -8,6 +8,7 @@
|
||||
import { GoogleGenAI } from '@google/genai';
|
||||
import fsPromises from 'node:fs/promises';
|
||||
import { logger } from './logger.server';
|
||||
import pRateLimit from 'p-ratelimit';
|
||||
import type { FlyerItem, MasterGroceryItem, ExtractedFlyerItem } from '../types';
|
||||
|
||||
/**
|
||||
@@ -43,6 +44,7 @@ type RawFlyerItem = {
|
||||
export class AIService {
|
||||
private aiClient: IAiClient;
|
||||
private fs: IFileSystem;
|
||||
private rateLimiter: <T>(fn: () => Promise<T>) => Promise<T>;
|
||||
|
||||
constructor(aiClient?: IAiClient, fs?: IFileSystem) {
|
||||
if (aiClient) {
|
||||
@@ -65,6 +67,16 @@ export class AIService {
|
||||
}
|
||||
|
||||
this.fs = fs || fsPromises;
|
||||
|
||||
// Initialize the rate limiter based on an environment variable.
|
||||
// Defaults to 5 requests per minute (60,000 ms) if not specified.
|
||||
const requestsPerMinute = parseInt(process.env.GEMINI_RPM || '5', 10);
|
||||
this.rateLimiter = pRateLimit({
|
||||
interval: 60 * 1000, // 1 minute
|
||||
rate: requestsPerMinute,
|
||||
concurrency: requestsPerMinute, // Allow up to `rate` requests to be running in parallel.
|
||||
});
|
||||
logger.info(`[AIService] Rate limiter initialized to ${requestsPerMinute} requests per minute.`);
|
||||
}
|
||||
|
||||
private async serverFileToGenerativePart(path: string, mimeType: string) {
|
||||
@@ -174,10 +186,12 @@ export class AIService {
|
||||
const imagePart = await this.serverFileToGenerativePart(imagePath, imageMimeType);
|
||||
|
||||
try {
|
||||
const response = await this.aiClient.generateContent({
|
||||
model: 'gemini-2.5-flash',
|
||||
contents: [{ parts: [{text: prompt}, imagePart] }]
|
||||
});
|
||||
// Wrap the AI call with the rate limiter.
|
||||
const response = await this.rateLimiter(() =>
|
||||
this.aiClient.generateContent({
|
||||
model: 'gemini-2.5-flash',
|
||||
contents: [{ parts: [{text: prompt}, imagePart] }]
|
||||
}));
|
||||
const text = response.text;
|
||||
const parsedJson = this._parseJsonFromAiResponse<any[]>(text);
|
||||
|
||||
@@ -215,11 +229,13 @@ export class AIService {
|
||||
try {
|
||||
logger.debug(`[aiService.server] Calling Gemini API for flyer processing with ${imageParts.length} image(s).`);
|
||||
const geminiCallStartTime = process.hrtime.bigint();
|
||||
|
||||
const response = await this.aiClient.generateContent({
|
||||
model: 'gemini-2.5-flash',
|
||||
contents: [{ parts: [{ text: prompt }, ...imageParts] }]
|
||||
});
|
||||
|
||||
// Wrap the AI call with the rate limiter.
|
||||
const response = await this.rateLimiter(() =>
|
||||
this.aiClient.generateContent({
|
||||
model: 'gemini-2.5-flash',
|
||||
contents: [{ parts: [{ text: prompt }, ...imageParts] }]
|
||||
}));
|
||||
|
||||
const geminiCallEndTime = process.hrtime.bigint();
|
||||
const durationMs = Number(geminiCallEndTime - geminiCallStartTime) / 1_000_000;
|
||||
@@ -307,10 +323,12 @@ export class AIService {
|
||||
// 4. Call the AI model
|
||||
try {
|
||||
logger.info(`[aiService.server] Calling Gemini for targeted rescan of type: ${extractionType}`);
|
||||
const response = await this.aiClient.generateContent({
|
||||
model: 'gemini-2.5-flash',
|
||||
contents: [{ parts: [{ text: prompt }, imagePart] }]
|
||||
});
|
||||
// Wrap the AI call with the rate limiter.
|
||||
const response = await this.rateLimiter(() =>
|
||||
this.aiClient.generateContent({
|
||||
model: 'gemini-2.5-flash',
|
||||
contents: [{ parts: [{ text: prompt }, imagePart] }]
|
||||
}));
|
||||
|
||||
const text = response.text?.trim() ?? '';
|
||||
logger.info(`[aiService.server] Gemini rescan completed. Extracted text: "${text}"`);
|
||||
@@ -334,13 +352,14 @@ export class AIService {
|
||||
const storeName = store?.name || 'the grocery store';
|
||||
|
||||
try {
|
||||
const response = await this.aiClient.generateContent({
|
||||
// Wrap the AI call with the rate limiter.
|
||||
const response = await this.rateLimiter(() => this.aiClient.generateContent({
|
||||
model: "gemini-2.5-flash",
|
||||
contents: [{ parts: [{ text: `My current location is latitude ${userLocation.latitude}, longitude ${userLocation.longitude}.
|
||||
I have a shopping list with items like ${topItems}. Find the nearest ${storeName} to me and suggest the best route.
|
||||
Also, are there any other specialty stores nearby (like a bakery or butcher) that might have good deals on related items?`}]}],
|
||||
tools: [{ "googleSearch": {} }],
|
||||
});
|
||||
}));
|
||||
|
||||
// In a real implementation, you would render the map URLs from the sources.
|
||||
const sources = (response.candidates?.[0]?.groundingMetadata?.groundingAttributions || []).map((att: any) => ({
|
||||
|
||||
Reference in New Issue
Block a user