added genai rate limiting
All checks were successful
Deploy to Web Server flyer-crawler.projectium.com / deploy (push) Successful in 7m47s

This commit is contained in:
2025-12-10 10:29:57 -08:00
parent 331df90f1b
commit 0694a5501f
4 changed files with 52 additions and 16 deletions

View File

@@ -8,6 +8,7 @@
import { GoogleGenAI } from '@google/genai';
import fsPromises from 'node:fs/promises';
import { logger } from './logger.server';
import pRateLimit from 'p-ratelimit';
import type { FlyerItem, MasterGroceryItem, ExtractedFlyerItem } from '../types';
/**
@@ -43,6 +44,7 @@ type RawFlyerItem = {
export class AIService {
private aiClient: IAiClient;
private fs: IFileSystem;
private rateLimiter: <T>(fn: () => Promise<T>) => Promise<T>;
constructor(aiClient?: IAiClient, fs?: IFileSystem) {
if (aiClient) {
@@ -65,6 +67,16 @@ export class AIService {
}
this.fs = fs || fsPromises;
// Initialize the rate limiter based on an environment variable.
// Defaults to 5 requests per minute (60,000 ms) if not specified.
const requestsPerMinute = parseInt(process.env.GEMINI_RPM || '5', 10);
this.rateLimiter = pRateLimit({
interval: 60 * 1000, // 1 minute
rate: requestsPerMinute,
concurrency: requestsPerMinute, // Allow up to `rate` requests to be running in parallel.
});
logger.info(`[AIService] Rate limiter initialized to ${requestsPerMinute} requests per minute.`);
}
private async serverFileToGenerativePart(path: string, mimeType: string) {
@@ -174,10 +186,12 @@ export class AIService {
const imagePart = await this.serverFileToGenerativePart(imagePath, imageMimeType);
try {
const response = await this.aiClient.generateContent({
model: 'gemini-2.5-flash',
contents: [{ parts: [{text: prompt}, imagePart] }]
});
// Wrap the AI call with the rate limiter.
const response = await this.rateLimiter(() =>
this.aiClient.generateContent({
model: 'gemini-2.5-flash',
contents: [{ parts: [{text: prompt}, imagePart] }]
}));
const text = response.text;
const parsedJson = this._parseJsonFromAiResponse<any[]>(text);
@@ -215,11 +229,13 @@ export class AIService {
try {
logger.debug(`[aiService.server] Calling Gemini API for flyer processing with ${imageParts.length} image(s).`);
const geminiCallStartTime = process.hrtime.bigint();
const response = await this.aiClient.generateContent({
model: 'gemini-2.5-flash',
contents: [{ parts: [{ text: prompt }, ...imageParts] }]
});
// Wrap the AI call with the rate limiter.
const response = await this.rateLimiter(() =>
this.aiClient.generateContent({
model: 'gemini-2.5-flash',
contents: [{ parts: [{ text: prompt }, ...imageParts] }]
}));
const geminiCallEndTime = process.hrtime.bigint();
const durationMs = Number(geminiCallEndTime - geminiCallStartTime) / 1_000_000;
@@ -307,10 +323,12 @@ export class AIService {
// 4. Call the AI model
try {
logger.info(`[aiService.server] Calling Gemini for targeted rescan of type: ${extractionType}`);
const response = await this.aiClient.generateContent({
model: 'gemini-2.5-flash',
contents: [{ parts: [{ text: prompt }, imagePart] }]
});
// Wrap the AI call with the rate limiter.
const response = await this.rateLimiter(() =>
this.aiClient.generateContent({
model: 'gemini-2.5-flash',
contents: [{ parts: [{ text: prompt }, imagePart] }]
}));
const text = response.text?.trim() ?? '';
logger.info(`[aiService.server] Gemini rescan completed. Extracted text: "${text}"`);
@@ -334,13 +352,14 @@ export class AIService {
const storeName = store?.name || 'the grocery store';
try {
const response = await this.aiClient.generateContent({
// Wrap the AI call with the rate limiter.
const response = await this.rateLimiter(() => this.aiClient.generateContent({
model: "gemini-2.5-flash",
contents: [{ parts: [{ text: `My current location is latitude ${userLocation.latitude}, longitude ${userLocation.longitude}.
I have a shopping list with items like ${topItems}. Find the nearest ${storeName} to me and suggest the best route.
Also, are there any other specialty stores nearby (like a bakery or butcher) that might have good deals on related items?`}]}],
tools: [{ "googleSearch": {} }],
});
}));
// In a real implementation, you would render the map URLs from the sources.
const sources = (response.candidates?.[0]?.groundingMetadata?.groundingAttributions || []).map((att: any) => ({