added genai rate limiting

2025-12-10 10:29:57 -08:00
parent 331df90f1b
commit 0694a5501f
4 changed files with 52 additions and 16 deletions
--- a/src/services/aiService.server.ts
+++ b/src/services/aiService.server.ts
@@ -8,6 +8,7 @@
 import { GoogleGenAI } from '@google/genai';
 import fsPromises from 'node:fs/promises';
 import { logger } from './logger.server';
+import pRateLimit from 'p-ratelimit';
 import type { FlyerItem, MasterGroceryItem, ExtractedFlyerItem } from '../types';

 /**
@@ -43,6 +44,7 @@ type RawFlyerItem = {
 export class AIService {
  private aiClient: IAiClient;
  private fs: IFileSystem;
+  private rateLimiter: <T>(fn: () => Promise<T>) => Promise<T>;

  constructor(aiClient?: IAiClient, fs?: IFileSystem) {
    if (aiClient) {
@@ -65,6 +67,16 @@ export class AIService {
    }

    this.fs = fs || fsPromises;
+
+    // Initialize the rate limiter based on an environment variable.
+    // Defaults to 5 requests per minute (60,000 ms) if not specified.
+    const requestsPerMinute = parseInt(process.env.GEMINI_RPM || '5', 10);
+    this.rateLimiter = pRateLimit({
+      interval: 60 * 1000, // 1 minute
+      rate: requestsPerMinute,
+      concurrency: requestsPerMinute, // Allow up to `rate` requests to be running in parallel.
+    });
+    logger.info(`[AIService] Rate limiter initialized to ${requestsPerMinute} requests per minute.`);
  }

  private async serverFileToGenerativePart(path: string, mimeType: string) {
@@ -174,10 +186,12 @@ export class AIService {
    const imagePart = await this.serverFileToGenerativePart(imagePath, imageMimeType);

    try {
-      const response = await this.aiClient.generateContent({
-        model: 'gemini-2.5-flash',
-        contents: [{ parts: [{text: prompt}, imagePart] }]
-      });
+      // Wrap the AI call with the rate limiter.
+      const response = await this.rateLimiter(() => 
+        this.aiClient.generateContent({
+          model: 'gemini-2.5-flash',
+          contents: [{ parts: [{text: prompt}, imagePart] }]
+        }));
      const text = response.text;
      const parsedJson = this._parseJsonFromAiResponse<any[]>(text);

@@ -215,11 +229,13 @@ export class AIService {
      try {
         logger.debug(`[aiService.server] Calling Gemini API for flyer processing with ${imageParts.length} image(s).`);
          const geminiCallStartTime = process.hrtime.bigint();
-
-        const response = await this.aiClient.generateContent({
-          model: 'gemini-2.5-flash',
-          contents: [{ parts: [{ text: prompt }, ...imageParts] }]
-        });
+        
+        // Wrap the AI call with the rate limiter.
+        const response = await this.rateLimiter(() => 
+          this.aiClient.generateContent({
+            model: 'gemini-2.5-flash',
+            contents: [{ parts: [{ text: prompt }, ...imageParts] }]
+          }));

        const geminiCallEndTime = process.hrtime.bigint();
        const durationMs = Number(geminiCallEndTime - geminiCallStartTime) / 1_000_000;
@@ -307,10 +323,12 @@ export class AIService {
    // 4. Call the AI model
    try {
      logger.info(`[aiService.server] Calling Gemini for targeted rescan of type: ${extractionType}`);
-      const response = await this.aiClient.generateContent({
-        model: 'gemini-2.5-flash',
-        contents: [{ parts: [{ text: prompt }, imagePart] }]
-      });
+      // Wrap the AI call with the rate limiter.
+      const response = await this.rateLimiter(() => 
+        this.aiClient.generateContent({
+          model: 'gemini-2.5-flash',
+          contents: [{ parts: [{ text: prompt }, imagePart] }]
+        }));

      const text = response.text?.trim() ?? '';
      logger.info(`[aiService.server] Gemini rescan completed. Extracted text: "${text}"`);
@@ -334,13 +352,14 @@ export class AIService {
      const storeName = store?.name || 'the grocery store';
      
      try {
-          const response = await this.aiClient.generateContent({
+          // Wrap the AI call with the rate limiter.
+          const response = await this.rateLimiter(() => this.aiClient.generateContent({
              model: "gemini-2.5-flash",
              contents: [{ parts: [{ text: `My current location is latitude ${userLocation.latitude}, longitude ${userLocation.longitude}.
              I have a shopping list with items like ${topItems}. Find the nearest ${storeName} to me and suggest the best route.
              Also, are there any other specialty stores nearby (like a bakery or butcher) that might have good deals on related items?`}]}],
              tools: [{ "googleSearch": {} }],
-          });
+          }));
  
          // In a real implementation, you would render the map URLs from the sources.
          const sources = (response.candidates?.[0]?.groundingMetadata?.groundingAttributions || []).map((att: any) => ({