// src/services/aiService.server.ts /** * @file This file contains all server-side functions that directly interact with the Google AI (Gemini) API. * It is intended to be used only by the backend (e.g., server.ts) and should never be imported into client-side code. * The `.server.ts` naming convention helps enforce this separation. */ import { GoogleGenAI, type GenerateContentResponse, type Content, type Tool } from '@google/genai'; import fsPromises from 'node:fs/promises'; import type { Logger } from 'pino'; import { z } from 'zod'; import { pRateLimit } from 'p-ratelimit'; import type { FlyerItem, MasterGroceryItem, ExtractedFlyerItem } from '../types'; // Helper for consistent required string validation (handles missing/null/empty) const requiredString = (message: string) => z.preprocess((val) => val ?? '', z.string().min(1, message)); // --- Zod Schemas for AI Response Validation (exported for the transformer) --- const ExtractedFlyerItemSchema = z.object({ item: z.string(), price_display: z.string(), price_in_cents: z.number().nullable(), quantity: z.string(), category_name: z.string(), master_item_id: z.number().nullish(), // .nullish() allows null or undefined }); export const AiFlyerDataSchema = z.object({ store_name: requiredString('Store name cannot be empty'), valid_from: z.string().nullable(), valid_to: z.string().nullable(), store_address: z.string().nullable(), items: z.array(ExtractedFlyerItemSchema), }); /** * Defines the contract for a file system utility. This interface allows for * dependency injection, making the AIService testable without hitting the real file system. */ interface IFileSystem { readFile(path: string): Promise; } /** * Defines the contract for an AI model client. This allows for dependency injection, * making the AIService testable without making real API calls to Google. */ interface IAiClient { generateContent(request: { contents: Content[]; tools?: Tool[]; }): Promise; } /** * Defines the shape of a single flyer item as returned by the AI. * This type is intentionally loose to accommodate potential null/undefined values * from the AI before they are cleaned and normalized. */ type RawFlyerItem = { item: string; price_display: string | null | undefined; price_in_cents: number | null; quantity: string | null | undefined; category_name: string | null | undefined; master_item_id?: number | null | undefined; }; export class AIService { private aiClient: IAiClient; private fs: IFileSystem; private rateLimiter: (fn: () => Promise) => Promise; private logger: Logger; constructor(logger: Logger, aiClient?: IAiClient, fs?: IFileSystem) { this.logger = logger; this.logger.info('---------------- [AIService] Constructor Start ----------------'); if (aiClient) { this.logger.info( '[AIService Constructor] Using provided mock AI client. This indicates a TEST environment.', ); this.aiClient = aiClient; } else { this.logger.info( '[AIService Constructor] No mock client provided. Initializing Google GenAI client for PRODUCTION-LIKE environment.', ); // Determine if we are in any kind of test environment. // VITEST_POOL_ID is reliably set by Vitest during test runs. const isTestEnvironment = process.env.NODE_ENV === 'test' || !!process.env.VITEST_POOL_ID; this.logger.info( { isTestEnvironment, nodeEnv: process.env.NODE_ENV, vitestPoolId: process.env.VITEST_POOL_ID, hasApiKey: !!process.env.GEMINI_API_KEY, }, '[AIService Constructor] Environment check', ); const apiKey = process.env.GEMINI_API_KEY; if (!apiKey) { this.logger.warn('[AIService] GEMINI_API_KEY is not set.'); // Allow initialization without key in test/build environments if strictly needed if (!isTestEnvironment) { this.logger.error('[AIService] GEMINI_API_KEY is required in non-test environments.'); throw new Error('GEMINI_API_KEY environment variable not set for server-side AI calls.'); } else { this.logger.warn( '[AIService Constructor] GEMINI_API_KEY is missing, but this is a test environment, so proceeding.', ); } } // In test mode without injected client, we might not have a key. // The stubs below protect against calling the undefined client. // This is the correct modern SDK pattern. We instantiate the main client. const genAI = apiKey ? new GoogleGenAI({ apiKey }) : null; if (!genAI) { this.logger.warn( '[AIService] GoogleGenAI client could not be initialized (likely missing API key in test environment). Using mock placeholder.', ); } // do not change "gemini-2.5-flash" - this is correct const modelName = 'gemini-2.5-flash'; // We create a shim/adapter that matches the old structure but uses the new SDK call pattern. // This preserves the dependency injection pattern used throughout the class. this.aiClient = genAI ? { generateContent: async (request) => { // The model name is now injected here, into every call, as the new SDK requires. // Architectural guard clause: All requests from this service must have content. // This prevents sending invalid requests to the API and satisfies TypeScript's strictness. if (!request.contents || request.contents.length === 0) { this.logger.error( { request }, '[AIService Adapter] generateContent called with no content, which is invalid.', ); throw new Error('AIService.generateContent requires at least one content element.'); } // Architectural Fix: After the guard clause, assign the guaranteed-to-exist element // to a new constant. This provides a definitive type-safe variable for the compiler. const firstContent = request.contents[0]; this.logger.debug( { modelName, requestParts: firstContent.parts?.length ?? 0 }, '[AIService] Calling actual generateContent via adapter.', ); return genAI.models.generateContent({ model: modelName, ...request }); }, } : { // This is the updated mock for testing, matching the new response shape. generateContent: async () => { this.logger.warn( '[AIService] Mock generateContent called. This should only happen in tests when no API key is available.', ); return { text: '[]' } as unknown as GenerateContentResponse; }, }; } this.fs = fs || fsPromises; if (aiClient) { this.logger.warn( '[AIService Constructor] Mock client detected. Rate limiter is DISABLED for testing.', ); this.rateLimiter = (fn: () => Promise) => fn(); // Pass-through function } else { const requestsPerMinute = parseInt(process.env.GEMINI_RPM || '5', 10); this.logger.info( `[AIService Constructor] Initializing production rate limiter to ${requestsPerMinute} RPM.`, ); this.rateLimiter = pRateLimit({ interval: 60 * 1000, rate: requestsPerMinute, concurrency: requestsPerMinute, }); } this.logger.info('---------------- [AIService] Constructor End ----------------'); } private async serverFileToGenerativePart(path: string, mimeType: string) { const fileData = await this.fs.readFile(path); return { inlineData: { data: fileData.toString('base64'), mimeType, }, }; } /** * Constructs the detailed prompt for the AI to extract flyer data. * @param masterItems A list of known grocery items to aid in matching. * @param submitterIp The IP address of the user who submitted the flyer. * @param userProfileAddress The profile address of the user. * @returns A formatted string to be used as the AI prompt. */ private _buildFlyerExtractionPrompt( masterItems: MasterGroceryItem[], submitterIp?: string, userProfileAddress?: string, ): string { let locationHint = ''; if (userProfileAddress) { locationHint = `The user who uploaded this flyer has a profile address of "${userProfileAddress}". Use this as a strong hint for the store's location.`; } else if (submitterIp) { locationHint = `The user uploaded this flyer from an IP address that suggests a location. Use this as a general hint for the store's region.`; } // Optimization: Instead of sending the whole masterItems object, send only the necessary fields. // This significantly reduces the number of tokens used in the prompt. const simplifiedMasterList = masterItems.map((item) => ({ id: item.master_grocery_item_id, name: item.name, })); return ` # TASK Analyze the provided flyer image(s) and extract key information into a single, valid JSON object. # RULES 1. Extract the following top-level details for the flyer: - "store_name": The name of the grocery store (e.g., "Walmart", "No Frills"). - "valid_from": The start date of the sale in YYYY-MM-DD format. Use null if not present. - "valid_to": The end date of the sale in YYYY-MM-DD format. Use null if not present. - "store_address": The physical address of the store. Use null if not present. ${locationHint} 2. Extract each individual sale item into an "items" array. For each item, provide: - "item": The name of the product (e.g., "Coca-Cola Classic"). - "price_display": The exact sale price as a string (e.g., "$2.99", "2 for $5.00"). If no price is visible, use an empty string "". - "price_in_cents": The primary numeric price in cents. For "$2.99", use 299. For "2 for $5.00", use 500. If no price is visible, you MUST use null. - "quantity": A string describing the quantity or weight (e.g., "12x355mL", "500g", "each"). If no quantity is visible, use an empty string "". - "master_item_id": Find the best matching item from the MASTER LIST provided below and return its "id". If no good match is found, you MUST use null. - "category_name": The most appropriate category (e.g., "Beverages", "Meat & Seafood"). If unsure, use "Other/Miscellaneous". 3. Your entire output MUST be a single JSON object. Do not include any other text, explanations, or markdown formatting like \`\`\`json. # EXAMPLES - For an item "Red Seedless Grapes" on sale for "$1.99 /lb" that matches master item ID 45: { "item": "Red Seedless Grapes", "price_display": "$1.99 /lb", "price_in_cents": 199, "quantity": "/lb", "master_item_id": 45, "category_name": "Produce" } - For an item "PC Cola 2L" on sale "3 for $5.00" that has no master item match: { "item": "PC Cola 2L", "price_display": "3 for $5.00", "price_in_cents": 500, "quantity": "2L", "master_item_id": null, "category_name": "Beverages" } - For an item "Store-made Muffins" with no price listed: { "item": "Store-made Muffins", "price_display": "", "price_in_cents": null, "quantity": "6 pack", "master_item_id": 123, "category_name": "Bakery" } # MASTER LIST ${JSON.stringify(simplifiedMasterList)} # JSON OUTPUT `; } /** * Safely parses a JSON object from a string, typically from an AI response. * @param responseText The raw text response from the AI. * @returns The parsed JSON object, or null if parsing fails. */ private _parseJsonFromAiResponse(responseText: string | undefined, logger: Logger): T | null { // --- START HYPER-DIAGNOSTIC LOGGING --- console.log('\n--- DIAGNOSING _parseJsonFromAiResponse ---'); console.log( `1. Initial responseText (Type: ${typeof responseText}):`, JSON.stringify(responseText), ); // --- END HYPER-DIAGNOSTIC LOGGING --- if (!responseText) { logger.warn( '[_parseJsonFromAiResponse] Response text is empty or undefined. Returning null.', ); console.log('2. responseText is falsy. ABORTING.'); console.log('--- END DIAGNOSIS ---\n'); return null; } // Find the start of the JSON, which can be inside a markdown block const markdownRegex = /```(json)?\s*([\s\S]*?)\s*```/; const markdownMatch = responseText.match(markdownRegex); console.log('2. Regex Result (markdownMatch):', markdownMatch); let jsonString; if (markdownMatch && markdownMatch[2] !== undefined) { // Check for capture group console.log('3. Regex matched. Processing Captured Group.'); console.log( ` - Captured content (Type: ${typeof markdownMatch[2]}, Length: ${markdownMatch[2].length}):`, JSON.stringify(markdownMatch[2]), ); logger.debug( { rawCapture: markdownMatch[2] }, '[_parseJsonFromAiResponse] Found JSON content within markdown code block.', ); jsonString = markdownMatch[2].trim(); console.log( `4. After trimming, jsonString is (Type: ${typeof jsonString}, Length: ${jsonString.length}):`, JSON.stringify(jsonString), ); logger.debug( { trimmedJsonString: jsonString }, '[_parseJsonFromAiResponse] Trimmed extracted JSON string.', ); } else { console.log( '3. Regex did NOT match or capture group 2 is undefined. Will attempt to parse entire responseText.', ); jsonString = responseText; } // Find the first '{' or '[' and the last '}' or ']' to isolate the JSON object. const firstBrace = jsonString.indexOf('{'); const firstBracket = jsonString.indexOf('['); console.log( `5. Index search on jsonString: firstBrace=${firstBrace}, firstBracket=${firstBracket}`, ); // Determine the starting point of the JSON content const startIndex = firstBrace === -1 || (firstBracket !== -1 && firstBracket < firstBrace) ? firstBracket : firstBrace; console.log('6. Calculated startIndex:', startIndex); if (startIndex === -1) { logger.error( { responseText }, "[_parseJsonFromAiResponse] Could not find starting '{' or '[' in response.", ); console.log('7. startIndex is -1. ABORTING.'); console.log('--- END DIAGNOSIS ---\n'); return null; } const jsonSlice = jsonString.substring(startIndex); console.log( `8. Sliced string to be parsed (jsonSlice) (Length: ${jsonSlice.length}):`, JSON.stringify(jsonSlice), ); try { console.log('9. Attempting JSON.parse on jsonSlice...'); const parsed = JSON.parse(jsonSlice) as T; console.log('10. SUCCESS: JSON.parse succeeded.'); console.log('--- END DIAGNOSIS (SUCCESS) ---\n'); return parsed; } catch (e) { logger.error( { jsonSlice, error: e, errorMessage: (e as Error).message, stack: (e as Error).stack }, '[_parseJsonFromAiResponse] Failed to parse JSON slice.', ); console.error('10. FAILURE: JSON.parse FAILED. Error:', e); console.log('--- END DIAGNOSIS (FAILURE) ---\n'); return null; } } async extractItemsFromReceiptImage( imagePath: string, imageMimeType: string, logger: Logger = this.logger, ): Promise<{ raw_item_description: string; price_paid_cents: number }[] | null> { const prompt = ` Analyze the provided receipt image. Extract all purchased line items. For each item, identify its description and total price. Return the data as a valid JSON array of objects. Each object should have two keys: 1. "raw_item_description": a string containing the item's name as written on the receipt. 2. "price_paid_cents": an integer representing the total price for that line item in cents (do not include currency symbols). Example format: [ { "raw_item_description": "ORGANIC BANANAS", "price_paid_cents": 129 }, { "raw_item_description": "AVOCADO", "price_paid_cents": 299 } ] Only output the JSON array. Do not include any other text, explanations, or markdown formatting. `; const imagePart = await this.serverFileToGenerativePart(imagePath, imageMimeType); logger.info('[extractItemsFromReceiptImage] Entering method.'); try { logger.debug('[extractItemsFromReceiptImage] PRE-RATE-LIMITER: Preparing to call AI.'); // Wrap the AI call with the rate limiter. const result = await this.rateLimiter(() => this.aiClient.generateContent({ contents: [{ parts: [{ text: prompt }, imagePart] }], }), ); logger.debug( '[extractItemsFromReceiptImage] POST-RATE-LIMITER: AI call successful, parsing response.', ); // The response from the SDK is structured, we need to access the text part. const text = result.text; logger.debug( { rawText: text?.substring(0, 100) }, '[extractItemsFromReceiptImage] Raw text from AI.', ); const parsedJson = this._parseJsonFromAiResponse< { raw_item_description: string; price_paid_cents: number }[] >(text, logger); if (!parsedJson) { logger.error( { responseText: text }, '[extractItemsFromReceiptImage] Failed to parse valid JSON from response.', ); throw new Error('AI response did not contain a valid JSON array.'); } logger.info('[extractItemsFromReceiptImage] Successfully extracted items. Exiting method.'); return parsedJson; } catch (apiError) { logger.error( { err: apiError }, '[extractItemsFromReceiptImage] An error occurred during the process.', ); throw apiError; } } async extractCoreDataFromFlyerImage( imagePaths: { path: string; mimetype: string }[], masterItems: MasterGroceryItem[], submitterIp?: string, userProfileAddress?: string, logger: Logger = this.logger, ): Promise<{ store_name: string; valid_from: string | null; valid_to: string | null; store_address: string | null; items: ExtractedFlyerItem[]; }> { logger.info( `[extractCoreDataFromFlyerImage] Entering method with ${imagePaths.length} image(s).`, ); const prompt = this._buildFlyerExtractionPrompt(masterItems, submitterIp, userProfileAddress); const imageParts = await Promise.all( imagePaths.map((file) => this.serverFileToGenerativePart(file.path, file.mimetype)), ); const totalImageSize = imageParts.reduce((acc, part) => acc + part.inlineData.data.length, 0); logger.info( `[aiService.server] Total base64 image data size for Gemini: ${(totalImageSize / (1024 * 1024)).toFixed(2)} MB`, ); try { logger.debug( `[extractCoreDataFromFlyerImage] PRE-RATE-LIMITER: Preparing to call Gemini API.`, ); const geminiCallStartTime = process.hrtime.bigint(); // Wrap the AI call with the rate limiter. const result = await this.rateLimiter(() => { logger.debug( '[extractCoreDataFromFlyerImage] INSIDE-RATE-LIMITER: Executing generateContent call.', ); return this.aiClient.generateContent({ contents: [{ parts: [{ text: prompt }, ...imageParts] }], }); }); logger.debug('[extractCoreDataFromFlyerImage] POST-RATE-LIMITER: AI call completed.'); const geminiCallEndTime = process.hrtime.bigint(); const durationMs = Number(geminiCallEndTime - geminiCallStartTime) / 1_000_000; logger.info( `[aiService.server] Gemini API call for flyer processing completed in ${durationMs.toFixed(2)} ms.`, ); const text = result.text; logger.debug( `[aiService.server] Raw Gemini response text (first 500 chars): ${text?.substring(0, 500)}`, ); const extractedData = this._parseJsonFromAiResponse>( text, logger, ); if (!extractedData) { logger.error( { responseText: text }, '[extractCoreDataFromFlyerImage] AI response did not contain a valid JSON object after parsing.', ); throw new Error('AI response did not contain a valid JSON object.'); } // Normalize the items to create a clean data structure. logger.debug('[extractCoreDataFromFlyerImage] Normalizing extracted items.'); const normalizedItems = Array.isArray(extractedData.items) ? this._normalizeExtractedItems(extractedData.items) : []; logger.info( `[extractCoreDataFromFlyerImage] Successfully processed flyer data for store: ${extractedData.store_name}. Exiting method.`, ); return { ...extractedData, items: normalizedItems }; } catch (apiError) { logger.error({ err: apiError }, '[extractCoreDataFromFlyerImage] The entire process failed.'); throw apiError; } } /** * Normalizes the raw items returned by the AI, ensuring fields are in the correct format. * @param items An array of raw flyer items from the AI. * @returns A normalized array of flyer items. */ private _normalizeExtractedItems(items: RawFlyerItem[]): ExtractedFlyerItem[] { return items.map((item: RawFlyerItem) => ({ ...item, price_display: item.price_display === null || item.price_display === undefined ? '' : String(item.price_display), quantity: item.quantity === null || item.quantity === undefined ? '' : String(item.quantity), category_name: item.category_name === null || item.category_name === undefined ? 'Other/Miscellaneous' : String(item.category_name), master_item_id: item.master_item_id ?? undefined, })); } /** * SERVER-SIDE FUNCTION * Extracts a specific piece of text from a cropped area of an image. * @param imagePath The path to the original image file on the server. * @param cropArea The coordinates and dimensions { x, y, width, height } to crop. * @param extractionType The type of data to extract, which determines the AI prompt. * @returns A promise that resolves to the extracted text. */ async extractTextFromImageArea( imagePath: string, imageMimeType: string, cropArea: { x: number; y: number; width: number; height: number }, extractionType: 'store_name' | 'dates' | 'item_details', logger: Logger = this.logger, ): Promise<{ text: string | undefined }> { logger.info( `[extractTextFromImageArea] Entering method for extraction type: ${extractionType}.`, ); // 1. Define prompts based on the extraction type const prompts = { store_name: 'What is the store name in this image? Respond with only the name.', dates: 'What are the sale dates in this image? Respond with the date range as text (e.g., "Jan 1 - Jan 7").', item_details: 'Extract the item name, price, and quantity from this image. Respond with the text as seen.', }; const prompt = prompts[extractionType] || 'Extract the text from this image.'; // 2. Crop the image using sharp logger.debug('[extractTextFromImageArea] Cropping image with sharp.'); const sharp = (await import('sharp')).default; const croppedImageBuffer = await sharp(imagePath) .extract({ left: Math.round(cropArea.x), top: Math.round(cropArea.y), width: Math.round(cropArea.width), height: Math.round(cropArea.height), }) .toBuffer(); // 3. Convert cropped buffer to GenerativePart const imagePart = { inlineData: { data: croppedImageBuffer.toString('base64'), mimeType: imageMimeType, }, }; // 4. Call the AI model try { logger.debug(`[extractTextFromImageArea] PRE-RATE-LIMITER: Preparing to call AI.`); // Wrap the AI call with the rate limiter. const result = await this.rateLimiter(() => { logger.debug(`[extractTextFromImageArea] INSIDE-RATE-LIMITER: Executing generateContent.`); return this.aiClient.generateContent({ contents: [{ parts: [{ text: prompt }, imagePart] }], }); }); logger.debug('[extractTextFromImageArea] POST-RATE-LIMITER: AI call completed.'); const text = result.text?.trim(); logger.info( `[extractTextFromImageArea] Gemini rescan completed. Extracted text: "${text}". Exiting method.`, ); return { text }; } catch (apiError) { logger.error( { err: apiError }, `[extractTextFromImageArea] An error occurred for type ${extractionType}.`, ); throw apiError; } } /** * SERVER-SIDE FUNCTION * Uses Google Maps grounding to find nearby stores and plan a shopping trip. * @param items The items from the flyer. * @param store The store associated with the flyer. * @param userLocation The user's current geographic coordinates. * @returns A text response with trip planning advice and a list of map sources. */ async planTripWithMaps( items: FlyerItem[], store: { name: string } | undefined, userLocation: GeolocationCoordinates, logger: Logger = this.logger, ): Promise<{ text: string; sources: { uri: string; title: string }[] }> { // Return a 501 Not Implemented error as this feature is disabled. logger.warn('[AIService] planTripWithMaps called, but feature is disabled. Throwing error.'); throw new Error("The 'planTripWithMaps' feature is currently disabled due to API costs."); /* const topItems = items.slice(0, 5).map(i => i.item).join(', '); const storeName = store?.name || 'the grocery store'; try { // Wrap the AI call with the rate limiter. const result = await this.rateLimiter(() => this.aiClient.generateContent({ contents: [{ parts: [{ text: `My current location is latitude ${userLocation.latitude}, longitude ${userLocation.longitude}. I have a shopping list with items like ${topItems}. Find the nearest ${storeName} to me and suggest the best route. Also, are there any other specialty stores nearby (like a bakery or butcher) that might have good deals on related items?`}]}], tools: [{ "googleSearch": {} }], })); // In a real implementation, you would render the map URLs from the sources. // The new SDK provides the search queries used, not a direct list of web attributions. // We will transform these queries into searchable links to fulfill the contract of the function. const searchQueries = result.candidates?.[0]?.groundingMetadata?.webSearchQueries || []; const sources = searchQueries.map((query: string) => ({ uri: `https://www.google.com/search?q=${encodeURIComponent(query)}`, title: query })); return { text: result.text ?? '', sources }; } catch (apiError) { logger.error({ err: apiError }, "Google GenAI API call failed in planTripWithMaps"); throw apiError; } */ } } // Export a singleton instance of the service for use throughout the application. import { logger } from './logger.server'; export const aiService = new AIService(logger);