one moar time - we can do it?
All checks were successful
Deploy to Test Environment / deploy-to-test (push) Successful in 32m32s

This commit is contained in:
2025-12-17 04:49:01 -08:00
parent 1d18646818
commit d3ad50cde6
12 changed files with 482 additions and 235 deletions

View File

@@ -75,16 +75,21 @@ export class AIService {
constructor(logger: Logger, aiClient?: IAiClient, fs?: IFileSystem) {
this.logger = logger;
this.logger.info('[AIService] Initializing...');
this.logger.info('---------------- [AIService] Constructor Start ----------------');
if (aiClient) {
this.logger.info('[AIService] Using provided mock AI client.');
this.logger.info('[AIService Constructor] Using provided mock AI client. This indicates a TEST environment.');
this.aiClient = aiClient;
} else {
this.logger.info('[AIService] Initializing Google GenAI client.');
this.logger.info('[AIService Constructor] No mock client provided. Initializing Google GenAI client for PRODUCTION-LIKE environment.');
// Determine if we are in any kind of test environment.
// VITEST_POOL_ID is reliably set by Vitest during test runs.
const isTestEnvironment = process.env.NODE_ENV === 'test' || !!process.env.VITEST_POOL_ID;
this.logger.debug({ isTestEnvironment, nodeEnv: process.env.NODE_ENV, vitestPoolId: process.env.VITEST_POOL_ID }, '[AIService] Environment check');
this.logger.info({
isTestEnvironment,
nodeEnv: process.env.NODE_ENV,
vitestPoolId: process.env.VITEST_POOL_ID,
hasApiKey: !!process.env.GEMINI_API_KEY
}, '[AIService Constructor] Environment check');
const apiKey = process.env.GEMINI_API_KEY;
if (!apiKey) {
@@ -121,7 +126,7 @@ export class AIService {
// Architectural Fix: After the guard clause, assign the guaranteed-to-exist element
// to a new constant. This provides a definitive type-safe variable for the compiler.
const firstContent = request.contents[0];
this.logger.debug({ modelName, requestParts: firstContent.parts.length }, '[AIService] Calling actual generateContent via adapter.');
this.logger.debug({ modelName, requestParts: firstContent.parts?.length ?? 0 }, '[AIService] Calling actual generateContent via adapter.');
return genAI.models.generateContent({ model: modelName, ...request });
}
} : {
@@ -135,15 +140,17 @@ export class AIService {
this.fs = fs || fsPromises;
// Initialize the rate limiter based on an environment variable.
// Defaults to 5 requests per minute (60,000 ms) if not specified.
const requestsPerMinute = parseInt(process.env.GEMINI_RPM || '5', 10);
this.rateLimiter = pRateLimit({
interval: 60 * 1000, // 1 minute
rate: requestsPerMinute,
concurrency: requestsPerMinute, // Allow up to `rate` requests to be running in parallel.
});
this.logger.info(`[AIService] Rate limiter initialized to ${requestsPerMinute} requests per minute.`);
if (aiClient) {
this.logger.warn('[AIService Constructor] Mock client detected. Rate limiter is DISABLED for testing.');
this.rateLimiter = <T>(fn: () => Promise<T>) => fn(); // Pass-through function
} else {
const requestsPerMinute = parseInt(process.env.GEMINI_RPM || '5', 10);
this.logger.info(`[AIService Constructor] Initializing production rate limiter to ${requestsPerMinute} RPM.`);
this.rateLimiter = pRateLimit({
interval: 60 * 1000, rate: requestsPerMinute, concurrency: requestsPerMinute,
});
}
this.logger.info('---------------- [AIService] Constructor End ----------------');
}
private async serverFileToGenerativePart(path: string, mimeType: string) {
@@ -211,61 +218,39 @@ export class AIService {
* @returns The parsed JSON object, or null if parsing fails.
*/
private _parseJsonFromAiResponse<T>(responseText: string | undefined, logger: Logger): T | null {
logger.debug({ responseTextLength: responseText?.length }, 'Starting JSON parsing from AI response.');
logger.debug({ responseTextLength: responseText?.length }, '[_parseJsonFromAiResponse] Starting...');
if (!responseText) {
logger.warn('Cannot parse JSON from empty or undefined response text.');
logger.warn('[_parseJsonFromAiResponse] Response text is empty or undefined. Returning null.');
return null;
}
// Attempt to find markdown-style JSON block first
// Find the start of the JSON, which can be inside a markdown block
const markdownMatch = responseText.match(/```(json)?\s*([\s\S]*?)\s*```/);
let potentialJson = responseText;
let jsonString = responseText;
if (markdownMatch && markdownMatch[2]) {
logger.debug('Found JSON within markdown code block.');
potentialJson = markdownMatch[2];
logger.debug('[_parseJsonFromAiResponse] Found JSON within markdown code block.');
jsonString = markdownMatch[2];
}
// Find the first '{' or '[' to determine the start of the JSON content.
const firstBrace = potentialJson.indexOf('{');
const firstBracket = potentialJson.indexOf('[');
let start = -1;
// Find the first '{' or '[' and the last '}' or ']' to isolate the JSON object.
const firstBrace = jsonString.indexOf('{');
const firstBracket = jsonString.indexOf('[');
if (firstBrace === -1 && firstBracket === -1) {
logger.error({ potentialJson }, "No JSON start characters ('{' or '[') found in AI response after cleaning.");
return null;
} else if (firstBrace === -1) {
start = firstBracket;
} else if (firstBracket === -1) {
start = firstBrace;
} else {
start = Math.min(firstBrace, firstBracket);
// Determine the starting point of the JSON content
const startIndex = (firstBrace === -1 || (firstBracket !== -1 && firstBracket < firstBrace)) ? firstBracket : firstBrace;
if (startIndex === -1) {
logger.error({ responseText }, "[_parseJsonFromAiResponse] Could not find starting '{' or '[' in response.");
return null;
}
// Slice from the start of the potential JSON object/array to the end of the string.
const jsonString = potentialJson.substring(start);
logger.debug({ jsonString: jsonString.substring(0, 200) }, 'Extracted potential JSON string for parsing (first 200 chars).');
const jsonSlice = jsonString.substring(startIndex);
try {
return JSON.parse(jsonString) as T;
return JSON.parse(jsonSlice) as T;
} catch (e) {
logger.warn({ error: e, jsonString: jsonString.substring(0, 500) }, 'Primary JSON parse failed. This may be due to incomplete JSON. Attempting to truncate and re-parse.');
const lastBrace = jsonString.lastIndexOf('}');
const lastBracket = jsonString.lastIndexOf(']');
const end = Math.max(lastBrace, lastBracket);
if (end <= -1) {
logger.error({ jsonString, error: e }, 'Failed to parse JSON and could not find a valid closing character to attempt truncation.');
return null;
}
const truncatedJson = jsonString.substring(0, end + 1);
logger.debug({ truncatedJson: truncatedJson.substring(0, 200) }, 'Attempting to parse truncated JSON string.');
try {
return JSON.parse(truncatedJson) as T;
} catch (finalError) {
logger.error({ jsonString: truncatedJson, error: finalError }, 'Failed to parse even the truncated JSON from AI response.');
return null;
}
logger.error({ jsonSlice, error: e, errorMessage: (e as Error).message, stack: (e as Error).stack }, "[_parseJsonFromAiResponse] Failed to parse JSON slice.");
return null;
}
}
@@ -292,22 +277,30 @@ export class AIService {
const imagePart = await this.serverFileToGenerativePart(imagePath, imageMimeType);
logger.info('[extractItemsFromReceiptImage] Entering method.');
try {
logger.debug('[extractItemsFromReceiptImage] PRE-RATE-LIMITER: Preparing to call AI.');
// Wrap the AI call with the rate limiter.
const result = await this.rateLimiter(() =>
this.aiClient.generateContent({
contents: [{ parts: [{text: prompt}, imagePart] }]
}));
logger.debug('[extractItemsFromReceiptImage] POST-RATE-LIMITER: AI call successful, parsing response.');
// The response from the SDK is structured, we need to access the text part.
const text = result.text;
logger.debug({ rawText: text?.substring(0, 100) }, '[extractItemsFromReceiptImage] Raw text from AI.');
const parsedJson = this._parseJsonFromAiResponse<{ raw_item_description: string; price_paid_cents: number }[]>(text, logger);
if (!parsedJson) {
logger.error({ responseText: text }, '[extractItemsFromReceiptImage] Failed to parse valid JSON from response.');
throw new Error('AI response did not contain a valid JSON array.');
}
logger.info('[extractItemsFromReceiptImage] Successfully extracted items. Exiting method.');
return parsedJson;
} catch (apiError) {
logger.error({ err: apiError }, "Google GenAI API call failed in extractItemsFromReceiptImage");
logger.error({ err: apiError }, "[extractItemsFromReceiptImage] An error occurred during the process.");
throw apiError;
}
}
@@ -325,6 +318,7 @@ export class AIService {
store_address: string | null;
items: ExtractedFlyerItem[];
}> {
logger.info(`[extractCoreDataFromFlyerImage] Entering method with ${imagePaths.length} image(s).`);
const prompt = this._buildFlyerExtractionPrompt(masterItems, submitterIp, userProfileAddress);
const imageParts = await Promise.all(
@@ -335,42 +329,42 @@ export class AIService {
logger.info(`[aiService.server] Total base64 image data size for Gemini: ${(totalImageSize / (1024 * 1024)).toFixed(2)} MB`);
try {
logger.debug(`[aiService.server] Calling Gemini API for flyer processing with ${imageParts.length} image(s).`);
logger.debug(`[extractCoreDataFromFlyerImage] PRE-RATE-LIMITER: Preparing to call Gemini API.`);
const geminiCallStartTime = process.hrtime.bigint();
// Wrap the AI call with the rate limiter.
const result = await this.rateLimiter(() => {
logger.debug("Executing generateContent call within rate limiter for flyer data.");
logger.debug("[extractCoreDataFromFlyerImage] INSIDE-RATE-LIMITER: Executing generateContent call.");
return this.aiClient.generateContent({
contents: [{ parts: [{ text: prompt }, ...imageParts] }]
});
});
logger.debug('[extractCoreDataFromFlyerImage] POST-RATE-LIMITER: AI call completed.');
const geminiCallEndTime = process.hrtime.bigint();
const durationMs = Number(geminiCallEndTime - geminiCallStartTime) / 1_000_000;
logger.info(`[aiService.server] Gemini API call for flyer processing completed in ${durationMs.toFixed(2)} ms.`);
const text = result.text;
logger.debug(`[aiService.server] Raw Gemini response text (first 500 chars): ${text?.substring(0, 500)}`);
const extractedData = this._parseJsonFromAiResponse<z.infer<typeof AiFlyerDataSchema>>(text, logger);
if (!extractedData) {
logger.error({ responseText: text }, "AI response for flyer processing did not contain a valid JSON object after parsing.");
logger.error({ responseText: text }, "[extractCoreDataFromFlyerImage] AI response did not contain a valid JSON object after parsing.");
throw new Error('AI response did not contain a valid JSON object.');
}
// Normalize the items to create a clean data structure.
logger.debug('[extractCoreDataFromFlyerImage] Normalizing extracted items.');
const normalizedItems = Array.isArray(extractedData.items)
? this._normalizeExtractedItems(extractedData.items)
: [];
// Return a new, correctly typed object, rather than mutating the original.
// This makes the data flow explicit and satisfies TypeScript.
logger.info(`[extractCoreDataFromFlyerImage] Successfully processed flyer data for store: ${extractedData.store_name}. Exiting method.`);
return { ...extractedData, items: normalizedItems };
} catch (apiError) {
logger.error({ err: apiError }, "Google GenAI API call failed in extractCoreDataFromFlyerImage. The error was caught.");
logger.error({ err: apiError }, "[extractCoreDataFromFlyerImage] The entire process failed.");
throw apiError;
}
}
@@ -404,6 +398,7 @@ export class AIService {
cropArea: { x: number; y: number; width: number; height: number },
extractionType: 'store_name' | 'dates' | 'item_details',
logger: Logger = this.logger): Promise<{ text: string | undefined }> {
logger.info(`[extractTextFromImageArea] Entering method for extraction type: ${extractionType}.`);
// 1. Define prompts based on the extraction type
const prompts = {
store_name: 'What is the store name in this image? Respond with only the name.',
@@ -414,6 +409,7 @@ export class AIService {
const prompt = prompts[extractionType] || 'Extract the text from this image.';
// 2. Crop the image using sharp
logger.debug('[extractTextFromImageArea] Cropping image with sharp.');
const sharp = (await import('sharp')).default;
const croppedImageBuffer = await sharp(imagePath)
.extract({
@@ -434,20 +430,21 @@ export class AIService {
// 4. Call the AI model
try {
logger.info(`[aiService.server] Calling Gemini for targeted rescan of type: ${extractionType}`);
logger.debug(`[extractTextFromImageArea] PRE-RATE-LIMITER: Preparing to call AI.`);
// Wrap the AI call with the rate limiter.
const result = await this.rateLimiter(() => {
logger.debug(`Executing generateContent call within rate limiter for image area text extraction (type: ${extractionType}).`);
logger.debug(`[extractTextFromImageArea] INSIDE-RATE-LIMITER: Executing generateContent.`);
return this.aiClient.generateContent({
contents: [{ parts: [{ text: prompt }, imagePart] }]
});
});
logger.debug('[extractTextFromImageArea] POST-RATE-LIMITER: AI call completed.');
const text = result.text?.trim();
logger.info(`[aiService.server] Gemini rescan completed. Extracted text: "${text}"`);
logger.info(`[extractTextFromImageArea] Gemini rescan completed. Extracted text: "${text}". Exiting method.`);
return { text };
} catch (apiError) {
logger.error({ err: apiError }, `Google GenAI API call failed in extractTextFromImageArea for type ${extractionType}`);
logger.error({ err: apiError }, `[extractTextFromImageArea] An error occurred for type ${extractionType}.`);
throw apiError;
}
}