Files
flyer-crawler.projectium.com/services/geminiService.ts

429 lines
20 KiB
TypeScript

import { GoogleGenAI, Type, Modality } from "@google/genai";
import type { FlyerItem, MasterGroceryItem, UnitPrice, Store } from '../types';
import { CATEGORIES } from '../types';
import { parsePriceToCents } from '../utils/priceParser';
/*
NOTE ON THE GOOGLE AI API KEY:
This project uses a Google AI (Gemini) API key. In this environment, you do not need to manually create one.
You may see a "Choose a key" dialog. If it mentions a "free tier", you can simply close or ignore that dialog.
The environment will automatically provide a free-tier API key as `process.env.API_KEY` for the AI to work.
*/
if (!process.env.API_KEY) {
throw new Error("API_KEY environment variable not set");
}
const ai = new GoogleGenAI({ apiKey: process.env.API_KEY });
/**
* Parses a JSON string from a Gemini response, robustly handling markdown fences.
* @param responseText The raw text from the AI response.
* @returns The parsed JSON object.
*/
function parseGeminiJson<T>(responseText: string): T {
let cleanedText = responseText.trim();
// Remove markdown fences ` ```json ... ``` `
const jsonRegex = /```json\s*([\s\S]*?)\s*```/;
const match = cleanedText.match(jsonRegex);
if (match && match[1]) {
cleanedText = match[1];
}
try {
return JSON.parse(cleanedText) as T;
} catch (e: any) {
console.error("Failed to parse JSON response from AI.", {
originalResponse: responseText,
cleanedJSON: cleanedText,
error: e.message,
});
// Re-throw with more context.
throw new Error(`Failed to parse JSON response from AI. Error: ${e.message}. The AI may have returned malformed data.`);
}
}
const fileToGenerativePart = async (file: File) => {
const base64EncodedDataPromise = new Promise<string>((resolve) => {
const reader = new FileReader();
reader.onloadend = () => resolve((reader.result as string).split(',')[1]);
reader.readAsDataURL(file);
});
return {
inlineData: { data: await base64EncodedDataPromise, mimeType: file.type },
};
};
export const isImageAFlyer = async (imageFile: File): Promise<boolean> => {
const imagePart = await fileToGenerativePart(imageFile);
try {
const response = await ai.models.generateContent({
model: 'gemini-flash-lite-latest',
contents: {
parts: [
imagePart,
{ text: `Is this a grocery store flyer or advertisement? Analyze the image and answer with only a JSON object: {"is_flyer": true} or {"is_flyer": false}.` }
]
},
config: {
responseMimeType: "application/json",
responseSchema: {
type: Type.OBJECT,
properties: {
is_flyer: { type: Type.BOOLEAN }
},
required: ['is_flyer']
}
}
});
const parsedJson = parseGeminiJson<{ is_flyer: boolean }>(response.text);
return parsedJson.is_flyer;
} catch(e) {
console.error("Flyer check failed:", e);
return false;
}
}
export const extractAddressFromImage = async (imageFile: File): Promise<string | null> => {
const imagePart = await fileToGenerativePart(imageFile);
const response = await ai.models.generateContent({
model: 'gemini-flash-lite-latest',
contents: {
parts: [
imagePart,
{ text: `Is there a physical store address visible in this image? If so, extract the full address. If not, return null. Return ONLY a JSON object: {"address": "123 Main St, Anytown, USA"} or {"address": null}.` }
]
},
config: {
responseMimeType: "application/json",
responseSchema: {
type: Type.OBJECT,
properties: {
address: { type: Type.STRING, nullable: true, description: "The full store address found in the image, or null if not present." },
},
required: ['address']
}
}
});
const parsedJson = parseGeminiJson<{ address: string | null }>(response.text);
return parsedJson.address;
};
// Raw item structure as returned by the AI model
interface RawFlyerItem {
item: string;
price: string;
quantity: string;
category: string;
quantity_num: number | null;
master_item_id: number | null;
unit_price: UnitPrice | null;
}
interface ExtractedCoreData {
store_name: string;
valid_from: string | null;
valid_to: string | null;
items: Omit<FlyerItem, 'id' | 'created_at' | 'flyer_id'>[];
}
interface ExtractedLogoData {
store_logo_base_64: string | null;
}
export const extractCoreDataFromImage = async (imageFiles: File[], masterItems: MasterGroceryItem[]): Promise<ExtractedCoreData> => {
const imageParts = await Promise.all(imageFiles.map(fileToGenerativePart));
// Create a special "unmatched" item to act as a fallback for the AI.
// This gives the AI a valid, required choice when no other match is suitable.
const UNMATCHED_ITEM_ID = 0;
const unmatchedMasterItem = { id: UNMATCHED_ITEM_ID, name: '_UNMATCHED_' };
const masterItemsForPrompt = [
...masterItems.map(item => ({ id: item.id, name: item.name })),
unmatchedMasterItem
];
const response = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: {
parts: [
...imageParts,
{ text: `You are an expert data extraction and matching system for grocery store flyers. Analyze the provided flyer images (which may be multiple pages of the same flyer).
1. Identify the name of the grocery store/company.
2. Identify the date range for which the flyer's deals are valid. Extract a 'valid_from' and 'valid_to' date. You MUST return dates in 'YYYY-MM-DD' format. If no date range is clearly visible, you MUST return 'null' for both date fields.
3. Extract all distinct sale items from all pages. For each item, extract its name, price, and quantity/deal description.
4. **Categorization**: For each item, determine its category from the provided list. You MUST select one of the exact category names from the list.
5. **Extract Numeric Quantity**: From the quantity string, extract the primary numeric value if one is present (e.g., for "500g bag", extract 500; for "2L bottle", extract 2; for "per lb", return null). This should be a number, not a string. If no clear number is present, you MUST return 'null'. Store this in the 'quantity_num' field.
6. **CRITICAL ITEM MATCHING (STRICTLY ENFORCED)**: For each extracted item, you MUST match it to its corresponding canonical item from the 'Master Items List'. This is the most important task.
* **RULE 1: BE EXTREMELY STRICT.** The goal is to link an item to its *exact* canonical counterpart (e.g., 'Granny Smith Apples' -> 'apples'), not a vaguely related item.
* **RULE 2: NEW FALLBACK RULE.** If you are not 100% certain of a perfect match, you MUST assign the \`master_item_id\` of the special \`_UNMATCHED_\` item (ID: ${UNMATCHED_ITEM_ID}). This is not optional. Assigning a wrong item is a critical failure.
* **RULE 3: FOCUS ON THE CORE PRODUCT.** Ignore brands unless the master item is brand-specific. For example, 'Compliments Organic Mushrooms' should match a master item named 'mushrooms'.
* **NEGATIVE EXAMPLES (WHAT NOT TO DO):**
* 'Sunrise Salmon Fillets' should NOT be matched to 'chicken thighs'.
* 'Large Avocados' should NOT be matched to 'chicken thighs'.
* If the flyer item is 'bananas' and the only master items are 'apples' and 'oranges', you MUST assign the \`_UNMATCHED_\` ID.
* **FINAL CHECK:** Before assigning an ID, ask: "Is '[flyer item name]' a type of '[master item name]'?". If the answer is no, and no other item fits, you MUST use the \`_UNMATCHED_\` ID.
7. **Unit Price Calculation**: For each item, calculate and provide a 'unit_price'. This is CRITICAL for price comparison. Your goal is to standardize the price into a common unit.
* **Multi-buy deals**: If the price is '2 for $5.00', the unit price object should reflect a value of 2.50 and a unit of 'each'.
* **Price by weight**: If the price is '$3.99/lb', the unit price object should be { "value": 3.99, "unit": "lb" }.
* The unit price MUST be a JSON object: \`{ "value": <number>, "unit": "<string>" }\`. Use standard units: 'g', 'kg', 'ml', 'l' for metric, and 'oz', 'lb', 'fl oz' for imperial. Use 'each' for items sold individually.
* If a unit price cannot be determined or is not applicable, you MUST return \`null\` for this field.
Return the result as a single JSON object, strictly following the provided schema. It is critical that all string values within the JSON are correctly escaped.
Category List (for categorization): ${JSON.stringify(CATEGORIES)}
Master Items List (for matching): ${JSON.stringify(masterItemsForPrompt)}
` }
]
},
config: {
responseMimeType: "application/json",
responseSchema: {
type: Type.OBJECT,
properties: {
store_name: { type: Type.STRING, description: 'The name of the grocery store (e.g., "Safeway", "Kroger").' },
valid_from: { type: Type.STRING, description: "The start date of the sale in YYYY-MM-DD format, or null if not found." },
valid_to: { type: Type.STRING, description: "The end date of the sale in YYYY-MM-DD format, or null if not found." },
items: {
type: Type.ARRAY,
items: {
type: Type.OBJECT,
properties: {
item: { type: Type.STRING, description: 'The name of the grocery item as it appears in the flyer.' },
price: { type: Type.STRING, description: 'The price of the item, including currency symbols.' },
quantity: { type: Type.STRING, description: 'The quantity, unit, or deal description (e.g., "per lb", "5 oz").' },
category: { type: Type.STRING, description: "The category of the item from the predefined list." },
quantity_num: { type: Type.NUMBER, nullable: true, description: 'The parsed numeric value from the quantity string, or null.'},
master_item_id: { type: Type.INTEGER, description: "The integer ID of the matching item from the master list, or null if no match is found." },
unit_price: {
type: Type.OBJECT,
nullable: true,
description: "A structured object for the calculated price per single standard unit, or null if not applicable.",
properties: {
value: { type: Type.NUMBER, description: "The numeric value of the unit price." },
unit: { type: Type.STRING, description: "The standard unit of measurement (e.g., 'g', 'ml', 'lb', 'oz', 'each')." }
},
required: ["value", "unit"]
}
},
required: ['item', 'price', 'quantity', 'category', 'quantity_num', 'master_item_id', 'unit_price']
}
}
},
required: ['store_name', 'valid_from', 'valid_to', 'items']
}
}
});
const parsedJson = parseGeminiJson<{
store_name: string;
valid_from: string | null;
valid_to: string | null;
items: RawFlyerItem[];
}>(response.text);
const processedItems: Omit<FlyerItem, 'id' | 'created_at' | 'flyer_id'>[] = parsedJson.items.map(rawItem => ({
item: rawItem.item,
price_display: rawItem.price,
price_in_cents: parsePriceToCents(rawItem.price),
quantity: rawItem.quantity,
category_name: rawItem.category,
quantity_num: rawItem.quantity_num,
// Convert the special _UNMATCHED_ ID back to null for the database.
master_item_id: rawItem.master_item_id === UNMATCHED_ITEM_ID ? null : rawItem.master_item_id,
unit_price: rawItem.unit_price,
}));
const today = new Date().toISOString().split('T')[0];
const finalData: ExtractedCoreData = {
store_name: parsedJson.store_name,
// Per user instruction, a date is mandatory. If the AI cannot find one,
// we must use today's date as a fallback.
valid_from: parsedJson.valid_from || today,
valid_to: parsedJson.valid_to || today,
items: processedItems,
};
return finalData;
};
export const extractLogoFromImage = async (imageFiles: File[]): Promise<ExtractedLogoData> => {
const imageParts = await Promise.all(imageFiles.map(fileToGenerativePart));
const response = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: {
parts: [
...imageParts,
{ text: `You are a specialized image analysis tool. Your only task is to identify the main store logo in the provided flyer image. Crop it from the image into a small square (approx 64x64 pixels). Return it as a base64-encoded PNG string. If no logo is found, return null. Return ONLY a JSON object matching the specified schema. It is critical that all string values within the JSON are correctly escaped.` }
]
},
config: {
responseMimeType: "application/json",
responseSchema: {
type: Type.OBJECT,
properties: {
store_logo_base_64: { type: Type.STRING, description: "A small, cropped, base64-encoded PNG string of the store's logo, or null if not found." },
},
required: ['store_logo_base_64']
}
}
});
return parseGeminiJson<ExtractedLogoData>(response.text);
};
export const getQuickInsights = async (items: FlyerItem[]): Promise<string> => {
const prompt = `Based on this list of grocery items on sale, provide some quick insights, simple meal ideas, or shopping tips. Keep it concise and easy to read.\n\nItems:\n${JSON.stringify(items, null, 2)}`;
const response = await ai.models.generateContent({
model: 'gemini-flash-lite-latest',
contents: prompt
});
return response.text;
};
export const getDeepDiveAnalysis = async (items: FlyerItem[]): Promise<string> => {
const prompt = `Perform a detailed analysis of these grocery sale items. Create a comprehensive weekly meal plan to maximize savings. Identify the best value-for-money deals, considering unit prices if possible. Point out any potential purchasing traps (e.g., items that seem cheap but have a high cost per unit or are near expiration). Format the output in clear, well-structured markdown.\n\nItems:\n${JSON.stringify(items, null, 2)}`;
const response = await ai.models.generateContent({
model: 'gemini-2.5-pro',
contents: prompt,
config: {
thinkingConfig: { thinkingBudget: 32768 }
}
});
return response.text;
};
export const searchWeb = async (items: FlyerItem[]): Promise<{text: string; sources: any[]}> => {
const topItems = items.slice(0, 3).map(i => i.item).join(', ');
const prompt = `Find recipes, nutritional information, or price comparisons for these items: ${topItems}. Provide a summary and the sources you used.`;
const response = await ai.models.generateContent({
model: 'gemini-2.5-flash',
contents: prompt,
config: {
tools: [{googleSearch: {}}]
}
});
const sources = response.candidates?.[0]?.groundingMetadata?.groundingChunks || [];
return { text: response.text, sources };
};
// ============================================================================
// STUBS FOR FUTURE AI FEATURES
// ============================================================================
/**
* [STUB] Uses Google Maps grounding to find nearby stores and plan a shopping trip.
* @param items The items from the flyer.
* @param store The store associated with the flyer.
* @param userLocation The user's current geographic coordinates.
* @returns A text response with trip planning advice and a list of map sources.
*/
export const planTripWithMaps = async (items: FlyerItem[], store: Store | undefined, userLocation: GeolocationCoordinates): Promise<{text: string; sources: any[]}> => {
console.log("Stub: planTripWithMaps called with location:", userLocation);
const topItems = items.slice(0, 5).map(i => i.item).join(', ');
const storeName = store?.name || 'the grocery store';
const response = await ai.models.generateContent({
model: "gemini-2.5-flash",
contents: `I have a shopping list with items like ${topItems}. Find the nearest ${storeName} to me and suggest the best route. Also, are there any other specialty stores nearby (like a bakery or butcher) that might have good deals on related items?`,
config: {
tools: [{googleMaps: {}}],
toolConfig: {
retrievalConfig: {
latLng: {
latitude: userLocation.latitude,
longitude: userLocation.longitude
}
}
}
},
});
// In a real implementation, you would render the map URLs from the sources.
const sources = response.candidates?.[0]?.groundingMetadata?.groundingChunks || [];
return { text: response.text, sources };
};
/**
* [STUB] Generates an image based on a text prompt using the Imagen model.
* @param prompt A description of the image to generate (e.g., a meal plan).
* @returns A base64-encoded string of the generated PNG image.
*/
export const generateImageFromText = async (prompt: string): Promise<string> => {
console.log("Stub: generateImageFromText called with prompt:", prompt);
const response = await ai.models.generateImages({
model: 'imagen-4.0-generate-001',
prompt: `A vibrant, appetizing flat-lay photo of a meal plan featuring: ${prompt}. Studio lighting, high detail.`,
config: {
numberOfImages: 1,
outputMimeType: 'image/png',
aspectRatio: '16:9',
},
});
const base64ImageBytes: string = response.generatedImages[0].image.imageBytes;
return base64ImageBytes;
};
/**
* [STUB] Converts a string of text into speech audio data.
* @param text The text to be spoken.
* @returns A base64-encoded string of the raw audio data.
*/
export const generateSpeechFromText = async (text: string): Promise<string> => {
console.log("Stub: generateSpeechFromText called with text:", text);
const response = await ai.models.generateContent({
model: "gemini-2.5-flash-preview-tts",
contents: [{ parts: [{ text: `Say cheerfully: ${text}` }] }],
config: {
responseModalities: [Modality.AUDIO],
speechConfig: {
voiceConfig: {
prebuiltVoiceConfig: { voiceName: 'Kore' },
},
},
},
});
const base64Audio = response.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
if (!base64Audio) {
throw new Error("No audio data returned from TTS API.");
}
return base64Audio;
};
/**
* [STUB] Initiates a real-time voice conversation session using the Live API.
* @param callbacks An object containing onopen, onmessage, onerror, and onclose handlers.
* @returns A promise that resolves to the live session object.
*/
export const startVoiceSession = (callbacks: any) => {
console.log("Stub: startVoiceSession called.");
// This returns the promise that the UI will use to send data once the connection is open.
return ai.live.connect({
model: 'gemini-2.5-flash-native-audio-preview-09-2025',
callbacks: callbacks,
config: {
responseModalities: [Modality.AUDIO],
speechConfig: {
voiceConfig: { prebuiltVoiceConfig: { voiceName: 'Zephyr' } },
},
inputAudioTranscription: {},
outputAudioTranscription: {},
systemInstruction: 'You are a friendly and helpful grocery shopping assistant. Keep your answers concise.',
},
});
};