429 lines
20 KiB
TypeScript
429 lines
20 KiB
TypeScript
|
|
|
|
import { GoogleGenAI, Type, Modality } from "@google/genai";
|
|
import type { FlyerItem, MasterGroceryItem, UnitPrice, Store } from '../types';
|
|
import { CATEGORIES } from '../types';
|
|
import { parsePriceToCents } from '../utils/priceParser';
|
|
|
|
/*
|
|
NOTE ON THE GOOGLE AI API KEY:
|
|
This project uses a Google AI (Gemini) API key. In this environment, you do not need to manually create one.
|
|
You may see a "Choose a key" dialog. If it mentions a "free tier", you can simply close or ignore that dialog.
|
|
The environment will automatically provide a free-tier API key as `process.env.API_KEY` for the AI to work.
|
|
*/
|
|
|
|
if (!process.env.API_KEY) {
|
|
throw new Error("API_KEY environment variable not set");
|
|
}
|
|
|
|
const ai = new GoogleGenAI({ apiKey: process.env.API_KEY });
|
|
|
|
/**
|
|
* Parses a JSON string from a Gemini response, robustly handling markdown fences.
|
|
* @param responseText The raw text from the AI response.
|
|
* @returns The parsed JSON object.
|
|
*/
|
|
function parseGeminiJson<T>(responseText: string): T {
|
|
let cleanedText = responseText.trim();
|
|
|
|
// Remove markdown fences ` ```json ... ``` `
|
|
const jsonRegex = /```json\s*([\s\S]*?)\s*```/;
|
|
const match = cleanedText.match(jsonRegex);
|
|
if (match && match[1]) {
|
|
cleanedText = match[1];
|
|
}
|
|
|
|
try {
|
|
return JSON.parse(cleanedText) as T;
|
|
} catch (e: any) {
|
|
console.error("Failed to parse JSON response from AI.", {
|
|
originalResponse: responseText,
|
|
cleanedJSON: cleanedText,
|
|
error: e.message,
|
|
});
|
|
|
|
// Re-throw with more context.
|
|
throw new Error(`Failed to parse JSON response from AI. Error: ${e.message}. The AI may have returned malformed data.`);
|
|
}
|
|
}
|
|
|
|
const fileToGenerativePart = async (file: File) => {
|
|
const base64EncodedDataPromise = new Promise<string>((resolve) => {
|
|
const reader = new FileReader();
|
|
reader.onloadend = () => resolve((reader.result as string).split(',')[1]);
|
|
reader.readAsDataURL(file);
|
|
});
|
|
return {
|
|
inlineData: { data: await base64EncodedDataPromise, mimeType: file.type },
|
|
};
|
|
};
|
|
|
|
export const isImageAFlyer = async (imageFile: File): Promise<boolean> => {
|
|
const imagePart = await fileToGenerativePart(imageFile);
|
|
try {
|
|
const response = await ai.models.generateContent({
|
|
model: 'gemini-flash-lite-latest',
|
|
contents: {
|
|
parts: [
|
|
imagePart,
|
|
{ text: `Is this a grocery store flyer or advertisement? Analyze the image and answer with only a JSON object: {"is_flyer": true} or {"is_flyer": false}.` }
|
|
]
|
|
},
|
|
config: {
|
|
responseMimeType: "application/json",
|
|
responseSchema: {
|
|
type: Type.OBJECT,
|
|
properties: {
|
|
is_flyer: { type: Type.BOOLEAN }
|
|
},
|
|
required: ['is_flyer']
|
|
}
|
|
}
|
|
});
|
|
const parsedJson = parseGeminiJson<{ is_flyer: boolean }>(response.text);
|
|
return parsedJson.is_flyer;
|
|
} catch(e) {
|
|
console.error("Flyer check failed:", e);
|
|
return false;
|
|
}
|
|
}
|
|
|
|
export const extractAddressFromImage = async (imageFile: File): Promise<string | null> => {
|
|
const imagePart = await fileToGenerativePart(imageFile);
|
|
const response = await ai.models.generateContent({
|
|
model: 'gemini-flash-lite-latest',
|
|
contents: {
|
|
parts: [
|
|
imagePart,
|
|
{ text: `Is there a physical store address visible in this image? If so, extract the full address. If not, return null. Return ONLY a JSON object: {"address": "123 Main St, Anytown, USA"} or {"address": null}.` }
|
|
]
|
|
},
|
|
config: {
|
|
responseMimeType: "application/json",
|
|
responseSchema: {
|
|
type: Type.OBJECT,
|
|
properties: {
|
|
address: { type: Type.STRING, nullable: true, description: "The full store address found in the image, or null if not present." },
|
|
},
|
|
required: ['address']
|
|
}
|
|
}
|
|
});
|
|
const parsedJson = parseGeminiJson<{ address: string | null }>(response.text);
|
|
return parsedJson.address;
|
|
};
|
|
|
|
|
|
// Raw item structure as returned by the AI model
|
|
interface RawFlyerItem {
|
|
item: string;
|
|
price: string;
|
|
quantity: string;
|
|
category: string;
|
|
quantity_num: number | null;
|
|
master_item_id: number | null;
|
|
unit_price: UnitPrice | null;
|
|
}
|
|
|
|
interface ExtractedCoreData {
|
|
store_name: string;
|
|
valid_from: string | null;
|
|
valid_to: string | null;
|
|
items: Omit<FlyerItem, 'id' | 'created_at' | 'flyer_id'>[];
|
|
}
|
|
|
|
interface ExtractedLogoData {
|
|
store_logo_base_64: string | null;
|
|
}
|
|
|
|
|
|
export const extractCoreDataFromImage = async (imageFiles: File[], masterItems: MasterGroceryItem[]): Promise<ExtractedCoreData> => {
|
|
const imageParts = await Promise.all(imageFiles.map(fileToGenerativePart));
|
|
|
|
// Create a special "unmatched" item to act as a fallback for the AI.
|
|
// This gives the AI a valid, required choice when no other match is suitable.
|
|
const UNMATCHED_ITEM_ID = 0;
|
|
const unmatchedMasterItem = { id: UNMATCHED_ITEM_ID, name: '_UNMATCHED_' };
|
|
|
|
const masterItemsForPrompt = [
|
|
...masterItems.map(item => ({ id: item.id, name: item.name })),
|
|
unmatchedMasterItem
|
|
];
|
|
|
|
const response = await ai.models.generateContent({
|
|
model: 'gemini-2.5-flash',
|
|
contents: {
|
|
parts: [
|
|
...imageParts,
|
|
{ text: `You are an expert data extraction and matching system for grocery store flyers. Analyze the provided flyer images (which may be multiple pages of the same flyer).
|
|
1. Identify the name of the grocery store/company.
|
|
2. Identify the date range for which the flyer's deals are valid. Extract a 'valid_from' and 'valid_to' date. You MUST return dates in 'YYYY-MM-DD' format. If no date range is clearly visible, you MUST return 'null' for both date fields.
|
|
3. Extract all distinct sale items from all pages. For each item, extract its name, price, and quantity/deal description.
|
|
4. **Categorization**: For each item, determine its category from the provided list. You MUST select one of the exact category names from the list.
|
|
5. **Extract Numeric Quantity**: From the quantity string, extract the primary numeric value if one is present (e.g., for "500g bag", extract 500; for "2L bottle", extract 2; for "per lb", return null). This should be a number, not a string. If no clear number is present, you MUST return 'null'. Store this in the 'quantity_num' field.
|
|
6. **CRITICAL ITEM MATCHING (STRICTLY ENFORCED)**: For each extracted item, you MUST match it to its corresponding canonical item from the 'Master Items List'. This is the most important task.
|
|
* **RULE 1: BE EXTREMELY STRICT.** The goal is to link an item to its *exact* canonical counterpart (e.g., 'Granny Smith Apples' -> 'apples'), not a vaguely related item.
|
|
* **RULE 2: NEW FALLBACK RULE.** If you are not 100% certain of a perfect match, you MUST assign the \`master_item_id\` of the special \`_UNMATCHED_\` item (ID: ${UNMATCHED_ITEM_ID}). This is not optional. Assigning a wrong item is a critical failure.
|
|
* **RULE 3: FOCUS ON THE CORE PRODUCT.** Ignore brands unless the master item is brand-specific. For example, 'Compliments Organic Mushrooms' should match a master item named 'mushrooms'.
|
|
* **NEGATIVE EXAMPLES (WHAT NOT TO DO):**
|
|
* 'Sunrise Salmon Fillets' should NOT be matched to 'chicken thighs'.
|
|
* 'Large Avocados' should NOT be matched to 'chicken thighs'.
|
|
* If the flyer item is 'bananas' and the only master items are 'apples' and 'oranges', you MUST assign the \`_UNMATCHED_\` ID.
|
|
* **FINAL CHECK:** Before assigning an ID, ask: "Is '[flyer item name]' a type of '[master item name]'?". If the answer is no, and no other item fits, you MUST use the \`_UNMATCHED_\` ID.
|
|
7. **Unit Price Calculation**: For each item, calculate and provide a 'unit_price'. This is CRITICAL for price comparison. Your goal is to standardize the price into a common unit.
|
|
* **Multi-buy deals**: If the price is '2 for $5.00', the unit price object should reflect a value of 2.50 and a unit of 'each'.
|
|
* **Price by weight**: If the price is '$3.99/lb', the unit price object should be { "value": 3.99, "unit": "lb" }.
|
|
* The unit price MUST be a JSON object: \`{ "value": <number>, "unit": "<string>" }\`. Use standard units: 'g', 'kg', 'ml', 'l' for metric, and 'oz', 'lb', 'fl oz' for imperial. Use 'each' for items sold individually.
|
|
* If a unit price cannot be determined or is not applicable, you MUST return \`null\` for this field.
|
|
|
|
Return the result as a single JSON object, strictly following the provided schema. It is critical that all string values within the JSON are correctly escaped.
|
|
|
|
Category List (for categorization): ${JSON.stringify(CATEGORIES)}
|
|
Master Items List (for matching): ${JSON.stringify(masterItemsForPrompt)}
|
|
` }
|
|
]
|
|
},
|
|
config: {
|
|
responseMimeType: "application/json",
|
|
responseSchema: {
|
|
type: Type.OBJECT,
|
|
properties: {
|
|
store_name: { type: Type.STRING, description: 'The name of the grocery store (e.g., "Safeway", "Kroger").' },
|
|
valid_from: { type: Type.STRING, description: "The start date of the sale in YYYY-MM-DD format, or null if not found." },
|
|
valid_to: { type: Type.STRING, description: "The end date of the sale in YYYY-MM-DD format, or null if not found." },
|
|
items: {
|
|
type: Type.ARRAY,
|
|
items: {
|
|
type: Type.OBJECT,
|
|
properties: {
|
|
item: { type: Type.STRING, description: 'The name of the grocery item as it appears in the flyer.' },
|
|
price: { type: Type.STRING, description: 'The price of the item, including currency symbols.' },
|
|
quantity: { type: Type.STRING, description: 'The quantity, unit, or deal description (e.g., "per lb", "5 oz").' },
|
|
category: { type: Type.STRING, description: "The category of the item from the predefined list." },
|
|
quantity_num: { type: Type.NUMBER, nullable: true, description: 'The parsed numeric value from the quantity string, or null.'},
|
|
master_item_id: { type: Type.INTEGER, description: "The integer ID of the matching item from the master list, or null if no match is found." },
|
|
unit_price: {
|
|
type: Type.OBJECT,
|
|
nullable: true,
|
|
description: "A structured object for the calculated price per single standard unit, or null if not applicable.",
|
|
properties: {
|
|
value: { type: Type.NUMBER, description: "The numeric value of the unit price." },
|
|
unit: { type: Type.STRING, description: "The standard unit of measurement (e.g., 'g', 'ml', 'lb', 'oz', 'each')." }
|
|
},
|
|
required: ["value", "unit"]
|
|
}
|
|
},
|
|
required: ['item', 'price', 'quantity', 'category', 'quantity_num', 'master_item_id', 'unit_price']
|
|
}
|
|
}
|
|
},
|
|
required: ['store_name', 'valid_from', 'valid_to', 'items']
|
|
}
|
|
}
|
|
});
|
|
|
|
const parsedJson = parseGeminiJson<{
|
|
store_name: string;
|
|
valid_from: string | null;
|
|
valid_to: string | null;
|
|
items: RawFlyerItem[];
|
|
}>(response.text);
|
|
|
|
const processedItems: Omit<FlyerItem, 'id' | 'created_at' | 'flyer_id'>[] = parsedJson.items.map(rawItem => ({
|
|
item: rawItem.item,
|
|
price_display: rawItem.price,
|
|
price_in_cents: parsePriceToCents(rawItem.price),
|
|
quantity: rawItem.quantity,
|
|
category_name: rawItem.category,
|
|
quantity_num: rawItem.quantity_num,
|
|
// Convert the special _UNMATCHED_ ID back to null for the database.
|
|
master_item_id: rawItem.master_item_id === UNMATCHED_ITEM_ID ? null : rawItem.master_item_id,
|
|
unit_price: rawItem.unit_price,
|
|
}));
|
|
|
|
const today = new Date().toISOString().split('T')[0];
|
|
|
|
const finalData: ExtractedCoreData = {
|
|
store_name: parsedJson.store_name,
|
|
// Per user instruction, a date is mandatory. If the AI cannot find one,
|
|
// we must use today's date as a fallback.
|
|
valid_from: parsedJson.valid_from || today,
|
|
valid_to: parsedJson.valid_to || today,
|
|
items: processedItems,
|
|
};
|
|
|
|
return finalData;
|
|
};
|
|
|
|
export const extractLogoFromImage = async (imageFiles: File[]): Promise<ExtractedLogoData> => {
|
|
const imageParts = await Promise.all(imageFiles.map(fileToGenerativePart));
|
|
|
|
const response = await ai.models.generateContent({
|
|
model: 'gemini-2.5-flash',
|
|
contents: {
|
|
parts: [
|
|
...imageParts,
|
|
{ text: `You are a specialized image analysis tool. Your only task is to identify the main store logo in the provided flyer image. Crop it from the image into a small square (approx 64x64 pixels). Return it as a base64-encoded PNG string. If no logo is found, return null. Return ONLY a JSON object matching the specified schema. It is critical that all string values within the JSON are correctly escaped.` }
|
|
]
|
|
},
|
|
config: {
|
|
responseMimeType: "application/json",
|
|
responseSchema: {
|
|
type: Type.OBJECT,
|
|
properties: {
|
|
store_logo_base_64: { type: Type.STRING, description: "A small, cropped, base64-encoded PNG string of the store's logo, or null if not found." },
|
|
},
|
|
required: ['store_logo_base_64']
|
|
}
|
|
}
|
|
});
|
|
return parseGeminiJson<ExtractedLogoData>(response.text);
|
|
};
|
|
|
|
export const getQuickInsights = async (items: FlyerItem[]): Promise<string> => {
|
|
const prompt = `Based on this list of grocery items on sale, provide some quick insights, simple meal ideas, or shopping tips. Keep it concise and easy to read.\n\nItems:\n${JSON.stringify(items, null, 2)}`;
|
|
|
|
const response = await ai.models.generateContent({
|
|
model: 'gemini-flash-lite-latest',
|
|
contents: prompt
|
|
});
|
|
|
|
return response.text;
|
|
};
|
|
|
|
export const getDeepDiveAnalysis = async (items: FlyerItem[]): Promise<string> => {
|
|
const prompt = `Perform a detailed analysis of these grocery sale items. Create a comprehensive weekly meal plan to maximize savings. Identify the best value-for-money deals, considering unit prices if possible. Point out any potential purchasing traps (e.g., items that seem cheap but have a high cost per unit or are near expiration). Format the output in clear, well-structured markdown.\n\nItems:\n${JSON.stringify(items, null, 2)}`;
|
|
|
|
const response = await ai.models.generateContent({
|
|
model: 'gemini-2.5-pro',
|
|
contents: prompt,
|
|
config: {
|
|
thinkingConfig: { thinkingBudget: 32768 }
|
|
}
|
|
});
|
|
|
|
return response.text;
|
|
};
|
|
|
|
export const searchWeb = async (items: FlyerItem[]): Promise<{text: string; sources: any[]}> => {
|
|
const topItems = items.slice(0, 3).map(i => i.item).join(', ');
|
|
const prompt = `Find recipes, nutritional information, or price comparisons for these items: ${topItems}. Provide a summary and the sources you used.`;
|
|
|
|
const response = await ai.models.generateContent({
|
|
model: 'gemini-2.5-flash',
|
|
contents: prompt,
|
|
config: {
|
|
tools: [{googleSearch: {}}]
|
|
}
|
|
});
|
|
|
|
const sources = response.candidates?.[0]?.groundingMetadata?.groundingChunks || [];
|
|
return { text: response.text, sources };
|
|
};
|
|
|
|
// ============================================================================
|
|
// STUBS FOR FUTURE AI FEATURES
|
|
// ============================================================================
|
|
|
|
/**
|
|
* [STUB] Uses Google Maps grounding to find nearby stores and plan a shopping trip.
|
|
* @param items The items from the flyer.
|
|
* @param store The store associated with the flyer.
|
|
* @param userLocation The user's current geographic coordinates.
|
|
* @returns A text response with trip planning advice and a list of map sources.
|
|
*/
|
|
export const planTripWithMaps = async (items: FlyerItem[], store: Store | undefined, userLocation: GeolocationCoordinates): Promise<{text: string; sources: any[]}> => {
|
|
console.log("Stub: planTripWithMaps called with location:", userLocation);
|
|
const topItems = items.slice(0, 5).map(i => i.item).join(', ');
|
|
const storeName = store?.name || 'the grocery store';
|
|
|
|
const response = await ai.models.generateContent({
|
|
model: "gemini-2.5-flash",
|
|
contents: `I have a shopping list with items like ${topItems}. Find the nearest ${storeName} to me and suggest the best route. Also, are there any other specialty stores nearby (like a bakery or butcher) that might have good deals on related items?`,
|
|
config: {
|
|
tools: [{googleMaps: {}}],
|
|
toolConfig: {
|
|
retrievalConfig: {
|
|
latLng: {
|
|
latitude: userLocation.latitude,
|
|
longitude: userLocation.longitude
|
|
}
|
|
}
|
|
}
|
|
},
|
|
});
|
|
|
|
// In a real implementation, you would render the map URLs from the sources.
|
|
const sources = response.candidates?.[0]?.groundingMetadata?.groundingChunks || [];
|
|
return { text: response.text, sources };
|
|
};
|
|
|
|
/**
|
|
* [STUB] Generates an image based on a text prompt using the Imagen model.
|
|
* @param prompt A description of the image to generate (e.g., a meal plan).
|
|
* @returns A base64-encoded string of the generated PNG image.
|
|
*/
|
|
export const generateImageFromText = async (prompt: string): Promise<string> => {
|
|
console.log("Stub: generateImageFromText called with prompt:", prompt);
|
|
const response = await ai.models.generateImages({
|
|
model: 'imagen-4.0-generate-001',
|
|
prompt: `A vibrant, appetizing flat-lay photo of a meal plan featuring: ${prompt}. Studio lighting, high detail.`,
|
|
config: {
|
|
numberOfImages: 1,
|
|
outputMimeType: 'image/png',
|
|
aspectRatio: '16:9',
|
|
},
|
|
});
|
|
|
|
const base64ImageBytes: string = response.generatedImages[0].image.imageBytes;
|
|
return base64ImageBytes;
|
|
};
|
|
|
|
/**
|
|
* [STUB] Converts a string of text into speech audio data.
|
|
* @param text The text to be spoken.
|
|
* @returns A base64-encoded string of the raw audio data.
|
|
*/
|
|
export const generateSpeechFromText = async (text: string): Promise<string> => {
|
|
console.log("Stub: generateSpeechFromText called with text:", text);
|
|
const response = await ai.models.generateContent({
|
|
model: "gemini-2.5-flash-preview-tts",
|
|
contents: [{ parts: [{ text: `Say cheerfully: ${text}` }] }],
|
|
config: {
|
|
responseModalities: [Modality.AUDIO],
|
|
speechConfig: {
|
|
voiceConfig: {
|
|
prebuiltVoiceConfig: { voiceName: 'Kore' },
|
|
},
|
|
},
|
|
},
|
|
});
|
|
const base64Audio = response.candidates?.[0]?.content?.parts?.[0]?.inlineData?.data;
|
|
if (!base64Audio) {
|
|
throw new Error("No audio data returned from TTS API.");
|
|
}
|
|
return base64Audio;
|
|
};
|
|
|
|
/**
|
|
* [STUB] Initiates a real-time voice conversation session using the Live API.
|
|
* @param callbacks An object containing onopen, onmessage, onerror, and onclose handlers.
|
|
* @returns A promise that resolves to the live session object.
|
|
*/
|
|
export const startVoiceSession = (callbacks: any) => {
|
|
console.log("Stub: startVoiceSession called.");
|
|
// This returns the promise that the UI will use to send data once the connection is open.
|
|
return ai.live.connect({
|
|
model: 'gemini-2.5-flash-native-audio-preview-09-2025',
|
|
callbacks: callbacks,
|
|
config: {
|
|
responseModalities: [Modality.AUDIO],
|
|
speechConfig: {
|
|
voiceConfig: { prebuiltVoiceConfig: { voiceName: 'Zephyr' } },
|
|
},
|
|
inputAudioTranscription: {},
|
|
outputAudioTranscription: {},
|
|
systemInstruction: 'You are a friendly and helpful grocery shopping assistant. Keep your answers concise.',
|
|
},
|
|
});
|
|
}; |