-- sql/migrations/003_receipt_scanning_enhancements.sql -- ============================================================================ -- RECEIPT SCANNING ENHANCEMENTS MIGRATION -- ============================================================================ -- Purpose: -- This migration adds enhancements to the existing receipt scanning tables: -- 1. Enhancements to receipts table for better OCR processing -- 2. Enhancements to receipt_items for better item matching -- 3. receipt_processing_log for tracking OCR/AI processing attempts -- -- Existing tables: -- - receipts (lines 932-948 in master_schema_rollup.sql) -- - receipt_items (lines 951-966 in master_schema_rollup.sql) -- ============================================================================ -- 1. Enhancements to receipts table -- Add store detection confidence ALTER TABLE public.receipts ADD COLUMN IF NOT EXISTS store_confidence NUMERIC(5,4); COMMENT ON COLUMN public.receipts.store_confidence IS 'Confidence score for store detection (0.0-1.0).'; -- Add OCR provider used ALTER TABLE public.receipts ADD COLUMN IF NOT EXISTS ocr_provider TEXT; COMMENT ON COLUMN public.receipts.ocr_provider IS 'Which OCR service processed this receipt: tesseract, openai, anthropic.'; -- Add error details for failed processing ALTER TABLE public.receipts ADD COLUMN IF NOT EXISTS error_details JSONB; COMMENT ON COLUMN public.receipts.error_details IS 'Detailed error information if processing failed.'; -- Add retry count for failed processing ALTER TABLE public.receipts ADD COLUMN IF NOT EXISTS retry_count INTEGER DEFAULT 0; COMMENT ON COLUMN public.receipts.retry_count IS 'Number of processing retry attempts.'; -- Add extracted text confidence ALTER TABLE public.receipts ADD COLUMN IF NOT EXISTS ocr_confidence NUMERIC(5,4); COMMENT ON COLUMN public.receipts.ocr_confidence IS 'Overall OCR text extraction confidence score.'; -- Add currency detection ALTER TABLE public.receipts ADD COLUMN IF NOT EXISTS currency TEXT DEFAULT 'CAD'; COMMENT ON COLUMN public.receipts.currency IS 'Detected currency: CAD, USD, etc.'; -- New indexes for receipt processing CREATE INDEX IF NOT EXISTS idx_receipts_status_retry ON public.receipts(status, retry_count) WHERE status IN ('pending', 'failed') AND retry_count < 3; -- 2. Enhancements to receipt_items table -- Add line number from receipt for ordering ALTER TABLE public.receipt_items ADD COLUMN IF NOT EXISTS line_number INTEGER; COMMENT ON COLUMN public.receipt_items.line_number IS 'Original line number on the receipt for display ordering.'; -- Add match confidence score ALTER TABLE public.receipt_items ADD COLUMN IF NOT EXISTS match_confidence NUMERIC(5,4); COMMENT ON COLUMN public.receipt_items.match_confidence IS 'Confidence score for item matching (0.0-1.0).'; -- Add is_discount flag for discount/coupon lines ALTER TABLE public.receipt_items ADD COLUMN IF NOT EXISTS is_discount BOOLEAN DEFAULT FALSE; COMMENT ON COLUMN public.receipt_items.is_discount IS 'Whether this line is a discount/coupon (negative price).'; -- Add unit_price if per-unit pricing detected ALTER TABLE public.receipt_items ADD COLUMN IF NOT EXISTS unit_price_cents INTEGER; COMMENT ON COLUMN public.receipt_items.unit_price_cents IS 'Per-unit price if detected (e.g., price per kg).'; -- Add unit type if detected ALTER TABLE public.receipt_items ADD COLUMN IF NOT EXISTS unit_type TEXT; COMMENT ON COLUMN public.receipt_items.unit_type IS 'Unit type if detected: kg, lb, each, etc.'; -- Add added_to_pantry flag ALTER TABLE public.receipt_items ADD COLUMN IF NOT EXISTS added_to_pantry BOOLEAN DEFAULT FALSE; COMMENT ON COLUMN public.receipt_items.added_to_pantry IS 'Whether this item has been added to user pantry.'; -- Add pantry_item_id link ALTER TABLE public.receipt_items ADD COLUMN IF NOT EXISTS pantry_item_id BIGINT REFERENCES public.pantry_items(pantry_item_id) ON DELETE SET NULL; COMMENT ON COLUMN public.receipt_items.pantry_item_id IS 'Link to pantry_items if this receipt item was added to pantry.'; -- New indexes for receipt_items CREATE INDEX IF NOT EXISTS idx_receipt_items_status ON public.receipt_items(status); CREATE INDEX IF NOT EXISTS idx_receipt_items_added_to_pantry ON public.receipt_items(receipt_id, added_to_pantry) WHERE added_to_pantry = FALSE; CREATE INDEX IF NOT EXISTS idx_receipt_items_pantry_item_id ON public.receipt_items(pantry_item_id) WHERE pantry_item_id IS NOT NULL; -- 3. Receipt Processing Log - track OCR/AI processing attempts -- Useful for debugging, monitoring costs, and improving processing CREATE TABLE IF NOT EXISTS public.receipt_processing_log ( log_id BIGINT PRIMARY KEY GENERATED ALWAYS AS IDENTITY, receipt_id BIGINT NOT NULL REFERENCES public.receipts(receipt_id) ON DELETE CASCADE, processing_step TEXT NOT NULL, status TEXT NOT NULL, provider TEXT, duration_ms INTEGER, tokens_used INTEGER, cost_cents INTEGER, input_data JSONB, output_data JSONB, error_message TEXT, created_at TIMESTAMPTZ DEFAULT now() NOT NULL, -- Validate processing step CONSTRAINT receipt_processing_log_step_check CHECK (processing_step IN ( 'upload', 'ocr_extraction', 'text_parsing', 'store_detection', 'item_extraction', 'item_matching', 'price_parsing', 'finalization' )), -- Validate status CONSTRAINT receipt_processing_log_status_check CHECK (status IN ('started', 'completed', 'failed', 'skipped')), -- Validate provider if specified CONSTRAINT receipt_processing_log_provider_check CHECK (provider IS NULL OR provider IN ( 'tesseract', 'openai', 'anthropic', 'google_vision', 'aws_textract', 'internal' )) ); COMMENT ON TABLE public.receipt_processing_log IS 'Detailed log of each processing step for receipts, useful for debugging and cost tracking.'; COMMENT ON COLUMN public.receipt_processing_log.processing_step IS 'Which processing step this log entry is for.'; COMMENT ON COLUMN public.receipt_processing_log.status IS 'Status of this step: started, completed, failed, skipped.'; COMMENT ON COLUMN public.receipt_processing_log.provider IS 'External service used: tesseract, openai, anthropic, etc.'; COMMENT ON COLUMN public.receipt_processing_log.duration_ms IS 'How long this step took in milliseconds.'; COMMENT ON COLUMN public.receipt_processing_log.tokens_used IS 'Number of API tokens used (for LLM providers).'; COMMENT ON COLUMN public.receipt_processing_log.cost_cents IS 'Estimated cost in cents for this processing step.'; COMMENT ON COLUMN public.receipt_processing_log.input_data IS 'Input data sent to the processing step (for debugging).'; COMMENT ON COLUMN public.receipt_processing_log.output_data IS 'Output data received from the processing step.'; -- Indexes for receipt_processing_log CREATE INDEX IF NOT EXISTS idx_receipt_processing_log_receipt_id ON public.receipt_processing_log(receipt_id); CREATE INDEX IF NOT EXISTS idx_receipt_processing_log_step_status ON public.receipt_processing_log(processing_step, status); CREATE INDEX IF NOT EXISTS idx_receipt_processing_log_created_at ON public.receipt_processing_log(created_at DESC); -- 4. Store-specific receipt patterns - help identify stores from receipt text CREATE TABLE IF NOT EXISTS public.store_receipt_patterns ( pattern_id BIGINT PRIMARY KEY GENERATED ALWAYS AS IDENTITY, store_id BIGINT NOT NULL REFERENCES public.stores(store_id) ON DELETE CASCADE, pattern_type TEXT NOT NULL, pattern_value TEXT NOT NULL, priority INTEGER DEFAULT 0, is_active BOOLEAN DEFAULT TRUE, created_at TIMESTAMPTZ DEFAULT now() NOT NULL, updated_at TIMESTAMPTZ DEFAULT now() NOT NULL, -- Validate pattern type CONSTRAINT store_receipt_patterns_type_check CHECK (pattern_type IN ( 'header_regex', 'footer_regex', 'phone_number', 'address_fragment', 'store_number_format' )), -- Validate pattern is not empty CONSTRAINT store_receipt_patterns_value_check CHECK (TRIM(pattern_value) <> ''), -- Unique constraint per store/type/value UNIQUE(store_id, pattern_type, pattern_value) ); COMMENT ON TABLE public.store_receipt_patterns IS 'Patterns to help identify stores from receipt text and format.'; COMMENT ON COLUMN public.store_receipt_patterns.pattern_type IS 'Type of pattern: header_regex, footer_regex, phone_number, etc.'; COMMENT ON COLUMN public.store_receipt_patterns.pattern_value IS 'The actual pattern (regex or literal text).'; COMMENT ON COLUMN public.store_receipt_patterns.priority IS 'Higher priority patterns are checked first.'; COMMENT ON COLUMN public.store_receipt_patterns.is_active IS 'Whether this pattern is currently in use.'; -- Indexes for store_receipt_patterns CREATE INDEX IF NOT EXISTS idx_store_receipt_patterns_store_id ON public.store_receipt_patterns(store_id); CREATE INDEX IF NOT EXISTS idx_store_receipt_patterns_active ON public.store_receipt_patterns(pattern_type, is_active, priority DESC) WHERE is_active = TRUE;