Some checks failed
Deploy to Test Environment / deploy-to-test (push) Failing after 1m10s
170 lines
8.8 KiB
SQL
170 lines
8.8 KiB
SQL
-- sql/migrations/003_receipt_scanning_enhancements.sql
|
|
-- ============================================================================
|
|
-- RECEIPT SCANNING ENHANCEMENTS MIGRATION
|
|
-- ============================================================================
|
|
-- Purpose:
|
|
-- This migration adds enhancements to the existing receipt scanning tables:
|
|
-- 1. Enhancements to receipts table for better OCR processing
|
|
-- 2. Enhancements to receipt_items for better item matching
|
|
-- 3. receipt_processing_log for tracking OCR/AI processing attempts
|
|
--
|
|
-- Existing tables:
|
|
-- - receipts (lines 932-948 in master_schema_rollup.sql)
|
|
-- - receipt_items (lines 951-966 in master_schema_rollup.sql)
|
|
-- ============================================================================
|
|
|
|
-- 1. Enhancements to receipts table
|
|
|
|
-- Add store detection confidence
|
|
ALTER TABLE public.receipts
|
|
ADD COLUMN IF NOT EXISTS store_confidence NUMERIC(5,4);
|
|
COMMENT ON COLUMN public.receipts.store_confidence IS 'Confidence score for store detection (0.0-1.0).';
|
|
|
|
-- Add OCR provider used
|
|
ALTER TABLE public.receipts
|
|
ADD COLUMN IF NOT EXISTS ocr_provider TEXT;
|
|
COMMENT ON COLUMN public.receipts.ocr_provider IS 'Which OCR service processed this receipt: tesseract, openai, anthropic.';
|
|
|
|
-- Add error details for failed processing
|
|
ALTER TABLE public.receipts
|
|
ADD COLUMN IF NOT EXISTS error_details JSONB;
|
|
COMMENT ON COLUMN public.receipts.error_details IS 'Detailed error information if processing failed.';
|
|
|
|
-- Add retry count for failed processing
|
|
ALTER TABLE public.receipts
|
|
ADD COLUMN IF NOT EXISTS retry_count INTEGER DEFAULT 0;
|
|
COMMENT ON COLUMN public.receipts.retry_count IS 'Number of processing retry attempts.';
|
|
|
|
-- Add extracted text confidence
|
|
ALTER TABLE public.receipts
|
|
ADD COLUMN IF NOT EXISTS ocr_confidence NUMERIC(5,4);
|
|
COMMENT ON COLUMN public.receipts.ocr_confidence IS 'Overall OCR text extraction confidence score.';
|
|
|
|
-- Add currency detection
|
|
ALTER TABLE public.receipts
|
|
ADD COLUMN IF NOT EXISTS currency TEXT DEFAULT 'CAD';
|
|
COMMENT ON COLUMN public.receipts.currency IS 'Detected currency: CAD, USD, etc.';
|
|
|
|
-- New indexes for receipt processing
|
|
CREATE INDEX IF NOT EXISTS idx_receipts_status_retry ON public.receipts(status, retry_count)
|
|
WHERE status IN ('pending', 'failed') AND retry_count < 3;
|
|
|
|
|
|
-- 2. Enhancements to receipt_items table
|
|
|
|
-- Add line number from receipt for ordering
|
|
ALTER TABLE public.receipt_items
|
|
ADD COLUMN IF NOT EXISTS line_number INTEGER;
|
|
COMMENT ON COLUMN public.receipt_items.line_number IS 'Original line number on the receipt for display ordering.';
|
|
|
|
-- Add match confidence score
|
|
ALTER TABLE public.receipt_items
|
|
ADD COLUMN IF NOT EXISTS match_confidence NUMERIC(5,4);
|
|
COMMENT ON COLUMN public.receipt_items.match_confidence IS 'Confidence score for item matching (0.0-1.0).';
|
|
|
|
-- Add is_discount flag for discount/coupon lines
|
|
ALTER TABLE public.receipt_items
|
|
ADD COLUMN IF NOT EXISTS is_discount BOOLEAN DEFAULT FALSE;
|
|
COMMENT ON COLUMN public.receipt_items.is_discount IS 'Whether this line is a discount/coupon (negative price).';
|
|
|
|
-- Add unit_price if per-unit pricing detected
|
|
ALTER TABLE public.receipt_items
|
|
ADD COLUMN IF NOT EXISTS unit_price_cents INTEGER;
|
|
COMMENT ON COLUMN public.receipt_items.unit_price_cents IS 'Per-unit price if detected (e.g., price per kg).';
|
|
|
|
-- Add unit type if detected
|
|
ALTER TABLE public.receipt_items
|
|
ADD COLUMN IF NOT EXISTS unit_type TEXT;
|
|
COMMENT ON COLUMN public.receipt_items.unit_type IS 'Unit type if detected: kg, lb, each, etc.';
|
|
|
|
-- Add added_to_pantry flag
|
|
ALTER TABLE public.receipt_items
|
|
ADD COLUMN IF NOT EXISTS added_to_pantry BOOLEAN DEFAULT FALSE;
|
|
COMMENT ON COLUMN public.receipt_items.added_to_pantry IS 'Whether this item has been added to user pantry.';
|
|
|
|
-- Add pantry_item_id link
|
|
ALTER TABLE public.receipt_items
|
|
ADD COLUMN IF NOT EXISTS pantry_item_id BIGINT REFERENCES public.pantry_items(pantry_item_id) ON DELETE SET NULL;
|
|
COMMENT ON COLUMN public.receipt_items.pantry_item_id IS 'Link to pantry_items if this receipt item was added to pantry.';
|
|
|
|
-- New indexes for receipt_items
|
|
CREATE INDEX IF NOT EXISTS idx_receipt_items_status ON public.receipt_items(status);
|
|
CREATE INDEX IF NOT EXISTS idx_receipt_items_added_to_pantry ON public.receipt_items(receipt_id, added_to_pantry)
|
|
WHERE added_to_pantry = FALSE;
|
|
CREATE INDEX IF NOT EXISTS idx_receipt_items_pantry_item_id ON public.receipt_items(pantry_item_id)
|
|
WHERE pantry_item_id IS NOT NULL;
|
|
|
|
|
|
-- 3. Receipt Processing Log - track OCR/AI processing attempts
|
|
-- Useful for debugging, monitoring costs, and improving processing
|
|
CREATE TABLE IF NOT EXISTS public.receipt_processing_log (
|
|
log_id BIGINT PRIMARY KEY GENERATED ALWAYS AS IDENTITY,
|
|
receipt_id BIGINT NOT NULL REFERENCES public.receipts(receipt_id) ON DELETE CASCADE,
|
|
processing_step TEXT NOT NULL,
|
|
status TEXT NOT NULL,
|
|
provider TEXT,
|
|
duration_ms INTEGER,
|
|
tokens_used INTEGER,
|
|
cost_cents INTEGER,
|
|
input_data JSONB,
|
|
output_data JSONB,
|
|
error_message TEXT,
|
|
created_at TIMESTAMPTZ DEFAULT now() NOT NULL,
|
|
-- Validate processing step
|
|
CONSTRAINT receipt_processing_log_step_check CHECK (processing_step IN (
|
|
'upload', 'ocr_extraction', 'text_parsing', 'store_detection',
|
|
'item_extraction', 'item_matching', 'price_parsing', 'finalization'
|
|
)),
|
|
-- Validate status
|
|
CONSTRAINT receipt_processing_log_status_check CHECK (status IN ('started', 'completed', 'failed', 'skipped')),
|
|
-- Validate provider if specified
|
|
CONSTRAINT receipt_processing_log_provider_check CHECK (provider IS NULL OR provider IN (
|
|
'tesseract', 'openai', 'anthropic', 'google_vision', 'aws_textract', 'internal'
|
|
))
|
|
);
|
|
COMMENT ON TABLE public.receipt_processing_log IS 'Detailed log of each processing step for receipts, useful for debugging and cost tracking.';
|
|
COMMENT ON COLUMN public.receipt_processing_log.processing_step IS 'Which processing step this log entry is for.';
|
|
COMMENT ON COLUMN public.receipt_processing_log.status IS 'Status of this step: started, completed, failed, skipped.';
|
|
COMMENT ON COLUMN public.receipt_processing_log.provider IS 'External service used: tesseract, openai, anthropic, etc.';
|
|
COMMENT ON COLUMN public.receipt_processing_log.duration_ms IS 'How long this step took in milliseconds.';
|
|
COMMENT ON COLUMN public.receipt_processing_log.tokens_used IS 'Number of API tokens used (for LLM providers).';
|
|
COMMENT ON COLUMN public.receipt_processing_log.cost_cents IS 'Estimated cost in cents for this processing step.';
|
|
COMMENT ON COLUMN public.receipt_processing_log.input_data IS 'Input data sent to the processing step (for debugging).';
|
|
COMMENT ON COLUMN public.receipt_processing_log.output_data IS 'Output data received from the processing step.';
|
|
|
|
-- Indexes for receipt_processing_log
|
|
CREATE INDEX IF NOT EXISTS idx_receipt_processing_log_receipt_id ON public.receipt_processing_log(receipt_id);
|
|
CREATE INDEX IF NOT EXISTS idx_receipt_processing_log_step_status ON public.receipt_processing_log(processing_step, status);
|
|
CREATE INDEX IF NOT EXISTS idx_receipt_processing_log_created_at ON public.receipt_processing_log(created_at DESC);
|
|
|
|
|
|
-- 4. Store-specific receipt patterns - help identify stores from receipt text
|
|
CREATE TABLE IF NOT EXISTS public.store_receipt_patterns (
|
|
pattern_id BIGINT PRIMARY KEY GENERATED ALWAYS AS IDENTITY,
|
|
store_id BIGINT NOT NULL REFERENCES public.stores(store_id) ON DELETE CASCADE,
|
|
pattern_type TEXT NOT NULL,
|
|
pattern_value TEXT NOT NULL,
|
|
priority INTEGER DEFAULT 0,
|
|
is_active BOOLEAN DEFAULT TRUE,
|
|
created_at TIMESTAMPTZ DEFAULT now() NOT NULL,
|
|
updated_at TIMESTAMPTZ DEFAULT now() NOT NULL,
|
|
-- Validate pattern type
|
|
CONSTRAINT store_receipt_patterns_type_check CHECK (pattern_type IN (
|
|
'header_regex', 'footer_regex', 'phone_number', 'address_fragment', 'store_number_format'
|
|
)),
|
|
-- Validate pattern is not empty
|
|
CONSTRAINT store_receipt_patterns_value_check CHECK (TRIM(pattern_value) <> ''),
|
|
-- Unique constraint per store/type/value
|
|
UNIQUE(store_id, pattern_type, pattern_value)
|
|
);
|
|
COMMENT ON TABLE public.store_receipt_patterns IS 'Patterns to help identify stores from receipt text and format.';
|
|
COMMENT ON COLUMN public.store_receipt_patterns.pattern_type IS 'Type of pattern: header_regex, footer_regex, phone_number, etc.';
|
|
COMMENT ON COLUMN public.store_receipt_patterns.pattern_value IS 'The actual pattern (regex or literal text).';
|
|
COMMENT ON COLUMN public.store_receipt_patterns.priority IS 'Higher priority patterns are checked first.';
|
|
COMMENT ON COLUMN public.store_receipt_patterns.is_active IS 'Whether this pattern is currently in use.';
|
|
|
|
-- Indexes for store_receipt_patterns
|
|
CREATE INDEX IF NOT EXISTS idx_store_receipt_patterns_store_id ON public.store_receipt_patterns(store_id);
|
|
CREATE INDEX IF NOT EXISTS idx_store_receipt_patterns_active ON public.store_receipt_patterns(pattern_type, is_active, priority DESC)
|
|
WHERE is_active = TRUE;
|