Files
food-ai/backend/internal/adapters/openai/recognition.go
dbastrikin 180c741424 feat: dish recognition UX, background mode, and backend bug fixes
Flutter client:
- Progress dialog: redesigned with pulsing animated icon, info hint about
  background mode, full-width Minimize button; dismiss signal via ValueNotifier
  so the dialog always closes regardless of widget lifecycle
- Background recognition: when user taps Minimize, wasMinimizedByUser flag is
  set; on completion a snackbar is shown instead of opening DishResultSheet
  directly; snackbar action opens the sheet on demand
- Fix dialog spinning forever: finally block guarantees dismissSignal=true on
  all exit paths including early returns from context.mounted checks
- Fix DishResultSheet not appearing: add ValueKey to _DailyMealsSection and
  meal card Padding so Flutter reuses elements when _TodayJobsWidget is
  inserted/removed from the SliverChildListDelegate list
- todayJobsProvider refresh: added refresh() method; called after job submit
  and on DishJobDone; all ref.read() calls guarded with context.mounted checks
- food_search_sheet: scan buttons replaced with full-width stacked OutlinedButtons
- app.dart: WidgetsBindingObserver refreshes scan providers on app resume
- L10n: added dishRecognitionHint and minimize keys to all 12 locales

Backend:
- migrations/003: ALTER TYPE recipe_source ADD VALUE 'recommendation' to fix
  22P02 error in GET /home/summary -> getRecommendations()
- item_enricher: normalizeProductCategory() validates AI-returned category
  against known slugs, falls back to "other" — fixes products_category_fkey
  FK violation during receipt recognition
- recognition prompt: enumerate valid categories so AI returns correct values

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-28 00:03:17 +02:00

234 lines
7.8 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package openai
import (
"context"
"encoding/json"
"fmt"
"strings"
"github.com/food-ai/backend/internal/adapters/ai"
)
// langNames maps ISO 639-1 codes to English language names used in AI prompts.
var langNames = map[string]string{
"en": "English", "ru": "Russian", "es": "Spanish",
"de": "German", "fr": "French", "it": "Italian",
"pt": "Portuguese", "zh": "Chinese", "ja": "Japanese",
"ko": "Korean", "ar": "Arabic", "hi": "Hindi",
}
// RecognizeReceipt uses the vision model to extract food items from a receipt photo.
func (c *Client) RecognizeReceipt(ctx context.Context, imageBase64, mimeType, lang string) (*ai.ReceiptResult, error) {
langName := langNames[lang]
if langName == "" {
langName = "English"
}
prompt := fmt.Sprintf(`You are an OCR system for grocery receipts.
Analyse the receipt photo and extract a list of food products.
Rules for each product:
NAME (confidence):
- Remove article codes, cashier codes (e.g. "1/72", "4607001234"), extra symbols.
- Complete obviously truncated OCR names: "Паштет шпро." → "Паштет шпротный",
"Паштет с говяжьей пече" → "Паштет с говяжьей печенью".
- Preserve meaningful product attributes: fat percentage ("3.2%%", "жирн. 9%%"),
flavour ("с гусиной печенью", "яблочный"), brand qualifiers ("ультрапастеризованное").
- confidence: your certainty that the name is correct (0.01.0).
QUANTITY + UNIT (quantity_confidence):
- If a weight or volume is written on the receipt line (e.g. "160г", "1л", "500 мл", "0.5кг"),
use it as quantity+unit. quantity_confidence = 0.91.0.
- If the count on the receipt is 1 and no weight/volume is stated, but the product is a
liquid (juice, milk, kefir, etc.) — infer 1 l and set quantity_confidence = 0.5.
- If the count is 1 and no weight is stated, but the product is a solid packaged good
(pâté, spreadable cheese, sausage, butter, hard cheese, etc.) — infer a typical
package weight in grams (e.g. pâté 100 g, spreadable cheese 180 g, butter 200 g)
and set quantity_confidence = 0.35.
- If the receipt explicitly states the quantity and unit (e.g. "2 кг", "3 шт"),
use them directly. quantity_confidence = 1.0.
- Never output quantity = 1 with unit = "g" unless the receipt explicitly says "1 г".
- unit must be one of: g, kg, ml, l, pcs, pack.
CATEGORY: dairy | meat | produce | bakery | frozen | beverages | other
Skip items that are not food (household chemicals, tobacco, alcohol, bags, services).
Items with unreadable text — add to unrecognized.
Return all text fields (name) in %s.
Return ONLY valid JSON without markdown:
{
"items": [
{"name": "...", "quantity": 160, "unit": "g", "category": "other", "confidence": 0.95, "quantity_confidence": 0.9}
],
"unrecognized": [
{"raw_text": "...", "price": 89.0}
]
}`, langName)
text, err := c.generateVisionContent(ctx, prompt, imageBase64, mimeType)
if err != nil {
return nil, fmt.Errorf("recognize receipt: %w", err)
}
var result ai.ReceiptResult
if err := parseJSON(text, &result); err != nil {
return nil, fmt.Errorf("parse receipt result: %w", err)
}
if result.Items == nil {
result.Items = []ai.RecognizedItem{}
}
if result.Unrecognized == nil {
result.Unrecognized = []ai.UnrecognizedItem{}
}
return &result, nil
}
// RecognizeProducts uses the vision model to identify food items in a photo (fridge, shelf, etc.).
func (c *Client) RecognizeProducts(ctx context.Context, imageBase64, mimeType, lang string) ([]ai.RecognizedItem, error) {
langName := langNames[lang]
if langName == "" {
langName = "English"
}
prompt := fmt.Sprintf(`You are a food product recognition system.
Look at the photo and identify all visible food products.
For each product estimate:
- name: product name
- quantity: approximate amount (number)
- unit: unit (g, kg, ml, l, pcs)
- category: dairy | meat | produce | bakery | frozen | beverages | other
- confidence: 0.01.0
Food products only. Skip empty packaging and inedible objects.
Return all text fields (name) in %s.
Return ONLY valid JSON without markdown:
{
"items": [
{"name": "...", "quantity": 10, "unit": "pcs", "category": "dairy", "confidence": 0.9}
]
}`, langName)
text, err := c.generateVisionContent(ctx, prompt, imageBase64, mimeType)
if err != nil {
return nil, fmt.Errorf("recognize products: %w", err)
}
var result struct {
Items []ai.RecognizedItem `json:"items"`
}
if err := parseJSON(text, &result); err != nil {
return nil, fmt.Errorf("parse products result: %w", err)
}
if result.Items == nil {
return []ai.RecognizedItem{}, nil
}
return result.Items, nil
}
// RecognizeDish uses the vision model to identify a dish and estimate its nutritional content.
// Returns 35 ranked candidates so the user can correct mis-identifications.
func (c *Client) RecognizeDish(ctx context.Context, imageBase64, mimeType, lang string) (*ai.DishResult, error) {
prompt := `You are a dietitian and culinary expert.
Look at the dish photo and suggest 3 to 5 possible dishes it could be.
Even if the first option is obvious, add 24 alternative dishes with lower confidence.
For each candidate specify:
- dish_name: dish name
- weight_grams: approximate portion weight in grams (estimate from photo)
- calories: calories for this portion (kcal)
- protein_g, fat_g, carbs_g: macros for this portion (grams)
- confidence: certainty 0.01.0
Sort candidates by descending confidence. First — most likely.
Return dish_name values in English.
Return ONLY valid JSON without markdown:
{
"candidates": [
{
"dish_name": "...",
"weight_grams": 350,
"calories": 520,
"protein_g": 22,
"fat_g": 26,
"carbs_g": 48,
"confidence": 0.88
},
{
"dish_name": "...",
"weight_grams": 350,
"calories": 540,
"protein_g": 20,
"fat_g": 28,
"carbs_g": 49,
"confidence": 0.65
}
]
}`
text, err := c.generateVisionContent(ctx, prompt, imageBase64, mimeType)
if err != nil {
return nil, fmt.Errorf("recognize dish: %w", err)
}
var result ai.DishResult
if parseError := parseJSON(text, &result); parseError != nil {
return nil, fmt.Errorf("parse dish result: %w", parseError)
}
if result.Candidates == nil {
result.Candidates = []ai.DishCandidate{}
}
return &result, nil
}
// ClassifyIngredient uses the text model to classify an unknown food item
// and build an ingredient_mappings record for it.
func (c *Client) ClassifyIngredient(ctx context.Context, name string) (*ai.IngredientClassification, error) {
prompt := fmt.Sprintf(`Classify the food product: "%s".
Return ONLY valid JSON without markdown:
{
"canonical_name": "turkey_breast",
"aliases": ["turkey breast"],
"translations": [
{"lang": "ru", "name": "грудка индейки", "aliases": ["грудка индейки", "филе индейки"]}
],
"category": "meat",
"default_unit": "g",
"calories_per_100g": 135,
"protein_per_100g": 29,
"fat_per_100g": 1,
"carbs_per_100g": 0,
"storage_days": 3
}
"category" must be exactly one of: dairy, meat, produce, bakery, frozen, beverages, other`, name)
messages := []map[string]string{
{"role": "user", "content": prompt},
}
text, err := c.generateContent(ctx, messages)
if err != nil {
return nil, fmt.Errorf("classify ingredient: %w", err)
}
var result ai.IngredientClassification
if err := parseJSON(text, &result); err != nil {
return nil, fmt.Errorf("parse classification: %w", err)
}
return &result, nil
}
// parseJSON strips optional markdown fences and unmarshals JSON.
func parseJSON(text string, dst any) error {
text = strings.TrimSpace(text)
if strings.HasPrefix(text, "```") {
text = strings.TrimPrefix(text, "```json")
text = strings.TrimPrefix(text, "```")
text = strings.TrimSuffix(text, "```")
text = strings.TrimSpace(text)
}
return json.Unmarshal([]byte(text), dst)
}