Files
food-ai/backend/internal/adapters/openai/recognition.go
dbastrikin 0f533ccaeb fix: enforce English canonical text in AI-generated dish and recipe data
RecognizeDish() and buildRecipePrompt() were generating text in the
user's language and storing it in base tables, violating the project
rule that base tables always hold English canonical text.

- RecognizeDish(): hardcode English for dish_name; enrichDishInBackground()
  now correctly translates FROM English into all other languages
- buildRecipePrompt(): remove langName lookup, hardcode English for all
  text fields; drop unused locale import

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-18 16:57:34 +02:00

213 lines
6.3 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package openai
import (
"context"
"encoding/json"
"fmt"
"strings"
"github.com/food-ai/backend/internal/adapters/ai"
)
// langNames maps ISO 639-1 codes to English language names used in AI prompts.
var langNames = map[string]string{
"en": "English", "ru": "Russian", "es": "Spanish",
"de": "German", "fr": "French", "it": "Italian",
"pt": "Portuguese", "zh": "Chinese", "ja": "Japanese",
"ko": "Korean", "ar": "Arabic", "hi": "Hindi",
}
// RecognizeReceipt uses the vision model to extract food items from a receipt photo.
func (c *Client) RecognizeReceipt(ctx context.Context, imageBase64, mimeType, lang string) (*ai.ReceiptResult, error) {
langName := langNames[lang]
if langName == "" {
langName = "English"
}
prompt := fmt.Sprintf(`You are an OCR system for grocery receipts.
Analyse the receipt photo and extract a list of food products.
For each product determine:
- name: product name (remove article codes, extra symbols)
- quantity: amount (number)
- unit: unit (g, kg, ml, l, pcs, pack)
- category: dairy | meat | produce | bakery | frozen | beverages | other
- confidence: 0.01.0
Skip items that are not food (household chemicals, tobacco, alcohol).
Items with unreadable text — add to unrecognized.
Return all text fields (name) in %s.
Return ONLY valid JSON without markdown:
{
"items": [
{"name": "...", "quantity": 1, "unit": "l", "category": "dairy", "confidence": 0.95}
],
"unrecognized": [
{"raw_text": "...", "price": 89.0}
]
}`, langName)
text, err := c.generateVisionContent(ctx, prompt, imageBase64, mimeType)
if err != nil {
return nil, fmt.Errorf("recognize receipt: %w", err)
}
var result ai.ReceiptResult
if err := parseJSON(text, &result); err != nil {
return nil, fmt.Errorf("parse receipt result: %w", err)
}
if result.Items == nil {
result.Items = []ai.RecognizedItem{}
}
if result.Unrecognized == nil {
result.Unrecognized = []ai.UnrecognizedItem{}
}
return &result, nil
}
// RecognizeProducts uses the vision model to identify food items in a photo (fridge, shelf, etc.).
func (c *Client) RecognizeProducts(ctx context.Context, imageBase64, mimeType, lang string) ([]ai.RecognizedItem, error) {
langName := langNames[lang]
if langName == "" {
langName = "English"
}
prompt := fmt.Sprintf(`You are a food product recognition system.
Look at the photo and identify all visible food products.
For each product estimate:
- name: product name
- quantity: approximate amount (number)
- unit: unit (g, kg, ml, l, pcs)
- category: dairy | meat | produce | bakery | frozen | beverages | other
- confidence: 0.01.0
Food products only. Skip empty packaging and inedible objects.
Return all text fields (name) in %s.
Return ONLY valid JSON without markdown:
{
"items": [
{"name": "...", "quantity": 10, "unit": "pcs", "category": "dairy", "confidence": 0.9}
]
}`, langName)
text, err := c.generateVisionContent(ctx, prompt, imageBase64, mimeType)
if err != nil {
return nil, fmt.Errorf("recognize products: %w", err)
}
var result struct {
Items []ai.RecognizedItem `json:"items"`
}
if err := parseJSON(text, &result); err != nil {
return nil, fmt.Errorf("parse products result: %w", err)
}
if result.Items == nil {
return []ai.RecognizedItem{}, nil
}
return result.Items, nil
}
// RecognizeDish uses the vision model to identify a dish and estimate its nutritional content.
// Returns 35 ranked candidates so the user can correct mis-identifications.
func (c *Client) RecognizeDish(ctx context.Context, imageBase64, mimeType, lang string) (*ai.DishResult, error) {
prompt := `You are a dietitian and culinary expert.
Look at the dish photo and suggest 3 to 5 possible dishes it could be.
Even if the first option is obvious, add 24 alternative dishes with lower confidence.
For each candidate specify:
- dish_name: dish name
- weight_grams: approximate portion weight in grams (estimate from photo)
- calories: calories for this portion (kcal)
- protein_g, fat_g, carbs_g: macros for this portion (grams)
- confidence: certainty 0.01.0
Sort candidates by descending confidence. First — most likely.
Return dish_name values in English.
Return ONLY valid JSON without markdown:
{
"candidates": [
{
"dish_name": "...",
"weight_grams": 350,
"calories": 520,
"protein_g": 22,
"fat_g": 26,
"carbs_g": 48,
"confidence": 0.88
},
{
"dish_name": "...",
"weight_grams": 350,
"calories": 540,
"protein_g": 20,
"fat_g": 28,
"carbs_g": 49,
"confidence": 0.65
}
]
}`
text, err := c.generateVisionContent(ctx, prompt, imageBase64, mimeType)
if err != nil {
return nil, fmt.Errorf("recognize dish: %w", err)
}
var result ai.DishResult
if parseError := parseJSON(text, &result); parseError != nil {
return nil, fmt.Errorf("parse dish result: %w", parseError)
}
if result.Candidates == nil {
result.Candidates = []ai.DishCandidate{}
}
return &result, nil
}
// ClassifyIngredient uses the text model to classify an unknown food item
// and build an ingredient_mappings record for it.
func (c *Client) ClassifyIngredient(ctx context.Context, name string) (*ai.IngredientClassification, error) {
prompt := fmt.Sprintf(`Classify the food product: "%s".
Return ONLY valid JSON without markdown:
{
"canonical_name": "turkey_breast",
"aliases": ["turkey breast"],
"translations": [
{"lang": "ru", "name": "грудка индейки", "aliases": ["грудка индейки", "филе индейки"]}
],
"category": "meat",
"default_unit": "g",
"calories_per_100g": 135,
"protein_per_100g": 29,
"fat_per_100g": 1,
"carbs_per_100g": 0,
"storage_days": 3
}`, name)
messages := []map[string]string{
{"role": "user", "content": prompt},
}
text, err := c.generateContent(ctx, messages)
if err != nil {
return nil, fmt.Errorf("classify ingredient: %w", err)
}
var result ai.IngredientClassification
if err := parseJSON(text, &result); err != nil {
return nil, fmt.Errorf("parse classification: %w", err)
}
return &result, nil
}
// parseJSON strips optional markdown fences and unmarshals JSON.
func parseJSON(text string, dst any) error {
text = strings.TrimSpace(text)
if strings.HasPrefix(text, "```") {
text = strings.TrimPrefix(text, "```json")
text = strings.TrimPrefix(text, "```")
text = strings.TrimSuffix(text, "```")
text = strings.TrimSpace(text)
}
return json.Unmarshal([]byte(text), dst)
}