RecognizeDish() and buildRecipePrompt() were generating text in the user's language and storing it in base tables, violating the project rule that base tables always hold English canonical text. - RecognizeDish(): hardcode English for dish_name; enrichDishInBackground() now correctly translates FROM English into all other languages - buildRecipePrompt(): remove langName lookup, hardcode English for all text fields; drop unused locale import Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
213 lines
6.3 KiB
Go
213 lines
6.3 KiB
Go
package openai
|
||
|
||
import (
|
||
"context"
|
||
"encoding/json"
|
||
"fmt"
|
||
"strings"
|
||
|
||
"github.com/food-ai/backend/internal/adapters/ai"
|
||
)
|
||
|
||
// langNames maps ISO 639-1 codes to English language names used in AI prompts.
|
||
var langNames = map[string]string{
|
||
"en": "English", "ru": "Russian", "es": "Spanish",
|
||
"de": "German", "fr": "French", "it": "Italian",
|
||
"pt": "Portuguese", "zh": "Chinese", "ja": "Japanese",
|
||
"ko": "Korean", "ar": "Arabic", "hi": "Hindi",
|
||
}
|
||
|
||
// RecognizeReceipt uses the vision model to extract food items from a receipt photo.
|
||
func (c *Client) RecognizeReceipt(ctx context.Context, imageBase64, mimeType, lang string) (*ai.ReceiptResult, error) {
|
||
langName := langNames[lang]
|
||
if langName == "" {
|
||
langName = "English"
|
||
}
|
||
prompt := fmt.Sprintf(`You are an OCR system for grocery receipts.
|
||
|
||
Analyse the receipt photo and extract a list of food products.
|
||
For each product determine:
|
||
- name: product name (remove article codes, extra symbols)
|
||
- quantity: amount (number)
|
||
- unit: unit (g, kg, ml, l, pcs, pack)
|
||
- category: dairy | meat | produce | bakery | frozen | beverages | other
|
||
- confidence: 0.0–1.0
|
||
|
||
Skip items that are not food (household chemicals, tobacco, alcohol).
|
||
Items with unreadable text — add to unrecognized.
|
||
|
||
Return all text fields (name) in %s.
|
||
Return ONLY valid JSON without markdown:
|
||
{
|
||
"items": [
|
||
{"name": "...", "quantity": 1, "unit": "l", "category": "dairy", "confidence": 0.95}
|
||
],
|
||
"unrecognized": [
|
||
{"raw_text": "...", "price": 89.0}
|
||
]
|
||
}`, langName)
|
||
|
||
text, err := c.generateVisionContent(ctx, prompt, imageBase64, mimeType)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("recognize receipt: %w", err)
|
||
}
|
||
|
||
var result ai.ReceiptResult
|
||
if err := parseJSON(text, &result); err != nil {
|
||
return nil, fmt.Errorf("parse receipt result: %w", err)
|
||
}
|
||
if result.Items == nil {
|
||
result.Items = []ai.RecognizedItem{}
|
||
}
|
||
if result.Unrecognized == nil {
|
||
result.Unrecognized = []ai.UnrecognizedItem{}
|
||
}
|
||
return &result, nil
|
||
}
|
||
|
||
// RecognizeProducts uses the vision model to identify food items in a photo (fridge, shelf, etc.).
|
||
func (c *Client) RecognizeProducts(ctx context.Context, imageBase64, mimeType, lang string) ([]ai.RecognizedItem, error) {
|
||
langName := langNames[lang]
|
||
if langName == "" {
|
||
langName = "English"
|
||
}
|
||
prompt := fmt.Sprintf(`You are a food product recognition system.
|
||
|
||
Look at the photo and identify all visible food products.
|
||
For each product estimate:
|
||
- name: product name
|
||
- quantity: approximate amount (number)
|
||
- unit: unit (g, kg, ml, l, pcs)
|
||
- category: dairy | meat | produce | bakery | frozen | beverages | other
|
||
- confidence: 0.0–1.0
|
||
|
||
Food products only. Skip empty packaging and inedible objects.
|
||
|
||
Return all text fields (name) in %s.
|
||
Return ONLY valid JSON without markdown:
|
||
{
|
||
"items": [
|
||
{"name": "...", "quantity": 10, "unit": "pcs", "category": "dairy", "confidence": 0.9}
|
||
]
|
||
}`, langName)
|
||
|
||
text, err := c.generateVisionContent(ctx, prompt, imageBase64, mimeType)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("recognize products: %w", err)
|
||
}
|
||
|
||
var result struct {
|
||
Items []ai.RecognizedItem `json:"items"`
|
||
}
|
||
if err := parseJSON(text, &result); err != nil {
|
||
return nil, fmt.Errorf("parse products result: %w", err)
|
||
}
|
||
if result.Items == nil {
|
||
return []ai.RecognizedItem{}, nil
|
||
}
|
||
return result.Items, nil
|
||
}
|
||
|
||
// RecognizeDish uses the vision model to identify a dish and estimate its nutritional content.
|
||
// Returns 3–5 ranked candidates so the user can correct mis-identifications.
|
||
func (c *Client) RecognizeDish(ctx context.Context, imageBase64, mimeType, lang string) (*ai.DishResult, error) {
|
||
prompt := `You are a dietitian and culinary expert.
|
||
|
||
Look at the dish photo and suggest 3 to 5 possible dishes it could be.
|
||
Even if the first option is obvious, add 2–4 alternative dishes with lower confidence.
|
||
For each candidate specify:
|
||
- dish_name: dish name
|
||
- weight_grams: approximate portion weight in grams (estimate from photo)
|
||
- calories: calories for this portion (kcal)
|
||
- protein_g, fat_g, carbs_g: macros for this portion (grams)
|
||
- confidence: certainty 0.0–1.0
|
||
|
||
Sort candidates by descending confidence. First — most likely.
|
||
|
||
Return dish_name values in English.
|
||
Return ONLY valid JSON without markdown:
|
||
{
|
||
"candidates": [
|
||
{
|
||
"dish_name": "...",
|
||
"weight_grams": 350,
|
||
"calories": 520,
|
||
"protein_g": 22,
|
||
"fat_g": 26,
|
||
"carbs_g": 48,
|
||
"confidence": 0.88
|
||
},
|
||
{
|
||
"dish_name": "...",
|
||
"weight_grams": 350,
|
||
"calories": 540,
|
||
"protein_g": 20,
|
||
"fat_g": 28,
|
||
"carbs_g": 49,
|
||
"confidence": 0.65
|
||
}
|
||
]
|
||
}`
|
||
|
||
text, err := c.generateVisionContent(ctx, prompt, imageBase64, mimeType)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("recognize dish: %w", err)
|
||
}
|
||
|
||
var result ai.DishResult
|
||
if parseError := parseJSON(text, &result); parseError != nil {
|
||
return nil, fmt.Errorf("parse dish result: %w", parseError)
|
||
}
|
||
if result.Candidates == nil {
|
||
result.Candidates = []ai.DishCandidate{}
|
||
}
|
||
return &result, nil
|
||
}
|
||
|
||
// ClassifyIngredient uses the text model to classify an unknown food item
|
||
// and build an ingredient_mappings record for it.
|
||
func (c *Client) ClassifyIngredient(ctx context.Context, name string) (*ai.IngredientClassification, error) {
|
||
prompt := fmt.Sprintf(`Classify the food product: "%s".
|
||
Return ONLY valid JSON without markdown:
|
||
{
|
||
"canonical_name": "turkey_breast",
|
||
"aliases": ["turkey breast"],
|
||
"translations": [
|
||
{"lang": "ru", "name": "грудка индейки", "aliases": ["грудка индейки", "филе индейки"]}
|
||
],
|
||
"category": "meat",
|
||
"default_unit": "g",
|
||
"calories_per_100g": 135,
|
||
"protein_per_100g": 29,
|
||
"fat_per_100g": 1,
|
||
"carbs_per_100g": 0,
|
||
"storage_days": 3
|
||
}`, name)
|
||
|
||
messages := []map[string]string{
|
||
{"role": "user", "content": prompt},
|
||
}
|
||
text, err := c.generateContent(ctx, messages)
|
||
if err != nil {
|
||
return nil, fmt.Errorf("classify ingredient: %w", err)
|
||
}
|
||
|
||
var result ai.IngredientClassification
|
||
if err := parseJSON(text, &result); err != nil {
|
||
return nil, fmt.Errorf("parse classification: %w", err)
|
||
}
|
||
return &result, nil
|
||
}
|
||
|
||
// parseJSON strips optional markdown fences and unmarshals JSON.
|
||
func parseJSON(text string, dst any) error {
|
||
text = strings.TrimSpace(text)
|
||
if strings.HasPrefix(text, "```") {
|
||
text = strings.TrimPrefix(text, "```json")
|
||
text = strings.TrimPrefix(text, "```")
|
||
text = strings.TrimSuffix(text, "```")
|
||
text = strings.TrimSpace(text)
|
||
}
|
||
return json.Unmarshal([]byte(text), dst)
|
||
}
|