package openai import ( "context" "encoding/json" "fmt" "strings" "github.com/food-ai/backend/internal/adapters/ai" ) // langNames maps ISO 639-1 codes to English language names used in AI prompts. var langNames = map[string]string{ "en": "English", "ru": "Russian", "es": "Spanish", "de": "German", "fr": "French", "it": "Italian", "pt": "Portuguese", "zh": "Chinese", "ja": "Japanese", "ko": "Korean", "ar": "Arabic", "hi": "Hindi", } // RecognizeReceipt uses the vision model to extract food items from a receipt photo. func (c *Client) RecognizeReceipt(ctx context.Context, imageBase64, mimeType, lang string) (*ai.ReceiptResult, error) { langName := langNames[lang] if langName == "" { langName = "English" } prompt := fmt.Sprintf(`You are an OCR system for grocery receipts. Analyse the receipt photo and extract a list of food products. Rules for each product: NAME (confidence): - Remove article codes, cashier codes (e.g. "1/72", "4607001234"), extra symbols. - Complete obviously truncated OCR names: "Паштет шпро." → "Паштет шпротный", "Паштет с говяжьей пече" → "Паштет с говяжьей печенью". - Preserve meaningful product attributes: fat percentage ("3.2%%", "жирн. 9%%"), flavour ("с гусиной печенью", "яблочный"), brand qualifiers ("ультрапастеризованное"). - confidence: your certainty that the name is correct (0.0–1.0). QUANTITY + UNIT (quantity_confidence): - If a weight or volume is written on the receipt line (e.g. "160г", "1л", "500 мл", "0.5кг"), use it as quantity+unit. quantity_confidence = 0.9–1.0. - If the count on the receipt is 1 and no weight/volume is stated, but the product is a liquid (juice, milk, kefir, etc.) — infer 1 l and set quantity_confidence = 0.5. - If the count is 1 and no weight is stated, but the product is a solid packaged good (pâté, spreadable cheese, sausage, butter, hard cheese, etc.) — infer a typical package weight in grams (e.g. pâté 100 g, spreadable cheese 180 g, butter 200 g) and set quantity_confidence = 0.35. - If the receipt explicitly states the quantity and unit (e.g. "2 кг", "3 шт"), use them directly. quantity_confidence = 1.0. - Never output quantity = 1 with unit = "g" unless the receipt explicitly says "1 г". - unit must be one of: g, kg, ml, l, pcs, pack. CATEGORY: dairy | meat | produce | bakery | frozen | beverages | other Skip items that are not food (household chemicals, tobacco, alcohol, bags, services). Items with unreadable text — add to unrecognized. Return all text fields (name) in %s. Return ONLY valid JSON without markdown: { "items": [ {"name": "...", "quantity": 160, "unit": "g", "category": "other", "confidence": 0.95, "quantity_confidence": 0.9} ], "unrecognized": [ {"raw_text": "...", "price": 89.0} ] }`, langName) text, err := c.generateVisionContent(ctx, prompt, imageBase64, mimeType) if err != nil { return nil, fmt.Errorf("recognize receipt: %w", err) } var result ai.ReceiptResult if err := parseJSON(text, &result); err != nil { return nil, fmt.Errorf("parse receipt result: %w", err) } if result.Items == nil { result.Items = []ai.RecognizedItem{} } if result.Unrecognized == nil { result.Unrecognized = []ai.UnrecognizedItem{} } return &result, nil } // RecognizeProducts uses the vision model to identify food items in a photo (fridge, shelf, etc.). func (c *Client) RecognizeProducts(ctx context.Context, imageBase64, mimeType, lang string) ([]ai.RecognizedItem, error) { langName := langNames[lang] if langName == "" { langName = "English" } prompt := fmt.Sprintf(`You are a food product recognition system. Look at the photo and identify all visible food products. For each product estimate: - name: product name - quantity: approximate amount (number) - unit: unit (g, kg, ml, l, pcs) - category: dairy | meat | produce | bakery | frozen | beverages | other - confidence: 0.0–1.0 Food products only. Skip empty packaging and inedible objects. Return all text fields (name) in %s. Return ONLY valid JSON without markdown: { "items": [ {"name": "...", "quantity": 10, "unit": "pcs", "category": "dairy", "confidence": 0.9} ] }`, langName) text, err := c.generateVisionContent(ctx, prompt, imageBase64, mimeType) if err != nil { return nil, fmt.Errorf("recognize products: %w", err) } var result struct { Items []ai.RecognizedItem `json:"items"` } if err := parseJSON(text, &result); err != nil { return nil, fmt.Errorf("parse products result: %w", err) } if result.Items == nil { return []ai.RecognizedItem{}, nil } return result.Items, nil } // RecognizeDish uses the vision model to identify a dish and estimate its nutritional content. // Returns 3–5 ranked candidates so the user can correct mis-identifications. func (c *Client) RecognizeDish(ctx context.Context, imageBase64, mimeType, lang string) (*ai.DishResult, error) { prompt := `You are a dietitian and culinary expert. Look at the dish photo and suggest 3 to 5 possible dishes it could be. Even if the first option is obvious, add 2–4 alternative dishes with lower confidence. For each candidate specify: - dish_name: dish name - weight_grams: approximate portion weight in grams (estimate from photo) - calories: calories for this portion (kcal) - protein_g, fat_g, carbs_g: macros for this portion (grams) - confidence: certainty 0.0–1.0 Sort candidates by descending confidence. First — most likely. Return dish_name values in English. Return ONLY valid JSON without markdown: { "candidates": [ { "dish_name": "...", "weight_grams": 350, "calories": 520, "protein_g": 22, "fat_g": 26, "carbs_g": 48, "confidence": 0.88 }, { "dish_name": "...", "weight_grams": 350, "calories": 540, "protein_g": 20, "fat_g": 28, "carbs_g": 49, "confidence": 0.65 } ] }` text, err := c.generateVisionContent(ctx, prompt, imageBase64, mimeType) if err != nil { return nil, fmt.Errorf("recognize dish: %w", err) } var result ai.DishResult if parseError := parseJSON(text, &result); parseError != nil { return nil, fmt.Errorf("parse dish result: %w", parseError) } if result.Candidates == nil { result.Candidates = []ai.DishCandidate{} } return &result, nil } // ClassifyIngredient uses the text model to classify an unknown food item // and build an ingredient_mappings record for it. func (c *Client) ClassifyIngredient(ctx context.Context, name string) (*ai.IngredientClassification, error) { prompt := fmt.Sprintf(`Classify the food product: "%s". Return ONLY valid JSON without markdown: { "canonical_name": "turkey_breast", "aliases": ["turkey breast"], "translations": [ {"lang": "ru", "name": "грудка индейки", "aliases": ["грудка индейки", "филе индейки"]} ], "category": "meat", "default_unit": "g", "calories_per_100g": 135, "protein_per_100g": 29, "fat_per_100g": 1, "carbs_per_100g": 0, "storage_days": 3 }`, name) messages := []map[string]string{ {"role": "user", "content": prompt}, } text, err := c.generateContent(ctx, messages) if err != nil { return nil, fmt.Errorf("classify ingredient: %w", err) } var result ai.IngredientClassification if err := parseJSON(text, &result); err != nil { return nil, fmt.Errorf("parse classification: %w", err) } return &result, nil } // parseJSON strips optional markdown fences and unmarshals JSON. func parseJSON(text string, dst any) error { text = strings.TrimSpace(text) if strings.HasPrefix(text, "```") { text = strings.TrimPrefix(text, "```json") text = strings.TrimPrefix(text, "```") text = strings.TrimSuffix(text, "```") text = strings.TrimSpace(text) } return json.Unmarshal([]byte(text), dst) }