food-ai/backend/internal/adapters/openai/recognition.go

package openai

import (
	"context"
	"encoding/json"
	"fmt"
	"strings"

	"github.com/food-ai/backend/internal/adapters/ai"
)

// langNames maps ISO 639-1 codes to English language names used in AI prompts.
var langNames = map[string]string{
	"en": "English", "ru": "Russian", "es": "Spanish",
	"de": "German", "fr": "French", "it": "Italian",
	"pt": "Portuguese", "zh": "Chinese", "ja": "Japanese",
	"ko": "Korean", "ar": "Arabic", "hi": "Hindi",
}

// RecognizeReceipt uses the vision model to extract food items from a receipt photo.
func (c *Client) RecognizeReceipt(ctx context.Context, imageBase64, mimeType, lang string) (*ai.ReceiptResult, error) {
	langName := langNames[lang]
	if langName == "" {
		langName = "English"
	}
	prompt := fmt.Sprintf(`You are an OCR system for grocery receipts.

Analyse the receipt photo and extract a list of food products.
For each product determine:
- name: product name (remove article codes, extra symbols)
- quantity: amount (number)
- unit: unit (g, kg, ml, l, pcs, pack)
- category: dairy | meat | produce | bakery | frozen | beverages | other
- confidence: 0.0–1.0

Skip items that are not food (household chemicals, tobacco, alcohol).
Items with unreadable text — add to unrecognized.

Return all text fields (name) in %s.
Return ONLY valid JSON without markdown:
{
  "items": [
    {"name": "...", "quantity": 1, "unit": "l", "category": "dairy", "confidence": 0.95}
  ],
  "unrecognized": [
    {"raw_text": "...", "price": 89.0}
  ]
}`, langName)

	text, err := c.generateVisionContent(ctx, prompt, imageBase64, mimeType)
	if err != nil {
		return nil, fmt.Errorf("recognize receipt: %w", err)
	}

	var result ai.ReceiptResult
	if err := parseJSON(text, &result); err != nil {
		return nil, fmt.Errorf("parse receipt result: %w", err)
	}
	if result.Items == nil {
		result.Items = []ai.RecognizedItem{}
	}
	if result.Unrecognized == nil {
		result.Unrecognized = []ai.UnrecognizedItem{}
	}
	return &result, nil
}

// RecognizeProducts uses the vision model to identify food items in a photo (fridge, shelf, etc.).
func (c *Client) RecognizeProducts(ctx context.Context, imageBase64, mimeType, lang string) ([]ai.RecognizedItem, error) {
	langName := langNames[lang]
	if langName == "" {
		langName = "English"
	}
	prompt := fmt.Sprintf(`You are a food product recognition system.

Look at the photo and identify all visible food products.
For each product estimate:
- name: product name
- quantity: approximate amount (number)
- unit: unit (g, kg, ml, l, pcs)
- category: dairy | meat | produce | bakery | frozen | beverages | other
- confidence: 0.0–1.0

Food products only. Skip empty packaging and inedible objects.

Return all text fields (name) in %s.
Return ONLY valid JSON without markdown:
{
  "items": [
    {"name": "...", "quantity": 10, "unit": "pcs", "category": "dairy", "confidence": 0.9}
  ]
}`, langName)

	text, err := c.generateVisionContent(ctx, prompt, imageBase64, mimeType)
	if err != nil {
		return nil, fmt.Errorf("recognize products: %w", err)
	}

	var result struct {
		Items []ai.RecognizedItem `json:"items"`
	}
	if err := parseJSON(text, &result); err != nil {
		return nil, fmt.Errorf("parse products result: %w", err)
	}
	if result.Items == nil {
		return []ai.RecognizedItem{}, nil
	}
	return result.Items, nil
}

// RecognizeDish uses the vision model to identify a dish and estimate its nutritional content.
// Returns 3–5 ranked candidates so the user can correct mis-identifications.
func (c *Client) RecognizeDish(ctx context.Context, imageBase64, mimeType, lang string) (*ai.DishResult, error) {
	prompt := `You are a dietitian and culinary expert.

Look at the dish photo and suggest 3 to 5 possible dishes it could be.
Even if the first option is obvious, add 2–4 alternative dishes with lower confidence.
For each candidate specify:
- dish_name: dish name
- weight_grams: approximate portion weight in grams (estimate from photo)
- calories: calories for this portion (kcal)
- protein_g, fat_g, carbs_g: macros for this portion (grams)
- confidence: certainty 0.0–1.0

Sort candidates by descending confidence. First — most likely.

Return dish_name values in English.
Return ONLY valid JSON without markdown:
{
  "candidates": [
    {
      "dish_name": "...",
      "weight_grams": 350,
      "calories": 520,
      "protein_g": 22,
      "fat_g": 26,
      "carbs_g": 48,
      "confidence": 0.88
    },
    {
      "dish_name": "...",
      "weight_grams": 350,
      "calories": 540,
      "protein_g": 20,
      "fat_g": 28,
      "carbs_g": 49,
      "confidence": 0.65
    }
  ]
}`

	text, err := c.generateVisionContent(ctx, prompt, imageBase64, mimeType)
	if err != nil {
		return nil, fmt.Errorf("recognize dish: %w", err)
	}

	var result ai.DishResult
	if parseError := parseJSON(text, &result); parseError != nil {
		return nil, fmt.Errorf("parse dish result: %w", parseError)
	}
	if result.Candidates == nil {
		result.Candidates = []ai.DishCandidate{}
	}
	return &result, nil
}

// ClassifyIngredient uses the text model to classify an unknown food item
// and build an ingredient_mappings record for it.
func (c *Client) ClassifyIngredient(ctx context.Context, name string) (*ai.IngredientClassification, error) {
	prompt := fmt.Sprintf(`Classify the food product: "%s".
Return ONLY valid JSON without markdown:
{
  "canonical_name": "turkey_breast",
  "aliases": ["turkey breast"],
  "translations": [
    {"lang": "ru", "name": "грудка индейки", "aliases": ["грудка индейки", "филе индейки"]}
  ],
  "category": "meat",
  "default_unit": "g",
  "calories_per_100g": 135,
  "protein_per_100g": 29,
  "fat_per_100g": 1,
  "carbs_per_100g": 0,
  "storage_days": 3
}`, name)

	messages := []map[string]string{
		{"role": "user", "content": prompt},
	}
	text, err := c.generateContent(ctx, messages)
	if err != nil {
		return nil, fmt.Errorf("classify ingredient: %w", err)
	}

	var result ai.IngredientClassification
	if err := parseJSON(text, &result); err != nil {
		return nil, fmt.Errorf("parse classification: %w", err)
	}
	return &result, nil
}

// parseJSON strips optional markdown fences and unmarshals JSON.
func parseJSON(text string, dst any) error {
	text = strings.TrimSpace(text)
	if strings.HasPrefix(text, "```") {
		text = strings.TrimPrefix(text, "```json")
		text = strings.TrimPrefix(text, "```")
		text = strings.TrimSuffix(text, "```")
		text = strings.TrimSpace(text)
	}
	return json.Unmarshal([]byte(text), dst)
}