Files
food-ai/backend/internal/gemini/recognition.go
dbastrikin a225f6c47a refactor: migrate ingredient aliases/categories to dedicated tables, drop spoonacular_id
- migration 012: create ingredient_categories + ingredient_category_translations
  tables (7 slugs, Russian names); add ingredient_aliases (ingredient_id, lang,
  alias) with GIN trigram index; migrate aliases JSONB from ingredient_mappings
  and ingredient_translations; drop aliases columns and spoonacular_id; add
  UNIQUE (canonical_name) as conflict key
- ingredient/model: remove SpoonacularID, add CategoryName for localized display
- ingredient/repository: conflict on canonical_name; GetByID/Search join category
  translations and aliases lateral; new UpsertAliases (pgx batch),
  UpsertCategoryTranslation; remove GetBySpoonacularID; split scan helpers into
  scanMappingWrite / scanMappingRead
- gemini/recognition: add IngredientTranslation type; IngredientClassification
  now carries Translations []IngredientTranslation instead of CanonicalNameRu;
  update ClassifyIngredient prompt to English with structured translations array
- recognition/handler: update ingredientRepo interface; saveClassification uses
  UpsertAliases and iterates Translations
- recipe/model: remove SpoonacularID from RecipeIngredient
- integration tests: remove SpoonacularID fixtures, replace GetBySpoonacularID
  tests with GetByID, add UpsertAliases and UpsertCategoryTranslation tests
- Flutter: remove canonicalNameRu from IngredientMapping, add categoryName;
  displayName returns server-resolved canonicalName; regenerate .g.dart

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-10 14:40:07 +02:00

230 lines
8.1 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package gemini
import (
"encoding/json"
"fmt"
"strings"
"context"
)
// RecognizedItem is a food item identified in an image.
type RecognizedItem struct {
Name string `json:"name"`
Quantity float64 `json:"quantity"`
Unit string `json:"unit"`
Category string `json:"category"`
Confidence float64 `json:"confidence"`
}
// UnrecognizedItem is text from a receipt that could not be identified as food.
type UnrecognizedItem struct {
RawText string `json:"raw_text"`
Price float64 `json:"price,omitempty"`
}
// ReceiptResult is the full result of receipt OCR.
type ReceiptResult struct {
Items []RecognizedItem `json:"items"`
Unrecognized []UnrecognizedItem `json:"unrecognized"`
}
// DishResult is the result of dish recognition.
type DishResult struct {
DishName string `json:"dish_name"`
WeightGrams int `json:"weight_grams"`
Calories float64 `json:"calories"`
ProteinG float64 `json:"protein_g"`
FatG float64 `json:"fat_g"`
CarbsG float64 `json:"carbs_g"`
Confidence float64 `json:"confidence"`
SimilarDishes []string `json:"similar_dishes"`
}
// IngredientTranslation holds the localized name and aliases for one language.
type IngredientTranslation struct {
Lang string `json:"lang"`
Name string `json:"name"`
Aliases []string `json:"aliases"`
}
// IngredientClassification is the AI-produced classification of an unknown food item.
type IngredientClassification struct {
CanonicalName string `json:"canonical_name"`
Aliases []string `json:"aliases"` // English aliases
Translations []IngredientTranslation `json:"translations"` // other languages
Category string `json:"category"`
DefaultUnit string `json:"default_unit"`
CaloriesPer100g *float64 `json:"calories_per_100g"`
ProteinPer100g *float64 `json:"protein_per_100g"`
FatPer100g *float64 `json:"fat_per_100g"`
CarbsPer100g *float64 `json:"carbs_per_100g"`
StorageDays int `json:"storage_days"`
}
// RecognizeReceipt uses the vision model to extract food items from a receipt photo.
func (c *Client) RecognizeReceipt(ctx context.Context, imageBase64, mimeType string) (*ReceiptResult, error) {
prompt := `Ты — OCR-система для чеков из продуктовых магазинов.
Проанализируй фото чека и извлеки список продуктов питания.
Для каждого продукта определи:
- name: название на русском языке (убери артикулы, коды, лишние символы)
- quantity: количество (число)
- unit: единица (г, кг, мл, л, шт, уп)
- category: dairy | meat | produce | bakery | frozen | beverages | other
- confidence: 0.01.0
Позиции, которые не являются едой (бытовая химия, табак, алкоголь) — пропусти.
Позиции с нечитаемым текстом — добавь в unrecognized.
Верни ТОЛЬКО валидный JSON без markdown:
{
"items": [
{"name": "Молоко 2.5%", "quantity": 1, "unit": "л", "category": "dairy", "confidence": 0.95}
],
"unrecognized": [
{"raw_text": "ТОВ АРТИК 1ШТ", "price": 89.0}
]
}`
text, err := c.generateVisionContent(ctx, prompt, imageBase64, mimeType)
if err != nil {
return nil, fmt.Errorf("recognize receipt: %w", err)
}
var result ReceiptResult
if err := parseJSON(text, &result); err != nil {
return nil, fmt.Errorf("parse receipt result: %w", err)
}
if result.Items == nil {
result.Items = []RecognizedItem{}
}
if result.Unrecognized == nil {
result.Unrecognized = []UnrecognizedItem{}
}
return &result, nil
}
// RecognizeProducts uses the vision model to identify food items in a photo (fridge, shelf, etc.).
func (c *Client) RecognizeProducts(ctx context.Context, imageBase64, mimeType string) ([]RecognizedItem, error) {
prompt := `Ты — система распознавания продуктов питания.
Посмотри на фото и определи все видимые продукты питания.
Для каждого продукта оцени:
- name: название на русском языке
- quantity: приблизительное количество (число)
- unit: единица (г, кг, мл, л, шт)
- category: dairy | meat | produce | bakery | frozen | beverages | other
- confidence: 0.01.0
Только продукты питания. Пустые упаковки и несъедобные предметы — пропусти.
Верни ТОЛЬКО валидный JSON без markdown:
{
"items": [
{"name": "Яйца", "quantity": 10, "unit": "шт", "category": "dairy", "confidence": 0.9}
]
}`
text, err := c.generateVisionContent(ctx, prompt, imageBase64, mimeType)
if err != nil {
return nil, fmt.Errorf("recognize products: %w", err)
}
var result struct {
Items []RecognizedItem `json:"items"`
}
if err := parseJSON(text, &result); err != nil {
return nil, fmt.Errorf("parse products result: %w", err)
}
if result.Items == nil {
return []RecognizedItem{}, nil
}
return result.Items, nil
}
// RecognizeDish uses the vision model to identify a dish and estimate its nutritional content.
func (c *Client) RecognizeDish(ctx context.Context, imageBase64, mimeType string) (*DishResult, error) {
prompt := `Ты — диетолог и кулинарный эксперт.
Посмотри на фото блюда и определи:
- dish_name: название блюда на русском языке
- weight_grams: приблизительный вес порции в граммах
- calories: калорийность порции (приблизительно)
- protein_g, fat_g, carbs_g: БЖУ на порцию
- confidence: 0.01.0
- similar_dishes: до 3 похожих блюд (для поиска рецептов)
Верни ТОЛЬКО валидный JSON без markdown:
{
"dish_name": "Паста Карбонара",
"weight_grams": 350,
"calories": 520,
"protein_g": 22,
"fat_g": 26,
"carbs_g": 48,
"confidence": 0.85,
"similar_dishes": ["Паста с беконом", "Спагетти"]
}`
text, err := c.generateVisionContent(ctx, prompt, imageBase64, mimeType)
if err != nil {
return nil, fmt.Errorf("recognize dish: %w", err)
}
var result DishResult
if err := parseJSON(text, &result); err != nil {
return nil, fmt.Errorf("parse dish result: %w", err)
}
if result.SimilarDishes == nil {
result.SimilarDishes = []string{}
}
return &result, nil
}
// ClassifyIngredient uses the text model to classify an unknown food item
// and build an ingredient_mappings record for it.
func (c *Client) ClassifyIngredient(ctx context.Context, name string) (*IngredientClassification, error) {
prompt := fmt.Sprintf(`Classify the food product: "%s".
Return ONLY valid JSON without markdown:
{
"canonical_name": "turkey_breast",
"aliases": ["turkey breast"],
"translations": [
{"lang": "ru", "name": "грудка индейки", "aliases": ["грудка индейки", "филе индейки"]}
],
"category": "meat",
"default_unit": "g",
"calories_per_100g": 135,
"protein_per_100g": 29,
"fat_per_100g": 1,
"carbs_per_100g": 0,
"storage_days": 3
}`, name)
messages := []map[string]string{
{"role": "user", "content": prompt},
}
text, err := c.generateContent(ctx, messages)
if err != nil {
return nil, fmt.Errorf("classify ingredient: %w", err)
}
var result IngredientClassification
if err := parseJSON(text, &result); err != nil {
return nil, fmt.Errorf("parse classification: %w", err)
}
return &result, nil
}
// parseJSON strips optional markdown fences and unmarshals JSON.
func parseJSON(text string, dst any) error {
text = strings.TrimSpace(text)
if strings.HasPrefix(text, "```") {
text = strings.TrimPrefix(text, "```json")
text = strings.TrimPrefix(text, "```")
text = strings.TrimSuffix(text, "```")
text = strings.TrimSpace(text)
}
return json.Unmarshal([]byte(text), dst)
}