Files
food-ai/backend/internal/gemini/recognition.go
dbastrikin deceedd4a7 feat: implement Iteration 3 — product/receipt/dish recognition
Backend:
- gemini/client.go: refactor to shared callGroq transport; add
  generateVisionContent using llama-3.2-11b-vision-preview model
- gemini/recognition.go: RecognizeReceipt, RecognizeProducts,
  RecognizeDish (vision), ClassifyIngredient (text); shared parseJSON helper
- ingredient/repository.go: add FuzzyMatch (wraps Search, returns best hit)
- recognition/handler.go: POST /ai/recognize-receipt, /ai/recognize-products,
  /ai/recognize-dish; enrichItems with fuzzy match + AI classify fallback;
  parallel multi-image processing with deduplication
- server.go + main.go: wire recognition handler under /ai routes

Flutter:
- pubspec.yaml: add image_picker ^1.1.0
- AndroidManifest.xml: add CAMERA and READ_EXTERNAL_STORAGE permissions
- Info.plist: add NSCameraUsageDescription and NSPhotoLibraryUsageDescription
- recognition_service.dart: RecognitionService wrapping /ai/* endpoints;
  RecognizedItem, ReceiptResult, DishResult models
- scan_screen.dart: mode selector (receipt / products / dish / manual);
  image source picker; loading overlay; navigates to confirm or dish screen
- recognition_confirm_screen.dart: editable list of recognized items;
  inline qty/unit editing; swipe-to-delete; batch-add to pantry
- dish_result_screen.dart: dish name, KBZHU breakdown, similar dishes chips
- app_router.dart: /scan, /scan/confirm, /scan/dish routes (no bottom nav)
- products_screen.dart: FAB now shows bottom sheet with Manual / Scan options

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-22 10:54:03 +02:00

222 lines
7.8 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package gemini
import (
"encoding/json"
"fmt"
"strings"
"context"
)
// RecognizedItem is a food item identified in an image.
type RecognizedItem struct {
Name string `json:"name"`
Quantity float64 `json:"quantity"`
Unit string `json:"unit"`
Category string `json:"category"`
Confidence float64 `json:"confidence"`
}
// UnrecognizedItem is text from a receipt that could not be identified as food.
type UnrecognizedItem struct {
RawText string `json:"raw_text"`
Price float64 `json:"price,omitempty"`
}
// ReceiptResult is the full result of receipt OCR.
type ReceiptResult struct {
Items []RecognizedItem `json:"items"`
Unrecognized []UnrecognizedItem `json:"unrecognized"`
}
// DishResult is the result of dish recognition.
type DishResult struct {
DishName string `json:"dish_name"`
WeightGrams int `json:"weight_grams"`
Calories float64 `json:"calories"`
ProteinG float64 `json:"protein_g"`
FatG float64 `json:"fat_g"`
CarbsG float64 `json:"carbs_g"`
Confidence float64 `json:"confidence"`
SimilarDishes []string `json:"similar_dishes"`
}
// IngredientClassification is the AI-produced classification of an unknown food item.
type IngredientClassification struct {
CanonicalName string `json:"canonical_name"`
CanonicalNameRu string `json:"canonical_name_ru"`
Category string `json:"category"`
DefaultUnit string `json:"default_unit"`
CaloriesPer100g *float64 `json:"calories_per_100g"`
ProteinPer100g *float64 `json:"protein_per_100g"`
FatPer100g *float64 `json:"fat_per_100g"`
CarbsPer100g *float64 `json:"carbs_per_100g"`
StorageDays int `json:"storage_days"`
Aliases []string `json:"aliases"`
}
// RecognizeReceipt uses the vision model to extract food items from a receipt photo.
func (c *Client) RecognizeReceipt(ctx context.Context, imageBase64, mimeType string) (*ReceiptResult, error) {
prompt := `Ты — OCR-система для чеков из продуктовых магазинов.
Проанализируй фото чека и извлеки список продуктов питания.
Для каждого продукта определи:
- name: название на русском языке (убери артикулы, коды, лишние символы)
- quantity: количество (число)
- unit: единица (г, кг, мл, л, шт, уп)
- category: dairy | meat | produce | bakery | frozen | beverages | other
- confidence: 0.01.0
Позиции, которые не являются едой (бытовая химия, табак, алкоголь) — пропусти.
Позиции с нечитаемым текстом — добавь в unrecognized.
Верни ТОЛЬКО валидный JSON без markdown:
{
"items": [
{"name": "Молоко 2.5%", "quantity": 1, "unit": "л", "category": "dairy", "confidence": 0.95}
],
"unrecognized": [
{"raw_text": "ТОВ АРТИК 1ШТ", "price": 89.0}
]
}`
text, err := c.generateVisionContent(ctx, prompt, imageBase64, mimeType)
if err != nil {
return nil, fmt.Errorf("recognize receipt: %w", err)
}
var result ReceiptResult
if err := parseJSON(text, &result); err != nil {
return nil, fmt.Errorf("parse receipt result: %w", err)
}
if result.Items == nil {
result.Items = []RecognizedItem{}
}
if result.Unrecognized == nil {
result.Unrecognized = []UnrecognizedItem{}
}
return &result, nil
}
// RecognizeProducts uses the vision model to identify food items in a photo (fridge, shelf, etc.).
func (c *Client) RecognizeProducts(ctx context.Context, imageBase64, mimeType string) ([]RecognizedItem, error) {
prompt := `Ты — система распознавания продуктов питания.
Посмотри на фото и определи все видимые продукты питания.
Для каждого продукта оцени:
- name: название на русском языке
- quantity: приблизительное количество (число)
- unit: единица (г, кг, мл, л, шт)
- category: dairy | meat | produce | bakery | frozen | beverages | other
- confidence: 0.01.0
Только продукты питания. Пустые упаковки и несъедобные предметы — пропусти.
Верни ТОЛЬКО валидный JSON без markdown:
{
"items": [
{"name": "Яйца", "quantity": 10, "unit": "шт", "category": "dairy", "confidence": 0.9}
]
}`
text, err := c.generateVisionContent(ctx, prompt, imageBase64, mimeType)
if err != nil {
return nil, fmt.Errorf("recognize products: %w", err)
}
var result struct {
Items []RecognizedItem `json:"items"`
}
if err := parseJSON(text, &result); err != nil {
return nil, fmt.Errorf("parse products result: %w", err)
}
if result.Items == nil {
return []RecognizedItem{}, nil
}
return result.Items, nil
}
// RecognizeDish uses the vision model to identify a dish and estimate its nutritional content.
func (c *Client) RecognizeDish(ctx context.Context, imageBase64, mimeType string) (*DishResult, error) {
prompt := `Ты — диетолог и кулинарный эксперт.
Посмотри на фото блюда и определи:
- dish_name: название блюда на русском языке
- weight_grams: приблизительный вес порции в граммах
- calories: калорийность порции (приблизительно)
- protein_g, fat_g, carbs_g: БЖУ на порцию
- confidence: 0.01.0
- similar_dishes: до 3 похожих блюд (для поиска рецептов)
Верни ТОЛЬКО валидный JSON без markdown:
{
"dish_name": "Паста Карбонара",
"weight_grams": 350,
"calories": 520,
"protein_g": 22,
"fat_g": 26,
"carbs_g": 48,
"confidence": 0.85,
"similar_dishes": ["Паста с беконом", "Спагетти"]
}`
text, err := c.generateVisionContent(ctx, prompt, imageBase64, mimeType)
if err != nil {
return nil, fmt.Errorf("recognize dish: %w", err)
}
var result DishResult
if err := parseJSON(text, &result); err != nil {
return nil, fmt.Errorf("parse dish result: %w", err)
}
if result.SimilarDishes == nil {
result.SimilarDishes = []string{}
}
return &result, nil
}
// ClassifyIngredient uses the text model to classify an unknown food item
// and build an ingredient_mappings record for it.
func (c *Client) ClassifyIngredient(ctx context.Context, name string) (*IngredientClassification, error) {
prompt := fmt.Sprintf(`Классифицируй продукт питания: "%s".
Ответь ТОЛЬКО валидным JSON без markdown:
{
"canonical_name": "turkey_breast",
"canonical_name_ru": "грудка индейки",
"category": "meat",
"default_unit": "g",
"calories_per_100g": 135,
"protein_per_100g": 29,
"fat_per_100g": 1,
"carbs_per_100g": 0,
"storage_days": 3,
"aliases": ["грудка индейки", "филе индейки", "turkey breast"]
}`, name)
messages := []map[string]string{
{"role": "user", "content": prompt},
}
text, err := c.generateContent(ctx, messages)
if err != nil {
return nil, fmt.Errorf("classify ingredient: %w", err)
}
var result IngredientClassification
if err := parseJSON(text, &result); err != nil {
return nil, fmt.Errorf("parse classification: %w", err)
}
return &result, nil
}
// parseJSON strips optional markdown fences and unmarshals JSON.
func parseJSON(text string, dst any) error {
text = strings.TrimSpace(text)
if strings.HasPrefix(text, "```") {
text = strings.TrimPrefix(text, "```json")
text = strings.TrimPrefix(text, "```")
text = strings.TrimSuffix(text, "```")
text = strings.TrimSpace(text)
}
return json.Unmarshal([]byte(text), dst)
}