feat: implement Iteration 3 — product/receipt/dish recognition

Backend:
- gemini/client.go: refactor to shared callGroq transport; add
  generateVisionContent using llama-3.2-11b-vision-preview model
- gemini/recognition.go: RecognizeReceipt, RecognizeProducts,
  RecognizeDish (vision), ClassifyIngredient (text); shared parseJSON helper
- ingredient/repository.go: add FuzzyMatch (wraps Search, returns best hit)
- recognition/handler.go: POST /ai/recognize-receipt, /ai/recognize-products,
  /ai/recognize-dish; enrichItems with fuzzy match + AI classify fallback;
  parallel multi-image processing with deduplication
- server.go + main.go: wire recognition handler under /ai routes

Flutter:
- pubspec.yaml: add image_picker ^1.1.0
- AndroidManifest.xml: add CAMERA and READ_EXTERNAL_STORAGE permissions
- Info.plist: add NSCameraUsageDescription and NSPhotoLibraryUsageDescription
- recognition_service.dart: RecognitionService wrapping /ai/* endpoints;
  RecognizedItem, ReceiptResult, DishResult models
- scan_screen.dart: mode selector (receipt / products / dish / manual);
  image source picker; loading overlay; navigates to confirm or dish screen
- recognition_confirm_screen.dart: editable list of recognized items;
  inline qty/unit editing; swipe-to-delete; batch-add to pantry
- dish_result_screen.dart: dish name, KBZHU breakdown, similar dishes chips
- app_router.dart: /scan, /scan/confirm, /scan/dish routes (no bottom nav)
- products_screen.dart: FAB now shows bottom sheet with Manual / Scan options

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
dbastrikin
2026-02-22 10:54:03 +02:00
parent 288bb1c375
commit deceedd4a7
16 changed files with 1623 additions and 8 deletions

View File

@@ -0,0 +1,221 @@
package gemini
import (
"encoding/json"
"fmt"
"strings"
"context"
)
// RecognizedItem is a food item identified in an image.
type RecognizedItem struct {
Name string `json:"name"`
Quantity float64 `json:"quantity"`
Unit string `json:"unit"`
Category string `json:"category"`
Confidence float64 `json:"confidence"`
}
// UnrecognizedItem is text from a receipt that could not be identified as food.
type UnrecognizedItem struct {
RawText string `json:"raw_text"`
Price float64 `json:"price,omitempty"`
}
// ReceiptResult is the full result of receipt OCR.
type ReceiptResult struct {
Items []RecognizedItem `json:"items"`
Unrecognized []UnrecognizedItem `json:"unrecognized"`
}
// DishResult is the result of dish recognition.
type DishResult struct {
DishName string `json:"dish_name"`
WeightGrams int `json:"weight_grams"`
Calories float64 `json:"calories"`
ProteinG float64 `json:"protein_g"`
FatG float64 `json:"fat_g"`
CarbsG float64 `json:"carbs_g"`
Confidence float64 `json:"confidence"`
SimilarDishes []string `json:"similar_dishes"`
}
// IngredientClassification is the AI-produced classification of an unknown food item.
type IngredientClassification struct {
CanonicalName string `json:"canonical_name"`
CanonicalNameRu string `json:"canonical_name_ru"`
Category string `json:"category"`
DefaultUnit string `json:"default_unit"`
CaloriesPer100g *float64 `json:"calories_per_100g"`
ProteinPer100g *float64 `json:"protein_per_100g"`
FatPer100g *float64 `json:"fat_per_100g"`
CarbsPer100g *float64 `json:"carbs_per_100g"`
StorageDays int `json:"storage_days"`
Aliases []string `json:"aliases"`
}
// RecognizeReceipt uses the vision model to extract food items from a receipt photo.
func (c *Client) RecognizeReceipt(ctx context.Context, imageBase64, mimeType string) (*ReceiptResult, error) {
prompt := `Ты — OCR-система для чеков из продуктовых магазинов.
Проанализируй фото чека и извлеки список продуктов питания.
Для каждого продукта определи:
- name: название на русском языке (убери артикулы, коды, лишние символы)
- quantity: количество (число)
- unit: единица (г, кг, мл, л, шт, уп)
- category: dairy | meat | produce | bakery | frozen | beverages | other
- confidence: 0.01.0
Позиции, которые не являются едой (бытовая химия, табак, алкоголь) — пропусти.
Позиции с нечитаемым текстом — добавь в unrecognized.
Верни ТОЛЬКО валидный JSON без markdown:
{
"items": [
{"name": "Молоко 2.5%", "quantity": 1, "unit": "л", "category": "dairy", "confidence": 0.95}
],
"unrecognized": [
{"raw_text": "ТОВ АРТИК 1ШТ", "price": 89.0}
]
}`
text, err := c.generateVisionContent(ctx, prompt, imageBase64, mimeType)
if err != nil {
return nil, fmt.Errorf("recognize receipt: %w", err)
}
var result ReceiptResult
if err := parseJSON(text, &result); err != nil {
return nil, fmt.Errorf("parse receipt result: %w", err)
}
if result.Items == nil {
result.Items = []RecognizedItem{}
}
if result.Unrecognized == nil {
result.Unrecognized = []UnrecognizedItem{}
}
return &result, nil
}
// RecognizeProducts uses the vision model to identify food items in a photo (fridge, shelf, etc.).
func (c *Client) RecognizeProducts(ctx context.Context, imageBase64, mimeType string) ([]RecognizedItem, error) {
prompt := `Ты — система распознавания продуктов питания.
Посмотри на фото и определи все видимые продукты питания.
Для каждого продукта оцени:
- name: название на русском языке
- quantity: приблизительное количество (число)
- unit: единица (г, кг, мл, л, шт)
- category: dairy | meat | produce | bakery | frozen | beverages | other
- confidence: 0.01.0
Только продукты питания. Пустые упаковки и несъедобные предметы — пропусти.
Верни ТОЛЬКО валидный JSON без markdown:
{
"items": [
{"name": "Яйца", "quantity": 10, "unit": "шт", "category": "dairy", "confidence": 0.9}
]
}`
text, err := c.generateVisionContent(ctx, prompt, imageBase64, mimeType)
if err != nil {
return nil, fmt.Errorf("recognize products: %w", err)
}
var result struct {
Items []RecognizedItem `json:"items"`
}
if err := parseJSON(text, &result); err != nil {
return nil, fmt.Errorf("parse products result: %w", err)
}
if result.Items == nil {
return []RecognizedItem{}, nil
}
return result.Items, nil
}
// RecognizeDish uses the vision model to identify a dish and estimate its nutritional content.
func (c *Client) RecognizeDish(ctx context.Context, imageBase64, mimeType string) (*DishResult, error) {
prompt := `Ты — диетолог и кулинарный эксперт.
Посмотри на фото блюда и определи:
- dish_name: название блюда на русском языке
- weight_grams: приблизительный вес порции в граммах
- calories: калорийность порции (приблизительно)
- protein_g, fat_g, carbs_g: БЖУ на порцию
- confidence: 0.01.0
- similar_dishes: до 3 похожих блюд (для поиска рецептов)
Верни ТОЛЬКО валидный JSON без markdown:
{
"dish_name": "Паста Карбонара",
"weight_grams": 350,
"calories": 520,
"protein_g": 22,
"fat_g": 26,
"carbs_g": 48,
"confidence": 0.85,
"similar_dishes": ["Паста с беконом", "Спагетти"]
}`
text, err := c.generateVisionContent(ctx, prompt, imageBase64, mimeType)
if err != nil {
return nil, fmt.Errorf("recognize dish: %w", err)
}
var result DishResult
if err := parseJSON(text, &result); err != nil {
return nil, fmt.Errorf("parse dish result: %w", err)
}
if result.SimilarDishes == nil {
result.SimilarDishes = []string{}
}
return &result, nil
}
// ClassifyIngredient uses the text model to classify an unknown food item
// and build an ingredient_mappings record for it.
func (c *Client) ClassifyIngredient(ctx context.Context, name string) (*IngredientClassification, error) {
prompt := fmt.Sprintf(`Классифицируй продукт питания: "%s".
Ответь ТОЛЬКО валидным JSON без markdown:
{
"canonical_name": "turkey_breast",
"canonical_name_ru": "грудка индейки",
"category": "meat",
"default_unit": "g",
"calories_per_100g": 135,
"protein_per_100g": 29,
"fat_per_100g": 1,
"carbs_per_100g": 0,
"storage_days": 3,
"aliases": ["грудка индейки", "филе индейки", "turkey breast"]
}`, name)
messages := []map[string]string{
{"role": "user", "content": prompt},
}
text, err := c.generateContent(ctx, messages)
if err != nil {
return nil, fmt.Errorf("classify ingredient: %w", err)
}
var result IngredientClassification
if err := parseJSON(text, &result); err != nil {
return nil, fmt.Errorf("parse classification: %w", err)
}
return &result, nil
}
// parseJSON strips optional markdown fences and unmarshals JSON.
func parseJSON(text string, dst any) error {
text = strings.TrimSpace(text)
if strings.HasPrefix(text, "```") {
text = strings.TrimPrefix(text, "```json")
text = strings.TrimPrefix(text, "```")
text = strings.TrimSuffix(text, "```")
text = strings.TrimSpace(text)
}
return json.Unmarshal([]byte(text), dst)
}