Backend: - gemini/client.go: refactor to shared callGroq transport; add generateVisionContent using llama-3.2-11b-vision-preview model - gemini/recognition.go: RecognizeReceipt, RecognizeProducts, RecognizeDish (vision), ClassifyIngredient (text); shared parseJSON helper - ingredient/repository.go: add FuzzyMatch (wraps Search, returns best hit) - recognition/handler.go: POST /ai/recognize-receipt, /ai/recognize-products, /ai/recognize-dish; enrichItems with fuzzy match + AI classify fallback; parallel multi-image processing with deduplication - server.go + main.go: wire recognition handler under /ai routes Flutter: - pubspec.yaml: add image_picker ^1.1.0 - AndroidManifest.xml: add CAMERA and READ_EXTERNAL_STORAGE permissions - Info.plist: add NSCameraUsageDescription and NSPhotoLibraryUsageDescription - recognition_service.dart: RecognitionService wrapping /ai/* endpoints; RecognizedItem, ReceiptResult, DishResult models - scan_screen.dart: mode selector (receipt / products / dish / manual); image source picker; loading overlay; navigates to confirm or dish screen - recognition_confirm_screen.dart: editable list of recognized items; inline qty/unit editing; swipe-to-delete; batch-add to pantry - dish_result_screen.dart: dish name, KBZHU breakdown, similar dishes chips - app_router.dart: /scan, /scan/confirm, /scan/dish routes (no bottom nav) - products_screen.dart: FAB now shows bottom sheet with Manual / Scan options Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
121 lines
3.2 KiB
Go
121 lines
3.2 KiB
Go
package gemini
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"time"
|
|
)
|
|
|
|
const (
|
|
// groqAPIURL is the Groq OpenAI-compatible endpoint (free tier, no billing required).
|
|
groqAPIURL = "https://api.groq.com/openai/v1/chat/completions"
|
|
|
|
// groqModel is the default text generation model.
|
|
groqModel = "llama-3.3-70b-versatile"
|
|
|
|
// groqVisionModel supports image inputs in OpenAI vision format.
|
|
groqVisionModel = "llama-3.2-11b-vision-preview"
|
|
|
|
maxRetries = 3
|
|
)
|
|
|
|
// Client is an HTTP client for the Groq LLM API (OpenAI-compatible).
|
|
type Client struct {
|
|
apiKey string
|
|
httpClient *http.Client
|
|
}
|
|
|
|
// NewClient creates a new Client.
|
|
func NewClient(apiKey string) *Client {
|
|
return &Client{
|
|
apiKey: apiKey,
|
|
httpClient: &http.Client{
|
|
Timeout: 90 * time.Second,
|
|
},
|
|
}
|
|
}
|
|
|
|
// generateContent sends text messages to the text-only model.
|
|
func (c *Client) generateContent(ctx context.Context, messages []map[string]string) (string, error) {
|
|
return c.callGroq(ctx, groqModel, 0.7, messages)
|
|
}
|
|
|
|
// generateVisionContent sends an image + text prompt to the vision model.
|
|
// imageBase64 must be the raw base64-encoded image data (no data URI prefix).
|
|
// mimeType defaults to "image/jpeg" if empty.
|
|
func (c *Client) generateVisionContent(ctx context.Context, prompt, imageBase64, mimeType string) (string, error) {
|
|
if mimeType == "" {
|
|
mimeType = "image/jpeg"
|
|
}
|
|
dataURL := fmt.Sprintf("data:%s;base64,%s", mimeType, imageBase64)
|
|
|
|
messages := []any{
|
|
map[string]any{
|
|
"role": "user",
|
|
"content": []any{
|
|
map[string]any{
|
|
"type": "image_url",
|
|
"image_url": map[string]string{"url": dataURL},
|
|
},
|
|
map[string]any{
|
|
"type": "text",
|
|
"text": prompt,
|
|
},
|
|
},
|
|
},
|
|
}
|
|
return c.callGroq(ctx, groqVisionModel, 0.1, messages)
|
|
}
|
|
|
|
// callGroq is the shared HTTP transport for all Groq requests.
|
|
// messages can be []map[string]string (text) or []any (vision with image content).
|
|
func (c *Client) callGroq(ctx context.Context, model string, temperature float64, messages any) (string, error) {
|
|
body := map[string]any{
|
|
"model": model,
|
|
"temperature": temperature,
|
|
"messages": messages,
|
|
}
|
|
|
|
bodyBytes, err := json.Marshal(body)
|
|
if err != nil {
|
|
return "", fmt.Errorf("marshal request: %w", err)
|
|
}
|
|
|
|
req, err := http.NewRequestWithContext(ctx, http.MethodPost, groqAPIURL, bytes.NewReader(bodyBytes))
|
|
if err != nil {
|
|
return "", fmt.Errorf("create request: %w", err)
|
|
}
|
|
req.Header.Set("Content-Type", "application/json")
|
|
req.Header.Set("Authorization", "Bearer "+c.apiKey)
|
|
|
|
resp, err := c.httpClient.Do(req)
|
|
if err != nil {
|
|
return "", fmt.Errorf("send request: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
raw, _ := io.ReadAll(resp.Body)
|
|
return "", fmt.Errorf("groq API error %d: %s", resp.StatusCode, string(raw))
|
|
}
|
|
|
|
var result struct {
|
|
Choices []struct {
|
|
Message struct {
|
|
Content string `json:"content"`
|
|
} `json:"message"`
|
|
} `json:"choices"`
|
|
}
|
|
if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
|
|
return "", fmt.Errorf("decode response: %w", err)
|
|
}
|
|
if len(result.Choices) == 0 {
|
|
return "", fmt.Errorf("empty response from Groq")
|
|
}
|
|
return result.Choices[0].Message.Content, nil
|
|
}
|