food-ai/backend/internal/gemini/client.go

package gemini

import (
	"bytes"
	"context"
	"encoding/json"
	"fmt"
	"io"
	"net/http"
	"time"
)

const (
	// openaiAPIURL is the OpenAI chat completions endpoint.
	openaiAPIURL = "https://api.openai.com/v1/chat/completions"

	// openaiModel is the default text generation model.
	openaiModel = "gpt-4o-mini"

	// openaiVisionModel supports image inputs.
	openaiVisionModel = "gpt-4o"

	maxRetries = 3
)

// Client is an HTTP client for the OpenAI API.
type Client struct {
	apiKey     string
	httpClient *http.Client
}

// NewClient creates a new Client.
func NewClient(apiKey string) *Client {
	return &Client{
		apiKey: apiKey,
		httpClient: &http.Client{
			Timeout: 90 * time.Second,
		},
	}
}

// generateContent sends text messages to the text model.
func (c *Client) generateContent(ctx context.Context, messages []map[string]string) (string, error) {
	return c.callOpenAI(ctx, openaiModel, 0.7, messages)
}

// generateVisionContent sends an image + text prompt to the vision model.
// imageBase64 must be the raw base64-encoded image data (no data URI prefix).
// mimeType defaults to "image/jpeg" if empty.
func (c *Client) generateVisionContent(ctx context.Context, prompt, imageBase64, mimeType string) (string, error) {
	if mimeType == "" {
		mimeType = "image/jpeg"
	}
	dataURL := fmt.Sprintf("data:%s;base64,%s", mimeType, imageBase64)

	messages := []any{
		map[string]any{
			"role": "user",
			"content": []any{
				map[string]any{
					"type":      "image_url",
					"image_url": map[string]string{"url": dataURL},
				},
				map[string]any{
					"type": "text",
					"text": prompt,
				},
			},
		},
	}
	return c.callOpenAI(ctx, openaiVisionModel, 0.1, messages)
}

// callOpenAI is the shared HTTP transport for all OpenAI requests.
// messages can be []map[string]string (text) or []any (vision with image content).
func (c *Client) callOpenAI(ctx context.Context, model string, temperature float64, messages any) (string, error) {
	body := map[string]any{
		"model":       model,
		"temperature": temperature,
		"messages":    messages,
	}

	bodyBytes, err := json.Marshal(body)
	if err != nil {
		return "", fmt.Errorf("marshal request: %w", err)
	}

	req, err := http.NewRequestWithContext(ctx, http.MethodPost, openaiAPIURL, bytes.NewReader(bodyBytes))
	if err != nil {
		return "", fmt.Errorf("create request: %w", err)
	}
	req.Header.Set("Content-Type", "application/json")
	req.Header.Set("Authorization", "Bearer "+c.apiKey)

	resp, err := c.httpClient.Do(req)
	if err != nil {
		return "", fmt.Errorf("send request: %w", err)
	}
	defer resp.Body.Close()

	if resp.StatusCode != http.StatusOK {
		raw, _ := io.ReadAll(resp.Body)
		return "", fmt.Errorf("openai API error %d: %s", resp.StatusCode, string(raw))
	}

	var result struct {
		Choices []struct {
			Message struct {
				Content string `json:"content"`
			} `json:"message"`
		} `json:"choices"`
	}
	if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
		return "", fmt.Errorf("decode response: %w", err)
	}
	if len(result.Choices) == 0 {
		return "", fmt.Errorf("empty response from OpenAI")
	}
	return result.Choices[0].Message.Content, nil
}