package gemini import ( "bytes" "context" "encoding/json" "fmt" "io" "net/http" "time" ) const ( // openaiAPIURL is the OpenAI chat completions endpoint. openaiAPIURL = "https://api.openai.com/v1/chat/completions" // openaiModel is the default text generation model. openaiModel = "gpt-4o-mini" // openaiVisionModel supports image inputs. openaiVisionModel = "gpt-4o" maxRetries = 3 ) // Client is an HTTP client for the OpenAI API. type Client struct { apiKey string httpClient *http.Client } // NewClient creates a new Client. func NewClient(apiKey string) *Client { return &Client{ apiKey: apiKey, httpClient: &http.Client{ Timeout: 90 * time.Second, }, } } // generateContent sends text messages to the text model. func (c *Client) generateContent(ctx context.Context, messages []map[string]string) (string, error) { return c.callOpenAI(ctx, openaiModel, 0.7, messages) } // generateVisionContent sends an image + text prompt to the vision model. // imageBase64 must be the raw base64-encoded image data (no data URI prefix). // mimeType defaults to "image/jpeg" if empty. func (c *Client) generateVisionContent(ctx context.Context, prompt, imageBase64, mimeType string) (string, error) { if mimeType == "" { mimeType = "image/jpeg" } dataURL := fmt.Sprintf("data:%s;base64,%s", mimeType, imageBase64) messages := []any{ map[string]any{ "role": "user", "content": []any{ map[string]any{ "type": "image_url", "image_url": map[string]string{"url": dataURL}, }, map[string]any{ "type": "text", "text": prompt, }, }, }, } return c.callOpenAI(ctx, openaiVisionModel, 0.1, messages) } // callOpenAI is the shared HTTP transport for all OpenAI requests. // messages can be []map[string]string (text) or []any (vision with image content). func (c *Client) callOpenAI(ctx context.Context, model string, temperature float64, messages any) (string, error) { body := map[string]any{ "model": model, "temperature": temperature, "messages": messages, } bodyBytes, err := json.Marshal(body) if err != nil { return "", fmt.Errorf("marshal request: %w", err) } req, err := http.NewRequestWithContext(ctx, http.MethodPost, openaiAPIURL, bytes.NewReader(bodyBytes)) if err != nil { return "", fmt.Errorf("create request: %w", err) } req.Header.Set("Content-Type", "application/json") req.Header.Set("Authorization", "Bearer "+c.apiKey) resp, err := c.httpClient.Do(req) if err != nil { return "", fmt.Errorf("send request: %w", err) } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { raw, _ := io.ReadAll(resp.Body) return "", fmt.Errorf("openai API error %d: %s", resp.StatusCode, string(raw)) } var result struct { Choices []struct { Message struct { Content string `json:"content"` } `json:"message"` } `json:"choices"` } if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { return "", fmt.Errorf("decode response: %w", err) } if len(result.Choices) == 0 { return "", fmt.Errorf("empty response from OpenAI") } return result.Choices[0].Message.Content, nil }