From 1eb5e759724a10fea90a2f8e9ab7c292e7287191 Mon Sep 17 00:00:00 2001
From: Devon Rifkin <drifkin@drifkin.net>
Date: Thu, 11 Dec 2025 15:37:10 -0800
Subject: [PATCH] openai: add v1/responses support (#13351)

Only supporting the stateless part of the API.

Doc updates to come once this is shipped.

Closes: #9659
---
 middleware/openai.go     |  108 +++
 openai/openai.go         |   53 +-
 openai/responses.go      | 1004 +++++++++++++++++++++++++
 openai/responses_test.go | 1543 ++++++++++++++++++++++++++++++++++++++
 server/routes.go         |    2 +
 5 files changed, 2688 insertions(+), 22 deletions(-)
 create mode 100644 openai/responses.go
 create mode 100644 openai/responses_test.go

diff --git a/middleware/openai.go b/middleware/openai.go
index b2e43f165..5e526416e 100644
--- a/middleware/openai.go
+++ b/middleware/openai.go
@@ -433,3 +433,111 @@ func ChatMiddleware() gin.HandlerFunc {
 		c.Next()
 	}
 }
+
+type ResponsesWriter struct {
+	BaseWriter
+	converter  *openai.ResponsesStreamConverter
+	model      string
+	stream     bool
+	responseID string
+	itemID     string
+}
+
+func (w *ResponsesWriter) writeEvent(eventType string, data any) error {
+	d, err := json.Marshal(data)
+	if err != nil {
+		return err
+	}
+	_, err = w.ResponseWriter.Write([]byte(fmt.Sprintf("event: %s\ndata: %s\n\n", eventType, d)))
+	if err != nil {
+		return err
+	}
+	if f, ok := w.ResponseWriter.(http.Flusher); ok {
+		f.Flush()
+	}
+	return nil
+}
+
+func (w *ResponsesWriter) writeResponse(data []byte) (int, error) {
+	var chatResponse api.ChatResponse
+	if err := json.Unmarshal(data, &chatResponse); err != nil {
+		return 0, err
+	}
+
+	if w.stream {
+		w.ResponseWriter.Header().Set("Content-Type", "text/event-stream")
+
+		events := w.converter.Process(chatResponse)
+		for _, event := range events {
+			if err := w.writeEvent(event.Event, event.Data); err != nil {
+				return 0, err
+			}
+		}
+		return len(data), nil
+	}
+
+	// Non-streaming response
+	w.ResponseWriter.Header().Set("Content-Type", "application/json")
+	response := openai.ToResponse(w.model, w.responseID, w.itemID, chatResponse)
+	return len(data), json.NewEncoder(w.ResponseWriter).Encode(response)
+}
+
+func (w *ResponsesWriter) Write(data []byte) (int, error) {
+	code := w.ResponseWriter.Status()
+	if code != http.StatusOK {
+		return w.writeError(data)
+	}
+	return w.writeResponse(data)
+}
+
+func ResponsesMiddleware() gin.HandlerFunc {
+	return func(c *gin.Context) {
+		var req openai.ResponsesRequest
+		if err := c.ShouldBindJSON(&req); err != nil {
+			c.AbortWithStatusJSON(http.StatusBadRequest, openai.NewError(http.StatusBadRequest, err.Error()))
+			return
+		}
+
+		chatReq, err := openai.FromResponsesRequest(req)
+		if err != nil {
+			c.AbortWithStatusJSON(http.StatusBadRequest, openai.NewError(http.StatusBadRequest, err.Error()))
+			return
+		}
+
+		// Check if client requested streaming (defaults to false)
+		streamRequested := req.Stream != nil && *req.Stream
+
+		// Pass streaming preference to the underlying chat request
+		chatReq.Stream = &streamRequested
+
+		var b bytes.Buffer
+		if err := json.NewEncoder(&b).Encode(chatReq); err != nil {
+			c.AbortWithStatusJSON(http.StatusInternalServerError, openai.NewError(http.StatusInternalServerError, err.Error()))
+			return
+		}
+
+		c.Request.Body = io.NopCloser(&b)
+
+		responseID := fmt.Sprintf("resp_%d", rand.Intn(999999))
+		itemID := fmt.Sprintf("msg_%d", rand.Intn(999999))
+
+		w := &ResponsesWriter{
+			BaseWriter: BaseWriter{ResponseWriter: c.Writer},
+			converter:  openai.NewResponsesStreamConverter(responseID, itemID, req.Model),
+			model:      req.Model,
+			stream:     streamRequested,
+			responseID: responseID,
+			itemID:     itemID,
+		}
+
+		// Set headers based on streaming mode
+		if streamRequested {
+			c.Writer.Header().Set("Content-Type", "text/event-stream")
+			c.Writer.Header().Set("Cache-Control", "no-cache")
+			c.Writer.Header().Set("Connection", "keep-alive")
+		}
+
+		c.Writer = w
+		c.Next()
+	}
+}
diff --git a/openai/openai.go b/openai/openai.go
index 4713d481b..9dcba3000 100644
--- a/openai/openai.go
+++ b/openai/openai.go
@@ -487,29 +487,9 @@ func FromChatRequest(r ChatCompletionRequest) (*api.ChatRequest, error) {
 						}
 					}
 
-					types := []string{"jpeg", "jpg", "png", "webp"}
-					valid := false
-					// support blank mime type to match api/chat taking just unadorned base64
-					if strings.HasPrefix(url, "data:;base64,") {
-						url = strings.TrimPrefix(url, "data:;base64,")
-						valid = true
-					}
-					for _, t := range types {
-						prefix := "data:image/" + t + ";base64,"
-						if strings.HasPrefix(url, prefix) {
-							url = strings.TrimPrefix(url, prefix)
-							valid = true
-							break
-						}
-					}
-
-					if !valid {
-						return nil, errors.New("invalid image input")
-					}
-
-					img, err := base64.StdEncoding.DecodeString(url)
+					img, err := decodeImageURL(url)
 					if err != nil {
-						return nil, errors.New("invalid message format")
+						return nil, err
 					}
 
 					messages = append(messages, api.Message{Role: msg.Role, Images: []api.ImageData{img}})
@@ -648,6 +628,35 @@ func nameFromToolCallID(messages []Message, toolCallID string) string {
 	return ""
 }
 
+// decodeImageURL decodes a base64 data URI into raw image bytes.
+func decodeImageURL(url string) (api.ImageData, error) {
+	types := []string{"jpeg", "jpg", "png", "webp"}
+
+	// Support blank mime type to match /api/chat's behavior of taking just unadorned base64
+	if strings.HasPrefix(url, "data:;base64,") {
+		url = strings.TrimPrefix(url, "data:;base64,")
+	} else {
+		valid := false
+		for _, t := range types {
+			prefix := "data:image/" + t + ";base64,"
+			if strings.HasPrefix(url, prefix) {
+				url = strings.TrimPrefix(url, prefix)
+				valid = true
+				break
+			}
+		}
+		if !valid {
+			return nil, errors.New("invalid image input")
+		}
+	}
+
+	img, err := base64.StdEncoding.DecodeString(url)
+	if err != nil {
+		return nil, errors.New("invalid image input")
+	}
+	return img, nil
+}
+
 // FromCompletionToolCall converts OpenAI ToolCall format to api.ToolCall
 func FromCompletionToolCall(toolCalls []ToolCall) ([]api.ToolCall, error) {
 	apiToolCalls := make([]api.ToolCall, len(toolCalls))
diff --git a/openai/responses.go b/openai/responses.go
new file mode 100644
index 000000000..8f6b1d94c
--- /dev/null
+++ b/openai/responses.go
@@ -0,0 +1,1004 @@
+package openai
+
+import (
+	"encoding/json"
+	"fmt"
+	"math/rand"
+
+	"github.com/ollama/ollama/api"
+)
+
+// ResponsesContent is a discriminated union for input content types.
+// Concrete types: ResponsesTextContent, ResponsesImageContent
+type ResponsesContent interface {
+	responsesContent() // unexported marker method
+}
+
+type ResponsesTextContent struct {
+	Type string `json:"type"` // always "input_text"
+	Text string `json:"text"`
+}
+
+func (ResponsesTextContent) responsesContent() {}
+
+type ResponsesImageContent struct {
+	Type string `json:"type"` // always "input_image"
+	// TODO(drifkin): is this really required? that seems verbose and a default is specified in the docs
+	Detail   string `json:"detail"`              // required
+	FileID   string `json:"file_id,omitempty"`   // optional
+	ImageURL string `json:"image_url,omitempty"` // optional
+}
+
+func (ResponsesImageContent) responsesContent() {}
+
+// ResponsesOutputTextContent represents output text from a previous assistant response
+// that is being passed back as part of the conversation history.
+type ResponsesOutputTextContent struct {
+	Type string `json:"type"` // always "output_text"
+	Text string `json:"text"`
+}
+
+func (ResponsesOutputTextContent) responsesContent() {}
+
+type ResponsesInputMessage struct {
+	Type    string             `json:"type"` // always "message"
+	Role    string             `json:"role"` // one of `user`, `system`, `developer`
+	Content []ResponsesContent `json:"content,omitempty"`
+}
+
+func (m *ResponsesInputMessage) UnmarshalJSON(data []byte) error {
+	var aux struct {
+		Type    string          `json:"type"`
+		Role    string          `json:"role"`
+		Content json.RawMessage `json:"content"`
+	}
+
+	if err := json.Unmarshal(data, &aux); err != nil {
+		return err
+	}
+
+	m.Type = aux.Type
+	m.Role = aux.Role
+
+	if len(aux.Content) == 0 {
+		return nil
+	}
+
+	// Try to parse content as a string first (shorthand format)
+	var contentStr string
+	if err := json.Unmarshal(aux.Content, &contentStr); err == nil {
+		m.Content = []ResponsesContent{
+			ResponsesTextContent{Type: "input_text", Text: contentStr},
+		}
+		return nil
+	}
+
+	// Otherwise, parse as an array of content items
+	var rawItems []json.RawMessage
+	if err := json.Unmarshal(aux.Content, &rawItems); err != nil {
+		return fmt.Errorf("content must be a string or array: %w", err)
+	}
+
+	m.Content = make([]ResponsesContent, 0, len(rawItems))
+	for i, raw := range rawItems {
+		// Peek at the type field to determine which concrete type to use
+		var typeField struct {
+			Type string `json:"type"`
+		}
+		if err := json.Unmarshal(raw, &typeField); err != nil {
+			return fmt.Errorf("content[%d]: %w", i, err)
+		}
+
+		switch typeField.Type {
+		case "input_text":
+			var content ResponsesTextContent
+			if err := json.Unmarshal(raw, &content); err != nil {
+				return fmt.Errorf("content[%d]: %w", i, err)
+			}
+			m.Content = append(m.Content, content)
+		case "input_image":
+			var content ResponsesImageContent
+			if err := json.Unmarshal(raw, &content); err != nil {
+				return fmt.Errorf("content[%d]: %w", i, err)
+			}
+			m.Content = append(m.Content, content)
+		case "output_text":
+			var content ResponsesOutputTextContent
+			if err := json.Unmarshal(raw, &content); err != nil {
+				return fmt.Errorf("content[%d]: %w", i, err)
+			}
+			m.Content = append(m.Content, content)
+		default:
+			return fmt.Errorf("content[%d]: unknown content type: %s", i, typeField.Type)
+		}
+	}
+
+	return nil
+}
+
+type ResponsesOutputMessage struct{}
+
+// ResponsesInputItem is a discriminated union for input items.
+// Concrete types: ResponsesInputMessage (more to come)
+type ResponsesInputItem interface {
+	responsesInputItem() // unexported marker method
+}
+
+func (ResponsesInputMessage) responsesInputItem() {}
+
+// ResponsesFunctionCall represents an assistant's function call in conversation history.
+type ResponsesFunctionCall struct {
+	ID        string `json:"id,omitempty"` // item ID
+	Type      string `json:"type"`         // always "function_call"
+	CallID    string `json:"call_id"`      // the tool call ID
+	Name      string `json:"name"`         // function name
+	Arguments string `json:"arguments"`    // JSON arguments string
+}
+
+func (ResponsesFunctionCall) responsesInputItem() {}
+
+// ResponsesFunctionCallOutput represents a function call result from the client.
+type ResponsesFunctionCallOutput struct {
+	Type   string `json:"type"`    // always "function_call_output"
+	CallID string `json:"call_id"` // links to the original function call
+	Output string `json:"output"`  // the function result
+}
+
+func (ResponsesFunctionCallOutput) responsesInputItem() {}
+
+// ResponsesReasoningInput represents a reasoning item passed back as input.
+// This is used when the client sends previous reasoning back for context.
+type ResponsesReasoningInput struct {
+	ID               string                      `json:"id,omitempty"`
+	Type             string                      `json:"type"` // always "reasoning"
+	Summary          []ResponsesReasoningSummary `json:"summary,omitempty"`
+	EncryptedContent string                      `json:"encrypted_content,omitempty"`
+}
+
+func (ResponsesReasoningInput) responsesInputItem() {}
+
+// unmarshalResponsesInputItem unmarshals a single input item from JSON.
+func unmarshalResponsesInputItem(data []byte) (ResponsesInputItem, error) {
+	var typeField struct {
+		Type string `json:"type"`
+		Role string `json:"role"`
+	}
+	if err := json.Unmarshal(data, &typeField); err != nil {
+		return nil, err
+	}
+
+	// Handle shorthand message format: {"role": "...", "content": "..."}
+	// When type is empty but role is present, treat as a message
+	itemType := typeField.Type
+	if itemType == "" && typeField.Role != "" {
+		itemType = "message"
+	}
+
+	switch itemType {
+	case "message":
+		var msg ResponsesInputMessage
+		if err := json.Unmarshal(data, &msg); err != nil {
+			return nil, err
+		}
+		return msg, nil
+	case "function_call":
+		var fc ResponsesFunctionCall
+		if err := json.Unmarshal(data, &fc); err != nil {
+			return nil, err
+		}
+		return fc, nil
+	case "function_call_output":
+		var output ResponsesFunctionCallOutput
+		if err := json.Unmarshal(data, &output); err != nil {
+			return nil, err
+		}
+		return output, nil
+	case "reasoning":
+		var reasoning ResponsesReasoningInput
+		if err := json.Unmarshal(data, &reasoning); err != nil {
+			return nil, err
+		}
+		return reasoning, nil
+	default:
+		return nil, fmt.Errorf("unknown input item type: %s", typeField.Type)
+	}
+}
+
+// ResponsesInput can be either:
+// - a string (equivalent to a text input with the user role)
+// - an array of input items (see ResponsesInputItem)
+type ResponsesInput struct {
+	Text  string               // set if input was a plain string
+	Items []ResponsesInputItem // set if input was an array
+}
+
+func (r *ResponsesInput) UnmarshalJSON(data []byte) error {
+	// Try string first
+	var s string
+	if err := json.Unmarshal(data, &s); err == nil {
+		r.Text = s
+		return nil
+	}
+
+	// Otherwise, try array of input items
+	var rawItems []json.RawMessage
+	if err := json.Unmarshal(data, &rawItems); err != nil {
+		return fmt.Errorf("input must be a string or array: %w", err)
+	}
+
+	r.Items = make([]ResponsesInputItem, 0, len(rawItems))
+	for i, raw := range rawItems {
+		item, err := unmarshalResponsesInputItem(raw)
+		if err != nil {
+			return fmt.Errorf("input[%d]: %w", i, err)
+		}
+		r.Items = append(r.Items, item)
+	}
+
+	return nil
+}
+
+type ResponsesReasoning struct {
+	// originally: optional, default is per-model
+	Effort string `json:"effort,omitempty"`
+
+	// originally: deprecated, use `summary` instead. One of `auto`, `concise`, `detailed`
+	GenerateSummary string `json:"generate_summary,omitempty"`
+
+	// originally: optional, one of `auto`, `concise`, `detailed`
+	Summary string `json:"summary,omitempty"`
+}
+
+type ResponsesTextFormat struct {
+	Type   string          `json:"type"`             // "text", "json_schema"
+	Name   string          `json:"name,omitempty"`   // for json_schema
+	Schema json.RawMessage `json:"schema,omitempty"` // for json_schema
+	Strict *bool           `json:"strict,omitempty"` // for json_schema
+}
+
+type ResponsesText struct {
+	Format *ResponsesTextFormat `json:"format,omitempty"`
+}
+
+// ResponsesTool represents a tool in the Responses API format.
+// Note: This differs from api.Tool which nests fields under "function".
+type ResponsesTool struct {
+	Type        string         `json:"type"` // "function"
+	Name        string         `json:"name"`
+	Description string         `json:"description,omitempty"`
+	Strict      bool           `json:"strict,omitempty"`
+	Parameters  map[string]any `json:"parameters,omitempty"`
+}
+
+type ResponsesRequest struct {
+	Model string `json:"model"`
+
+	// originally: optional, default is false
+	// for us: not supported
+	Background bool `json:"background"`
+
+	// originally: optional `string | {id: string}`
+	// for us: not supported
+	Conversation json.RawMessage `json:"conversation"`
+
+	// originally: string[]
+	// for us: ignored
+	Include []string `json:"include"`
+
+	Input ResponsesInput `json:"input"`
+
+	// optional, inserts a system message at the start of the conversation
+	Instructions string `json:"instructions,omitempty"`
+
+	// optional, maps to num_predict
+	MaxOutputTokens *int `json:"max_output_tokens,omitempty"`
+
+	Reasoning ResponsesReasoning `json:"reasoning"`
+
+	// optional, default is 1.0
+	Temperature *float64 `json:"temperature"`
+
+	// optional, controls output format (e.g. json_schema)
+	Text *ResponsesText `json:"text,omitempty"`
+
+	// optional, default is 1.0
+	TopP *float64 `json:"top_p"`
+
+	// optional, default is `"disabled"`
+	Truncation *string `json:"truncation"`
+
+	Tools []ResponsesTool `json:"tools,omitempty"`
+
+	// TODO(drifkin): tool_choice is not supported. We could support "none" by not
+	// passing tools, but the other controls like `"required"` cannot be generally
+	// supported.
+
+	// optional, default is false
+	Stream *bool `json:"stream,omitempty"`
+}
+
+// FromResponsesRequest converts a ResponsesRequest to api.ChatRequest
+func FromResponsesRequest(r ResponsesRequest) (*api.ChatRequest, error) {
+	var messages []api.Message
+
+	// Add instructions as system message if present
+	if r.Instructions != "" {
+		messages = append(messages, api.Message{
+			Role:    "system",
+			Content: r.Instructions,
+		})
+	}
+
+	// Handle simple string input
+	if r.Input.Text != "" {
+		messages = append(messages, api.Message{
+			Role:    "user",
+			Content: r.Input.Text,
+		})
+	}
+
+	// Handle array of input items
+	// Track pending reasoning to merge with the next assistant message
+	var pendingThinking string
+
+	for _, item := range r.Input.Items {
+		switch v := item.(type) {
+		case ResponsesReasoningInput:
+			// Store thinking to merge with the next assistant message
+			pendingThinking = v.EncryptedContent
+		case ResponsesInputMessage:
+			msg, err := convertInputMessage(v)
+			if err != nil {
+				return nil, err
+			}
+			// If this is an assistant message, attach pending thinking
+			if msg.Role == "assistant" && pendingThinking != "" {
+				msg.Thinking = pendingThinking
+				pendingThinking = ""
+			}
+			messages = append(messages, msg)
+		case ResponsesFunctionCall:
+			// Convert function call to assistant message with tool calls
+			var args api.ToolCallFunctionArguments
+			if v.Arguments != "" {
+				if err := json.Unmarshal([]byte(v.Arguments), &args); err != nil {
+					return nil, fmt.Errorf("failed to parse function call arguments: %w", err)
+				}
+			}
+			msg := api.Message{
+				Role: "assistant",
+				ToolCalls: []api.ToolCall{{
+					ID: v.CallID,
+					Function: api.ToolCallFunction{
+						Name:      v.Name,
+						Arguments: args,
+					},
+				}},
+			}
+			// Attach pending thinking
+			if pendingThinking != "" {
+				msg.Thinking = pendingThinking
+				pendingThinking = ""
+			}
+			messages = append(messages, msg)
+		case ResponsesFunctionCallOutput:
+			messages = append(messages, api.Message{
+				Role:       "tool",
+				Content:    v.Output,
+				ToolCallID: v.CallID,
+			})
+		}
+	}
+
+	// If there's trailing reasoning without a following message, emit it
+	if pendingThinking != "" {
+		messages = append(messages, api.Message{
+			Role:     "assistant",
+			Thinking: pendingThinking,
+		})
+	}
+
+	options := make(map[string]any)
+
+	if r.Temperature != nil {
+		options["temperature"] = *r.Temperature
+	} else {
+		options["temperature"] = 1.0
+	}
+
+	if r.TopP != nil {
+		options["top_p"] = *r.TopP
+	} else { //nolint:staticcheck // SA9003: empty branch
+		// TODO(drifkin): OpenAI defaults to 1.0 here, but we don't follow that here
+		// in case the model has a different default. It would be best if we
+		// understood whether there was a model-specific default and if not, we
+		// should also default to 1.0, but that will require some additional
+		// plumbing
+	}
+
+	if r.MaxOutputTokens != nil {
+		options["num_predict"] = *r.MaxOutputTokens
+	}
+
+	// Convert tools from Responses API format to api.Tool format
+	var tools []api.Tool
+	for _, t := range r.Tools {
+		tool, err := convertTool(t)
+		if err != nil {
+			return nil, err
+		}
+		tools = append(tools, tool)
+	}
+
+	// Handle text format (e.g. json_schema)
+	var format json.RawMessage
+	if r.Text != nil && r.Text.Format != nil {
+		switch r.Text.Format.Type {
+		case "json_schema":
+			if r.Text.Format.Schema != nil {
+				format = r.Text.Format.Schema
+			}
+		}
+	}
+
+	return &api.ChatRequest{
+		Model:    r.Model,
+		Messages: messages,
+		Options:  options,
+		Tools:    tools,
+		Format:   format,
+	}, nil
+}
+
+func convertTool(t ResponsesTool) (api.Tool, error) {
+	// Convert parameters from map[string]any to api.ToolFunctionParameters
+	var params api.ToolFunctionParameters
+	if t.Parameters != nil {
+		// Marshal and unmarshal to convert
+		b, err := json.Marshal(t.Parameters)
+		if err != nil {
+			return api.Tool{}, fmt.Errorf("failed to marshal tool parameters: %w", err)
+		}
+		if err := json.Unmarshal(b, &params); err != nil {
+			return api.Tool{}, fmt.Errorf("failed to unmarshal tool parameters: %w", err)
+		}
+	}
+
+	return api.Tool{
+		Type: t.Type,
+		Function: api.ToolFunction{
+			Name:        t.Name,
+			Description: t.Description,
+			Parameters:  params,
+		},
+	}, nil
+}
+
+func convertInputMessage(m ResponsesInputMessage) (api.Message, error) {
+	var content string
+	var images []api.ImageData
+
+	for _, c := range m.Content {
+		switch v := c.(type) {
+		case ResponsesTextContent:
+			content += v.Text
+		case ResponsesOutputTextContent:
+			content += v.Text
+		case ResponsesImageContent:
+			if v.ImageURL == "" {
+				continue // Skip if no URL (FileID not supported)
+			}
+			img, err := decodeImageURL(v.ImageURL)
+			if err != nil {
+				return api.Message{}, err
+			}
+			images = append(images, img)
+		}
+	}
+
+	return api.Message{
+		Role:    m.Role,
+		Content: content,
+		Images:  images,
+	}, nil
+}
+
+// Response types for the Responses API
+
+type ResponsesResponse struct {
+	ID        string                `json:"id"`
+	Object    string                `json:"object"`
+	CreatedAt int64                 `json:"created_at"`
+	Status    string                `json:"status"`
+	Model     string                `json:"model"`
+	Output    []ResponsesOutputItem `json:"output"`
+	Usage     *ResponsesUsage       `json:"usage,omitempty"`
+	// TODO(drifkin): add `temperature` and `top_p` to the response, but this
+	// requires additional plumbing to find the effective values since the
+	// defaults can come from the model or the request
+}
+
+type ResponsesOutputItem struct {
+	ID        string                   `json:"id"`
+	Type      string                   `json:"type"` // "message", "function_call", or "reasoning"
+	Status    string                   `json:"status,omitempty"`
+	Role      string                   `json:"role,omitempty"`      // for message
+	Content   []ResponsesOutputContent `json:"content,omitempty"`   // for message
+	CallID    string                   `json:"call_id,omitempty"`   // for function_call
+	Name      string                   `json:"name,omitempty"`      // for function_call
+	Arguments string                   `json:"arguments,omitempty"` // for function_call
+
+	// Reasoning fields
+	Summary          []ResponsesReasoningSummary `json:"summary,omitempty"`           // for reasoning
+	EncryptedContent string                      `json:"encrypted_content,omitempty"` // for reasoning
+}
+
+type ResponsesReasoningSummary struct {
+	Type string `json:"type"` // "summary_text"
+	Text string `json:"text"`
+}
+
+type ResponsesOutputContent struct {
+	Type string `json:"type"` // "output_text"
+	Text string `json:"text"`
+}
+
+type ResponsesUsage struct {
+	InputTokens  int `json:"input_tokens"`
+	OutputTokens int `json:"output_tokens"`
+	TotalTokens  int `json:"total_tokens"`
+}
+
+// ToResponse converts an api.ChatResponse to a Responses API response
+func ToResponse(model, responseID, itemID string, chatResponse api.ChatResponse) ResponsesResponse {
+	var output []ResponsesOutputItem
+
+	// Add reasoning item if thinking is present
+	if chatResponse.Message.Thinking != "" {
+		output = append(output, ResponsesOutputItem{
+			ID:   fmt.Sprintf("rs_%s", responseID),
+			Type: "reasoning",
+			Summary: []ResponsesReasoningSummary{
+				{
+					Type: "summary_text",
+					Text: chatResponse.Message.Thinking,
+				},
+			},
+			EncryptedContent: chatResponse.Message.Thinking, // Plain text for now
+		})
+	}
+
+	if len(chatResponse.Message.ToolCalls) > 0 {
+		toolCalls := ToToolCalls(chatResponse.Message.ToolCalls)
+		for i, tc := range toolCalls {
+			output = append(output, ResponsesOutputItem{
+				ID:        fmt.Sprintf("fc_%s_%d", responseID, i),
+				Type:      "function_call",
+				CallID:    tc.ID,
+				Name:      tc.Function.Name,
+				Arguments: tc.Function.Arguments,
+			})
+		}
+	} else {
+		output = append(output, ResponsesOutputItem{
+			ID:     itemID,
+			Type:   "message",
+			Status: "completed",
+			Role:   "assistant",
+			Content: []ResponsesOutputContent{
+				{
+					Type: "output_text",
+					Text: chatResponse.Message.Content,
+				},
+			},
+		})
+	}
+
+	return ResponsesResponse{
+		ID:        responseID,
+		Object:    "response",
+		CreatedAt: chatResponse.CreatedAt.Unix(),
+		Status:    "completed",
+		Model:     model,
+		Output:    output,
+		Usage: &ResponsesUsage{
+			InputTokens:  chatResponse.PromptEvalCount,
+			OutputTokens: chatResponse.EvalCount,
+			TotalTokens:  chatResponse.PromptEvalCount + chatResponse.EvalCount,
+		},
+	}
+}
+
+// Streaming events: <https://platform.openai.com/docs/api-reference/responses-streaming>
+
+// ResponsesStreamEvent represents a single Server-Sent Event for the Responses API.
+type ResponsesStreamEvent struct {
+	Event string // The event type (e.g., "response.created")
+	Data  any    // The event payload (will be JSON-marshaled)
+}
+
+// ResponsesStreamConverter converts api.ChatResponse objects to Responses API
+// streaming events. It maintains state across multiple calls to handle the
+// streaming event sequence correctly.
+type ResponsesStreamConverter struct {
+	// Configuration (immutable after creation)
+	responseID string
+	itemID     string
+	model      string
+
+	// State tracking (mutated across Process calls)
+	firstWrite      bool
+	outputIndex     int
+	contentIndex    int
+	contentStarted  bool
+	toolCallsSent   bool
+	accumulatedText string
+	sequenceNumber  int
+
+	// Reasoning/thinking state
+	accumulatedThinking string
+	reasoningItemID     string
+	reasoningStarted    bool
+	reasoningDone       bool
+
+	// Tool calls state (for final output)
+	toolCallItems []map[string]any
+}
+
+// newEvent creates a ResponsesStreamEvent with the sequence number included in the data.
+func (c *ResponsesStreamConverter) newEvent(eventType string, data map[string]any) ResponsesStreamEvent {
+	data["type"] = eventType
+	data["sequence_number"] = c.sequenceNumber
+	c.sequenceNumber++
+	return ResponsesStreamEvent{
+		Event: eventType,
+		Data:  data,
+	}
+}
+
+// NewResponsesStreamConverter creates a new converter with the given configuration.
+func NewResponsesStreamConverter(responseID, itemID, model string) *ResponsesStreamConverter {
+	return &ResponsesStreamConverter{
+		responseID: responseID,
+		itemID:     itemID,
+		model:      model,
+		firstWrite: true,
+	}
+}
+
+// Process takes a ChatResponse and returns the events that should be emitted.
+// Events are returned in order. The caller is responsible for serializing
+// and sending these events.
+func (c *ResponsesStreamConverter) Process(r api.ChatResponse) []ResponsesStreamEvent {
+	var events []ResponsesStreamEvent
+
+	hasToolCalls := len(r.Message.ToolCalls) > 0
+	hasThinking := r.Message.Thinking != ""
+
+	// First chunk - emit initial events
+	if c.firstWrite {
+		c.firstWrite = false
+		events = append(events, c.createResponseCreatedEvent())
+		events = append(events, c.createResponseInProgressEvent())
+	}
+
+	// Handle reasoning/thinking (before other content)
+	if hasThinking {
+		events = append(events, c.processThinking(r.Message.Thinking)...)
+	}
+
+	// Handle tool calls
+	if hasToolCalls {
+		events = append(events, c.processToolCalls(r.Message.ToolCalls)...)
+		c.toolCallsSent = true
+	}
+
+	// Handle text content (only if no tool calls)
+	if !hasToolCalls && !c.toolCallsSent && r.Message.Content != "" {
+		events = append(events, c.processTextContent(r.Message.Content)...)
+	}
+
+	// Done - emit closing events
+	if r.Done {
+		events = append(events, c.processCompletion(r)...)
+	}
+
+	return events
+}
+
+func (c *ResponsesStreamConverter) createResponseCreatedEvent() ResponsesStreamEvent {
+	return c.newEvent("response.created", map[string]any{
+		"response": map[string]any{
+			"id":     c.responseID,
+			"object": "response",
+			"status": "in_progress",
+			"output": []any{},
+		},
+	})
+}
+
+func (c *ResponsesStreamConverter) createResponseInProgressEvent() ResponsesStreamEvent {
+	return c.newEvent("response.in_progress", map[string]any{
+		"response": map[string]any{
+			"id":     c.responseID,
+			"object": "response",
+			"status": "in_progress",
+			"output": []any{},
+		},
+	})
+}
+
+func (c *ResponsesStreamConverter) processThinking(thinking string) []ResponsesStreamEvent {
+	var events []ResponsesStreamEvent
+
+	// Start reasoning item if not started
+	if !c.reasoningStarted {
+		c.reasoningStarted = true
+		c.reasoningItemID = fmt.Sprintf("rs_%d", rand.Intn(999999))
+
+		events = append(events, c.newEvent("response.output_item.added", map[string]any{
+			"output_index": c.outputIndex,
+			"item": map[string]any{
+				"id":      c.reasoningItemID,
+				"type":    "reasoning",
+				"summary": []any{},
+			},
+		}))
+	}
+
+	// Accumulate thinking
+	c.accumulatedThinking += thinking
+
+	// Emit delta
+	events = append(events, c.newEvent("response.reasoning_summary_text.delta", map[string]any{
+		"item_id":      c.reasoningItemID,
+		"output_index": c.outputIndex,
+		"delta":        thinking,
+	}))
+
+	// TODO(drifkin): consider adding
+	// [`response.reasoning_text.delta`](https://platform.openai.com/docs/api-reference/responses-streaming/response/reasoning_text/delta),
+	// but need to do additional research to understand how it's used and how
+	// widely supported it is
+
+	return events
+}
+
+func (c *ResponsesStreamConverter) finishReasoning() []ResponsesStreamEvent {
+	if !c.reasoningStarted || c.reasoningDone {
+		return nil
+	}
+	c.reasoningDone = true
+
+	events := []ResponsesStreamEvent{
+		c.newEvent("response.reasoning_summary_text.done", map[string]any{
+			"item_id":      c.reasoningItemID,
+			"output_index": c.outputIndex,
+			"text":         c.accumulatedThinking,
+		}),
+		c.newEvent("response.output_item.done", map[string]any{
+			"output_index": c.outputIndex,
+			"item": map[string]any{
+				"id":                c.reasoningItemID,
+				"type":              "reasoning",
+				"summary":           []map[string]any{{"type": "summary_text", "text": c.accumulatedThinking}},
+				"encrypted_content": c.accumulatedThinking, // Plain text for now
+			},
+		}),
+	}
+
+	c.outputIndex++
+	return events
+}
+
+func (c *ResponsesStreamConverter) processToolCalls(toolCalls []api.ToolCall) []ResponsesStreamEvent {
+	var events []ResponsesStreamEvent
+
+	// Finish reasoning first if it was started
+	events = append(events, c.finishReasoning()...)
+
+	converted := ToToolCalls(toolCalls)
+
+	for i, tc := range converted {
+		fcItemID := fmt.Sprintf("fc_%d_%d", rand.Intn(999999), i)
+
+		// Store for final output (with status: completed)
+		toolCallItem := map[string]any{
+			"id":        fcItemID,
+			"type":      "function_call",
+			"status":    "completed",
+			"call_id":   tc.ID,
+			"name":      tc.Function.Name,
+			"arguments": tc.Function.Arguments,
+		}
+		c.toolCallItems = append(c.toolCallItems, toolCallItem)
+
+		// response.output_item.added for function call
+		events = append(events, c.newEvent("response.output_item.added", map[string]any{
+			"output_index": c.outputIndex + i,
+			"item": map[string]any{
+				"id":        fcItemID,
+				"type":      "function_call",
+				"status":    "in_progress",
+				"call_id":   tc.ID,
+				"name":      tc.Function.Name,
+				"arguments": "",
+			},
+		}))
+
+		// response.function_call_arguments.delta
+		if tc.Function.Arguments != "" {
+			events = append(events, c.newEvent("response.function_call_arguments.delta", map[string]any{
+				"item_id":      fcItemID,
+				"output_index": c.outputIndex + i,
+				"delta":        tc.Function.Arguments,
+			}))
+		}
+
+		// response.function_call_arguments.done
+		events = append(events, c.newEvent("response.function_call_arguments.done", map[string]any{
+			"item_id":      fcItemID,
+			"output_index": c.outputIndex + i,
+			"arguments":    tc.Function.Arguments,
+		}))
+
+		// response.output_item.done for function call
+		events = append(events, c.newEvent("response.output_item.done", map[string]any{
+			"output_index": c.outputIndex + i,
+			"item": map[string]any{
+				"id":        fcItemID,
+				"type":      "function_call",
+				"status":    "completed",
+				"call_id":   tc.ID,
+				"name":      tc.Function.Name,
+				"arguments": tc.Function.Arguments,
+			},
+		}))
+	}
+
+	return events
+}
+
+func (c *ResponsesStreamConverter) processTextContent(content string) []ResponsesStreamEvent {
+	var events []ResponsesStreamEvent
+
+	// Finish reasoning first if it was started
+	events = append(events, c.finishReasoning()...)
+
+	// Emit output item and content part for first text content
+	if !c.contentStarted {
+		c.contentStarted = true
+
+		// response.output_item.added
+		events = append(events, c.newEvent("response.output_item.added", map[string]any{
+			"output_index": c.outputIndex,
+			"item": map[string]any{
+				"id":      c.itemID,
+				"type":    "message",
+				"status":  "in_progress",
+				"role":    "assistant",
+				"content": []any{},
+			},
+		}))
+
+		// response.content_part.added
+		events = append(events, c.newEvent("response.content_part.added", map[string]any{
+			"item_id":       c.itemID,
+			"output_index":  c.outputIndex,
+			"content_index": c.contentIndex,
+			"part": map[string]any{
+				"type": "output_text",
+				"text": "",
+			},
+		}))
+	}
+
+	// Accumulate text
+	c.accumulatedText += content
+
+	// Emit content delta
+	events = append(events, c.newEvent("response.output_text.delta", map[string]any{
+		"item_id":       c.itemID,
+		"output_index":  c.outputIndex,
+		"content_index": 0,
+		"delta":         content,
+	}))
+
+	return events
+}
+
+func (c *ResponsesStreamConverter) buildFinalOutput() []any {
+	var output []any
+
+	// Add reasoning item if present
+	if c.reasoningStarted {
+		output = append(output, map[string]any{
+			"id":                c.reasoningItemID,
+			"type":              "reasoning",
+			"summary":           []map[string]any{{"type": "summary_text", "text": c.accumulatedThinking}},
+			"encrypted_content": c.accumulatedThinking,
+		})
+	}
+
+	// Add tool calls if present
+	if len(c.toolCallItems) > 0 {
+		for _, item := range c.toolCallItems {
+			output = append(output, item)
+		}
+	} else if c.contentStarted {
+		// Add message item if we had text content
+		output = append(output, map[string]any{
+			"id":     c.itemID,
+			"type":   "message",
+			"status": "completed",
+			"role":   "assistant",
+			"content": []map[string]any{{
+				"type": "output_text",
+				"text": c.accumulatedText,
+			}},
+		})
+	}
+
+	return output
+}
+
+func (c *ResponsesStreamConverter) processCompletion(r api.ChatResponse) []ResponsesStreamEvent {
+	var events []ResponsesStreamEvent
+
+	// Finish reasoning if not done
+	events = append(events, c.finishReasoning()...)
+
+	// Emit text completion events if we had text content
+	if !c.toolCallsSent && c.contentStarted {
+		// response.output_text.done
+		events = append(events, c.newEvent("response.output_text.done", map[string]any{
+			"item_id":       c.itemID,
+			"output_index":  c.outputIndex,
+			"content_index": 0,
+			"text":          c.accumulatedText,
+		}))
+
+		// response.content_part.done
+		events = append(events, c.newEvent("response.content_part.done", map[string]any{
+			"item_id":       c.itemID,
+			"output_index":  c.outputIndex,
+			"content_index": 0,
+			"part": map[string]any{
+				"type": "output_text",
+				"text": c.accumulatedText,
+			},
+		}))
+
+		// response.output_item.done
+		events = append(events, c.newEvent("response.output_item.done", map[string]any{
+			"output_index": c.outputIndex,
+			"item": map[string]any{
+				"id":     c.itemID,
+				"type":   "message",
+				"status": "completed",
+				"role":   "assistant",
+				"content": []map[string]any{{
+					"type": "output_text",
+					"text": c.accumulatedText,
+				}},
+			},
+		}))
+	}
+
+	// response.completed
+	events = append(events, c.newEvent("response.completed", map[string]any{
+		"response": map[string]any{
+			"id":     c.responseID,
+			"object": "response",
+			"status": "completed",
+			"output": c.buildFinalOutput(),
+			"usage": map[string]any{
+				"input_tokens":  r.PromptEvalCount,
+				"output_tokens": r.EvalCount,
+				"total_tokens":  r.PromptEvalCount + r.EvalCount,
+			},
+		},
+	}))
+
+	return events
+}
diff --git a/openai/responses_test.go b/openai/responses_test.go
new file mode 100644
index 000000000..50fbfdc57
--- /dev/null
+++ b/openai/responses_test.go
@@ -0,0 +1,1543 @@
+package openai
+
+import (
+	"encoding/json"
+	"testing"
+	"time"
+
+	"github.com/ollama/ollama/api"
+)
+
+func TestResponsesInputMessage_UnmarshalJSON(t *testing.T) {
+	tests := []struct {
+		name    string
+		json    string
+		want    ResponsesInputMessage
+		wantErr bool
+	}{
+		{
+			name: "text content",
+			json: `{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}`,
+			want: ResponsesInputMessage{
+				Type:    "message",
+				Role:    "user",
+				Content: []ResponsesContent{ResponsesTextContent{Type: "input_text", Text: "hello"}},
+			},
+		},
+		{
+			name: "image content",
+			json: `{"type": "message", "role": "user", "content": [{"type": "input_image", "detail": "auto", "image_url": "https://example.com/img.png"}]}`,
+			want: ResponsesInputMessage{
+				Type: "message",
+				Role: "user",
+				Content: []ResponsesContent{ResponsesImageContent{
+					Type:     "input_image",
+					Detail:   "auto",
+					ImageURL: "https://example.com/img.png",
+				}},
+			},
+		},
+		{
+			name: "multiple content items",
+			json: `{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}, {"type": "input_text", "text": "world"}]}`,
+			want: ResponsesInputMessage{
+				Type: "message",
+				Role: "user",
+				Content: []ResponsesContent{
+					ResponsesTextContent{Type: "input_text", Text: "hello"},
+					ResponsesTextContent{Type: "input_text", Text: "world"},
+				},
+			},
+		},
+		{
+			name:    "unknown content type",
+			json:    `{"type": "message", "role": "user", "content": [{"type": "unknown"}]}`,
+			wantErr: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var got ResponsesInputMessage
+			err := json.Unmarshal([]byte(tt.json), &got)
+
+			if tt.wantErr {
+				if err == nil {
+					t.Error("expected error, got nil")
+				}
+				return
+			}
+
+			if err != nil {
+				t.Fatalf("unexpected error: %v", err)
+			}
+
+			if got.Type != tt.want.Type {
+				t.Errorf("Type = %q, want %q", got.Type, tt.want.Type)
+			}
+
+			if got.Role != tt.want.Role {
+				t.Errorf("Role = %q, want %q", got.Role, tt.want.Role)
+			}
+
+			if len(got.Content) != len(tt.want.Content) {
+				t.Fatalf("len(Content) = %d, want %d", len(got.Content), len(tt.want.Content))
+			}
+
+			for i := range tt.want.Content {
+				switch wantContent := tt.want.Content[i].(type) {
+				case ResponsesTextContent:
+					gotContent, ok := got.Content[i].(ResponsesTextContent)
+					if !ok {
+						t.Fatalf("Content[%d] type = %T, want ResponsesTextContent", i, got.Content[i])
+					}
+					if gotContent != wantContent {
+						t.Errorf("Content[%d] = %+v, want %+v", i, gotContent, wantContent)
+					}
+				case ResponsesImageContent:
+					gotContent, ok := got.Content[i].(ResponsesImageContent)
+					if !ok {
+						t.Fatalf("Content[%d] type = %T, want ResponsesImageContent", i, got.Content[i])
+					}
+					if gotContent != wantContent {
+						t.Errorf("Content[%d] = %+v, want %+v", i, gotContent, wantContent)
+					}
+				}
+			}
+		})
+	}
+}
+
+func TestResponsesInput_UnmarshalJSON(t *testing.T) {
+	tests := []struct {
+		name      string
+		json      string
+		wantText  string
+		wantItems int
+		wantErr   bool
+	}{
+		{
+			name:     "plain string",
+			json:     `"hello world"`,
+			wantText: "hello world",
+		},
+		{
+			name:      "array with one message",
+			json:      `[{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}]`,
+			wantItems: 1,
+		},
+		{
+			name:      "array with multiple messages",
+			json:      `[{"type": "message", "role": "system", "content": [{"type": "input_text", "text": "you are helpful"}]}, {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}]`,
+			wantItems: 2,
+		},
+		{
+			name:    "invalid input",
+			json:    `123`,
+			wantErr: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var got ResponsesInput
+			err := json.Unmarshal([]byte(tt.json), &got)
+
+			if tt.wantErr {
+				if err == nil {
+					t.Error("expected error, got nil")
+				}
+				return
+			}
+
+			if err != nil {
+				t.Fatalf("unexpected error: %v", err)
+			}
+
+			if got.Text != tt.wantText {
+				t.Errorf("Text = %q, want %q", got.Text, tt.wantText)
+			}
+
+			if len(got.Items) != tt.wantItems {
+				t.Errorf("len(Items) = %d, want %d", len(got.Items), tt.wantItems)
+			}
+		})
+	}
+}
+
+func TestUnmarshalResponsesInputItem(t *testing.T) {
+	t.Run("message item", func(t *testing.T) {
+		got, err := unmarshalResponsesInputItem([]byte(`{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}`))
+		if err != nil {
+			t.Fatalf("unexpected error: %v", err)
+		}
+
+		msg, ok := got.(ResponsesInputMessage)
+		if !ok {
+			t.Fatalf("got type %T, want ResponsesInputMessage", got)
+		}
+
+		if msg.Role != "user" {
+			t.Errorf("Role = %q, want %q", msg.Role, "user")
+		}
+	})
+
+	t.Run("function_call item", func(t *testing.T) {
+		got, err := unmarshalResponsesInputItem([]byte(`{"type": "function_call", "call_id": "call_abc123", "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}`))
+		if err != nil {
+			t.Fatalf("unexpected error: %v", err)
+		}
+
+		fc, ok := got.(ResponsesFunctionCall)
+		if !ok {
+			t.Fatalf("got type %T, want ResponsesFunctionCall", got)
+		}
+
+		if fc.Type != "function_call" {
+			t.Errorf("Type = %q, want %q", fc.Type, "function_call")
+		}
+		if fc.CallID != "call_abc123" {
+			t.Errorf("CallID = %q, want %q", fc.CallID, "call_abc123")
+		}
+		if fc.Name != "get_weather" {
+			t.Errorf("Name = %q, want %q", fc.Name, "get_weather")
+		}
+	})
+
+	t.Run("function_call_output item", func(t *testing.T) {
+		got, err := unmarshalResponsesInputItem([]byte(`{"type": "function_call_output", "call_id": "call_abc123", "output": "the result"}`))
+		if err != nil {
+			t.Fatalf("unexpected error: %v", err)
+		}
+
+		output, ok := got.(ResponsesFunctionCallOutput)
+		if !ok {
+			t.Fatalf("got type %T, want ResponsesFunctionCallOutput", got)
+		}
+
+		if output.Type != "function_call_output" {
+			t.Errorf("Type = %q, want %q", output.Type, "function_call_output")
+		}
+		if output.CallID != "call_abc123" {
+			t.Errorf("CallID = %q, want %q", output.CallID, "call_abc123")
+		}
+		if output.Output != "the result" {
+			t.Errorf("Output = %q, want %q", output.Output, "the result")
+		}
+	})
+
+	t.Run("unknown item type", func(t *testing.T) {
+		_, err := unmarshalResponsesInputItem([]byte(`{"type": "unknown_type"}`))
+		if err == nil {
+			t.Error("expected error, got nil")
+		}
+	})
+}
+
+func TestResponsesRequest_UnmarshalJSON(t *testing.T) {
+	tests := []struct {
+		name    string
+		json    string
+		check   func(t *testing.T, req ResponsesRequest)
+		wantErr bool
+	}{
+		{
+			name: "simple string input",
+			json: `{"model": "gpt-oss:20b", "input": "hello"}`,
+			check: func(t *testing.T, req ResponsesRequest) {
+				if req.Model != "gpt-oss:20b" {
+					t.Errorf("Model = %q, want %q", req.Model, "gpt-oss:20b")
+				}
+				if req.Input.Text != "hello" {
+					t.Errorf("Input.Text = %q, want %q", req.Input.Text, "hello")
+				}
+			},
+		},
+		{
+			name: "array input with messages",
+			json: `{"model": "gpt-oss:20b", "input": [{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "hello"}]}]}`,
+			check: func(t *testing.T, req ResponsesRequest) {
+				if len(req.Input.Items) != 1 {
+					t.Fatalf("len(Input.Items) = %d, want 1", len(req.Input.Items))
+				}
+				msg, ok := req.Input.Items[0].(ResponsesInputMessage)
+				if !ok {
+					t.Fatalf("Input.Items[0] type = %T, want ResponsesInputMessage", req.Input.Items[0])
+				}
+				if msg.Role != "user" {
+					t.Errorf("Role = %q, want %q", msg.Role, "user")
+				}
+			},
+		},
+		{
+			name: "with temperature",
+			json: `{"model": "gpt-oss:20b", "input": "hello", "temperature": 0.5}`,
+			check: func(t *testing.T, req ResponsesRequest) {
+				if req.Temperature == nil || *req.Temperature != 0.5 {
+					t.Errorf("Temperature = %v, want 0.5", req.Temperature)
+				}
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			var got ResponsesRequest
+			err := json.Unmarshal([]byte(tt.json), &got)
+
+			if tt.wantErr {
+				if err == nil {
+					t.Error("expected error, got nil")
+				}
+				return
+			}
+
+			if err != nil {
+				t.Fatalf("unexpected error: %v", err)
+			}
+
+			if tt.check != nil {
+				tt.check(t, got)
+			}
+		})
+	}
+}
+
+func TestFromResponsesRequest_Tools(t *testing.T) {
+	reqJSON := `{
+		"model": "gpt-oss:20b",
+		"input": "hello",
+		"tools": [
+			{
+				"type": "function",
+				"name": "shell",
+				"description": "Runs a shell command",
+				"strict": false,
+				"parameters": {
+					"type": "object",
+					"properties": {
+						"command": {
+							"type": "array",
+							"items": {"type": "string"},
+							"description": "The command to execute"
+						}
+					},
+					"required": ["command"]
+				}
+			}
+		]
+	}`
+
+	var req ResponsesRequest
+	if err := json.Unmarshal([]byte(reqJSON), &req); err != nil {
+		t.Fatalf("failed to unmarshal request: %v", err)
+	}
+
+	// Check that tools were parsed
+	if len(req.Tools) != 1 {
+		t.Fatalf("expected 1 tool, got %d", len(req.Tools))
+	}
+
+	if req.Tools[0].Name != "shell" {
+		t.Errorf("expected tool name 'shell', got %q", req.Tools[0].Name)
+	}
+
+	// Convert and check
+	chatReq, err := FromResponsesRequest(req)
+	if err != nil {
+		t.Fatalf("failed to convert request: %v", err)
+	}
+
+	if len(chatReq.Tools) != 1 {
+		t.Fatalf("expected 1 converted tool, got %d", len(chatReq.Tools))
+	}
+
+	tool := chatReq.Tools[0]
+	if tool.Type != "function" {
+		t.Errorf("expected tool type 'function', got %q", tool.Type)
+	}
+	if tool.Function.Name != "shell" {
+		t.Errorf("expected function name 'shell', got %q", tool.Function.Name)
+	}
+	if tool.Function.Description != "Runs a shell command" {
+		t.Errorf("expected function description 'Runs a shell command', got %q", tool.Function.Description)
+	}
+	if tool.Function.Parameters.Type != "object" {
+		t.Errorf("expected parameters type 'object', got %q", tool.Function.Parameters.Type)
+	}
+	if len(tool.Function.Parameters.Required) != 1 || tool.Function.Parameters.Required[0] != "command" {
+		t.Errorf("expected required ['command'], got %v", tool.Function.Parameters.Required)
+	}
+}
+
+func TestFromResponsesRequest_FunctionCallOutput(t *testing.T) {
+	// Test a complete tool call round-trip:
+	// 1. User message asking about weather
+	// 2. Assistant's function call (from previous response)
+	// 3. Function call output (the tool result)
+	reqJSON := `{
+		"model": "gpt-oss:20b",
+		"input": [
+			{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "what is the weather?"}]},
+			{"type": "function_call", "call_id": "call_abc123", "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"},
+			{"type": "function_call_output", "call_id": "call_abc123", "output": "sunny, 72F"}
+		]
+	}`
+
+	var req ResponsesRequest
+	if err := json.Unmarshal([]byte(reqJSON), &req); err != nil {
+		t.Fatalf("failed to unmarshal request: %v", err)
+	}
+
+	// Check that input items were parsed
+	if len(req.Input.Items) != 3 {
+		t.Fatalf("expected 3 input items, got %d", len(req.Input.Items))
+	}
+
+	// Verify the function_call item
+	fc, ok := req.Input.Items[1].(ResponsesFunctionCall)
+	if !ok {
+		t.Fatalf("Input.Items[1] type = %T, want ResponsesFunctionCall", req.Input.Items[1])
+	}
+	if fc.Name != "get_weather" {
+		t.Errorf("Name = %q, want %q", fc.Name, "get_weather")
+	}
+
+	// Verify the function_call_output item
+	fcOutput, ok := req.Input.Items[2].(ResponsesFunctionCallOutput)
+	if !ok {
+		t.Fatalf("Input.Items[2] type = %T, want ResponsesFunctionCallOutput", req.Input.Items[2])
+	}
+	if fcOutput.CallID != "call_abc123" {
+		t.Errorf("CallID = %q, want %q", fcOutput.CallID, "call_abc123")
+	}
+
+	// Convert and check
+	chatReq, err := FromResponsesRequest(req)
+	if err != nil {
+		t.Fatalf("failed to convert request: %v", err)
+	}
+
+	if len(chatReq.Messages) != 3 {
+		t.Fatalf("expected 3 messages, got %d", len(chatReq.Messages))
+	}
+
+	// Check the user message
+	userMsg := chatReq.Messages[0]
+	if userMsg.Role != "user" {
+		t.Errorf("expected role 'user', got %q", userMsg.Role)
+	}
+
+	// Check the assistant message with tool call
+	assistantMsg := chatReq.Messages[1]
+	if assistantMsg.Role != "assistant" {
+		t.Errorf("expected role 'assistant', got %q", assistantMsg.Role)
+	}
+	if len(assistantMsg.ToolCalls) != 1 {
+		t.Fatalf("expected 1 tool call, got %d", len(assistantMsg.ToolCalls))
+	}
+	if assistantMsg.ToolCalls[0].ID != "call_abc123" {
+		t.Errorf("expected tool call ID 'call_abc123', got %q", assistantMsg.ToolCalls[0].ID)
+	}
+	if assistantMsg.ToolCalls[0].Function.Name != "get_weather" {
+		t.Errorf("expected function name 'get_weather', got %q", assistantMsg.ToolCalls[0].Function.Name)
+	}
+
+	// Check the tool response message
+	toolMsg := chatReq.Messages[2]
+	if toolMsg.Role != "tool" {
+		t.Errorf("expected role 'tool', got %q", toolMsg.Role)
+	}
+	if toolMsg.Content != "sunny, 72F" {
+		t.Errorf("expected content 'sunny, 72F', got %q", toolMsg.Content)
+	}
+	if toolMsg.ToolCallID != "call_abc123" {
+		t.Errorf("expected ToolCallID 'call_abc123', got %q", toolMsg.ToolCallID)
+	}
+}
+
+func TestDecodeImageURL(t *testing.T) {
+	// Valid PNG base64 (1x1 red pixel)
+	validPNG := "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg=="
+
+	t.Run("valid png", func(t *testing.T) {
+		img, err := decodeImageURL(validPNG)
+		if err != nil {
+			t.Fatalf("unexpected error: %v", err)
+		}
+		if len(img) == 0 {
+			t.Error("expected non-empty image data")
+		}
+	})
+
+	t.Run("valid jpeg", func(t *testing.T) {
+		// Just test the prefix validation with minimal base64
+		_, err := decodeImageURL("data:image/jpeg;base64,/9j/4AAQSkZJRg==")
+		if err != nil {
+			t.Fatalf("unexpected error: %v", err)
+		}
+	})
+
+	t.Run("blank mime type", func(t *testing.T) {
+		_, err := decodeImageURL("data:;base64,dGVzdA==")
+		if err != nil {
+			t.Fatalf("unexpected error: %v", err)
+		}
+	})
+
+	t.Run("invalid mime type", func(t *testing.T) {
+		_, err := decodeImageURL("data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7")
+		if err == nil {
+			t.Error("expected error for unsupported mime type")
+		}
+	})
+
+	t.Run("invalid base64", func(t *testing.T) {
+		_, err := decodeImageURL("data:image/png;base64,not-valid-base64!")
+		if err == nil {
+			t.Error("expected error for invalid base64")
+		}
+	})
+
+	t.Run("not a data url", func(t *testing.T) {
+		_, err := decodeImageURL("https://example.com/image.png")
+		if err == nil {
+			t.Error("expected error for non-data URL")
+		}
+	})
+}
+
+func TestFromResponsesRequest_Images(t *testing.T) {
+	// 1x1 red PNG pixel
+	pngBase64 := "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg=="
+
+	reqJSON := `{
+		"model": "llava",
+		"input": [
+			{"type": "message", "role": "user", "content": [
+				{"type": "input_text", "text": "What is in this image?"},
+				{"type": "input_image", "detail": "auto", "image_url": "data:image/png;base64,` + pngBase64 + `"}
+			]}
+		]
+	}`
+
+	var req ResponsesRequest
+	if err := json.Unmarshal([]byte(reqJSON), &req); err != nil {
+		t.Fatalf("failed to unmarshal request: %v", err)
+	}
+
+	chatReq, err := FromResponsesRequest(req)
+	if err != nil {
+		t.Fatalf("failed to convert request: %v", err)
+	}
+
+	if len(chatReq.Messages) != 1 {
+		t.Fatalf("expected 1 message, got %d", len(chatReq.Messages))
+	}
+
+	msg := chatReq.Messages[0]
+	if msg.Role != "user" {
+		t.Errorf("expected role 'user', got %q", msg.Role)
+	}
+	if msg.Content != "What is in this image?" {
+		t.Errorf("expected content 'What is in this image?', got %q", msg.Content)
+	}
+	if len(msg.Images) != 1 {
+		t.Fatalf("expected 1 image, got %d", len(msg.Images))
+	}
+	if len(msg.Images[0]) == 0 {
+		t.Error("expected non-empty image data")
+	}
+}
+
+func TestResponsesStreamConverter_TextOnly(t *testing.T) {
+	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b")
+
+	// First chunk with content
+	events := converter.Process(api.ChatResponse{
+		Message: api.Message{
+			Content: "Hello",
+		},
+	})
+
+	// Should have: response.created, response.in_progress, output_item.added, content_part.added, output_text.delta
+	if len(events) != 5 {
+		t.Fatalf("expected 5 events, got %d", len(events))
+	}
+
+	if events[0].Event != "response.created" {
+		t.Errorf("events[0].Event = %q, want %q", events[0].Event, "response.created")
+	}
+	if events[1].Event != "response.in_progress" {
+		t.Errorf("events[1].Event = %q, want %q", events[1].Event, "response.in_progress")
+	}
+	if events[2].Event != "response.output_item.added" {
+		t.Errorf("events[2].Event = %q, want %q", events[2].Event, "response.output_item.added")
+	}
+	if events[3].Event != "response.content_part.added" {
+		t.Errorf("events[3].Event = %q, want %q", events[3].Event, "response.content_part.added")
+	}
+	if events[4].Event != "response.output_text.delta" {
+		t.Errorf("events[4].Event = %q, want %q", events[4].Event, "response.output_text.delta")
+	}
+
+	// Second chunk with more content
+	events = converter.Process(api.ChatResponse{
+		Message: api.Message{
+			Content: " World",
+		},
+	})
+
+	// Should only have output_text.delta (no more created/in_progress/added)
+	if len(events) != 1 {
+		t.Fatalf("expected 1 event, got %d", len(events))
+	}
+	if events[0].Event != "response.output_text.delta" {
+		t.Errorf("events[0].Event = %q, want %q", events[0].Event, "response.output_text.delta")
+	}
+
+	// Final chunk
+	events = converter.Process(api.ChatResponse{
+		Message: api.Message{},
+		Done:    true,
+	})
+
+	// Should have: output_text.done, content_part.done, output_item.done, response.completed
+	if len(events) != 4 {
+		t.Fatalf("expected 4 events, got %d", len(events))
+	}
+	if events[0].Event != "response.output_text.done" {
+		t.Errorf("events[0].Event = %q, want %q", events[0].Event, "response.output_text.done")
+	}
+	// Check that accumulated text is present
+	data := events[0].Data.(map[string]any)
+	if data["text"] != "Hello World" {
+		t.Errorf("accumulated text = %q, want %q", data["text"], "Hello World")
+	}
+}
+
+func TestResponsesStreamConverter_ToolCalls(t *testing.T) {
+	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b")
+
+	events := converter.Process(api.ChatResponse{
+		Message: api.Message{
+			ToolCalls: []api.ToolCall{
+				{
+					ID: "call_abc",
+					Function: api.ToolCallFunction{
+						Name:      "get_weather",
+						Arguments: api.ToolCallFunctionArguments{"city": "Paris"},
+					},
+				},
+			},
+		},
+	})
+
+	// Should have: created, in_progress, output_item.added, arguments.delta, arguments.done, output_item.done
+	if len(events) != 6 {
+		t.Fatalf("expected 6 events, got %d", len(events))
+	}
+
+	if events[2].Event != "response.output_item.added" {
+		t.Errorf("events[2].Event = %q, want %q", events[2].Event, "response.output_item.added")
+	}
+	if events[3].Event != "response.function_call_arguments.delta" {
+		t.Errorf("events[3].Event = %q, want %q", events[3].Event, "response.function_call_arguments.delta")
+	}
+	if events[4].Event != "response.function_call_arguments.done" {
+		t.Errorf("events[4].Event = %q, want %q", events[4].Event, "response.function_call_arguments.done")
+	}
+	if events[5].Event != "response.output_item.done" {
+		t.Errorf("events[5].Event = %q, want %q", events[5].Event, "response.output_item.done")
+	}
+}
+
+func TestResponsesStreamConverter_Reasoning(t *testing.T) {
+	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b")
+
+	// First chunk with thinking
+	events := converter.Process(api.ChatResponse{
+		Message: api.Message{
+			Thinking: "Let me think...",
+		},
+	})
+
+	// Should have: created, in_progress, output_item.added (reasoning), reasoning_summary_text.delta
+	if len(events) != 4 {
+		t.Fatalf("expected 4 events, got %d", len(events))
+	}
+
+	if events[2].Event != "response.output_item.added" {
+		t.Errorf("events[2].Event = %q, want %q", events[2].Event, "response.output_item.added")
+	}
+	// Check it's a reasoning item
+	data := events[2].Data.(map[string]any)
+	item := data["item"].(map[string]any)
+	if item["type"] != "reasoning" {
+		t.Errorf("item type = %q, want %q", item["type"], "reasoning")
+	}
+
+	if events[3].Event != "response.reasoning_summary_text.delta" {
+		t.Errorf("events[3].Event = %q, want %q", events[3].Event, "response.reasoning_summary_text.delta")
+	}
+
+	// Second chunk with text content (reasoning should close first)
+	events = converter.Process(api.ChatResponse{
+		Message: api.Message{
+			Content: "The answer is 42",
+		},
+	})
+
+	// Should have: reasoning_summary_text.done, output_item.done (reasoning), output_item.added (message), content_part.added, output_text.delta
+	if len(events) != 5 {
+		t.Fatalf("expected 5 events, got %d", len(events))
+	}
+
+	if events[0].Event != "response.reasoning_summary_text.done" {
+		t.Errorf("events[0].Event = %q, want %q", events[0].Event, "response.reasoning_summary_text.done")
+	}
+	if events[1].Event != "response.output_item.done" {
+		t.Errorf("events[1].Event = %q, want %q", events[1].Event, "response.output_item.done")
+	}
+	// Check the reasoning done item has encrypted_content
+	doneData := events[1].Data.(map[string]any)
+	doneItem := doneData["item"].(map[string]any)
+	if doneItem["encrypted_content"] != "Let me think..." {
+		t.Errorf("encrypted_content = %q, want %q", doneItem["encrypted_content"], "Let me think...")
+	}
+}
+
+func TestFromResponsesRequest_ReasoningMerge(t *testing.T) {
+	t.Run("reasoning merged with following message", func(t *testing.T) {
+		reqJSON := `{
+			"model": "qwen3",
+			"input": [
+				{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "solve 2+2"}]},
+				{"type": "reasoning", "id": "rs_123", "encrypted_content": "Let me think about this math problem...", "summary": [{"type": "summary_text", "text": "Thinking about math"}]},
+				{"type": "message", "role": "assistant", "content": [{"type": "input_text", "text": "The answer is 4"}]}
+			]
+		}`
+
+		var req ResponsesRequest
+		if err := json.Unmarshal([]byte(reqJSON), &req); err != nil {
+			t.Fatalf("failed to unmarshal request: %v", err)
+		}
+
+		chatReq, err := FromResponsesRequest(req)
+		if err != nil {
+			t.Fatalf("failed to convert request: %v", err)
+		}
+
+		// Should have 2 messages: user and assistant (with thinking merged)
+		if len(chatReq.Messages) != 2 {
+			t.Fatalf("expected 2 messages, got %d", len(chatReq.Messages))
+		}
+
+		// Check user message
+		if chatReq.Messages[0].Role != "user" {
+			t.Errorf("Messages[0].Role = %q, want %q", chatReq.Messages[0].Role, "user")
+		}
+
+		// Check assistant message has both content and thinking
+		assistantMsg := chatReq.Messages[1]
+		if assistantMsg.Role != "assistant" {
+			t.Errorf("Messages[1].Role = %q, want %q", assistantMsg.Role, "assistant")
+		}
+		if assistantMsg.Content != "The answer is 4" {
+			t.Errorf("Messages[1].Content = %q, want %q", assistantMsg.Content, "The answer is 4")
+		}
+		if assistantMsg.Thinking != "Let me think about this math problem..." {
+			t.Errorf("Messages[1].Thinking = %q, want %q", assistantMsg.Thinking, "Let me think about this math problem...")
+		}
+	})
+
+	t.Run("reasoning merged with following function call", func(t *testing.T) {
+		reqJSON := `{
+			"model": "qwen3",
+			"input": [
+				{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "what is the weather?"}]},
+				{"type": "reasoning", "id": "rs_123", "encrypted_content": "I need to call a tool for this...", "summary": []},
+				{"type": "function_call", "call_id": "call_abc", "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}
+			]
+		}`
+
+		var req ResponsesRequest
+		if err := json.Unmarshal([]byte(reqJSON), &req); err != nil {
+			t.Fatalf("failed to unmarshal request: %v", err)
+		}
+
+		chatReq, err := FromResponsesRequest(req)
+		if err != nil {
+			t.Fatalf("failed to convert request: %v", err)
+		}
+
+		// Should have 2 messages: user and assistant (with thinking + tool call)
+		if len(chatReq.Messages) != 2 {
+			t.Fatalf("expected 2 messages, got %d", len(chatReq.Messages))
+		}
+
+		// Check assistant message has both tool call and thinking
+		assistantMsg := chatReq.Messages[1]
+		if assistantMsg.Role != "assistant" {
+			t.Errorf("Messages[1].Role = %q, want %q", assistantMsg.Role, "assistant")
+		}
+		if assistantMsg.Thinking != "I need to call a tool for this..." {
+			t.Errorf("Messages[1].Thinking = %q, want %q", assistantMsg.Thinking, "I need to call a tool for this...")
+		}
+		if len(assistantMsg.ToolCalls) != 1 {
+			t.Fatalf("expected 1 tool call, got %d", len(assistantMsg.ToolCalls))
+		}
+		if assistantMsg.ToolCalls[0].Function.Name != "get_weather" {
+			t.Errorf("ToolCalls[0].Function.Name = %q, want %q", assistantMsg.ToolCalls[0].Function.Name, "get_weather")
+		}
+	})
+
+	t.Run("multi-turn conversation with reasoning", func(t *testing.T) {
+		// Simulates: user asks -> model thinks + responds -> user follows up
+		reqJSON := `{
+			"model": "qwen3",
+			"input": [
+				{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "What is 2+2?"}]},
+				{"type": "reasoning", "id": "rs_001", "encrypted_content": "This is a simple arithmetic problem. 2+2=4.", "summary": [{"type": "summary_text", "text": "Calculating 2+2"}]},
+				{"type": "message", "role": "assistant", "content": [{"type": "input_text", "text": "The answer is 4."}]},
+				{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "Now multiply that by 3"}]}
+			]
+		}`
+
+		var req ResponsesRequest
+		if err := json.Unmarshal([]byte(reqJSON), &req); err != nil {
+			t.Fatalf("failed to unmarshal request: %v", err)
+		}
+
+		chatReq, err := FromResponsesRequest(req)
+		if err != nil {
+			t.Fatalf("failed to convert request: %v", err)
+		}
+
+		// Should have 3 messages:
+		// 1. user: "What is 2+2?"
+		// 2. assistant: thinking + "The answer is 4."
+		// 3. user: "Now multiply that by 3"
+		if len(chatReq.Messages) != 3 {
+			t.Fatalf("expected 3 messages, got %d", len(chatReq.Messages))
+		}
+
+		// Check first user message
+		if chatReq.Messages[0].Role != "user" || chatReq.Messages[0].Content != "What is 2+2?" {
+			t.Errorf("Messages[0] = {Role: %q, Content: %q}, want {Role: \"user\", Content: \"What is 2+2?\"}",
+				chatReq.Messages[0].Role, chatReq.Messages[0].Content)
+		}
+
+		// Check assistant message has merged thinking + content
+		if chatReq.Messages[1].Role != "assistant" {
+			t.Errorf("Messages[1].Role = %q, want \"assistant\"", chatReq.Messages[1].Role)
+		}
+		if chatReq.Messages[1].Content != "The answer is 4." {
+			t.Errorf("Messages[1].Content = %q, want \"The answer is 4.\"", chatReq.Messages[1].Content)
+		}
+		if chatReq.Messages[1].Thinking != "This is a simple arithmetic problem. 2+2=4." {
+			t.Errorf("Messages[1].Thinking = %q, want \"This is a simple arithmetic problem. 2+2=4.\"",
+				chatReq.Messages[1].Thinking)
+		}
+
+		// Check second user message
+		if chatReq.Messages[2].Role != "user" || chatReq.Messages[2].Content != "Now multiply that by 3" {
+			t.Errorf("Messages[2] = {Role: %q, Content: %q}, want {Role: \"user\", Content: \"Now multiply that by 3\"}",
+				chatReq.Messages[2].Role, chatReq.Messages[2].Content)
+		}
+	})
+
+	t.Run("multi-turn with tool calls and reasoning", func(t *testing.T) {
+		// Simulates: user asks -> model thinks + calls tool -> tool responds -> model thinks + responds -> user follows up
+		reqJSON := `{
+			"model": "qwen3",
+			"input": [
+				{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "What is the weather in Paris?"}]},
+				{"type": "reasoning", "id": "rs_001", "encrypted_content": "I need to call the weather API for Paris.", "summary": []},
+				{"type": "function_call", "call_id": "call_abc", "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"},
+				{"type": "function_call_output", "call_id": "call_abc", "output": "Sunny, 72°F"},
+				{"type": "reasoning", "id": "rs_002", "encrypted_content": "The weather API returned sunny and 72°F. I should format this nicely.", "summary": []},
+				{"type": "message", "role": "assistant", "content": [{"type": "input_text", "text": "It's sunny and 72°F in Paris!"}]},
+				{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "What about London?"}]}
+			]
+		}`
+
+		var req ResponsesRequest
+		if err := json.Unmarshal([]byte(reqJSON), &req); err != nil {
+			t.Fatalf("failed to unmarshal request: %v", err)
+		}
+
+		chatReq, err := FromResponsesRequest(req)
+		if err != nil {
+			t.Fatalf("failed to convert request: %v", err)
+		}
+
+		// Should have 5 messages:
+		// 1. user: "What is the weather in Paris?"
+		// 2. assistant: thinking + tool call
+		// 3. tool: "Sunny, 72°F"
+		// 4. assistant: thinking + "It's sunny and 72°F in Paris!"
+		// 5. user: "What about London?"
+		if len(chatReq.Messages) != 5 {
+			t.Fatalf("expected 5 messages, got %d", len(chatReq.Messages))
+		}
+
+		// Message 1: user
+		if chatReq.Messages[0].Role != "user" {
+			t.Errorf("Messages[0].Role = %q, want \"user\"", chatReq.Messages[0].Role)
+		}
+
+		// Message 2: assistant with thinking + tool call
+		if chatReq.Messages[1].Role != "assistant" {
+			t.Errorf("Messages[1].Role = %q, want \"assistant\"", chatReq.Messages[1].Role)
+		}
+		if chatReq.Messages[1].Thinking != "I need to call the weather API for Paris." {
+			t.Errorf("Messages[1].Thinking = %q, want \"I need to call the weather API for Paris.\"", chatReq.Messages[1].Thinking)
+		}
+		if len(chatReq.Messages[1].ToolCalls) != 1 || chatReq.Messages[1].ToolCalls[0].Function.Name != "get_weather" {
+			t.Errorf("Messages[1].ToolCalls not as expected")
+		}
+
+		// Message 3: tool response
+		if chatReq.Messages[2].Role != "tool" || chatReq.Messages[2].Content != "Sunny, 72°F" {
+			t.Errorf("Messages[2] = {Role: %q, Content: %q}, want {Role: \"tool\", Content: \"Sunny, 72°F\"}",
+				chatReq.Messages[2].Role, chatReq.Messages[2].Content)
+		}
+
+		// Message 4: assistant with thinking + content
+		if chatReq.Messages[3].Role != "assistant" {
+			t.Errorf("Messages[3].Role = %q, want \"assistant\"", chatReq.Messages[3].Role)
+		}
+		if chatReq.Messages[3].Thinking != "The weather API returned sunny and 72°F. I should format this nicely." {
+			t.Errorf("Messages[3].Thinking = %q, want correct thinking", chatReq.Messages[3].Thinking)
+		}
+		if chatReq.Messages[3].Content != "It's sunny and 72°F in Paris!" {
+			t.Errorf("Messages[3].Content = %q, want \"It's sunny and 72°F in Paris!\"", chatReq.Messages[3].Content)
+		}
+
+		// Message 5: user follow-up
+		if chatReq.Messages[4].Role != "user" || chatReq.Messages[4].Content != "What about London?" {
+			t.Errorf("Messages[4] = {Role: %q, Content: %q}, want {Role: \"user\", Content: \"What about London?\"}",
+				chatReq.Messages[4].Role, chatReq.Messages[4].Content)
+		}
+	})
+
+	t.Run("trailing reasoning creates separate message", func(t *testing.T) {
+		reqJSON := `{
+			"model": "qwen3",
+			"input": [
+				{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "think about this"}]},
+				{"type": "reasoning", "id": "rs_123", "encrypted_content": "Still thinking...", "summary": []}
+			]
+		}`
+
+		var req ResponsesRequest
+		if err := json.Unmarshal([]byte(reqJSON), &req); err != nil {
+			t.Fatalf("failed to unmarshal request: %v", err)
+		}
+
+		chatReq, err := FromResponsesRequest(req)
+		if err != nil {
+			t.Fatalf("failed to convert request: %v", err)
+		}
+
+		// Should have 2 messages: user and assistant (thinking only)
+		if len(chatReq.Messages) != 2 {
+			t.Fatalf("expected 2 messages, got %d", len(chatReq.Messages))
+		}
+
+		// Check assistant message has only thinking
+		assistantMsg := chatReq.Messages[1]
+		if assistantMsg.Role != "assistant" {
+			t.Errorf("Messages[1].Role = %q, want %q", assistantMsg.Role, "assistant")
+		}
+		if assistantMsg.Thinking != "Still thinking..." {
+			t.Errorf("Messages[1].Thinking = %q, want %q", assistantMsg.Thinking, "Still thinking...")
+		}
+		if assistantMsg.Content != "" {
+			t.Errorf("Messages[1].Content = %q, want empty", assistantMsg.Content)
+		}
+	})
+}
+
+func TestToResponse_WithReasoning(t *testing.T) {
+	response := ToResponse("gpt-oss:20b", "resp_123", "msg_456", api.ChatResponse{
+		CreatedAt: time.Now(),
+		Message: api.Message{
+			Thinking: "Analyzing the question...",
+			Content:  "The answer is 42",
+		},
+		Done: true,
+	})
+
+	// Should have 2 output items: reasoning + message
+	if len(response.Output) != 2 {
+		t.Fatalf("expected 2 output items, got %d", len(response.Output))
+	}
+
+	// First item should be reasoning
+	if response.Output[0].Type != "reasoning" {
+		t.Errorf("Output[0].Type = %q, want %q", response.Output[0].Type, "reasoning")
+	}
+	if len(response.Output[0].Summary) != 1 {
+		t.Fatalf("expected 1 summary item, got %d", len(response.Output[0].Summary))
+	}
+	if response.Output[0].Summary[0].Text != "Analyzing the question..." {
+		t.Errorf("Summary[0].Text = %q, want %q", response.Output[0].Summary[0].Text, "Analyzing the question...")
+	}
+	if response.Output[0].EncryptedContent != "Analyzing the question..." {
+		t.Errorf("EncryptedContent = %q, want %q", response.Output[0].EncryptedContent, "Analyzing the question...")
+	}
+
+	// Second item should be message
+	if response.Output[1].Type != "message" {
+		t.Errorf("Output[1].Type = %q, want %q", response.Output[1].Type, "message")
+	}
+	if response.Output[1].Content[0].Text != "The answer is 42" {
+		t.Errorf("Content[0].Text = %q, want %q", response.Output[1].Content[0].Text, "The answer is 42")
+	}
+}
+
+func TestFromResponsesRequest_Instructions(t *testing.T) {
+	reqJSON := `{
+		"model": "gpt-oss:20b",
+		"instructions": "You are a helpful pirate. Always respond in pirate speak.",
+		"input": "Hello"
+	}`
+
+	var req ResponsesRequest
+	if err := json.Unmarshal([]byte(reqJSON), &req); err != nil {
+		t.Fatalf("failed to unmarshal request: %v", err)
+	}
+
+	chatReq, err := FromResponsesRequest(req)
+	if err != nil {
+		t.Fatalf("failed to convert request: %v", err)
+	}
+
+	// Should have 2 messages: system (instructions) + user
+	if len(chatReq.Messages) != 2 {
+		t.Fatalf("expected 2 messages, got %d", len(chatReq.Messages))
+	}
+
+	// First message should be system with instructions
+	if chatReq.Messages[0].Role != "system" {
+		t.Errorf("Messages[0].Role = %q, want %q", chatReq.Messages[0].Role, "system")
+	}
+	if chatReq.Messages[0].Content != "You are a helpful pirate. Always respond in pirate speak." {
+		t.Errorf("Messages[0].Content = %q, want instructions", chatReq.Messages[0].Content)
+	}
+
+	// Second message should be user
+	if chatReq.Messages[1].Role != "user" {
+		t.Errorf("Messages[1].Role = %q, want %q", chatReq.Messages[1].Role, "user")
+	}
+	if chatReq.Messages[1].Content != "Hello" {
+		t.Errorf("Messages[1].Content = %q, want %q", chatReq.Messages[1].Content, "Hello")
+	}
+}
+
+func TestFromResponsesRequest_MaxOutputTokens(t *testing.T) {
+	reqJSON := `{
+		"model": "gpt-oss:20b",
+		"input": "Write a story",
+		"max_output_tokens": 100
+	}`
+
+	var req ResponsesRequest
+	if err := json.Unmarshal([]byte(reqJSON), &req); err != nil {
+		t.Fatalf("failed to unmarshal request: %v", err)
+	}
+
+	chatReq, err := FromResponsesRequest(req)
+	if err != nil {
+		t.Fatalf("failed to convert request: %v", err)
+	}
+
+	// Check that num_predict is set in options
+	numPredict, ok := chatReq.Options["num_predict"]
+	if !ok {
+		t.Fatal("expected num_predict in options")
+	}
+	if numPredict != 100 {
+		t.Errorf("num_predict = %v, want 100", numPredict)
+	}
+}
+
+func TestFromResponsesRequest_TextFormatJsonSchema(t *testing.T) {
+	reqJSON := `{
+		"model": "gpt-oss:20b",
+		"input": "Give me info about John who is 30",
+		"text": {
+			"format": {
+				"type": "json_schema",
+				"name": "person",
+				"strict": true,
+				"schema": {
+					"type": "object",
+					"properties": {
+						"name": {"type": "string"},
+						"age": {"type": "integer"}
+					},
+					"required": ["name", "age"]
+				}
+			}
+		}
+	}`
+
+	var req ResponsesRequest
+	if err := json.Unmarshal([]byte(reqJSON), &req); err != nil {
+		t.Fatalf("failed to unmarshal request: %v", err)
+	}
+
+	// Verify the text format was parsed
+	if req.Text == nil || req.Text.Format == nil {
+		t.Fatal("expected Text.Format to be set")
+	}
+	if req.Text.Format.Type != "json_schema" {
+		t.Errorf("Text.Format.Type = %q, want %q", req.Text.Format.Type, "json_schema")
+	}
+
+	chatReq, err := FromResponsesRequest(req)
+	if err != nil {
+		t.Fatalf("failed to convert request: %v", err)
+	}
+
+	// Check that Format is set
+	if chatReq.Format == nil {
+		t.Fatal("expected Format to be set")
+	}
+
+	// Verify the schema is passed through
+	var schema map[string]any
+	if err := json.Unmarshal(chatReq.Format, &schema); err != nil {
+		t.Fatalf("failed to unmarshal format: %v", err)
+	}
+	if schema["type"] != "object" {
+		t.Errorf("schema type = %v, want %q", schema["type"], "object")
+	}
+	props, ok := schema["properties"].(map[string]any)
+	if !ok {
+		t.Fatal("expected properties in schema")
+	}
+	if _, ok := props["name"]; !ok {
+		t.Error("expected 'name' in schema properties")
+	}
+	if _, ok := props["age"]; !ok {
+		t.Error("expected 'age' in schema properties")
+	}
+}
+
+func TestFromResponsesRequest_TextFormatText(t *testing.T) {
+	// When format type is "text", Format should be nil (no constraint)
+	reqJSON := `{
+		"model": "gpt-oss:20b",
+		"input": "Hello",
+		"text": {
+			"format": {
+				"type": "text"
+			}
+		}
+	}`
+
+	var req ResponsesRequest
+	if err := json.Unmarshal([]byte(reqJSON), &req); err != nil {
+		t.Fatalf("failed to unmarshal request: %v", err)
+	}
+
+	chatReq, err := FromResponsesRequest(req)
+	if err != nil {
+		t.Fatalf("failed to convert request: %v", err)
+	}
+
+	// Format should be nil for "text" type
+	if chatReq.Format != nil {
+		t.Errorf("expected Format to be nil for text type, got %s", string(chatReq.Format))
+	}
+}
+
+func TestResponsesInputMessage_ShorthandFormats(t *testing.T) {
+	t.Run("string content shorthand", func(t *testing.T) {
+		// Content can be a plain string instead of an array of content items
+		jsonStr := `{"type": "message", "role": "user", "content": "Hello world"}`
+
+		var msg ResponsesInputMessage
+		if err := json.Unmarshal([]byte(jsonStr), &msg); err != nil {
+			t.Fatalf("unexpected error: %v", err)
+		}
+
+		if msg.Role != "user" {
+			t.Errorf("Role = %q, want %q", msg.Role, "user")
+		}
+		if len(msg.Content) != 1 {
+			t.Fatalf("len(Content) = %d, want 1", len(msg.Content))
+		}
+
+		textContent, ok := msg.Content[0].(ResponsesTextContent)
+		if !ok {
+			t.Fatalf("Content[0] type = %T, want ResponsesTextContent", msg.Content[0])
+		}
+		if textContent.Text != "Hello world" {
+			t.Errorf("Content[0].Text = %q, want %q", textContent.Text, "Hello world")
+		}
+		if textContent.Type != "input_text" {
+			t.Errorf("Content[0].Type = %q, want %q", textContent.Type, "input_text")
+		}
+	})
+
+	t.Run("output_text content type", func(t *testing.T) {
+		// Previous assistant responses come back with output_text content type
+		jsonStr := `{"type": "message", "role": "assistant", "content": [{"type": "output_text", "text": "I am an assistant"}]}`
+
+		var msg ResponsesInputMessage
+		if err := json.Unmarshal([]byte(jsonStr), &msg); err != nil {
+			t.Fatalf("unexpected error: %v", err)
+		}
+
+		if msg.Role != "assistant" {
+			t.Errorf("Role = %q, want %q", msg.Role, "assistant")
+		}
+		if len(msg.Content) != 1 {
+			t.Fatalf("len(Content) = %d, want 1", len(msg.Content))
+		}
+
+		outputContent, ok := msg.Content[0].(ResponsesOutputTextContent)
+		if !ok {
+			t.Fatalf("Content[0] type = %T, want ResponsesOutputTextContent", msg.Content[0])
+		}
+		if outputContent.Text != "I am an assistant" {
+			t.Errorf("Content[0].Text = %q, want %q", outputContent.Text, "I am an assistant")
+		}
+	})
+}
+
+func TestUnmarshalResponsesInputItem_ShorthandMessage(t *testing.T) {
+	t.Run("message without type field", func(t *testing.T) {
+		// When type is omitted but role is present, treat as message
+		jsonStr := `{"role": "user", "content": "Hello"}`
+
+		item, err := unmarshalResponsesInputItem([]byte(jsonStr))
+		if err != nil {
+			t.Fatalf("unexpected error: %v", err)
+		}
+
+		msg, ok := item.(ResponsesInputMessage)
+		if !ok {
+			t.Fatalf("got type %T, want ResponsesInputMessage", item)
+		}
+		if msg.Role != "user" {
+			t.Errorf("Role = %q, want %q", msg.Role, "user")
+		}
+		if len(msg.Content) != 1 {
+			t.Fatalf("len(Content) = %d, want 1", len(msg.Content))
+		}
+	})
+
+	t.Run("message with both type and role", func(t *testing.T) {
+		// Explicit type should still work
+		jsonStr := `{"type": "message", "role": "system", "content": "You are helpful"}`
+
+		item, err := unmarshalResponsesInputItem([]byte(jsonStr))
+		if err != nil {
+			t.Fatalf("unexpected error: %v", err)
+		}
+
+		msg, ok := item.(ResponsesInputMessage)
+		if !ok {
+			t.Fatalf("got type %T, want ResponsesInputMessage", item)
+		}
+		if msg.Role != "system" {
+			t.Errorf("Role = %q, want %q", msg.Role, "system")
+		}
+	})
+}
+
+func TestFromResponsesRequest_ShorthandFormats(t *testing.T) {
+	t.Run("shorthand message without type", func(t *testing.T) {
+		// Real-world format from OpenAI SDK
+		reqJSON := `{
+			"model": "gpt-4.1",
+			"input": [
+				{"role": "user", "content": "What is the weather in Tokyo?"}
+			]
+		}`
+
+		var req ResponsesRequest
+		if err := json.Unmarshal([]byte(reqJSON), &req); err != nil {
+			t.Fatalf("failed to unmarshal request: %v", err)
+		}
+
+		if len(req.Input.Items) != 1 {
+			t.Fatalf("expected 1 input item, got %d", len(req.Input.Items))
+		}
+
+		msg, ok := req.Input.Items[0].(ResponsesInputMessage)
+		if !ok {
+			t.Fatalf("Input.Items[0] type = %T, want ResponsesInputMessage", req.Input.Items[0])
+		}
+		if msg.Role != "user" {
+			t.Errorf("Role = %q, want %q", msg.Role, "user")
+		}
+
+		chatReq, err := FromResponsesRequest(req)
+		if err != nil {
+			t.Fatalf("failed to convert request: %v", err)
+		}
+
+		if len(chatReq.Messages) != 1 {
+			t.Fatalf("expected 1 message, got %d", len(chatReq.Messages))
+		}
+		if chatReq.Messages[0].Content != "What is the weather in Tokyo?" {
+			t.Errorf("Content = %q, want %q", chatReq.Messages[0].Content, "What is the weather in Tokyo?")
+		}
+	})
+
+	t.Run("conversation with output_text from previous response", func(t *testing.T) {
+		// Simulates a multi-turn conversation where previous assistant response is sent back
+		reqJSON := `{
+			"model": "gpt-4.1",
+			"input": [
+				{"role": "user", "content": "Hello"},
+				{"type": "message", "role": "assistant", "content": [{"type": "output_text", "text": "Hi there!"}]},
+				{"role": "user", "content": "How are you?"}
+			]
+		}`
+
+		var req ResponsesRequest
+		if err := json.Unmarshal([]byte(reqJSON), &req); err != nil {
+			t.Fatalf("failed to unmarshal request: %v", err)
+		}
+
+		chatReq, err := FromResponsesRequest(req)
+		if err != nil {
+			t.Fatalf("failed to convert request: %v", err)
+		}
+
+		if len(chatReq.Messages) != 3 {
+			t.Fatalf("expected 3 messages, got %d", len(chatReq.Messages))
+		}
+
+		// Check first user message
+		if chatReq.Messages[0].Role != "user" || chatReq.Messages[0].Content != "Hello" {
+			t.Errorf("Messages[0] = {Role: %q, Content: %q}, want {Role: \"user\", Content: \"Hello\"}",
+				chatReq.Messages[0].Role, chatReq.Messages[0].Content)
+		}
+
+		// Check assistant message (output_text should be converted to content)
+		if chatReq.Messages[1].Role != "assistant" || chatReq.Messages[1].Content != "Hi there!" {
+			t.Errorf("Messages[1] = {Role: %q, Content: %q}, want {Role: \"assistant\", Content: \"Hi there!\"}",
+				chatReq.Messages[1].Role, chatReq.Messages[1].Content)
+		}
+
+		// Check second user message
+		if chatReq.Messages[2].Role != "user" || chatReq.Messages[2].Content != "How are you?" {
+			t.Errorf("Messages[2] = {Role: %q, Content: %q}, want {Role: \"user\", Content: \"How are you?\"}",
+				chatReq.Messages[2].Role, chatReq.Messages[2].Content)
+		}
+	})
+}
+
+func TestResponsesStreamConverter_OutputIncludesContent(t *testing.T) {
+	// Verify that response.output_item.done includes content field for messages
+	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b")
+
+	// First chunk
+	converter.Process(api.ChatResponse{
+		Message: api.Message{Content: "Hello World"},
+	})
+
+	// Final chunk
+	events := converter.Process(api.ChatResponse{
+		Message: api.Message{},
+		Done:    true,
+	})
+
+	// Find the output_item.done event
+	var outputItemDone map[string]any
+	for _, event := range events {
+		if event.Event == "response.output_item.done" {
+			outputItemDone = event.Data.(map[string]any)
+			break
+		}
+	}
+
+	if outputItemDone == nil {
+		t.Fatal("expected response.output_item.done event")
+	}
+
+	item := outputItemDone["item"].(map[string]any)
+	if item["type"] != "message" {
+		t.Errorf("item.type = %q, want %q", item["type"], "message")
+	}
+
+	content, ok := item["content"].([]map[string]any)
+	if !ok {
+		t.Fatalf("item.content type = %T, want []map[string]any", item["content"])
+	}
+	if len(content) != 1 {
+		t.Fatalf("len(content) = %d, want 1", len(content))
+	}
+	if content[0]["type"] != "output_text" {
+		t.Errorf("content[0].type = %q, want %q", content[0]["type"], "output_text")
+	}
+	if content[0]["text"] != "Hello World" {
+		t.Errorf("content[0].text = %q, want %q", content[0]["text"], "Hello World")
+	}
+}
+
+func TestResponsesStreamConverter_ResponseCompletedIncludesOutput(t *testing.T) {
+	// Verify that response.completed includes the output array
+	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b")
+
+	// Process some content
+	converter.Process(api.ChatResponse{
+		Message: api.Message{Content: "Test response"},
+	})
+
+	// Final chunk
+	events := converter.Process(api.ChatResponse{
+		Message: api.Message{},
+		Done:    true,
+	})
+
+	// Find the response.completed event
+	var responseCompleted map[string]any
+	for _, event := range events {
+		if event.Event == "response.completed" {
+			responseCompleted = event.Data.(map[string]any)
+			break
+		}
+	}
+
+	if responseCompleted == nil {
+		t.Fatal("expected response.completed event")
+	}
+
+	response := responseCompleted["response"].(map[string]any)
+	output, ok := response["output"].([]any)
+	if !ok {
+		t.Fatalf("response.output type = %T, want []any", response["output"])
+	}
+
+	if len(output) != 1 {
+		t.Fatalf("len(output) = %d, want 1", len(output))
+	}
+
+	item := output[0].(map[string]any)
+	if item["type"] != "message" {
+		t.Errorf("output[0].type = %q, want %q", item["type"], "message")
+	}
+}
+
+func TestResponsesStreamConverter_ResponseCreatedIncludesOutput(t *testing.T) {
+	// Verify that response.created includes an empty output array
+	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b")
+
+	events := converter.Process(api.ChatResponse{
+		Message: api.Message{Content: "Hi"},
+	})
+
+	// First event should be response.created
+	if events[0].Event != "response.created" {
+		t.Fatalf("events[0].Event = %q, want %q", events[0].Event, "response.created")
+	}
+
+	data := events[0].Data.(map[string]any)
+	response := data["response"].(map[string]any)
+
+	output, ok := response["output"].([]any)
+	if !ok {
+		t.Fatalf("response.output type = %T, want []any", response["output"])
+	}
+
+	// Should be empty array initially
+	if len(output) != 0 {
+		t.Errorf("len(output) = %d, want 0", len(output))
+	}
+}
+
+func TestResponsesStreamConverter_SequenceNumbers(t *testing.T) {
+	// Verify that events include incrementing sequence numbers
+	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b")
+
+	events := converter.Process(api.ChatResponse{
+		Message: api.Message{Content: "Hello"},
+	})
+
+	for i, event := range events {
+		data := event.Data.(map[string]any)
+		seqNum, ok := data["sequence_number"].(int)
+		if !ok {
+			t.Fatalf("events[%d] missing sequence_number", i)
+		}
+		if seqNum != i {
+			t.Errorf("events[%d].sequence_number = %d, want %d", i, seqNum, i)
+		}
+	}
+
+	// Process more content, sequence should continue
+	moreEvents := converter.Process(api.ChatResponse{
+		Message: api.Message{Content: " World"},
+	})
+
+	expectedSeq := len(events)
+	for i, event := range moreEvents {
+		data := event.Data.(map[string]any)
+		seqNum := data["sequence_number"].(int)
+		if seqNum != expectedSeq+i {
+			t.Errorf("moreEvents[%d].sequence_number = %d, want %d", i, seqNum, expectedSeq+i)
+		}
+	}
+}
+
+func TestResponsesStreamConverter_FunctionCallStatus(t *testing.T) {
+	// Verify that function call items include status field
+	converter := NewResponsesStreamConverter("resp_123", "msg_456", "gpt-oss:20b")
+
+	events := converter.Process(api.ChatResponse{
+		Message: api.Message{
+			ToolCalls: []api.ToolCall{
+				{
+					ID: "call_abc",
+					Function: api.ToolCallFunction{
+						Name:      "get_weather",
+						Arguments: api.ToolCallFunctionArguments{"city": "Paris"},
+					},
+				},
+			},
+		},
+	})
+
+	// Find output_item.added event
+	var addedItem map[string]any
+	var doneItem map[string]any
+	for _, event := range events {
+		data := event.Data.(map[string]any)
+		if data["type"] == "response.output_item.added" {
+			item := data["item"].(map[string]any)
+			if item["type"] == "function_call" {
+				addedItem = item
+			}
+		}
+		if data["type"] == "response.output_item.done" {
+			item := data["item"].(map[string]any)
+			if item["type"] == "function_call" {
+				doneItem = item
+			}
+		}
+	}
+
+	if addedItem == nil {
+		t.Fatal("expected function_call output_item.added event")
+	}
+	if addedItem["status"] != "in_progress" {
+		t.Errorf("output_item.added status = %q, want %q", addedItem["status"], "in_progress")
+	}
+
+	if doneItem == nil {
+		t.Fatal("expected function_call output_item.done event")
+	}
+	if doneItem["status"] != "completed" {
+		t.Errorf("output_item.done status = %q, want %q", doneItem["status"], "completed")
+	}
+}
diff --git a/server/routes.go b/server/routes.go
index bbf6b9b90..54f23d5d5 100644
--- a/server/routes.go
+++ b/server/routes.go
@@ -1532,6 +1532,7 @@ func (s *Server) GenerateRoutes(rc *ollama.Registry) (http.Handler, error) {
 	r.POST("/v1/embeddings", middleware.EmbeddingsMiddleware(), s.EmbedHandler)
 	r.GET("/v1/models", middleware.ListMiddleware(), s.ListHandler)
 	r.GET("/v1/models/:model", middleware.RetrieveMiddleware(), s.ShowHandler)
+	r.POST("/v1/responses", middleware.ResponsesMiddleware(), s.ChatHandler)
 
 	if rc != nil {
 		// wrap old with new
@@ -2393,3 +2394,4 @@ func filterThinkTags(msgs []api.Message, m *Model) []api.Message {
 	}
 	return msgs
 }
+