openai: add tool call appending to previous assistant message (#13434)

* openai: add tool call appending to previous asst message * add tests for thinking appending
2025-12-11 17:30:12 -08:00 · 2025-12-11 17:30:12 -08:00 · 9b2035d194
parent 93d45d7a04
commit 9b2035d194
2 changed files with 324 additions and 14 deletions
--- a/openai/responses.go
+++ b/openai/responses.go
@ -365,22 +365,33 @@ func FromResponsesRequest(r ResponsesRequest) (*api.ChatRequest, error) {
 					return nil, fmt.Errorf("failed to parse function call arguments: %w", err)
 				}
 			}
-			msg := api.Message{
-				Role: "assistant",
-				ToolCalls: []api.ToolCall{{
-					ID: v.CallID,
-					Function: api.ToolCallFunction{
-						Name:      v.Name,
-						Arguments: args,
-					},
-				}},
+			toolCall := api.ToolCall{
+				ID: v.CallID,
+				Function: api.ToolCallFunction{
+					Name:      v.Name,
+					Arguments: args,
+				},
 			}
-			// Attach pending thinking
-			if pendingThinking != "" {
-				msg.Thinking = pendingThinking
-				pendingThinking = ""
+
+			// Merge tool call into existing assistant message if it has content or tool calls
+			if len(messages) > 0 && messages[len(messages)-1].Role == "assistant" {
+				lastMsg := &messages[len(messages)-1]
+				lastMsg.ToolCalls = append(lastMsg.ToolCalls, toolCall)
+				if pendingThinking != "" {
+					lastMsg.Thinking = pendingThinking
+					pendingThinking = ""
+				}
+			} else {
+				msg := api.Message{
+					Role:      "assistant",
+					ToolCalls: []api.ToolCall{toolCall},
+				}
+				if pendingThinking != "" {
+					msg.Thinking = pendingThinking
+					pendingThinking = ""
+				}
+				messages = append(messages, msg)
 			}
-			messages = append(messages, msg)
 		case ResponsesFunctionCallOutput:
 			messages = append(messages, api.Message{
 				Role:       "tool",
--- a/openai/responses_test.go
+++ b/openai/responses_test.go
@ -456,6 +456,305 @@ func TestFromResponsesRequest_FunctionCallOutput(t *testing.T) {
 	}
 }

+func TestFromResponsesRequest_FunctionCallMerge(t *testing.T) {
+	t.Run("function call merges with preceding assistant message", func(t *testing.T) {
+		// When assistant message has content followed by function_call,
+		// they should be merged into a single message
+		reqJSON := `{
+			"model": "gpt-oss:20b",
+			"input": [
+				{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "what is the weather?"}]},
+				{"type": "message", "role": "assistant", "content": [{"type": "output_text", "text": "I'll check the weather for you."}]},
+				{"type": "function_call", "call_id": "call_abc123", "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}
+			]
+		}`
+
+		var req ResponsesRequest
+		if err := json.Unmarshal([]byte(reqJSON), &req); err != nil {
+			t.Fatalf("failed to unmarshal request: %v", err)
+		}
+
+		chatReq, err := FromResponsesRequest(req)
+		if err != nil {
+			t.Fatalf("failed to convert request: %v", err)
+		}
+
+		// Should have 2 messages: user and assistant (with content + tool call merged)
+		if len(chatReq.Messages) != 2 {
+			t.Fatalf("expected 2 messages, got %d", len(chatReq.Messages))
+		}
+
+		// Check user message
+		if chatReq.Messages[0].Role != "user" {
+			t.Errorf("Messages[0].Role = %q, want %q", chatReq.Messages[0].Role, "user")
+		}
+
+		// Check assistant message has both content and tool call
+		assistantMsg := chatReq.Messages[1]
+		if assistantMsg.Role != "assistant" {
+			t.Errorf("Messages[1].Role = %q, want %q", assistantMsg.Role, "assistant")
+		}
+		if assistantMsg.Content != "I'll check the weather for you." {
+			t.Errorf("Messages[1].Content = %q, want %q", assistantMsg.Content, "I'll check the weather for you.")
+		}
+		if len(assistantMsg.ToolCalls) != 1 {
+			t.Fatalf("expected 1 tool call, got %d", len(assistantMsg.ToolCalls))
+		}
+		if assistantMsg.ToolCalls[0].Function.Name != "get_weather" {
+			t.Errorf("ToolCalls[0].Function.Name = %q, want %q", assistantMsg.ToolCalls[0].Function.Name, "get_weather")
+		}
+	})
+
+	t.Run("function call without preceding assistant creates new message", func(t *testing.T) {
+		// When there's no preceding assistant message, function_call creates its own message
+		reqJSON := `{
+			"model": "gpt-oss:20b",
+			"input": [
+				{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "what is the weather?"}]},
+				{"type": "function_call", "call_id": "call_abc123", "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}
+			]
+		}`
+
+		var req ResponsesRequest
+		if err := json.Unmarshal([]byte(reqJSON), &req); err != nil {
+			t.Fatalf("failed to unmarshal request: %v", err)
+		}
+
+		chatReq, err := FromResponsesRequest(req)
+		if err != nil {
+			t.Fatalf("failed to convert request: %v", err)
+		}
+
+		// Should have 2 messages: user and assistant (tool call only)
+		if len(chatReq.Messages) != 2 {
+			t.Fatalf("expected 2 messages, got %d", len(chatReq.Messages))
+		}
+
+		// Check assistant message has tool call but no content
+		assistantMsg := chatReq.Messages[1]
+		if assistantMsg.Role != "assistant" {
+			t.Errorf("Messages[1].Role = %q, want %q", assistantMsg.Role, "assistant")
+		}
+		if assistantMsg.Content != "" {
+			t.Errorf("Messages[1].Content = %q, want empty", assistantMsg.Content)
+		}
+		if len(assistantMsg.ToolCalls) != 1 {
+			t.Fatalf("expected 1 tool call, got %d", len(assistantMsg.ToolCalls))
+		}
+	})
+
+	t.Run("multiple function calls merge into same assistant message", func(t *testing.T) {
+		// Multiple consecutive function_calls should all merge into the same assistant message
+		reqJSON := `{
+			"model": "gpt-oss:20b",
+			"input": [
+				{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "check weather and time"}]},
+				{"type": "message", "role": "assistant", "content": [{"type": "output_text", "text": "I'll check both."}]},
+				{"type": "function_call", "call_id": "call_1", "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"},
+				{"type": "function_call", "call_id": "call_2", "name": "get_time", "arguments": "{\"city\":\"Paris\"}"}
+			]
+		}`
+
+		var req ResponsesRequest
+		if err := json.Unmarshal([]byte(reqJSON), &req); err != nil {
+			t.Fatalf("failed to unmarshal request: %v", err)
+		}
+
+		chatReq, err := FromResponsesRequest(req)
+		if err != nil {
+			t.Fatalf("failed to convert request: %v", err)
+		}
+
+		// Should have 2 messages: user and assistant (content + both tool calls)
+		if len(chatReq.Messages) != 2 {
+			t.Fatalf("expected 2 messages, got %d", len(chatReq.Messages))
+		}
+
+		// Assistant has content + both tool calls
+		assistantMsg := chatReq.Messages[1]
+		if assistantMsg.Content != "I'll check both." {
+			t.Errorf("Messages[1].Content = %q, want %q", assistantMsg.Content, "I'll check both.")
+		}
+		if len(assistantMsg.ToolCalls) != 2 {
+			t.Fatalf("expected 2 tool calls, got %d", len(assistantMsg.ToolCalls))
+		}
+		if assistantMsg.ToolCalls[0].Function.Name != "get_weather" {
+			t.Errorf("ToolCalls[0].Function.Name = %q, want %q", assistantMsg.ToolCalls[0].Function.Name, "get_weather")
+		}
+		if assistantMsg.ToolCalls[1].Function.Name != "get_time" {
+			t.Errorf("ToolCalls[1].Function.Name = %q, want %q", assistantMsg.ToolCalls[1].Function.Name, "get_time")
+		}
+	})
+
+	t.Run("new assistant message starts fresh tool call group", func(t *testing.T) {
+		// assistant → tool_call → tool_call → assistant → tool_call
+		// Should result in 2 assistant messages with their respective tool calls
+		reqJSON := `{
+			"model": "gpt-oss:20b",
+			"input": [
+				{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "do multiple things"}]},
+				{"type": "message", "role": "assistant", "content": [{"type": "output_text", "text": "First batch."}]},
+				{"type": "function_call", "call_id": "call_1", "name": "func_a", "arguments": "{}"},
+				{"type": "function_call", "call_id": "call_2", "name": "func_b", "arguments": "{}"},
+				{"type": "message", "role": "assistant", "content": [{"type": "output_text", "text": "Second batch."}]},
+				{"type": "function_call", "call_id": "call_3", "name": "func_c", "arguments": "{}"}
+			]
+		}`
+
+		var req ResponsesRequest
+		if err := json.Unmarshal([]byte(reqJSON), &req); err != nil {
+			t.Fatalf("failed to unmarshal request: %v", err)
+		}
+
+		chatReq, err := FromResponsesRequest(req)
+		if err != nil {
+			t.Fatalf("failed to convert request: %v", err)
+		}
+
+		// Should have 3 messages:
+		// 1. user
+		// 2. assistant "First batch." + tool calls [func_a, func_b]
+		// 3. assistant "Second batch." + tool calls [func_c]
+		if len(chatReq.Messages) != 3 {
+			t.Fatalf("expected 3 messages, got %d", len(chatReq.Messages))
+		}
+
+		asst1 := chatReq.Messages[1]
+		if asst1.Content != "First batch." {
+			t.Errorf("Messages[1].Content = %q, want %q", asst1.Content, "First batch.")
+		}
+		if len(asst1.ToolCalls) != 2 {
+			t.Fatalf("expected 2 tool calls in Messages[1], got %d", len(asst1.ToolCalls))
+		}
+		if asst1.ToolCalls[0].Function.Name != "func_a" {
+			t.Errorf("Messages[1].ToolCalls[0] = %q, want %q", asst1.ToolCalls[0].Function.Name, "func_a")
+		}
+		if asst1.ToolCalls[1].Function.Name != "func_b" {
+			t.Errorf("Messages[1].ToolCalls[1] = %q, want %q", asst1.ToolCalls[1].Function.Name, "func_b")
+		}
+
+		asst2 := chatReq.Messages[2]
+		if asst2.Content != "Second batch." {
+			t.Errorf("Messages[2].Content = %q, want %q", asst2.Content, "Second batch.")
+		}
+		if len(asst2.ToolCalls) != 1 {
+			t.Fatalf("expected 1 tool call in Messages[2], got %d", len(asst2.ToolCalls))
+		}
+		if asst2.ToolCalls[0].Function.Name != "func_c" {
+			t.Errorf("Messages[2].ToolCalls[0] = %q, want %q", asst2.ToolCalls[0].Function.Name, "func_c")
+		}
+	})
+
+	t.Run("function call merges with assistant that has thinking", func(t *testing.T) {
+		// reasoning → assistant (gets thinking) → function_call → should merge
+		reqJSON := `{
+			"model": "gpt-oss:20b",
+			"input": [
+				{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "think and act"}]},
+				{"type": "reasoning", "id": "rs_1", "encrypted_content": "Let me think...", "summary": []},
+				{"type": "message", "role": "assistant", "content": [{"type": "output_text", "text": "I thought about it."}]},
+				{"type": "function_call", "call_id": "call_1", "name": "do_thing", "arguments": "{}"}
+			]
+		}`
+
+		var req ResponsesRequest
+		if err := json.Unmarshal([]byte(reqJSON), &req); err != nil {
+			t.Fatalf("failed to unmarshal request: %v", err)
+		}
+
+		chatReq, err := FromResponsesRequest(req)
+		if err != nil {
+			t.Fatalf("failed to convert request: %v", err)
+		}
+
+		// Should have 2 messages: user and assistant (thinking + content + tool call)
+		if len(chatReq.Messages) != 2 {
+			t.Fatalf("expected 2 messages, got %d", len(chatReq.Messages))
+		}
+
+		asst := chatReq.Messages[1]
+		if asst.Thinking != "Let me think..." {
+			t.Errorf("Messages[1].Thinking = %q, want %q", asst.Thinking, "Let me think...")
+		}
+		if asst.Content != "I thought about it." {
+			t.Errorf("Messages[1].Content = %q, want %q", asst.Content, "I thought about it.")
+		}
+		if len(asst.ToolCalls) != 1 {
+			t.Fatalf("expected 1 tool call, got %d", len(asst.ToolCalls))
+		}
+		if asst.ToolCalls[0].Function.Name != "do_thing" {
+			t.Errorf("ToolCalls[0].Function.Name = %q, want %q", asst.ToolCalls[0].Function.Name, "do_thing")
+		}
+	})
+
+	t.Run("mixed thinking and content with multiple tool calls", func(t *testing.T) {
+		// Test:
+		// 1. reasoning → assistant (empty content, gets thinking) → tc (merges)
+		// 2. assistant with content → tc → tc (both merge)
+		// Result: 2 assistant messages
+		reqJSON := `{
+			"model": "gpt-oss:20b",
+			"input": [
+				{"type": "message", "role": "user", "content": [{"type": "input_text", "text": "complex task"}]},
+				{"type": "reasoning", "id": "rs_1", "encrypted_content": "Thinking first...", "summary": []},
+				{"type": "message", "role": "assistant", "content": ""},
+				{"type": "function_call", "call_id": "call_1", "name": "think_action", "arguments": "{}"},
+				{"type": "message", "role": "assistant", "content": [{"type": "output_text", "text": "Now doing more."}]},
+				{"type": "function_call", "call_id": "call_2", "name": "action_a", "arguments": "{}"},
+				{"type": "function_call", "call_id": "call_3", "name": "action_b", "arguments": "{}"}
+			]
+		}`
+
+		var req ResponsesRequest
+		if err := json.Unmarshal([]byte(reqJSON), &req); err != nil {
+			t.Fatalf("failed to unmarshal request: %v", err)
+		}
+
+		chatReq, err := FromResponsesRequest(req)
+		if err != nil {
+			t.Fatalf("failed to convert request: %v", err)
+		}
+
+		// Should have 3 messages:
+		// 1. user
+		// 2. assistant with thinking + tool call [think_action]
+		// 3. assistant with content "Now doing more." + tool calls [action_a, action_b]
+		if len(chatReq.Messages) != 3 {
+			t.Fatalf("expected 3 messages, got %d", len(chatReq.Messages))
+		}
+
+		// First assistant: thinking + tool call
+		asst1 := chatReq.Messages[1]
+		if asst1.Thinking != "Thinking first..." {
+			t.Errorf("Messages[1].Thinking = %q, want %q", asst1.Thinking, "Thinking first...")
+		}
+		if asst1.Content != "" {
+			t.Errorf("Messages[1].Content = %q, want empty", asst1.Content)
+		}
+		if len(asst1.ToolCalls) != 1 {
+			t.Fatalf("expected 1 tool call in Messages[1], got %d", len(asst1.ToolCalls))
+		}
+		if asst1.ToolCalls[0].Function.Name != "think_action" {
+			t.Errorf("Messages[1].ToolCalls[0] = %q, want %q", asst1.ToolCalls[0].Function.Name, "think_action")
+		}
+
+		// Second assistant: content + 2 tool calls
+		asst2 := chatReq.Messages[2]
+		if asst2.Content != "Now doing more." {
+			t.Errorf("Messages[2].Content = %q, want %q", asst2.Content, "Now doing more.")
+		}
+		if len(asst2.ToolCalls) != 2 {
+			t.Fatalf("expected 2 tool calls in Messages[2], got %d", len(asst2.ToolCalls))
+		}
+		if asst2.ToolCalls[0].Function.Name != "action_a" {
+			t.Errorf("Messages[2].ToolCalls[0] = %q, want %q", asst2.ToolCalls[0].Function.Name, "action_a")
+		}
+		if asst2.ToolCalls[1].Function.Name != "action_b" {
+			t.Errorf("Messages[2].ToolCalls[1] = %q, want %q", asst2.ToolCalls[1].Function.Name, "action_b")
+		}
+	})
+}
+
 func TestDecodeImageURL(t *testing.T) {
 	// Valid PNG base64 (1x1 red pixel)
 	validPNG := "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg=="