diff --git a/openai/responses.go b/openai/responses.go index 8f6b1d94c..1fbd2de09 100644 --- a/openai/responses.go +++ b/openai/responses.go @@ -365,22 +365,33 @@ func FromResponsesRequest(r ResponsesRequest) (*api.ChatRequest, error) { return nil, fmt.Errorf("failed to parse function call arguments: %w", err) } } - msg := api.Message{ - Role: "assistant", - ToolCalls: []api.ToolCall{{ - ID: v.CallID, - Function: api.ToolCallFunction{ - Name: v.Name, - Arguments: args, - }, - }}, + toolCall := api.ToolCall{ + ID: v.CallID, + Function: api.ToolCallFunction{ + Name: v.Name, + Arguments: args, + }, } - // Attach pending thinking - if pendingThinking != "" { - msg.Thinking = pendingThinking - pendingThinking = "" + + // Merge tool call into existing assistant message if it has content or tool calls + if len(messages) > 0 && messages[len(messages)-1].Role == "assistant" { + lastMsg := &messages[len(messages)-1] + lastMsg.ToolCalls = append(lastMsg.ToolCalls, toolCall) + if pendingThinking != "" { + lastMsg.Thinking = pendingThinking + pendingThinking = "" + } + } else { + msg := api.Message{ + Role: "assistant", + ToolCalls: []api.ToolCall{toolCall}, + } + if pendingThinking != "" { + msg.Thinking = pendingThinking + pendingThinking = "" + } + messages = append(messages, msg) } - messages = append(messages, msg) case ResponsesFunctionCallOutput: messages = append(messages, api.Message{ Role: "tool", diff --git a/openai/responses_test.go b/openai/responses_test.go index 50fbfdc57..86731e72b 100644 --- a/openai/responses_test.go +++ b/openai/responses_test.go @@ -456,6 +456,305 @@ func TestFromResponsesRequest_FunctionCallOutput(t *testing.T) { } } +func TestFromResponsesRequest_FunctionCallMerge(t *testing.T) { + t.Run("function call merges with preceding assistant message", func(t *testing.T) { + // When assistant message has content followed by function_call, + // they should be merged into a single message + reqJSON := `{ + "model": "gpt-oss:20b", + "input": [ + {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "what is the weather?"}]}, + {"type": "message", "role": "assistant", "content": [{"type": "output_text", "text": "I'll check the weather for you."}]}, + {"type": "function_call", "call_id": "call_abc123", "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"} + ] + }` + + var req ResponsesRequest + if err := json.Unmarshal([]byte(reqJSON), &req); err != nil { + t.Fatalf("failed to unmarshal request: %v", err) + } + + chatReq, err := FromResponsesRequest(req) + if err != nil { + t.Fatalf("failed to convert request: %v", err) + } + + // Should have 2 messages: user and assistant (with content + tool call merged) + if len(chatReq.Messages) != 2 { + t.Fatalf("expected 2 messages, got %d", len(chatReq.Messages)) + } + + // Check user message + if chatReq.Messages[0].Role != "user" { + t.Errorf("Messages[0].Role = %q, want %q", chatReq.Messages[0].Role, "user") + } + + // Check assistant message has both content and tool call + assistantMsg := chatReq.Messages[1] + if assistantMsg.Role != "assistant" { + t.Errorf("Messages[1].Role = %q, want %q", assistantMsg.Role, "assistant") + } + if assistantMsg.Content != "I'll check the weather for you." { + t.Errorf("Messages[1].Content = %q, want %q", assistantMsg.Content, "I'll check the weather for you.") + } + if len(assistantMsg.ToolCalls) != 1 { + t.Fatalf("expected 1 tool call, got %d", len(assistantMsg.ToolCalls)) + } + if assistantMsg.ToolCalls[0].Function.Name != "get_weather" { + t.Errorf("ToolCalls[0].Function.Name = %q, want %q", assistantMsg.ToolCalls[0].Function.Name, "get_weather") + } + }) + + t.Run("function call without preceding assistant creates new message", func(t *testing.T) { + // When there's no preceding assistant message, function_call creates its own message + reqJSON := `{ + "model": "gpt-oss:20b", + "input": [ + {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "what is the weather?"}]}, + {"type": "function_call", "call_id": "call_abc123", "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"} + ] + }` + + var req ResponsesRequest + if err := json.Unmarshal([]byte(reqJSON), &req); err != nil { + t.Fatalf("failed to unmarshal request: %v", err) + } + + chatReq, err := FromResponsesRequest(req) + if err != nil { + t.Fatalf("failed to convert request: %v", err) + } + + // Should have 2 messages: user and assistant (tool call only) + if len(chatReq.Messages) != 2 { + t.Fatalf("expected 2 messages, got %d", len(chatReq.Messages)) + } + + // Check assistant message has tool call but no content + assistantMsg := chatReq.Messages[1] + if assistantMsg.Role != "assistant" { + t.Errorf("Messages[1].Role = %q, want %q", assistantMsg.Role, "assistant") + } + if assistantMsg.Content != "" { + t.Errorf("Messages[1].Content = %q, want empty", assistantMsg.Content) + } + if len(assistantMsg.ToolCalls) != 1 { + t.Fatalf("expected 1 tool call, got %d", len(assistantMsg.ToolCalls)) + } + }) + + t.Run("multiple function calls merge into same assistant message", func(t *testing.T) { + // Multiple consecutive function_calls should all merge into the same assistant message + reqJSON := `{ + "model": "gpt-oss:20b", + "input": [ + {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "check weather and time"}]}, + {"type": "message", "role": "assistant", "content": [{"type": "output_text", "text": "I'll check both."}]}, + {"type": "function_call", "call_id": "call_1", "name": "get_weather", "arguments": "{\"city\":\"Paris\"}"}, + {"type": "function_call", "call_id": "call_2", "name": "get_time", "arguments": "{\"city\":\"Paris\"}"} + ] + }` + + var req ResponsesRequest + if err := json.Unmarshal([]byte(reqJSON), &req); err != nil { + t.Fatalf("failed to unmarshal request: %v", err) + } + + chatReq, err := FromResponsesRequest(req) + if err != nil { + t.Fatalf("failed to convert request: %v", err) + } + + // Should have 2 messages: user and assistant (content + both tool calls) + if len(chatReq.Messages) != 2 { + t.Fatalf("expected 2 messages, got %d", len(chatReq.Messages)) + } + + // Assistant has content + both tool calls + assistantMsg := chatReq.Messages[1] + if assistantMsg.Content != "I'll check both." { + t.Errorf("Messages[1].Content = %q, want %q", assistantMsg.Content, "I'll check both.") + } + if len(assistantMsg.ToolCalls) != 2 { + t.Fatalf("expected 2 tool calls, got %d", len(assistantMsg.ToolCalls)) + } + if assistantMsg.ToolCalls[0].Function.Name != "get_weather" { + t.Errorf("ToolCalls[0].Function.Name = %q, want %q", assistantMsg.ToolCalls[0].Function.Name, "get_weather") + } + if assistantMsg.ToolCalls[1].Function.Name != "get_time" { + t.Errorf("ToolCalls[1].Function.Name = %q, want %q", assistantMsg.ToolCalls[1].Function.Name, "get_time") + } + }) + + t.Run("new assistant message starts fresh tool call group", func(t *testing.T) { + // assistant → tool_call → tool_call → assistant → tool_call + // Should result in 2 assistant messages with their respective tool calls + reqJSON := `{ + "model": "gpt-oss:20b", + "input": [ + {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "do multiple things"}]}, + {"type": "message", "role": "assistant", "content": [{"type": "output_text", "text": "First batch."}]}, + {"type": "function_call", "call_id": "call_1", "name": "func_a", "arguments": "{}"}, + {"type": "function_call", "call_id": "call_2", "name": "func_b", "arguments": "{}"}, + {"type": "message", "role": "assistant", "content": [{"type": "output_text", "text": "Second batch."}]}, + {"type": "function_call", "call_id": "call_3", "name": "func_c", "arguments": "{}"} + ] + }` + + var req ResponsesRequest + if err := json.Unmarshal([]byte(reqJSON), &req); err != nil { + t.Fatalf("failed to unmarshal request: %v", err) + } + + chatReq, err := FromResponsesRequest(req) + if err != nil { + t.Fatalf("failed to convert request: %v", err) + } + + // Should have 3 messages: + // 1. user + // 2. assistant "First batch." + tool calls [func_a, func_b] + // 3. assistant "Second batch." + tool calls [func_c] + if len(chatReq.Messages) != 3 { + t.Fatalf("expected 3 messages, got %d", len(chatReq.Messages)) + } + + asst1 := chatReq.Messages[1] + if asst1.Content != "First batch." { + t.Errorf("Messages[1].Content = %q, want %q", asst1.Content, "First batch.") + } + if len(asst1.ToolCalls) != 2 { + t.Fatalf("expected 2 tool calls in Messages[1], got %d", len(asst1.ToolCalls)) + } + if asst1.ToolCalls[0].Function.Name != "func_a" { + t.Errorf("Messages[1].ToolCalls[0] = %q, want %q", asst1.ToolCalls[0].Function.Name, "func_a") + } + if asst1.ToolCalls[1].Function.Name != "func_b" { + t.Errorf("Messages[1].ToolCalls[1] = %q, want %q", asst1.ToolCalls[1].Function.Name, "func_b") + } + + asst2 := chatReq.Messages[2] + if asst2.Content != "Second batch." { + t.Errorf("Messages[2].Content = %q, want %q", asst2.Content, "Second batch.") + } + if len(asst2.ToolCalls) != 1 { + t.Fatalf("expected 1 tool call in Messages[2], got %d", len(asst2.ToolCalls)) + } + if asst2.ToolCalls[0].Function.Name != "func_c" { + t.Errorf("Messages[2].ToolCalls[0] = %q, want %q", asst2.ToolCalls[0].Function.Name, "func_c") + } + }) + + t.Run("function call merges with assistant that has thinking", func(t *testing.T) { + // reasoning → assistant (gets thinking) → function_call → should merge + reqJSON := `{ + "model": "gpt-oss:20b", + "input": [ + {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "think and act"}]}, + {"type": "reasoning", "id": "rs_1", "encrypted_content": "Let me think...", "summary": []}, + {"type": "message", "role": "assistant", "content": [{"type": "output_text", "text": "I thought about it."}]}, + {"type": "function_call", "call_id": "call_1", "name": "do_thing", "arguments": "{}"} + ] + }` + + var req ResponsesRequest + if err := json.Unmarshal([]byte(reqJSON), &req); err != nil { + t.Fatalf("failed to unmarshal request: %v", err) + } + + chatReq, err := FromResponsesRequest(req) + if err != nil { + t.Fatalf("failed to convert request: %v", err) + } + + // Should have 2 messages: user and assistant (thinking + content + tool call) + if len(chatReq.Messages) != 2 { + t.Fatalf("expected 2 messages, got %d", len(chatReq.Messages)) + } + + asst := chatReq.Messages[1] + if asst.Thinking != "Let me think..." { + t.Errorf("Messages[1].Thinking = %q, want %q", asst.Thinking, "Let me think...") + } + if asst.Content != "I thought about it." { + t.Errorf("Messages[1].Content = %q, want %q", asst.Content, "I thought about it.") + } + if len(asst.ToolCalls) != 1 { + t.Fatalf("expected 1 tool call, got %d", len(asst.ToolCalls)) + } + if asst.ToolCalls[0].Function.Name != "do_thing" { + t.Errorf("ToolCalls[0].Function.Name = %q, want %q", asst.ToolCalls[0].Function.Name, "do_thing") + } + }) + + t.Run("mixed thinking and content with multiple tool calls", func(t *testing.T) { + // Test: + // 1. reasoning → assistant (empty content, gets thinking) → tc (merges) + // 2. assistant with content → tc → tc (both merge) + // Result: 2 assistant messages + reqJSON := `{ + "model": "gpt-oss:20b", + "input": [ + {"type": "message", "role": "user", "content": [{"type": "input_text", "text": "complex task"}]}, + {"type": "reasoning", "id": "rs_1", "encrypted_content": "Thinking first...", "summary": []}, + {"type": "message", "role": "assistant", "content": ""}, + {"type": "function_call", "call_id": "call_1", "name": "think_action", "arguments": "{}"}, + {"type": "message", "role": "assistant", "content": [{"type": "output_text", "text": "Now doing more."}]}, + {"type": "function_call", "call_id": "call_2", "name": "action_a", "arguments": "{}"}, + {"type": "function_call", "call_id": "call_3", "name": "action_b", "arguments": "{}"} + ] + }` + + var req ResponsesRequest + if err := json.Unmarshal([]byte(reqJSON), &req); err != nil { + t.Fatalf("failed to unmarshal request: %v", err) + } + + chatReq, err := FromResponsesRequest(req) + if err != nil { + t.Fatalf("failed to convert request: %v", err) + } + + // Should have 3 messages: + // 1. user + // 2. assistant with thinking + tool call [think_action] + // 3. assistant with content "Now doing more." + tool calls [action_a, action_b] + if len(chatReq.Messages) != 3 { + t.Fatalf("expected 3 messages, got %d", len(chatReq.Messages)) + } + + // First assistant: thinking + tool call + asst1 := chatReq.Messages[1] + if asst1.Thinking != "Thinking first..." { + t.Errorf("Messages[1].Thinking = %q, want %q", asst1.Thinking, "Thinking first...") + } + if asst1.Content != "" { + t.Errorf("Messages[1].Content = %q, want empty", asst1.Content) + } + if len(asst1.ToolCalls) != 1 { + t.Fatalf("expected 1 tool call in Messages[1], got %d", len(asst1.ToolCalls)) + } + if asst1.ToolCalls[0].Function.Name != "think_action" { + t.Errorf("Messages[1].ToolCalls[0] = %q, want %q", asst1.ToolCalls[0].Function.Name, "think_action") + } + + // Second assistant: content + 2 tool calls + asst2 := chatReq.Messages[2] + if asst2.Content != "Now doing more." { + t.Errorf("Messages[2].Content = %q, want %q", asst2.Content, "Now doing more.") + } + if len(asst2.ToolCalls) != 2 { + t.Fatalf("expected 2 tool calls in Messages[2], got %d", len(asst2.ToolCalls)) + } + if asst2.ToolCalls[0].Function.Name != "action_a" { + t.Errorf("Messages[2].ToolCalls[0] = %q, want %q", asst2.ToolCalls[0].Function.Name, "action_a") + } + if asst2.ToolCalls[1].Function.Name != "action_b" { + t.Errorf("Messages[2].ToolCalls[1] = %q, want %q", asst2.ToolCalls[1].Function.Name, "action_b") + } + }) +} + func TestDecodeImageURL(t *testing.T) { // Valid PNG base64 (1x1 red pixel) validPNG := "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg=="