Unverified Commit 3583283e authored by Wesley Liddick's avatar Wesley Liddick Committed by GitHub
Browse files

Merge pull request #1197 from mutuyihao/fix/apicompat-array-content

fix(apicompat): support array content for system and tool messages
parents 186e3675 4feacf22
...@@ -181,6 +181,35 @@ func TestChatCompletionsToResponses_ImageURL(t *testing.T) { ...@@ -181,6 +181,35 @@ func TestChatCompletionsToResponses_ImageURL(t *testing.T) {
assert.Equal(t, "data:image/png;base64,abc123", parts[1].ImageURL) assert.Equal(t, "data:image/png;base64,abc123", parts[1].ImageURL)
} }
func TestChatCompletionsToResponses_SystemArrayContent(t *testing.T) {
req := &ChatCompletionsRequest{
Model: "gpt-4o",
Messages: []ChatMessage{
{Role: "system", Content: json.RawMessage(`[{"type":"text","text":"You are a careful visual assistant."}]`)},
{Role: "user", Content: json.RawMessage(`[{"type":"text","text":"Describe this image"},{"type":"image_url","image_url":{"url":"data:image/png;base64,abc123"}}]`)},
},
}
resp, err := ChatCompletionsToResponses(req)
require.NoError(t, err)
var items []ResponsesInputItem
require.NoError(t, json.Unmarshal(resp.Input, &items))
require.Len(t, items, 2)
var systemParts []ResponsesContentPart
require.NoError(t, json.Unmarshal(items[0].Content, &systemParts))
require.Len(t, systemParts, 1)
assert.Equal(t, "input_text", systemParts[0].Type)
assert.Equal(t, "You are a careful visual assistant.", systemParts[0].Text)
var userParts []ResponsesContentPart
require.NoError(t, json.Unmarshal(items[1].Content, &userParts))
require.Len(t, userParts, 2)
assert.Equal(t, "input_image", userParts[1].Type)
assert.Equal(t, "data:image/png;base64,abc123", userParts[1].ImageURL)
}
func TestChatCompletionsToResponses_LegacyFunctions(t *testing.T) { func TestChatCompletionsToResponses_LegacyFunctions(t *testing.T) {
req := &ChatCompletionsRequest{ req := &ChatCompletionsRequest{
Model: "gpt-4o", Model: "gpt-4o",
...@@ -398,6 +427,45 @@ func TestResponsesToChatCompletions_Reasoning(t *testing.T) { ...@@ -398,6 +427,45 @@ func TestResponsesToChatCompletions_Reasoning(t *testing.T) {
assert.Equal(t, "I thought about it.", chat.Choices[0].Message.ReasoningContent) assert.Equal(t, "I thought about it.", chat.Choices[0].Message.ReasoningContent)
} }
func TestChatCompletionsToResponses_ToolArrayContent(t *testing.T) {
req := &ChatCompletionsRequest{
Model: "gpt-4o",
Messages: []ChatMessage{
{Role: "user", Content: json.RawMessage(`"Use the tool"`)},
{
Role: "assistant",
ToolCalls: []ChatToolCall{
{
ID: "call_1",
Type: "function",
Function: ChatFunctionCall{
Name: "inspect_image",
Arguments: `{}`,
},
},
},
},
{
Role: "tool",
ToolCallID: "call_1",
Content: json.RawMessage(
`[{"type":"text","text":"image width: 100"},{"type":"image_url","image_url":{"url":"data:image/png;base64,ignored"}},{"type":"text","text":"; image height: 200"}]`,
),
},
},
}
resp, err := ChatCompletionsToResponses(req)
require.NoError(t, err)
var items []ResponsesInputItem
require.NoError(t, json.Unmarshal(resp.Input, &items))
require.Len(t, items, 3)
assert.Equal(t, "function_call_output", items[2].Type)
assert.Equal(t, "call_1", items[2].CallID)
assert.Equal(t, "image width: 100; image height: 200", items[2].Output)
}
func TestResponsesToChatCompletions_Incomplete(t *testing.T) { func TestResponsesToChatCompletions_Incomplete(t *testing.T) {
resp := &ResponsesResponse{ resp := &ResponsesResponse{
ID: "resp_inc", ID: "resp_inc",
......
...@@ -6,6 +6,11 @@ import ( ...@@ -6,6 +6,11 @@ import (
"strings" "strings"
) )
type chatMessageContent struct {
Text *string
Parts []ChatContentPart
}
// ChatCompletionsToResponses converts a Chat Completions request into a // ChatCompletionsToResponses converts a Chat Completions request into a
// Responses API request. The upstream always streams, so Stream is forced to // Responses API request. The upstream always streams, so Stream is forced to
// true. store is always false and reasoning.encrypted_content is always // true. store is always false and reasoning.encrypted_content is always
...@@ -113,11 +118,11 @@ func chatMessageToResponsesItems(m ChatMessage) ([]ResponsesInputItem, error) { ...@@ -113,11 +118,11 @@ func chatMessageToResponsesItems(m ChatMessage) ([]ResponsesInputItem, error) {
// chatSystemToResponses converts a system message. // chatSystemToResponses converts a system message.
func chatSystemToResponses(m ChatMessage) ([]ResponsesInputItem, error) { func chatSystemToResponses(m ChatMessage) ([]ResponsesInputItem, error) {
text, err := parseChatContent(m.Content) parsed, err := parseChatMessageContent(m.Content)
if err != nil { if err != nil {
return nil, err return nil, err
} }
content, err := json.Marshal(text) content, err := marshalChatInputContent(parsed)
if err != nil { if err != nil {
return nil, err return nil, err
} }
...@@ -127,39 +132,11 @@ func chatSystemToResponses(m ChatMessage) ([]ResponsesInputItem, error) { ...@@ -127,39 +132,11 @@ func chatSystemToResponses(m ChatMessage) ([]ResponsesInputItem, error) {
// chatUserToResponses converts a user message, handling both plain strings and // chatUserToResponses converts a user message, handling both plain strings and
// multi-modal content arrays. // multi-modal content arrays.
func chatUserToResponses(m ChatMessage) ([]ResponsesInputItem, error) { func chatUserToResponses(m ChatMessage) ([]ResponsesInputItem, error) {
// Try plain string first. parsed, err := parseChatMessageContent(m.Content)
var s string if err != nil {
if err := json.Unmarshal(m.Content, &s); err == nil {
content, _ := json.Marshal(s)
return []ResponsesInputItem{{Role: "user", Content: content}}, nil
}
var parts []ChatContentPart
if err := json.Unmarshal(m.Content, &parts); err != nil {
return nil, fmt.Errorf("parse user content: %w", err) return nil, fmt.Errorf("parse user content: %w", err)
} }
content, err := marshalChatInputContent(parsed)
var responseParts []ResponsesContentPart
for _, p := range parts {
switch p.Type {
case "text":
if p.Text != "" {
responseParts = append(responseParts, ResponsesContentPart{
Type: "input_text",
Text: p.Text,
})
}
case "image_url":
if p.ImageURL != nil && p.ImageURL.URL != "" {
responseParts = append(responseParts, ResponsesContentPart{
Type: "input_image",
ImageURL: p.ImageURL.URL,
})
}
}
}
content, err := json.Marshal(responseParts)
if err != nil { if err != nil {
return nil, err return nil, err
} }
...@@ -312,16 +289,79 @@ func chatFunctionToResponses(m ChatMessage) ([]ResponsesInputItem, error) { ...@@ -312,16 +289,79 @@ func chatFunctionToResponses(m ChatMessage) ([]ResponsesInputItem, error) {
} }
// parseChatContent returns the string value of a ChatMessage Content field. // parseChatContent returns the string value of a ChatMessage Content field.
// Content must be a JSON string. Returns "" if content is null or empty. // Content can be a JSON string or an array of typed parts. Array content is
// flattened to text by concatenating text parts and ignoring non-text parts.
func parseChatContent(raw json.RawMessage) (string, error) { func parseChatContent(raw json.RawMessage) (string, error) {
parsed, err := parseChatMessageContent(raw)
if err != nil {
return "", err
}
if parsed.Text != nil {
return *parsed.Text, nil
}
return flattenChatContentParts(parsed.Parts), nil
}
func parseChatMessageContent(raw json.RawMessage) (chatMessageContent, error) {
if len(raw) == 0 { if len(raw) == 0 {
return "", nil return chatMessageContent{Text: stringPtr("")}, nil
} }
var s string var s string
if err := json.Unmarshal(raw, &s); err != nil { if err := json.Unmarshal(raw, &s); err == nil {
return "", fmt.Errorf("parse content as string: %w", err) return chatMessageContent{Text: &s}, nil
} }
return s, nil
var parts []ChatContentPart
if err := json.Unmarshal(raw, &parts); err == nil {
return chatMessageContent{Parts: parts}, nil
}
return chatMessageContent{}, fmt.Errorf("parse content as string or parts array")
}
func marshalChatInputContent(content chatMessageContent) (json.RawMessage, error) {
if content.Text != nil {
return json.Marshal(*content.Text)
}
return json.Marshal(convertChatContentPartsToResponses(content.Parts))
}
func convertChatContentPartsToResponses(parts []ChatContentPart) []ResponsesContentPart {
var responseParts []ResponsesContentPart
for _, p := range parts {
switch p.Type {
case "text":
if p.Text != "" {
responseParts = append(responseParts, ResponsesContentPart{
Type: "input_text",
Text: p.Text,
})
}
case "image_url":
if p.ImageURL != nil && p.ImageURL.URL != "" {
responseParts = append(responseParts, ResponsesContentPart{
Type: "input_image",
ImageURL: p.ImageURL.URL,
})
}
}
}
return responseParts
}
func flattenChatContentParts(parts []ChatContentPart) string {
var textParts []string
for _, p := range parts {
if p.Type == "text" && p.Text != "" {
textParts = append(textParts, p.Text)
}
}
return strings.Join(textParts, "")
}
func stringPtr(s string) *string {
return &s
} }
// convertChatToolsToResponses maps Chat Completions tool definitions and legacy // convertChatToolsToResponses maps Chat Completions tool definitions and legacy
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment