Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
陈曦
sub2api
Commits
00c151b4
Commit
00c151b4
authored
Mar 08, 2026
by
shaw
Browse files
feat: gpt->claude格式转换支持图片识别
parent
a2ae9f1f
Changes
3
Hide whitespace changes
Inline
Side-by-side
backend/internal/pkg/apicompat/anthropic_responses_test.go
View file @
00c151b4
...
...
@@ -733,3 +733,188 @@ func TestAnthropicToResponses_ToolChoiceSpecific(t *testing.T) {
require
.
True
(
t
,
ok
)
assert
.
Equal
(
t
,
"get_weather"
,
fn
[
"name"
])
}
// ---------------------------------------------------------------------------
// Image content block conversion tests
// ---------------------------------------------------------------------------
func
TestAnthropicToResponses_UserImageBlock
(
t
*
testing
.
T
)
{
req
:=
&
AnthropicRequest
{
Model
:
"gpt-5.2"
,
MaxTokens
:
1024
,
Messages
:
[]
AnthropicMessage
{
{
Role
:
"user"
,
Content
:
json
.
RawMessage
(
`[
{"type":"text","text":"What is in this image?"},
{"type":"image","source":{"type":"base64","media_type":"image/png","data":"iVBOR"}}
]`
)},
},
}
resp
,
err
:=
AnthropicToResponses
(
req
)
require
.
NoError
(
t
,
err
)
var
items
[]
ResponsesInputItem
require
.
NoError
(
t
,
json
.
Unmarshal
(
resp
.
Input
,
&
items
))
require
.
Len
(
t
,
items
,
1
)
assert
.
Equal
(
t
,
"user"
,
items
[
0
]
.
Role
)
var
parts
[]
ResponsesContentPart
require
.
NoError
(
t
,
json
.
Unmarshal
(
items
[
0
]
.
Content
,
&
parts
))
require
.
Len
(
t
,
parts
,
2
)
assert
.
Equal
(
t
,
"input_text"
,
parts
[
0
]
.
Type
)
assert
.
Equal
(
t
,
"What is in this image?"
,
parts
[
0
]
.
Text
)
assert
.
Equal
(
t
,
"input_image"
,
parts
[
1
]
.
Type
)
assert
.
Equal
(
t
,
"data:image/png;base64,iVBOR"
,
parts
[
1
]
.
ImageURL
)
}
func
TestAnthropicToResponses_ImageOnlyUserMessage
(
t
*
testing
.
T
)
{
req
:=
&
AnthropicRequest
{
Model
:
"gpt-5.2"
,
MaxTokens
:
1024
,
Messages
:
[]
AnthropicMessage
{
{
Role
:
"user"
,
Content
:
json
.
RawMessage
(
`[
{"type":"image","source":{"type":"base64","media_type":"image/jpeg","data":"/9j/4AAQ"}}
]`
)},
},
}
resp
,
err
:=
AnthropicToResponses
(
req
)
require
.
NoError
(
t
,
err
)
var
items
[]
ResponsesInputItem
require
.
NoError
(
t
,
json
.
Unmarshal
(
resp
.
Input
,
&
items
))
require
.
Len
(
t
,
items
,
1
)
var
parts
[]
ResponsesContentPart
require
.
NoError
(
t
,
json
.
Unmarshal
(
items
[
0
]
.
Content
,
&
parts
))
require
.
Len
(
t
,
parts
,
1
)
assert
.
Equal
(
t
,
"input_image"
,
parts
[
0
]
.
Type
)
assert
.
Equal
(
t
,
"data:image/jpeg;base64,/9j/4AAQ"
,
parts
[
0
]
.
ImageURL
)
}
func
TestAnthropicToResponses_ToolResultWithImage
(
t
*
testing
.
T
)
{
req
:=
&
AnthropicRequest
{
Model
:
"gpt-5.2"
,
MaxTokens
:
1024
,
Messages
:
[]
AnthropicMessage
{
{
Role
:
"user"
,
Content
:
json
.
RawMessage
(
`"Read the screenshot"`
)},
{
Role
:
"assistant"
,
Content
:
json
.
RawMessage
(
`[{"type":"tool_use","id":"toolu_1","name":"Read","input":{"file_path":"/tmp/screen.png"}}]`
)},
{
Role
:
"user"
,
Content
:
json
.
RawMessage
(
`[
{"type":"tool_result","tool_use_id":"toolu_1","content":[
{"type":"image","source":{"type":"base64","media_type":"image/png","data":"iVBOR"}}
]}
]`
)},
},
}
resp
,
err
:=
AnthropicToResponses
(
req
)
require
.
NoError
(
t
,
err
)
var
items
[]
ResponsesInputItem
require
.
NoError
(
t
,
json
.
Unmarshal
(
resp
.
Input
,
&
items
))
// user + function_call + function_call_output + user(image) = 4
require
.
Len
(
t
,
items
,
4
)
// function_call_output should have text-only output (no image).
assert
.
Equal
(
t
,
"function_call_output"
,
items
[
2
]
.
Type
)
assert
.
Equal
(
t
,
"fc_toolu_1"
,
items
[
2
]
.
CallID
)
assert
.
Equal
(
t
,
"(empty)"
,
items
[
2
]
.
Output
)
// Image should be in a separate user message.
assert
.
Equal
(
t
,
"user"
,
items
[
3
]
.
Role
)
var
parts
[]
ResponsesContentPart
require
.
NoError
(
t
,
json
.
Unmarshal
(
items
[
3
]
.
Content
,
&
parts
))
require
.
Len
(
t
,
parts
,
1
)
assert
.
Equal
(
t
,
"input_image"
,
parts
[
0
]
.
Type
)
assert
.
Equal
(
t
,
"data:image/png;base64,iVBOR"
,
parts
[
0
]
.
ImageURL
)
}
func
TestAnthropicToResponses_ToolResultMixed
(
t
*
testing
.
T
)
{
req
:=
&
AnthropicRequest
{
Model
:
"gpt-5.2"
,
MaxTokens
:
1024
,
Messages
:
[]
AnthropicMessage
{
{
Role
:
"user"
,
Content
:
json
.
RawMessage
(
`"Describe the file"`
)},
{
Role
:
"assistant"
,
Content
:
json
.
RawMessage
(
`[{"type":"tool_use","id":"toolu_2","name":"Read","input":{"file_path":"/tmp/photo.png"}}]`
)},
{
Role
:
"user"
,
Content
:
json
.
RawMessage
(
`[
{"type":"tool_result","tool_use_id":"toolu_2","content":[
{"type":"text","text":"File metadata: 800x600 PNG"},
{"type":"image","source":{"type":"base64","media_type":"image/png","data":"AAAA"}}
]}
]`
)},
},
}
resp
,
err
:=
AnthropicToResponses
(
req
)
require
.
NoError
(
t
,
err
)
var
items
[]
ResponsesInputItem
require
.
NoError
(
t
,
json
.
Unmarshal
(
resp
.
Input
,
&
items
))
// user + function_call + function_call_output + user(image) = 4
require
.
Len
(
t
,
items
,
4
)
// function_call_output should have text-only output.
assert
.
Equal
(
t
,
"function_call_output"
,
items
[
2
]
.
Type
)
assert
.
Equal
(
t
,
"File metadata: 800x600 PNG"
,
items
[
2
]
.
Output
)
// Image should be in a separate user message.
assert
.
Equal
(
t
,
"user"
,
items
[
3
]
.
Role
)
var
parts
[]
ResponsesContentPart
require
.
NoError
(
t
,
json
.
Unmarshal
(
items
[
3
]
.
Content
,
&
parts
))
require
.
Len
(
t
,
parts
,
1
)
assert
.
Equal
(
t
,
"input_image"
,
parts
[
0
]
.
Type
)
assert
.
Equal
(
t
,
"data:image/png;base64,AAAA"
,
parts
[
0
]
.
ImageURL
)
}
func
TestAnthropicToResponses_TextOnlyToolResultBackwardCompat
(
t
*
testing
.
T
)
{
req
:=
&
AnthropicRequest
{
Model
:
"gpt-5.2"
,
MaxTokens
:
1024
,
Messages
:
[]
AnthropicMessage
{
{
Role
:
"user"
,
Content
:
json
.
RawMessage
(
`"Check weather"`
)},
{
Role
:
"assistant"
,
Content
:
json
.
RawMessage
(
`[{"type":"tool_use","id":"call_1","name":"get_weather","input":{"city":"NYC"}}]`
)},
{
Role
:
"user"
,
Content
:
json
.
RawMessage
(
`[
{"type":"tool_result","tool_use_id":"call_1","content":[
{"type":"text","text":"Sunny, 72°F"}
]}
]`
)},
},
}
resp
,
err
:=
AnthropicToResponses
(
req
)
require
.
NoError
(
t
,
err
)
var
items
[]
ResponsesInputItem
require
.
NoError
(
t
,
json
.
Unmarshal
(
resp
.
Input
,
&
items
))
// user + function_call + function_call_output = 3
require
.
Len
(
t
,
items
,
3
)
// Text-only tool_result should produce a plain string.
assert
.
Equal
(
t
,
"Sunny, 72°F"
,
items
[
2
]
.
Output
)
}
func
TestAnthropicToResponses_ImageEmptyMediaType
(
t
*
testing
.
T
)
{
req
:=
&
AnthropicRequest
{
Model
:
"gpt-5.2"
,
MaxTokens
:
1024
,
Messages
:
[]
AnthropicMessage
{
{
Role
:
"user"
,
Content
:
json
.
RawMessage
(
`[
{"type":"image","source":{"type":"base64","media_type":"","data":"iVBOR"}}
]`
)},
},
}
resp
,
err
:=
AnthropicToResponses
(
req
)
require
.
NoError
(
t
,
err
)
var
items
[]
ResponsesInputItem
require
.
NoError
(
t
,
json
.
Unmarshal
(
resp
.
Input
,
&
items
))
require
.
Len
(
t
,
items
,
1
)
var
parts
[]
ResponsesContentPart
require
.
NoError
(
t
,
json
.
Unmarshal
(
items
[
0
]
.
Content
,
&
parts
))
require
.
Len
(
t
,
parts
,
1
)
assert
.
Equal
(
t
,
"input_image"
,
parts
[
0
]
.
Type
)
// Should default to image/png when media_type is empty.
assert
.
Equal
(
t
,
"data:image/png;base64,iVBOR"
,
parts
[
0
]
.
ImageURL
)
}
backend/internal/pkg/apicompat/anthropic_to_responses.go
View file @
00c151b4
...
...
@@ -169,7 +169,7 @@ func anthropicMsgToResponsesItems(m AnthropicMessage) ([]ResponsesInputItem, err
// anthropicUserToResponses handles an Anthropic user message. Content can be a
// plain string or an array of blocks. tool_result blocks are extracted into
// function_call_output items.
// function_call_output items.
Image blocks are converted to input_image parts.
func
anthropicUserToResponses
(
raw
json
.
RawMessage
)
([]
ResponsesInputItem
,
error
)
{
// Try plain string.
var
s
string
...
...
@@ -184,28 +184,46 @@ func anthropicUserToResponses(raw json.RawMessage) ([]ResponsesInputItem, error)
}
var
out
[]
ResponsesInputItem
var
toolResultImageParts
[]
ResponsesContentPart
// Extract tool_result blocks → function_call_output items.
// Images inside tool_results are extracted separately because the
// Responses API function_call_output.output only accepts strings.
for
_
,
b
:=
range
blocks
{
if
b
.
Type
!=
"tool_result"
{
continue
}
text
:=
extractAnthropicToolResultText
(
b
)
if
text
==
""
{
// OpenAI Responses API requires "output" field; use placeholder for empty results.
text
=
"(empty)"
}
outputText
,
imageParts
:=
convertToolResultOutput
(
b
)
out
=
append
(
out
,
ResponsesInputItem
{
Type
:
"function_call_output"
,
CallID
:
toResponsesCallID
(
b
.
ToolUseID
),
Output
:
t
ext
,
Output
:
outputT
ext
,
})
toolResultImageParts
=
append
(
toolResultImageParts
,
imageParts
...
)
}
// Remaining text blocks → user message.
text
:=
extractAnthropicTextFromBlocks
(
blocks
)
if
text
!=
""
{
content
,
_
:=
json
.
Marshal
(
text
)
// Remaining text + image blocks → user message with content parts.
// Also include images extracted from tool_results so the model can see them.
var
parts
[]
ResponsesContentPart
for
_
,
b
:=
range
blocks
{
switch
b
.
Type
{
case
"text"
:
if
b
.
Text
!=
""
{
parts
=
append
(
parts
,
ResponsesContentPart
{
Type
:
"input_text"
,
Text
:
b
.
Text
})
}
case
"image"
:
if
uri
:=
anthropicImageToDataURI
(
b
.
Source
);
uri
!=
""
{
parts
=
append
(
parts
,
ResponsesContentPart
{
Type
:
"input_image"
,
ImageURL
:
uri
})
}
}
}
parts
=
append
(
parts
,
toolResultImageParts
...
)
if
len
(
parts
)
>
0
{
content
,
err
:=
json
.
Marshal
(
parts
)
if
err
!=
nil
{
return
nil
,
err
}
out
=
append
(
out
,
ResponsesInputItem
{
Role
:
"user"
,
Content
:
content
})
}
...
...
@@ -290,26 +308,64 @@ func fromResponsesCallID(id string) string {
return
id
}
// extractAnthropicToolResultText gets the text content from a tool_result block.
func
extractAnthropicToolResultText
(
b
AnthropicContentBlock
)
string
{
if
len
(
b
.
Content
)
==
0
{
// anthropicImageToDataURI converts an AnthropicImageSource to a data URI string.
// Returns "" if the source is nil or has no data.
func
anthropicImageToDataURI
(
src
*
AnthropicImageSource
)
string
{
if
src
==
nil
||
src
.
Data
==
""
{
return
""
}
mediaType
:=
src
.
MediaType
if
mediaType
==
""
{
mediaType
=
"image/png"
}
return
"data:"
+
mediaType
+
";base64,"
+
src
.
Data
}
// convertToolResultOutput extracts text and image content from a tool_result
// block. Returns the text as a string for the function_call_output Output
// field, plus any image parts that must be sent in a separate user message
// (the Responses API output field only accepts strings).
func
convertToolResultOutput
(
b
AnthropicContentBlock
)
(
string
,
[]
ResponsesContentPart
)
{
if
len
(
b
.
Content
)
==
0
{
return
"(empty)"
,
nil
}
// Try plain string content.
var
s
string
if
err
:=
json
.
Unmarshal
(
b
.
Content
,
&
s
);
err
==
nil
{
return
s
if
s
==
""
{
s
=
"(empty)"
}
return
s
,
nil
}
// Array of content blocks — may contain text and/or images.
var
inner
[]
AnthropicContentBlock
if
err
:=
json
.
Unmarshal
(
b
.
Content
,
&
inner
);
err
==
nil
{
var
parts
[]
string
for
_
,
ib
:=
range
inner
{
if
ib
.
Type
==
"text"
&&
ib
.
Text
!=
""
{
parts
=
append
(
parts
,
ib
.
Text
)
if
err
:=
json
.
Unmarshal
(
b
.
Content
,
&
inner
);
err
!=
nil
{
return
"(empty)"
,
nil
}
// Separate text (for function_call_output) from images (for user message).
var
textParts
[]
string
var
imageParts
[]
ResponsesContentPart
for
_
,
ib
:=
range
inner
{
switch
ib
.
Type
{
case
"text"
:
if
ib
.
Text
!=
""
{
textParts
=
append
(
textParts
,
ib
.
Text
)
}
case
"image"
:
if
uri
:=
anthropicImageToDataURI
(
ib
.
Source
);
uri
!=
""
{
imageParts
=
append
(
imageParts
,
ResponsesContentPart
{
Type
:
"input_image"
,
ImageURL
:
uri
})
}
}
return
strings
.
Join
(
parts
,
"
\n\n
"
)
}
return
""
text
:=
strings
.
Join
(
textParts
,
"
\n\n
"
)
if
text
==
""
{
text
=
"(empty)"
}
return
text
,
imageParts
}
// extractAnthropicTextFromBlocks joins all text blocks, ignoring thinking/
...
...
backend/internal/pkg/apicompat/types.go
View file @
00c151b4
...
...
@@ -47,6 +47,9 @@ type AnthropicContentBlock struct {
// type=thinking
Thinking
string
`json:"thinking,omitempty"`
// type=image
Source
*
AnthropicImageSource
`json:"source,omitempty"`
// type=tool_use
ID
string
`json:"id,omitempty"`
Name
string
`json:"name,omitempty"`
...
...
@@ -58,6 +61,13 @@ type AnthropicContentBlock struct {
IsError
bool
`json:"is_error,omitempty"`
}
// AnthropicImageSource describes the source data for an image content block.
type
AnthropicImageSource
struct
{
Type
string
`json:"type"`
// "base64"
MediaType
string
`json:"media_type"`
Data
string
`json:"data"`
}
// AnthropicTool describes a tool available to the model.
type
AnthropicTool
struct
{
Type
string
`json:"type,omitempty"`
// e.g. "web_search_20250305" for server tools
...
...
@@ -176,8 +186,9 @@ type ResponsesInputItem struct {
// ResponsesContentPart is a typed content part in a Responses message.
type
ResponsesContentPart
struct
{
Type
string
`json:"type"`
// "input_text" | "output_text" | "input_image"
Text
string
`json:"text,omitempty"`
Type
string
`json:"type"`
// "input_text" | "output_text" | "input_image"
Text
string
`json:"text,omitempty"`
ImageURL
string
`json:"image_url,omitempty"`
// data URI for input_image
}
// ResponsesTool describes a tool in the Responses API.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment