fix(openai): drop reasoning items from /v1/responses input on OAuth path

Closes #1957 The OAuth path forwards client requests to chatgpt.com/backend-api/codex/responses, where applyCodexOAuthTransform forces store=false (chatgpt.com's codex backend rejects store=true). Reasoning items emitted under store=false are NEVER persisted upstream, so any rs_* reference that a client carries forward in a subsequent input[] array triggers a guaranteed upstream 404: Item with id 'rs_...' not found. Items are not persisted when `store` is set to false. Try again with `store` set to true, or remove this item from your input. sub2api wraps this as 502 "Upstream request failed" and the conversation breaks on every multi-turn /v1/responses request that uses reasoning + tools (reproducible with gpt-5.5; gpt-5.4 happens to dodge it because the upstream does not emit reasoning items for that model). Affected clients include any that follow the OpenAI Responses API spec and replay prior assistant items verbatim — in practice this hit OpenClaw and similar agent harnesses on every turn ≥2 with tool use. The fix: in filterCodexInput, drop input items with type == "reasoning" entirely. The model never reads reasoning summary text from input (only encrypted_content can carry reasoning context across turns, and chatgpt.com under store=false does not emit it), so this is a no-op for the model itself and a clean removal of unreachable upstream lookups. Scope is intentionally narrow: * Only OAuth account requests (account.Type == AccountTypeOAuth) reach applyCodexOAuthTransform / filterCodexInput. * API-key accounts going to api.openai.com/v1/responses are unaffected (store=true works there, rs_* persists, multi-turn already works). * Anthropic / Gemini platform groups go through different transforms and are unaffected. * /v1/chat/completions is unaffected (no reasoning items). * item_reference items (different type) are unaffected — only type == "reasoning" is dropped. Verification: * Existing tests pass: go test ./internal/service/ -run Codex|Tool|OAuth * New regression test asserts reasoning items are dropped under both preserveReferences=true and preserveReferences=false. * End-to-end repro on gpt-5.5 multi-turn + tools: pre-patch 502, post-patch 200. Repro on gpt-5.4 unchanged. Three-turn deep loop on gpt-5.5 passes.

fix(openai): drop reasoning items from /v1/responses input on OAuth path
Closes #1957 The OAuth path forwards client requests to chatgpt.com/backend-api/codex/responses, where applyCodexOAuthTransform forces store=false (chatgpt.com's codex backend rejects store=true). Reasoning items emitted under store=false are NEVER persisted upstream, so any rs_* reference that a client carries forward in a subsequent input[] array triggers a guaranteed upstream 404: Item with id 'rs_...' not found. Items are not persisted when `store` is set to false. Try again with `store` set to true, or remove this item from your input. sub2api wraps this as 502 "Upstream request failed" and the conversation breaks on every multi-turn /v1/responses request that uses reasoning + tools (reproducible with gpt-5.5; gpt-5.4 happens to dodge it because the upstream does not emit reasoning items for that model). Affected clients include any that follow the OpenAI Responses API spec and replay prior assistant items verbatim — in practice this hit OpenClaw and similar agent harnesses on every turn ≥2 with tool use. The fix: in filterCodexInput, drop input items with type == "reasoning" entirely. The model never reads reasoning summary text from input (only encrypted_content can carry reasoning context across turns, and chatgpt.com under store=false does not emit it), so this is a no-op for the model itself and a clean removal of unreachable upstream lookups. Scope is intentionally narrow: * Only OAuth account requests (account.Type == AccountTypeOAuth) reach applyCodexOAuthTransform / filterCodexInput. * API-key accounts going to api.openai.com/v1/responses are unaffected (store=true works there, rs_* persists, multi-turn already works). * Anthropic / Gemini platform groups go through different transforms and are unaffected. * /v1/chat/completions is unaffected (no reasoning items). * item_reference items (different type) are unaffected — only type == "reasoning" is dropped. Verification: * Existing tests pass: go test ./internal/service/ -run Codex|Tool|OAuth * New regression test asserts reasoning items are dropped under both preserveReferences=true and preserveReferences=false. * End-to-end repro on gpt-5.5 multi-turn + tools: pre-patch 502, post-patch 200. Repro on gpt-5.4 unchanged. Three-turn deep loop on gpt-5.5 passes.
7452fad8 · Oganneson · b0a2252e · 7452fad8 · 7452fad8
Unverified Commit 7452fad8 authored Apr 28, 2026 by Oganneson
--- a/backend/internal/service/openai_codex_transform.go
+++ b/backend/internal/service/openai_codex_transform.go
@@ -853,6 +853,14 @@ func filterCodexInput(input []any, preserveReferences bool) []any {
 		}
 		typ, _ := m["type"].(string)

+		// chatgpt.com codex backend (OAuth path) does not persist reasoning
+		// items because applyCodexOAuthTransform forces store=false. Any rs_*
+		// reference replayed in input is guaranteed to 404 upstream
+		// ("Item with id 'rs_...' not found"). Drop reasoning items entirely.
+		if typ == "reasoning" {
+			continue
+		}
+
 		// 仅修正真正的 tool/function call 标识，避免误改普通 message/reasoning id；
 		// 若 item_reference 指向 legacy call_* 标识，则仅修正该引用本身。
 		fixCallIDPrefix := func(id string) string {

--- a/backend/internal/service/openai_codex_transform_test.go
+++ b/backend/internal/service/openai_codex_transform_test.go
 package service

 import (
+	"fmt"
+	"strings"
 	"testing"

 	"github.com/stretchr/testify/require"
@@ -1094,3 +1096,54 @@ func TestIsInstructionsEmpty(t *testing.T) {
 		})
 	}
 }
+
+func TestFilterCodexInput_DropsReasoningItemsRegardlessOfPreserveReferences(t *testing.T) {
+	// Reasoning items in input[] reference rs_* IDs that were emitted by
+	// chatgpt.com under store=false (forced by applyCodexOAuthTransform).
+	// They are never persisted upstream, so forwarding them produces a
+	// guaranteed 404 ("Item with id 'rs_...' not found"). Drop them
+	// regardless of preserveReferences. See: Wei-Shaw/sub2api issue #1957.
+
+	build := func() []any {
+		return []any{
+			map[string]any{"type": "message", "id": "msg_0", "role": "user", "content": "hi"},
+			map[string]any{
+				"type":    "reasoning",
+				"id":      "rs_0672f12450da0b9c0169f07220a6c08198b68c2455ced99344",
+				"summary": []any{},
+			},
+			map[string]any{"type": "function_call", "id": "fc_1", "call_id": "call_1", "name": "tool"},
+			map[string]any{"type": "function_call_output", "call_id": "call_1", "output": "{}"},
+		}
+	}
+
+	for _, preserve := range []bool{true, false} {
+		preserve := preserve
+		t.Run(fmt.Sprintf("preserveReferences=%v", preserve), func(t *testing.T) {
+			filtered := filterCodexInput(build(), preserve)
+
+			for _, raw := range filtered {
+				item, ok := raw.(map[string]any)
+				require.True(t, ok)
+				require.NotEqual(t, "reasoning", item["type"],
+					"reasoning items must be dropped from input on the OAuth path")
+				if id, ok := item["id"].(string); ok {
+					require.False(t, strings.HasPrefix(id, "rs_"),
+						"no item carrying an rs_* id should survive the filter")
+				}
+			}
+
+			// Sanity check: the non-reasoning items should still be present.
+			gotTypes := make(map[string]int)
+			for _, raw := range filtered {
+				item, ok := raw.(map[string]any)
+				require.True(t, ok)
+				gotTypes[item["type"].(string)]++
+			}
+			require.Equal(t, 1, gotTypes["message"])
+			require.Equal(t, 1, gotTypes["function_call"])
+			require.Equal(t, 1, gotTypes["function_call_output"])
+			require.Equal(t, 0, gotTypes["reasoning"])
+		})
+	}
+}