Unverified Commit b9c31fa7 authored by Wesley Liddick's avatar Wesley Liddick Committed by GitHub
Browse files

Merge pull request #999 from InCerryGit/fix/enc_coot

fix: handle invalid encrypted content error and retry logic.
parents 17b33997 8f0ea7a0
......@@ -6129,6 +6129,29 @@ func extractUpstreamErrorMessage(body []byte) string {
return gjson.GetBytes(body, "message").String()
}
func extractUpstreamErrorCode(body []byte) string {
if code := strings.TrimSpace(gjson.GetBytes(body, "error.code").String()); code != "" {
return code
}
inner := strings.TrimSpace(gjson.GetBytes(body, "error.message").String())
if !strings.HasPrefix(inner, "{") {
return ""
}
if code := strings.TrimSpace(gjson.Get(inner, "error.code").String()); code != "" {
return code
}
if lastBrace := strings.LastIndex(inner, "}"); lastBrace >= 0 {
if code := strings.TrimSpace(gjson.Get(inner[:lastBrace+1], "error.code").String()); code != "" {
return code
}
}
return ""
}
func isCountTokensUnsupported404(statusCode int, body []byte) bool {
if statusCode != http.StatusNotFound {
return false
......
......@@ -480,6 +480,7 @@ func classifyOpenAIWSReconnectReason(err error) (string, bool) {
"upgrade_required",
"ws_unsupported",
"auth_failed",
"invalid_encrypted_content",
"previous_response_not_found":
return reason, false
}
......@@ -530,6 +531,14 @@ func resolveOpenAIWSFallbackErrorResponse(err error) (statusCode int, errType st
}
switch reason {
case "invalid_encrypted_content":
if statusCode == 0 {
statusCode = http.StatusBadRequest
}
errType = "invalid_request_error"
if upstreamMessage == "" {
upstreamMessage = "encrypted content could not be verified"
}
case "previous_response_not_found":
if statusCode == 0 {
statusCode = http.StatusBadRequest
......@@ -1924,6 +1933,7 @@ func (s *OpenAIGatewayService) Forward(ctx context.Context, c *gin.Context, acco
var wsErr error
wsLastFailureReason := ""
wsPrevResponseRecoveryTried := false
wsInvalidEncryptedContentRecoveryTried := false
recoverPrevResponseNotFound := func(attempt int) bool {
if wsPrevResponseRecoveryTried {
return false
......@@ -1956,6 +1966,37 @@ func (s *OpenAIGatewayService) Forward(ctx context.Context, c *gin.Context, acco
)
return true
}
recoverInvalidEncryptedContent := func(attempt int) bool {
if wsInvalidEncryptedContentRecoveryTried {
return false
}
removedReasoningItems := trimOpenAIEncryptedReasoningItems(wsReqBody)
if !removedReasoningItems {
logOpenAIWSModeInfo(
"reconnect_invalid_encrypted_content_recovery_skip account_id=%d attempt=%d reason=missing_encrypted_reasoning_items",
account.ID,
attempt,
)
return false
}
previousResponseID := openAIWSPayloadString(wsReqBody, "previous_response_id")
hasFunctionCallOutput := HasFunctionCallOutput(wsReqBody)
if previousResponseID != "" && !hasFunctionCallOutput {
delete(wsReqBody, "previous_response_id")
}
wsInvalidEncryptedContentRecoveryTried = true
logOpenAIWSModeInfo(
"reconnect_invalid_encrypted_content_recovery account_id=%d attempt=%d action=drop_encrypted_reasoning_items retry=1 previous_response_id_present=%v previous_response_id=%s previous_response_id_kind=%s has_function_call_output=%v dropped_previous_response_id=%v",
account.ID,
attempt,
previousResponseID != "",
truncateOpenAIWSLogValue(previousResponseID, openAIWSIDValueMaxLen),
normalizeOpenAIWSLogValue(ClassifyOpenAIPreviousResponseIDKind(previousResponseID)),
hasFunctionCallOutput,
previousResponseID != "" && !hasFunctionCallOutput,
)
return true
}
retryBudget := s.openAIWSRetryTotalBudget()
retryStartedAt := time.Now()
wsRetryLoop:
......@@ -1992,6 +2033,9 @@ func (s *OpenAIGatewayService) Forward(ctx context.Context, c *gin.Context, acco
if reason == "previous_response_not_found" && recoverPrevResponseNotFound(attempt) {
continue
}
if reason == "invalid_encrypted_content" && recoverInvalidEncryptedContent(attempt) {
continue
}
if retryable && attempt < maxAttempts {
backoff := s.openAIWSRetryBackoff(attempt)
if retryBudget > 0 && time.Since(retryStartedAt)+backoff > retryBudget {
......@@ -2075,126 +2119,143 @@ func (s *OpenAIGatewayService) Forward(ctx context.Context, c *gin.Context, acco
return nil, wsErr
}
// Build upstream request
upstreamCtx, releaseUpstreamCtx := detachStreamUpstreamContext(ctx, reqStream)
upstreamReq, err := s.buildUpstreamRequest(upstreamCtx, c, account, body, token, reqStream, promptCacheKey, isCodexCLI)
releaseUpstreamCtx()
if err != nil {
return nil, err
}
// Get proxy URL
proxyURL := ""
if account.ProxyID != nil && account.Proxy != nil {
proxyURL = account.Proxy.URL()
}
httpInvalidEncryptedContentRetryTried := false
for {
// Build upstream request
upstreamCtx, releaseUpstreamCtx := detachStreamUpstreamContext(ctx, reqStream)
upstreamReq, err := s.buildUpstreamRequest(upstreamCtx, c, account, body, token, reqStream, promptCacheKey, isCodexCLI)
releaseUpstreamCtx()
if err != nil {
return nil, err
}
// Send request
upstreamStart := time.Now()
resp, err := s.httpUpstream.Do(upstreamReq, proxyURL, account.ID, account.Concurrency)
SetOpsLatencyMs(c, OpsUpstreamLatencyMsKey, time.Since(upstreamStart).Milliseconds())
if err != nil {
// Ensure the client receives an error response (handlers assume Forward writes on non-failover errors).
safeErr := sanitizeUpstreamErrorMessage(err.Error())
setOpsUpstreamError(c, 0, safeErr, "")
appendOpsUpstreamError(c, OpsUpstreamErrorEvent{
Platform: account.Platform,
AccountID: account.ID,
AccountName: account.Name,
UpstreamStatusCode: 0,
Kind: "request_error",
Message: safeErr,
})
c.JSON(http.StatusBadGateway, gin.H{
"error": gin.H{
"type": "upstream_error",
"message": "Upstream request failed",
},
})
return nil, fmt.Errorf("upstream request failed: %s", safeErr)
}
defer func() { _ = resp.Body.Close() }()
// Get proxy URL
proxyURL := ""
if account.ProxyID != nil && account.Proxy != nil {
proxyURL = account.Proxy.URL()
}
// Handle error response
if resp.StatusCode >= 400 {
respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 2<<20))
_ = resp.Body.Close()
resp.Body = io.NopCloser(bytes.NewReader(respBody))
upstreamMsg := strings.TrimSpace(extractUpstreamErrorMessage(respBody))
upstreamMsg = sanitizeUpstreamErrorMessage(upstreamMsg)
if s.shouldFailoverOpenAIUpstreamResponse(resp.StatusCode, upstreamMsg, respBody) {
upstreamDetail := ""
if s.cfg != nil && s.cfg.Gateway.LogUpstreamErrorBody {
maxBytes := s.cfg.Gateway.LogUpstreamErrorBodyMaxBytes
if maxBytes <= 0 {
maxBytes = 2048
}
upstreamDetail = truncateString(string(respBody), maxBytes)
}
// Send request
upstreamStart := time.Now()
resp, err := s.httpUpstream.Do(upstreamReq, proxyURL, account.ID, account.Concurrency)
SetOpsLatencyMs(c, OpsUpstreamLatencyMsKey, time.Since(upstreamStart).Milliseconds())
if err != nil {
// Ensure the client receives an error response (handlers assume Forward writes on non-failover errors).
safeErr := sanitizeUpstreamErrorMessage(err.Error())
setOpsUpstreamError(c, 0, safeErr, "")
appendOpsUpstreamError(c, OpsUpstreamErrorEvent{
Platform: account.Platform,
AccountID: account.ID,
AccountName: account.Name,
UpstreamStatusCode: resp.StatusCode,
UpstreamRequestID: resp.Header.Get("x-request-id"),
Kind: "failover",
Message: upstreamMsg,
Detail: upstreamDetail,
UpstreamStatusCode: 0,
Kind: "request_error",
Message: safeErr,
})
c.JSON(http.StatusBadGateway, gin.H{
"error": gin.H{
"type": "upstream_error",
"message": "Upstream request failed",
},
})
return nil, fmt.Errorf("upstream request failed: %s", safeErr)
}
// Handle error response
if resp.StatusCode >= 400 {
respBody, _ := io.ReadAll(io.LimitReader(resp.Body, 2<<20))
_ = resp.Body.Close()
resp.Body = io.NopCloser(bytes.NewReader(respBody))
upstreamMsg := strings.TrimSpace(extractUpstreamErrorMessage(respBody))
upstreamMsg = sanitizeUpstreamErrorMessage(upstreamMsg)
upstreamCode := extractUpstreamErrorCode(respBody)
if !httpInvalidEncryptedContentRetryTried && resp.StatusCode == http.StatusBadRequest && upstreamCode == "invalid_encrypted_content" {
if trimOpenAIEncryptedReasoningItems(reqBody) {
body, err = json.Marshal(reqBody)
if err != nil {
return nil, fmt.Errorf("serialize invalid_encrypted_content retry body: %w", err)
}
setOpsUpstreamRequestBody(c, body)
httpInvalidEncryptedContentRetryTried = true
logger.LegacyPrintf("service.openai_gateway", "[OpenAI] Retrying non-WSv2 request once after invalid_encrypted_content (account: %s)", account.Name)
continue
}
logger.LegacyPrintf("service.openai_gateway", "[OpenAI] Skip non-WSv2 invalid_encrypted_content retry because encrypted reasoning items are missing (account: %s)", account.Name)
}
if s.shouldFailoverOpenAIUpstreamResponse(resp.StatusCode, upstreamMsg, respBody) {
upstreamDetail := ""
if s.cfg != nil && s.cfg.Gateway.LogUpstreamErrorBody {
maxBytes := s.cfg.Gateway.LogUpstreamErrorBodyMaxBytes
if maxBytes <= 0 {
maxBytes = 2048
}
upstreamDetail = truncateString(string(respBody), maxBytes)
}
appendOpsUpstreamError(c, OpsUpstreamErrorEvent{
Platform: account.Platform,
AccountID: account.ID,
AccountName: account.Name,
UpstreamStatusCode: resp.StatusCode,
UpstreamRequestID: resp.Header.Get("x-request-id"),
Kind: "failover",
Message: upstreamMsg,
Detail: upstreamDetail,
})
s.handleFailoverSideEffects(ctx, resp, account)
return nil, &UpstreamFailoverError{
StatusCode: resp.StatusCode,
ResponseBody: respBody,
RetryableOnSameAccount: account.IsPoolMode() && (isPoolModeRetryableStatus(resp.StatusCode) || isOpenAITransientProcessingError(resp.StatusCode, upstreamMsg, respBody)),
s.handleFailoverSideEffects(ctx, resp, account)
return nil, &UpstreamFailoverError{
StatusCode: resp.StatusCode,
ResponseBody: respBody,
RetryableOnSameAccount: account.IsPoolMode() && (isPoolModeRetryableStatus(resp.StatusCode) || isOpenAITransientProcessingError(resp.StatusCode, upstreamMsg, respBody)),
}
}
return s.handleErrorResponse(ctx, resp, c, account, body)
}
return s.handleErrorResponse(ctx, resp, c, account, body)
}
defer func() { _ = resp.Body.Close() }()
// Handle normal response
var usage *OpenAIUsage
var firstTokenMs *int
if reqStream {
streamResult, err := s.handleStreamingResponse(ctx, resp, c, account, startTime, originalModel, mappedModel)
if err != nil {
return nil, err
}
usage = streamResult.usage
firstTokenMs = streamResult.firstTokenMs
} else {
usage, err = s.handleNonStreamingResponse(ctx, resp, c, account, originalModel, mappedModel)
if err != nil {
return nil, err
// Handle normal response
var usage *OpenAIUsage
var firstTokenMs *int
if reqStream {
streamResult, err := s.handleStreamingResponse(ctx, resp, c, account, startTime, originalModel, mappedModel)
if err != nil {
return nil, err
}
usage = streamResult.usage
firstTokenMs = streamResult.firstTokenMs
} else {
usage, err = s.handleNonStreamingResponse(ctx, resp, c, account, originalModel, mappedModel)
if err != nil {
return nil, err
}
}
}
// Extract and save Codex usage snapshot from response headers (for OAuth accounts)
if account.Type == AccountTypeOAuth {
if snapshot := ParseCodexRateLimitHeaders(resp.Header); snapshot != nil {
s.updateCodexUsageSnapshot(ctx, account.ID, snapshot)
// Extract and save Codex usage snapshot from response headers (for OAuth accounts)
if account.Type == AccountTypeOAuth {
if snapshot := ParseCodexRateLimitHeaders(resp.Header); snapshot != nil {
s.updateCodexUsageSnapshot(ctx, account.ID, snapshot)
}
}
}
if usage == nil {
usage = &OpenAIUsage{}
}
if usage == nil {
usage = &OpenAIUsage{}
}
reasoningEffort := extractOpenAIReasoningEffort(reqBody, originalModel)
serviceTier := extractOpenAIServiceTier(reqBody)
reasoningEffort := extractOpenAIReasoningEffort(reqBody, originalModel)
serviceTier := extractOpenAIServiceTier(reqBody)
return &OpenAIForwardResult{
RequestID: resp.Header.Get("x-request-id"),
Usage: *usage,
Model: originalModel,
ServiceTier: serviceTier,
ReasoningEffort: reasoningEffort,
Stream: reqStream,
OpenAIWSMode: false,
Duration: time.Since(startTime),
FirstTokenMs: firstTokenMs,
}, nil
return &OpenAIForwardResult{
RequestID: resp.Header.Get("x-request-id"),
Usage: *usage,
Model: originalModel,
ServiceTier: serviceTier,
ReasoningEffort: reasoningEffort,
Stream: reqStream,
OpenAIWSMode: false,
Duration: time.Since(startTime),
FirstTokenMs: firstTokenMs,
}, nil
}
}
func (s *OpenAIGatewayService) forwardOpenAIPassthrough(
......@@ -3756,6 +3817,109 @@ func buildOpenAIResponsesURL(base string) string {
return normalized + "/v1/responses"
}
func trimOpenAIEncryptedReasoningItems(reqBody map[string]any) bool {
if len(reqBody) == 0 {
return false
}
inputValue, has := reqBody["input"]
if !has {
return false
}
switch input := inputValue.(type) {
case []any:
filtered := input[:0]
changed := false
for _, item := range input {
nextItem, itemChanged, keep := sanitizeEncryptedReasoningInputItem(item)
if itemChanged {
changed = true
}
if !keep {
continue
}
filtered = append(filtered, nextItem)
}
if !changed {
return false
}
if len(filtered) == 0 {
delete(reqBody, "input")
return true
}
reqBody["input"] = filtered
return true
case []map[string]any:
filtered := input[:0]
changed := false
for _, item := range input {
nextItem, itemChanged, keep := sanitizeEncryptedReasoningInputItem(item)
if itemChanged {
changed = true
}
if !keep {
continue
}
nextMap, ok := nextItem.(map[string]any)
if !ok {
filtered = append(filtered, item)
continue
}
filtered = append(filtered, nextMap)
}
if !changed {
return false
}
if len(filtered) == 0 {
delete(reqBody, "input")
return true
}
reqBody["input"] = filtered
return true
case map[string]any:
nextItem, changed, keep := sanitizeEncryptedReasoningInputItem(input)
if !changed {
return false
}
if !keep {
delete(reqBody, "input")
return true
}
nextMap, ok := nextItem.(map[string]any)
if !ok {
return false
}
reqBody["input"] = nextMap
return true
default:
return false
}
}
func sanitizeEncryptedReasoningInputItem(item any) (next any, changed bool, keep bool) {
inputItem, ok := item.(map[string]any)
if !ok {
return item, false, true
}
itemType, _ := inputItem["type"].(string)
if strings.TrimSpace(itemType) != "reasoning" {
return item, false, true
}
_, hasEncryptedContent := inputItem["encrypted_content"]
if !hasEncryptedContent {
return item, false, true
}
delete(inputItem, "encrypted_content")
if len(inputItem) == 1 {
return nil, true, false
}
return inputItem, true, true
}
func IsOpenAIResponsesCompactPathForTest(c *gin.Context) bool {
return isOpenAIResponsesCompactPath(c)
}
......
......@@ -3922,6 +3922,8 @@ func classifyOpenAIWSErrorEventFromRaw(codeRaw, errTypeRaw, msgRaw string) (stri
return "ws_unsupported", true
case "websocket_connection_limit_reached":
return "ws_connection_limit_reached", true
case "invalid_encrypted_content":
return "invalid_encrypted_content", true
case "previous_response_not_found":
return "previous_response_not_found", true
}
......@@ -3940,6 +3942,10 @@ func classifyOpenAIWSErrorEventFromRaw(codeRaw, errTypeRaw, msgRaw string) (stri
if strings.Contains(msg, "connection limit") && strings.Contains(msg, "websocket") {
return "ws_connection_limit_reached", true
}
if strings.Contains(msg, "invalid_encrypted_content") ||
(strings.Contains(msg, "encrypted content") && strings.Contains(msg, "could not be verified")) {
return "invalid_encrypted_content", true
}
if strings.Contains(msg, "previous_response_not_found") ||
(strings.Contains(msg, "previous response") && strings.Contains(msg, "not found")) {
return "previous_response_not_found", true
......@@ -3964,6 +3970,7 @@ func openAIWSErrorHTTPStatusFromRaw(codeRaw, errTypeRaw string) int {
case strings.Contains(errType, "invalid_request"),
strings.Contains(code, "invalid_request"),
strings.Contains(code, "bad_request"),
code == "invalid_encrypted_content",
code == "previous_response_not_found":
return http.StatusBadRequest
case strings.Contains(errType, "authentication"),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment