Merge pull request #548 from Edric-Li/main

feat: 错误处理增强、重试优化与性能改进

Merge pull request #548 from Edric-Li/main
feat: 错误处理增强、重试优化与性能改进
ae6fed15 · Wesley Liddick · GitHub · 84ced1c4 · 378e476e · ae6fed15
Unverified Commit ae6fed15 authored Feb 10, 2026 by Wesley Liddick Committed by GitHub Feb 10, 2026
--- a/backend/internal/service/error_passthrough_runtime.go
+++ b/backend/internal/service/error_passthrough_runtime.go
@@ -61,6 +61,11 @@ func applyErrorPassthroughRule(
 		errMsg = *rule.CustomMessage
 	}

+	// 命中 skip_monitoring 时在 context 中标记，供 ops_error_logger 跳过记录。
+	if rule.SkipMonitoring {
+		c.Set(OpsSkipPassthroughKey, true)
+	}
+
 	// 与现有 failover 场景保持一致：命中规则时统一返回 upstream_error。
 	errType = "upstream_error"
 	return status, errType, errMsg, true

--- a/backend/internal/service/error_passthrough_runtime_test.go
+++ b/backend/internal/service/error_passthrough_runtime_test.go
@@ -194,6 +194,63 @@ func TestGeminiWriteGeminiMappedError_AppliesRuleFor422(t *testing.T) {
 	assert.Equal(t, "Gemini上游失败", errField["message"])
 }

+func TestApplyErrorPassthroughRule_SkipMonitoringSetsContextKey(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	rec := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(rec)
+
+	rule := newNonFailoverPassthroughRule(http.StatusBadRequest, "prompt is too long", http.StatusBadRequest, "上下文超限")
+	rule.SkipMonitoring = true
+
+	ruleSvc := &ErrorPassthroughService{}
+	ruleSvc.setLocalCache([]*model.ErrorPassthroughRule{rule})
+	BindErrorPassthroughService(c, ruleSvc)
+
+	_, _, _, matched := applyErrorPassthroughRule(
+		c,
+		PlatformAnthropic,
+		http.StatusBadRequest,
+		[]byte(`{"error":{"message":"prompt is too long"}}`),
+		http.StatusBadGateway,
+		"upstream_error",
+		"Upstream request failed",
+	)
+
+	assert.True(t, matched)
+	v, exists := c.Get(OpsSkipPassthroughKey)
+	assert.True(t, exists, "OpsSkipPassthroughKey should be set when skip_monitoring=true")
+	boolVal, ok := v.(bool)
+	assert.True(t, ok, "value should be bool")
+	assert.True(t, boolVal)
+}
+
+func TestApplyErrorPassthroughRule_NoSkipMonitoringDoesNotSetContextKey(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	rec := httptest.NewRecorder()
+	c, _ := gin.CreateTestContext(rec)
+
+	rule := newNonFailoverPassthroughRule(http.StatusBadRequest, "prompt is too long", http.StatusBadRequest, "上下文超限")
+	rule.SkipMonitoring = false
+
+	ruleSvc := &ErrorPassthroughService{}
+	ruleSvc.setLocalCache([]*model.ErrorPassthroughRule{rule})
+	BindErrorPassthroughService(c, ruleSvc)
+
+	_, _, _, matched := applyErrorPassthroughRule(
+		c,
+		PlatformAnthropic,
+		http.StatusBadRequest,
+		[]byte(`{"error":{"message":"prompt is too long"}}`),
+		http.StatusBadGateway,
+		"upstream_error",
+		"Upstream request failed",
+	)
+
+	assert.True(t, matched)
+	_, exists := c.Get(OpsSkipPassthroughKey)
+	assert.False(t, exists, "OpsSkipPassthroughKey should NOT be set when skip_monitoring=false")
+}
+
 func newNonFailoverPassthroughRule(statusCode int, keyword string, respCode int, customMessage string) *model.ErrorPassthroughRule {
 	return &model.ErrorPassthroughRule{
 		ID:              1,

--- a/backend/internal/service/error_passthrough_service.go
+++ b/backend/internal/service/error_passthrough_service.go
@@ -45,10 +45,20 @@ type ErrorPassthroughService struct {
 	cache ErrorPassthroughCache

 	// 本地内存缓存，用于快速匹配
-	localCache   []*model.ErrorPassthroughRule
+	localCache   []*cachedPassthroughRule
 	localCacheMu sync.RWMutex
 }

+// cachedPassthroughRule 预计算的规则缓存，避免运行时重复 ToLower
+type cachedPassthroughRule struct {
+	*model.ErrorPassthroughRule
+	lowerKeywords  []string         // 预计算的小写关键词
+	lowerPlatforms []string         // 预计算的小写平台
+	errorCodeSet   map[int]struct{} // 预计算的 error code set
+}
+
+const maxBodyMatchLen = 8 << 10 // 8KB，错误信息不会在 8KB 之后才出现
+
 // NewErrorPassthroughService 创建错误透传规则服务
 func NewErrorPassthroughService(
 	repo ErrorPassthroughRepository,
@@ -150,17 +160,19 @@ func (s *ErrorPassthroughService) MatchRule(platform string, statusCode int, bod
 		return nil
 	}

-	bodyStr := strings.ToLower(string(body))
+	lowerPlatform := strings.ToLower(platform)
+	var bodyLower string // 延迟初始化，只在需要关键词匹配时计算
+	var bodyLowerDone bool

 	for _, rule := range rules {
 		if !rule.Enabled {
 			continue
 		}
-		if !s.platformMatches(rule, platform) {
+		if !s.platformMatchesCached(rule, lowerPlatform) {
 			continue
 		}
-		if s.ruleMatches(rule, statusCode, bodyStr) {
-			return rule
+		if s.ruleMatchesOptimized(rule, statusCode, body, &bodyLower, &bodyLowerDone) {
+			return rule.ErrorPassthroughRule
 		}
 	}

@@ -168,7 +180,7 @@ func (s *ErrorPassthroughService) MatchRule(platform string, statusCode int, bod
 }

 // getCachedRules 获取缓存的规则列表（按优先级排序）
-func (s *ErrorPassthroughService) getCachedRules() []*model.ErrorPassthroughRule {
+func (s *ErrorPassthroughService) getCachedRules() []*cachedPassthroughRule {
 	s.localCacheMu.RLock()
 	rules := s.localCache
 	s.localCacheMu.RUnlock()
@@ -223,17 +235,39 @@ func (s *ErrorPassthroughService) reloadRulesFromDB(ctx context.Context) error {
 	return nil
 }

-// setLocalCache 设置本地缓存
+// setLocalCache 设置本地缓存，预计算小写值和 set 以避免运行时重复计算
 func (s *ErrorPassthroughService) setLocalCache(rules []*model.ErrorPassthroughRule) {
+	cached := make([]*cachedPassthroughRule, len(rules))
+	for i, r := range rules {
+		cr := &cachedPassthroughRule{ErrorPassthroughRule: r}
+		if len(r.Keywords) > 0 {
+			cr.lowerKeywords = make([]string, len(r.Keywords))
+			for j, kw := range r.Keywords {
+				cr.lowerKeywords[j] = strings.ToLower(kw)
+			}
+		}
+		if len(r.Platforms) > 0 {
+			cr.lowerPlatforms = make([]string, len(r.Platforms))
+			for j, p := range r.Platforms {
+				cr.lowerPlatforms[j] = strings.ToLower(p)
+			}
+		}
+		if len(r.ErrorCodes) > 0 {
+			cr.errorCodeSet = make(map[int]struct{}, len(r.ErrorCodes))
+			for _, code := range r.ErrorCodes {
+				cr.errorCodeSet[code] = struct{}{}
+			}
+		}
+		cached[i] = cr
+	}
+
 	// 按优先级排序
-	sorted := make([]*model.ErrorPassthroughRule, len(rules))
-	copy(sorted, rules)
-	sort.Slice(sorted, func(i, j int) bool {
-		return sorted[i].Priority < sorted[j].Priority
+	sort.Slice(cached, func(i, j int) bool {
+		return cached[i].Priority < cached[j].Priority
 	})

 	s.localCacheMu.Lock()
-	s.localCache = sorted
+	s.localCache = cached
 	s.localCacheMu.Unlock()
 }

@@ -273,62 +307,79 @@ func (s *ErrorPassthroughService) invalidateAndNotify(ctx context.Context) {
 	}
 }

-// platformMatches 检查平台是否匹配
-func (s *ErrorPassthroughService) platformMatches(rule *model.ErrorPassthroughRule, platform string) bool {
-	// 如果没有配置平台限制，则匹配所有平台
-	if len(rule.Platforms) == 0 {
-		return true
+// ensureBodyLower 延迟初始化 body 的小写版本，只做一次转换，限制 8KB
+func ensureBodyLower(body []byte, bodyLower *string, done *bool) string {
+	if *done {
+		return *bodyLower
+	}
+	b := body
+	if len(b) > maxBodyMatchLen {
+		b = b[:maxBodyMatchLen]
 	}
+	*bodyLower = strings.ToLower(string(b))
+	*done = true
+	return *bodyLower
+}

-	platform = strings.ToLower(platform)
-	for _, p := range rule.Platforms {
-		if strings.ToLower(p) == platform {
+// platformMatchesCached 使用预计算的小写平台检查是否匹配
+func (s *ErrorPassthroughService) platformMatchesCached(rule *cachedPassthroughRule, lowerPlatform string) bool {
+	if len(rule.lowerPlatforms) == 0 {
+		return true
+	}
+	for _, p := range rule.lowerPlatforms {
+		if p == lowerPlatform {
 			return true
 		}
 	}
-
 	return false
 }

-// ruleMatches 检查规则是否匹配
-func (s *ErrorPassthroughService) ruleMatches(rule *model.ErrorPassthroughRule, statusCode int, bodyLower string) bool {
-	hasErrorCodes := len(rule.ErrorCodes) > 0
-	hasKeywords := len(rule.Keywords) > 0
+// ruleMatchesOptimized 优化的规则匹配，支持短路和延迟 body 转换
+func (s *ErrorPassthroughService) ruleMatchesOptimized(rule *cachedPassthroughRule, statusCode int, body []byte, bodyLower *string, bodyLowerDone *bool) bool {
+	hasErrorCodes := len(rule.errorCodeSet) > 0
+	hasKeywords := len(rule.lowerKeywords) > 0

-	// 如果没有配置任何条件，不匹配
 	if !hasErrorCodes && !hasKeywords {
 		return false
 	}

-	codeMatch := !hasErrorCodes || s.containsInt(rule.ErrorCodes, statusCode)
-	keywordMatch := !hasKeywords || s.containsAnyKeyword(bodyLower, rule.Keywords)
+	codeMatch := !hasErrorCodes || s.containsIntSet(rule.errorCodeSet, statusCode)

 	if rule.MatchMode == model.MatchModeAll {
-		// "all" 模式：所有配置的条件都必须满足
-		return codeMatch && keywordMatch
+		// "all" 模式：所有配置的条件都必须满足，短路
+		if hasErrorCodes && !codeMatch {
+			return false
+		}
+		if hasKeywords {
+			return s.containsAnyKeywordCached(ensureBodyLower(body, bodyLower, bodyLowerDone), rule.lowerKeywords)
+		}
+		return codeMatch
 	}

-	// "any" 模式：任一条件满足即可
+	// "any" 模式：任一条件满足即可，短路
 	if hasErrorCodes && hasKeywords {
-		return codeMatch || keywordMatch
-	}
-	return codeMatch && keywordMatch
-}
-
-// containsInt 检查切片是否包含指定整数
-func (s *ErrorPassthroughService) containsInt(slice []int, val int) bool {
-	for _, v := range slice {
-		if v == val {
+		if codeMatch {
 			return true
 		}
+		return s.containsAnyKeywordCached(ensureBodyLower(body, bodyLower, bodyLowerDone), rule.lowerKeywords)
 	}
-	return false
+	// 只配置了一种条件
+	if hasKeywords {
+		return s.containsAnyKeywordCached(ensureBodyLower(body, bodyLower, bodyLowerDone), rule.lowerKeywords)
+	}
+	return codeMatch
+}
+
+// containsIntSet 使用 map 查找替代线性扫描
+func (s *ErrorPassthroughService) containsIntSet(set map[int]struct{}, val int) bool {
+	_, ok := set[val]
+	return ok
 }

-// containsAnyKeyword 检查字符串是否包含任一关键词（不区分大小写）
-func (s *ErrorPassthroughService) containsAnyKeyword(bodyLower string, keywords []string) bool {
-	for _, kw := range keywords {
-		if strings.Contains(bodyLower, strings.ToLower(kw)) {
+// containsAnyKeywordCached 使用预计算的小写关键词检查匹配
+func (s *ErrorPassthroughService) containsAnyKeywordCached(bodyLower string, lowerKeywords []string) bool {
+	for _, kw := range lowerKeywords {
+		if strings.Contains(bodyLower, kw) {
 			return true
 		}
 	}

--- a/backend/internal/service/error_passthrough_service_test.go
+++ b/backend/internal/service/error_passthrough_service_test.go
@@ -145,32 +145,58 @@ func newTestService(rules []*model.ErrorPassthroughRule) *ErrorPassthroughServic
 	return svc
 }

+// newCachedRuleForTest 从 model.ErrorPassthroughRule 创建 cachedPassthroughRule（测试用）
+func newCachedRuleForTest(rule *model.ErrorPassthroughRule) *cachedPassthroughRule {
+	cr := &cachedPassthroughRule{ErrorPassthroughRule: rule}
+	if len(rule.Keywords) > 0 {
+		cr.lowerKeywords = make([]string, len(rule.Keywords))
+		for j, kw := range rule.Keywords {
+			cr.lowerKeywords[j] = strings.ToLower(kw)
+		}
+	}
+	if len(rule.Platforms) > 0 {
+		cr.lowerPlatforms = make([]string, len(rule.Platforms))
+		for j, p := range rule.Platforms {
+			cr.lowerPlatforms[j] = strings.ToLower(p)
+		}
+	}
+	if len(rule.ErrorCodes) > 0 {
+		cr.errorCodeSet = make(map[int]struct{}, len(rule.ErrorCodes))
+		for _, code := range rule.ErrorCodes {
+			cr.errorCodeSet[code] = struct{}{}
+		}
+	}
+	return cr
+}
+
 // =============================================================================
-// 测试 ruleMatches 核心匹配逻辑
+// 测试 ruleMatchesOptimized 核心匹配逻辑
 // =============================================================================

 func TestRuleMatches_NoConditions(t *testing.T) {
 	// 没有配置任何条件时，不应该匹配
 	svc := newTestService(nil)
-	rule := &model.ErrorPassthroughRule{
+	rule := newCachedRuleForTest(&model.ErrorPassthroughRule{
 		Enabled:    true,
 		ErrorCodes: []int{},
 		Keywords:   []string{},
 		MatchMode:  model.MatchModeAny,
-	}
+	})

-	assert.False(t, svc.ruleMatches(rule, 422, "some error message"),
+	var bodyLower string
+	var bodyLowerDone bool
+	assert.False(t, svc.ruleMatchesOptimized(rule, 422, []byte("some error message"), &bodyLower, &bodyLowerDone),
 		"没有配置条件时不应该匹配")
 }

 func TestRuleMatches_OnlyErrorCodes_AnyMode(t *testing.T) {
 	svc := newTestService(nil)
-	rule := &model.ErrorPassthroughRule{
+	rule := newCachedRuleForTest(&model.ErrorPassthroughRule{
 		Enabled:    true,
 		ErrorCodes: []int{422, 400},
 		Keywords:   []string{},
 		MatchMode:  model.MatchModeAny,
-	}
+	})

 	tests := []struct {
 		name       string
@@ -186,7 +212,9 @@ func TestRuleMatches_OnlyErrorCodes_AnyMode(t *testing.T) {

 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			result := svc.ruleMatches(rule, tt.statusCode, tt.body)
+			var bodyLower string
+			var bodyLowerDone bool
+			result := svc.ruleMatchesOptimized(rule, tt.statusCode, []byte(tt.body), &bodyLower, &bodyLowerDone)
 			assert.Equal(t, tt.expected, result)
 		})
 	}
@@ -194,12 +222,12 @@ func TestRuleMatches_OnlyErrorCodes_AnyMode(t *testing.T) {

 func TestRuleMatches_OnlyKeywords_AnyMode(t *testing.T) {
 	svc := newTestService(nil)
-	rule := &model.ErrorPassthroughRule{
+	rule := newCachedRuleForTest(&model.ErrorPassthroughRule{
 		Enabled:    true,
 		ErrorCodes: []int{},
 		Keywords:   []string{"context limit", "model not supported"},
 		MatchMode:  model.MatchModeAny,
-	}
+	})

 	tests := []struct {
 		name       string
@@ -210,16 +238,14 @@ func TestRuleMatches_OnlyKeywords_AnyMode(t *testing.T) {
 		{"关键词匹配 context limit", 500, "error: context limit reached", true},
 		{"关键词匹配 model not supported", 400, "the model not supported here", true},
 		{"关键词不匹配", 422, "some other error", false},
-		// 注意：ruleMatches 接收的 body 参数应该是已经转换为小写的
-		// 实际使用时，MatchRule 会先将 body 转换为小写再传给 ruleMatches
-		{"关键词大小写 - 输入已小写", 500, "context limit exceeded", true},
+		{"关键词大小写 - 自动转换", 500, "Context Limit exceeded", true},
 	}

 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			// 模拟 MatchRule 的行为：先转换为小写
-			bodyLower := strings.ToLower(tt.body)
-			result := svc.ruleMatches(rule, tt.statusCode, bodyLower)
+			var bodyLower string
+			var bodyLowerDone bool
+			result := svc.ruleMatchesOptimized(rule, tt.statusCode, []byte(tt.body), &bodyLower, &bodyLowerDone)
 			assert.Equal(t, tt.expected, result)
 		})
 	}
@@ -228,12 +254,12 @@ func TestRuleMatches_OnlyKeywords_AnyMode(t *testing.T) {
 func TestRuleMatches_BothConditions_AnyMode(t *testing.T) {
 	// any 模式：错误码 OR 关键词
 	svc := newTestService(nil)
-	rule := &model.ErrorPassthroughRule{
+	rule := newCachedRuleForTest(&model.ErrorPassthroughRule{
 		Enabled:    true,
 		ErrorCodes: []int{422, 400},
 		Keywords:   []string{"context limit"},
 		MatchMode:  model.MatchModeAny,
-	}
+	})

 	tests := []struct {
 		name       string
@@ -274,7 +300,9 @@ func TestRuleMatches_BothConditions_AnyMode(t *testing.T) {

 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			result := svc.ruleMatches(rule, tt.statusCode, tt.body)
+			var bodyLower string
+			var bodyLowerDone bool
+			result := svc.ruleMatchesOptimized(rule, tt.statusCode, []byte(tt.body), &bodyLower, &bodyLowerDone)
 			assert.Equal(t, tt.expected, result, tt.reason)
 		})
 	}
@@ -283,12 +311,12 @@ func TestRuleMatches_BothConditions_AnyMode(t *testing.T) {
 func TestRuleMatches_BothConditions_AllMode(t *testing.T) {
 	// all 模式：错误码 AND 关键词
 	svc := newTestService(nil)
-	rule := &model.ErrorPassthroughRule{
+	rule := newCachedRuleForTest(&model.ErrorPassthroughRule{
 		Enabled:    true,
 		ErrorCodes: []int{422, 400},
 		Keywords:   []string{"context limit"},
 		MatchMode:  model.MatchModeAll,
-	}
+	})

 	tests := []struct {
 		name       string
@@ -329,14 +357,16 @@ func TestRuleMatches_BothConditions_AllMode(t *testing.T) {

 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			result := svc.ruleMatches(rule, tt.statusCode, tt.body)
+			var bodyLower string
+			var bodyLowerDone bool
+			result := svc.ruleMatchesOptimized(rule, tt.statusCode, []byte(tt.body), &bodyLower, &bodyLowerDone)
 			assert.Equal(t, tt.expected, result, tt.reason)
 		})
 	}
 }

 // =============================================================================
-// 测试 platformMatches 平台匹配逻辑
+// 测试 platformMatchesCached 平台匹配逻辑
 // =============================================================================

 func TestPlatformMatches(t *testing.T) {
@@ -394,10 +424,10 @@ func TestPlatformMatches(t *testing.T) {

 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
-			rule := &model.ErrorPassthroughRule{
+			rule := newCachedRuleForTest(&model.ErrorPassthroughRule{
 				Platforms: tt.rulePlatforms,
-			}
-			result := svc.platformMatches(rule, tt.requestPlatform)
+			})
+			result := svc.platformMatchesCached(rule, strings.ToLower(tt.requestPlatform))
 			assert.Equal(t, tt.expected, result)
 		})
 	}

--- a/backend/internal/service/gateway_service.go
+++ b/backend/internal/service/gateway_service.go
@@ -368,15 +368,31 @@ type ForwardResult struct {

 // UpstreamFailoverError indicates an upstream error that should trigger account failover.
 type UpstreamFailoverError struct {
-	StatusCode        int
-	ResponseBody      []byte // 上游响应体，用于错误透传规则匹配
-	ForceCacheBilling bool   // Antigravity 粘性会话切换时设为 true
+	StatusCode             int
+	ResponseBody           []byte // 上游响应体，用于错误透传规则匹配
+	ForceCacheBilling      bool   // Antigravity 粘性会话切换时设为 true
+	RetryableOnSameAccount bool   // 临时性错误（如 Google 间歇性 400、空响应），应在同一账号上重试 N 次再切换
 }

 func (e *UpstreamFailoverError) Error() string {
 	return fmt.Sprintf("upstream error: %d (failover)", e.StatusCode)
 }

+// TempUnscheduleRetryableError 对 RetryableOnSameAccount 类型的 failover 错误触发临时封禁。
+// 由 handler 层在同账号重试全部用尽、切换账号时调用。
+func (s *GatewayService) TempUnscheduleRetryableError(ctx context.Context, accountID int64, failoverErr *UpstreamFailoverError) {
+	if failoverErr == nil || !failoverErr.RetryableOnSameAccount {
+		return
+	}
+	// 根据状态码选择封禁策略
+	switch failoverErr.StatusCode {
+	case http.StatusBadRequest:
+		tempUnscheduleGoogleConfigError(ctx, s.accountRepo, accountID, "[handler]")
+	case http.StatusBadGateway:
+		tempUnscheduleEmptyResponse(ctx, s.accountRepo, accountID, "[handler]")
+	}
+}
+
 // GatewayService handles API gateway operations
 type GatewayService struct {
 	accountRepo         AccountRepository

--- a/backend/internal/service/gemini_messages_compat_service.go
+++ b/backend/internal/service/gemini_messages_compat_service.go
@@ -880,6 +880,37 @@ func (s *GeminiMessagesCompatService) Forward(ctx context.Context, c *gin.Contex

 		// ErrorPolicyNone → 原有逻辑
 		s.handleGeminiUpstreamError(ctx, account, resp.StatusCode, resp.Header, respBody)
+		// 精确匹配服务端配置类 400 错误，触发 failover + 临时封禁
+		if resp.StatusCode == http.StatusBadRequest {
+			msg400 := strings.ToLower(strings.TrimSpace(extractUpstreamErrorMessage(respBody)))
+			if isGoogleProjectConfigError(msg400) {
+				upstreamReqID := resp.Header.Get(requestIDHeader)
+				if upstreamReqID == "" {
+					upstreamReqID = resp.Header.Get("x-goog-request-id")
+				}
+				upstreamMsg := sanitizeUpstreamErrorMessage(strings.TrimSpace(extractUpstreamErrorMessage(respBody)))
+				upstreamDetail := ""
+				if s.cfg != nil && s.cfg.Gateway.LogUpstreamErrorBody {
+					maxBytes := s.cfg.Gateway.LogUpstreamErrorBodyMaxBytes
+					if maxBytes <= 0 {
+						maxBytes = 2048
+					}
+					upstreamDetail = truncateString(string(respBody), maxBytes)
+				}
+				log.Printf("[Gemini] status=400 google_config_error failover=true upstream_message=%q account=%d", upstreamMsg, account.ID)
+				appendOpsUpstreamError(c, OpsUpstreamErrorEvent{
+					Platform:           account.Platform,
+					AccountID:          account.ID,
+					AccountName:        account.Name,
+					UpstreamStatusCode: resp.StatusCode,
+					UpstreamRequestID:  upstreamReqID,
+					Kind:               "failover",
+					Message:            upstreamMsg,
+					Detail:             upstreamDetail,
+				})
+				return nil, &UpstreamFailoverError{StatusCode: resp.StatusCode, ResponseBody: respBody, RetryableOnSameAccount: true}
+			}
+		}
 		if s.shouldFailoverGeminiUpstreamError(resp.StatusCode) {
 			upstreamReqID := resp.Header.Get(requestIDHeader)
 			if upstreamReqID == "" {
@@ -1330,6 +1361,34 @@ func (s *GeminiMessagesCompatService) ForwardNative(ctx context.Context, c *gin.

 		// ErrorPolicyNone → 原有逻辑
 		s.handleGeminiUpstreamError(ctx, account, resp.StatusCode, resp.Header, respBody)
+		// 精确匹配服务端配置类 400 错误，触发 failover + 临时封禁
+		if resp.StatusCode == http.StatusBadRequest {
+			msg400 := strings.ToLower(strings.TrimSpace(extractUpstreamErrorMessage(respBody)))
+			if isGoogleProjectConfigError(msg400) {
+				evBody := unwrapIfNeeded(isOAuth, respBody)
+				upstreamMsg := sanitizeUpstreamErrorMessage(strings.TrimSpace(extractUpstreamErrorMessage(evBody)))
+				upstreamDetail := ""
+				if s.cfg != nil && s.cfg.Gateway.LogUpstreamErrorBody {
+					maxBytes := s.cfg.Gateway.LogUpstreamErrorBodyMaxBytes
+					if maxBytes <= 0 {
+						maxBytes = 2048
+					}
+					upstreamDetail = truncateString(string(evBody), maxBytes)
+				}
+				log.Printf("[Gemini] status=400 google_config_error failover=true upstream_message=%q account=%d", upstreamMsg, account.ID)
+				appendOpsUpstreamError(c, OpsUpstreamErrorEvent{
+					Platform:           account.Platform,
+					AccountID:          account.ID,
+					AccountName:        account.Name,
+					UpstreamStatusCode: resp.StatusCode,
+					UpstreamRequestID:  requestID,
+					Kind:               "failover",
+					Message:            upstreamMsg,
+					Detail:             upstreamDetail,
+				})
+				return nil, &UpstreamFailoverError{StatusCode: resp.StatusCode, ResponseBody: evBody, RetryableOnSameAccount: true}
+			}
+		}
 		if s.shouldFailoverGeminiUpstreamError(resp.StatusCode) {
 			evBody := unwrapIfNeeded(isOAuth, respBody)
 			upstreamMsg := strings.TrimSpace(extractUpstreamErrorMessage(evBody))

--- a/backend/internal/service/ops_upstream_context.go
+++ b/backend/internal/service/ops_upstream_context.go
@@ -20,6 +20,10 @@ const (
 	// retry the specific upstream attempt (not just the client request).
 	// This value is sanitized+trimmed before being persisted.
 	OpsUpstreamRequestBodyKey = "ops_upstream_request_body"
+
+	// OpsSkipPassthroughKey 由 applyErrorPassthroughRule 在命中 skip_monitoring=true 的规则时设置。
+	// ops_error_logger 中间件检查此 key，为 true 时跳过错误记录。
+	OpsSkipPassthroughKey = "ops_skip_passthrough"
 )

 func setOpsUpstreamError(c *gin.Context, upstreamStatusCode int, upstreamMessage, upstreamDetail string) {
@@ -103,6 +107,37 @@ func appendOpsUpstreamError(c *gin.Context, ev OpsUpstreamErrorEvent) {
 	evCopy := ev
 	existing = append(existing, &evCopy)
 	c.Set(OpsUpstreamErrorsKey, existing)
+
+	checkSkipMonitoringForUpstreamEvent(c, &evCopy)
+}
+
+// checkSkipMonitoringForUpstreamEvent checks whether the upstream error event
+// matches a passthrough rule with skip_monitoring=true and, if so, sets the
+// OpsSkipPassthroughKey on the context.  This ensures intermediate retry /
+// failover errors (which never go through the final applyErrorPassthroughRule
+// path) can still suppress ops_error_logs recording.
+func checkSkipMonitoringForUpstreamEvent(c *gin.Context, ev *OpsUpstreamErrorEvent) {
+	if ev.UpstreamStatusCode == 0 {
+		return
+	}
+
+	svc := getBoundErrorPassthroughService(c)
+	if svc == nil {
+		return
+	}
+
+	// Use the best available body representation for keyword matching.
+	// Even when body is empty, MatchRule can still match rules that only
+	// specify ErrorCodes (no Keywords), so we always call it.
+	body := ev.Detail
+	if body == "" {
+		body = ev.Message
+	}
+
+	rule := svc.MatchRule(ev.Platform, ev.UpstreamStatusCode, []byte(body))
+	if rule != nil && rule.SkipMonitoring {
+		c.Set(OpsSkipPassthroughKey, true)
+	}
 }

 func marshalOpsUpstreamErrors(events []*OpsUpstreamErrorEvent) *string {

--- a/backend/migrations/053_add_skip_monitoring_to_error_passthrough.sql
+++ b/backend/migrations/053_add_skip_monitoring_to_error_passthrough.sql
+-- Add skip_monitoring field to error_passthrough_rules table
+-- When true, errors matching this rule will not be recorded in ops_error_logs
+ALTER TABLE error_passthrough_rules
+ADD COLUMN IF NOT EXISTS skip_monitoring BOOLEAN NOT NULL DEFAULT false;
--- a/frontend/src/api/admin/errorPassthrough.ts
+++ b/frontend/src/api/admin/errorPassthrough.ts
@@ -21,6 +21,7 @@ export interface ErrorPassthroughRule {
  response_code: number | null
  passthrough_body: boolean
  custom_message: string | null
+  skip_monitoring: boolean
  description: string | null
  created_at: string
  updated_at: string
@@ -41,6 +42,7 @@ export interface CreateRuleRequest {
  response_code?: number | null
  passthrough_body?: boolean
  custom_message?: string | null
+  skip_monitoring?: boolean
  description?: string | null
 }

@@ -59,6 +61,7 @@ export interface UpdateRuleRequest {
  response_code?: number | null
  passthrough_body?: boolean
  custom_message?: string | null
+  skip_monitoring?: boolean
  description?: string | null
 }


--- a/frontend/src/components/admin/ErrorPassthroughRulesModal.vue
+++ b/frontend/src/components/admin/ErrorPassthroughRulesModal.vue
@@ -148,6 +148,16 @@
                      {{ rule.passthrough_body ? t('admin.errorPassthrough.passthrough') : t('admin.errorPassthrough.custom') }}
                    </span>
                  </div>
+                  <div v-if="rule.skip_monitoring" class="flex items-center gap-1">
+                    <Icon
+                      name="checkCircle"
+                      size="xs"
+                      class="text-yellow-500"
+                    />
+                    <span class="text-gray-600 dark:text-gray-400">
+                      {{ t('admin.errorPassthrough.skipMonitoring') }}
+                    </span>
+                  </div>
                </div>
              </td>
              <td class="px-3 py-2">
@@ -366,6 +376,19 @@
          </div>
        </div>

+        <!-- Skip Monitoring -->
+        <div class="flex items-center gap-1.5">
+          <input
+            type="checkbox"
+            v-model="form.skip_monitoring"
+            class="h-3.5 w-3.5 rounded border-gray-300 text-yellow-600 focus:ring-yellow-500"
+          />
+          <span class="text-xs font-medium text-gray-700 dark:text-gray-300">
+            {{ t('admin.errorPassthrough.form.skipMonitoring') }}
+          </span>
+        </div>
+        <p class="input-hint text-xs -mt-3">{{ t('admin.errorPassthrough.form.skipMonitoringHint') }}</p>
+
        <!-- Enabled -->
        <div class="flex items-center gap-1.5">
          <input
@@ -453,6 +476,7 @@ const form = reactive({
  response_code: null as number | null,
  passthrough_body: true,
  custom_message: null as string | null,
+  skip_monitoring: false,
  description: null as string | null
 })

@@ -497,6 +521,7 @@ const resetForm = () => {
  form.response_code = null
  form.passthrough_body = true
  form.custom_message = null
+  form.skip_monitoring = false
  form.description = null
  errorCodesInput.value = ''
  keywordsInput.value = ''
@@ -520,6 +545,7 @@ const handleEdit = (rule: ErrorPassthroughRule) => {
  form.response_code = rule.response_code
  form.passthrough_body = rule.passthrough_body
  form.custom_message = rule.custom_message
+  form.skip_monitoring = rule.skip_monitoring
  form.description = rule.description
  errorCodesInput.value = rule.error_codes.join(', ')
  keywordsInput.value = rule.keywords.join('\n')
@@ -575,6 +601,7 @@ const handleSubmit = async () => {
      response_code: form.passthrough_code ? null : form.response_code,
      passthrough_body: form.passthrough_body,
      custom_message: form.passthrough_body ? null : form.custom_message,
+      skip_monitoring: form.skip_monitoring,
      description: form.description?.trim() || null
    }


--- a/frontend/src/i18n/locales/en.ts
+++ b/frontend/src/i18n/locales/en.ts
@@ -3353,6 +3353,7 @@ export default {
      custom: 'Custom',
      code: 'Code',
      body: 'Body',
+      skipMonitoring: 'Skip Monitoring',

      // Columns
      columns: {
@@ -3397,6 +3398,8 @@ export default {
        passthroughBody: 'Passthrough upstream error message',
        customMessage: 'Custom error message',
        customMessagePlaceholder: 'Error message to return to client...',
+        skipMonitoring: 'Skip monitoring',
+        skipMonitoringHint: 'When enabled, errors matching this rule will not be recorded in ops monitoring',
        enabled: 'Enable this rule'
      },


--- a/frontend/src/i18n/locales/zh.ts
+++ b/frontend/src/i18n/locales/zh.ts
@@ -3527,6 +3527,7 @@ export default {
      custom: '自定义',
      code: '状态码',
      body: '消息体',
+      skipMonitoring: '跳过监控',

      // Columns
      columns: {
@@ -3571,6 +3572,8 @@ export default {
        passthroughBody: '透传上游错误信息',
        customMessage: '自定义错误信息',
        customMessagePlaceholder: '返回给客户端的错误信息...',
+        skipMonitoring: '跳过运维监控记录',
+        skipMonitoringHint: '开启后，匹配此规则的错误不会被记录到运维监控中',
        enabled: '启用此规则'
      },