"frontend/src/components/vscode:/vscode.git/clone" did not exist on "3ecadf4aad86c5befe5a72b0e033ab74ce8bcfcd"
Unverified Commit ae6fed15 authored by Wesley Liddick's avatar Wesley Liddick Committed by GitHub
Browse files

Merge pull request #548 from Edric-Li/main

feat: 错误处理增强、重试优化与性能改进
parents 84ced1c4 378e476e
...@@ -61,6 +61,11 @@ func applyErrorPassthroughRule( ...@@ -61,6 +61,11 @@ func applyErrorPassthroughRule(
errMsg = *rule.CustomMessage errMsg = *rule.CustomMessage
} }
// 命中 skip_monitoring 时在 context 中标记,供 ops_error_logger 跳过记录。
if rule.SkipMonitoring {
c.Set(OpsSkipPassthroughKey, true)
}
// 与现有 failover 场景保持一致:命中规则时统一返回 upstream_error。 // 与现有 failover 场景保持一致:命中规则时统一返回 upstream_error。
errType = "upstream_error" errType = "upstream_error"
return status, errType, errMsg, true return status, errType, errMsg, true
......
...@@ -194,6 +194,63 @@ func TestGeminiWriteGeminiMappedError_AppliesRuleFor422(t *testing.T) { ...@@ -194,6 +194,63 @@ func TestGeminiWriteGeminiMappedError_AppliesRuleFor422(t *testing.T) {
assert.Equal(t, "Gemini上游失败", errField["message"]) assert.Equal(t, "Gemini上游失败", errField["message"])
} }
func TestApplyErrorPassthroughRule_SkipMonitoringSetsContextKey(t *testing.T) {
gin.SetMode(gin.TestMode)
rec := httptest.NewRecorder()
c, _ := gin.CreateTestContext(rec)
rule := newNonFailoverPassthroughRule(http.StatusBadRequest, "prompt is too long", http.StatusBadRequest, "上下文超限")
rule.SkipMonitoring = true
ruleSvc := &ErrorPassthroughService{}
ruleSvc.setLocalCache([]*model.ErrorPassthroughRule{rule})
BindErrorPassthroughService(c, ruleSvc)
_, _, _, matched := applyErrorPassthroughRule(
c,
PlatformAnthropic,
http.StatusBadRequest,
[]byte(`{"error":{"message":"prompt is too long"}}`),
http.StatusBadGateway,
"upstream_error",
"Upstream request failed",
)
assert.True(t, matched)
v, exists := c.Get(OpsSkipPassthroughKey)
assert.True(t, exists, "OpsSkipPassthroughKey should be set when skip_monitoring=true")
boolVal, ok := v.(bool)
assert.True(t, ok, "value should be bool")
assert.True(t, boolVal)
}
func TestApplyErrorPassthroughRule_NoSkipMonitoringDoesNotSetContextKey(t *testing.T) {
gin.SetMode(gin.TestMode)
rec := httptest.NewRecorder()
c, _ := gin.CreateTestContext(rec)
rule := newNonFailoverPassthroughRule(http.StatusBadRequest, "prompt is too long", http.StatusBadRequest, "上下文超限")
rule.SkipMonitoring = false
ruleSvc := &ErrorPassthroughService{}
ruleSvc.setLocalCache([]*model.ErrorPassthroughRule{rule})
BindErrorPassthroughService(c, ruleSvc)
_, _, _, matched := applyErrorPassthroughRule(
c,
PlatformAnthropic,
http.StatusBadRequest,
[]byte(`{"error":{"message":"prompt is too long"}}`),
http.StatusBadGateway,
"upstream_error",
"Upstream request failed",
)
assert.True(t, matched)
_, exists := c.Get(OpsSkipPassthroughKey)
assert.False(t, exists, "OpsSkipPassthroughKey should NOT be set when skip_monitoring=false")
}
func newNonFailoverPassthroughRule(statusCode int, keyword string, respCode int, customMessage string) *model.ErrorPassthroughRule { func newNonFailoverPassthroughRule(statusCode int, keyword string, respCode int, customMessage string) *model.ErrorPassthroughRule {
return &model.ErrorPassthroughRule{ return &model.ErrorPassthroughRule{
ID: 1, ID: 1,
......
...@@ -45,10 +45,20 @@ type ErrorPassthroughService struct { ...@@ -45,10 +45,20 @@ type ErrorPassthroughService struct {
cache ErrorPassthroughCache cache ErrorPassthroughCache
// 本地内存缓存,用于快速匹配 // 本地内存缓存,用于快速匹配
localCache []*model.ErrorPassthroughRule localCache []*cachedPassthroughRule
localCacheMu sync.RWMutex localCacheMu sync.RWMutex
} }
// cachedPassthroughRule 预计算的规则缓存,避免运行时重复 ToLower
type cachedPassthroughRule struct {
*model.ErrorPassthroughRule
lowerKeywords []string // 预计算的小写关键词
lowerPlatforms []string // 预计算的小写平台
errorCodeSet map[int]struct{} // 预计算的 error code set
}
const maxBodyMatchLen = 8 << 10 // 8KB,错误信息不会在 8KB 之后才出现
// NewErrorPassthroughService 创建错误透传规则服务 // NewErrorPassthroughService 创建错误透传规则服务
func NewErrorPassthroughService( func NewErrorPassthroughService(
repo ErrorPassthroughRepository, repo ErrorPassthroughRepository,
...@@ -150,17 +160,19 @@ func (s *ErrorPassthroughService) MatchRule(platform string, statusCode int, bod ...@@ -150,17 +160,19 @@ func (s *ErrorPassthroughService) MatchRule(platform string, statusCode int, bod
return nil return nil
} }
bodyStr := strings.ToLower(string(body)) lowerPlatform := strings.ToLower(platform)
var bodyLower string // 延迟初始化,只在需要关键词匹配时计算
var bodyLowerDone bool
for _, rule := range rules { for _, rule := range rules {
if !rule.Enabled { if !rule.Enabled {
continue continue
} }
if !s.platformMatches(rule, platform) { if !s.platformMatchesCached(rule, lowerPlatform) {
continue continue
} }
if s.ruleMatches(rule, statusCode, bodyStr) { if s.ruleMatchesOptimized(rule, statusCode, body, &bodyLower, &bodyLowerDone) {
return rule return rule.ErrorPassthroughRule
} }
} }
...@@ -168,7 +180,7 @@ func (s *ErrorPassthroughService) MatchRule(platform string, statusCode int, bod ...@@ -168,7 +180,7 @@ func (s *ErrorPassthroughService) MatchRule(platform string, statusCode int, bod
} }
// getCachedRules 获取缓存的规则列表(按优先级排序) // getCachedRules 获取缓存的规则列表(按优先级排序)
func (s *ErrorPassthroughService) getCachedRules() []*model.ErrorPassthroughRule { func (s *ErrorPassthroughService) getCachedRules() []*cachedPassthroughRule {
s.localCacheMu.RLock() s.localCacheMu.RLock()
rules := s.localCache rules := s.localCache
s.localCacheMu.RUnlock() s.localCacheMu.RUnlock()
...@@ -223,17 +235,39 @@ func (s *ErrorPassthroughService) reloadRulesFromDB(ctx context.Context) error { ...@@ -223,17 +235,39 @@ func (s *ErrorPassthroughService) reloadRulesFromDB(ctx context.Context) error {
return nil return nil
} }
// setLocalCache 设置本地缓存 // setLocalCache 设置本地缓存,预计算小写值和 set 以避免运行时重复计算
func (s *ErrorPassthroughService) setLocalCache(rules []*model.ErrorPassthroughRule) { func (s *ErrorPassthroughService) setLocalCache(rules []*model.ErrorPassthroughRule) {
cached := make([]*cachedPassthroughRule, len(rules))
for i, r := range rules {
cr := &cachedPassthroughRule{ErrorPassthroughRule: r}
if len(r.Keywords) > 0 {
cr.lowerKeywords = make([]string, len(r.Keywords))
for j, kw := range r.Keywords {
cr.lowerKeywords[j] = strings.ToLower(kw)
}
}
if len(r.Platforms) > 0 {
cr.lowerPlatforms = make([]string, len(r.Platforms))
for j, p := range r.Platforms {
cr.lowerPlatforms[j] = strings.ToLower(p)
}
}
if len(r.ErrorCodes) > 0 {
cr.errorCodeSet = make(map[int]struct{}, len(r.ErrorCodes))
for _, code := range r.ErrorCodes {
cr.errorCodeSet[code] = struct{}{}
}
}
cached[i] = cr
}
// 按优先级排序 // 按优先级排序
sorted := make([]*model.ErrorPassthroughRule, len(rules)) sort.Slice(cached, func(i, j int) bool {
copy(sorted, rules) return cached[i].Priority < cached[j].Priority
sort.Slice(sorted, func(i, j int) bool {
return sorted[i].Priority < sorted[j].Priority
}) })
s.localCacheMu.Lock() s.localCacheMu.Lock()
s.localCache = sorted s.localCache = cached
s.localCacheMu.Unlock() s.localCacheMu.Unlock()
} }
...@@ -273,62 +307,79 @@ func (s *ErrorPassthroughService) invalidateAndNotify(ctx context.Context) { ...@@ -273,62 +307,79 @@ func (s *ErrorPassthroughService) invalidateAndNotify(ctx context.Context) {
} }
} }
// platformMatches 检查平台是否匹配 // ensureBodyLower 延迟初始化 body 的小写版本,只做一次转换,限制 8KB
func (s *ErrorPassthroughService) platformMatches(rule *model.ErrorPassthroughRule, platform string) bool { func ensureBodyLower(body []byte, bodyLower *string, done *bool) string {
// 如果没有配置平台限制,则匹配所有平台 if *done {
if len(rule.Platforms) == 0 { return *bodyLower
return true }
b := body
if len(b) > maxBodyMatchLen {
b = b[:maxBodyMatchLen]
} }
*bodyLower = strings.ToLower(string(b))
*done = true
return *bodyLower
}
platform = strings.ToLower(platform) // platformMatchesCached 使用预计算的小写平台检查是否匹配
for _, p := range rule.Platforms { func (s *ErrorPassthroughService) platformMatchesCached(rule *cachedPassthroughRule, lowerPlatform string) bool {
if strings.ToLower(p) == platform { if len(rule.lowerPlatforms) == 0 {
return true
}
for _, p := range rule.lowerPlatforms {
if p == lowerPlatform {
return true return true
} }
} }
return false return false
} }
// ruleMatches 检查规则是否匹配 // ruleMatchesOptimized 优化的规则匹配,支持短路和延迟 body 转换
func (s *ErrorPassthroughService) ruleMatches(rule *model.ErrorPassthroughRule, statusCode int, bodyLower string) bool { func (s *ErrorPassthroughService) ruleMatchesOptimized(rule *cachedPassthroughRule, statusCode int, body []byte, bodyLower *string, bodyLowerDone *bool) bool {
hasErrorCodes := len(rule.ErrorCodes) > 0 hasErrorCodes := len(rule.errorCodeSet) > 0
hasKeywords := len(rule.Keywords) > 0 hasKeywords := len(rule.lowerKeywords) > 0
// 如果没有配置任何条件,不匹配
if !hasErrorCodes && !hasKeywords { if !hasErrorCodes && !hasKeywords {
return false return false
} }
codeMatch := !hasErrorCodes || s.containsInt(rule.ErrorCodes, statusCode) codeMatch := !hasErrorCodes || s.containsIntSet(rule.errorCodeSet, statusCode)
keywordMatch := !hasKeywords || s.containsAnyKeyword(bodyLower, rule.Keywords)
if rule.MatchMode == model.MatchModeAll { if rule.MatchMode == model.MatchModeAll {
// "all" 模式:所有配置的条件都必须满足 // "all" 模式:所有配置的条件都必须满足,短路
return codeMatch && keywordMatch if hasErrorCodes && !codeMatch {
return false
}
if hasKeywords {
return s.containsAnyKeywordCached(ensureBodyLower(body, bodyLower, bodyLowerDone), rule.lowerKeywords)
}
return codeMatch
} }
// "any" 模式:任一条件满足即可 // "any" 模式:任一条件满足即可,短路
if hasErrorCodes && hasKeywords { if hasErrorCodes && hasKeywords {
return codeMatch || keywordMatch if codeMatch {
}
return codeMatch && keywordMatch
}
// containsInt 检查切片是否包含指定整数
func (s *ErrorPassthroughService) containsInt(slice []int, val int) bool {
for _, v := range slice {
if v == val {
return true return true
} }
return s.containsAnyKeywordCached(ensureBodyLower(body, bodyLower, bodyLowerDone), rule.lowerKeywords)
} }
return false // 只配置了一种条件
if hasKeywords {
return s.containsAnyKeywordCached(ensureBodyLower(body, bodyLower, bodyLowerDone), rule.lowerKeywords)
}
return codeMatch
}
// containsIntSet 使用 map 查找替代线性扫描
func (s *ErrorPassthroughService) containsIntSet(set map[int]struct{}, val int) bool {
_, ok := set[val]
return ok
} }
// containsAnyKeyword 检查字符串是否包含任一关键词(不区分大小写) // containsAnyKeywordCached 使用预计算的小写关键词检查匹配
func (s *ErrorPassthroughService) containsAnyKeyword(bodyLower string, keywords []string) bool { func (s *ErrorPassthroughService) containsAnyKeywordCached(bodyLower string, lowerKeywords []string) bool {
for _, kw := range keywords { for _, kw := range lowerKeywords {
if strings.Contains(bodyLower, strings.ToLower(kw)) { if strings.Contains(bodyLower, kw) {
return true return true
} }
} }
......
...@@ -145,32 +145,58 @@ func newTestService(rules []*model.ErrorPassthroughRule) *ErrorPassthroughServic ...@@ -145,32 +145,58 @@ func newTestService(rules []*model.ErrorPassthroughRule) *ErrorPassthroughServic
return svc return svc
} }
// newCachedRuleForTest 从 model.ErrorPassthroughRule 创建 cachedPassthroughRule(测试用)
func newCachedRuleForTest(rule *model.ErrorPassthroughRule) *cachedPassthroughRule {
cr := &cachedPassthroughRule{ErrorPassthroughRule: rule}
if len(rule.Keywords) > 0 {
cr.lowerKeywords = make([]string, len(rule.Keywords))
for j, kw := range rule.Keywords {
cr.lowerKeywords[j] = strings.ToLower(kw)
}
}
if len(rule.Platforms) > 0 {
cr.lowerPlatforms = make([]string, len(rule.Platforms))
for j, p := range rule.Platforms {
cr.lowerPlatforms[j] = strings.ToLower(p)
}
}
if len(rule.ErrorCodes) > 0 {
cr.errorCodeSet = make(map[int]struct{}, len(rule.ErrorCodes))
for _, code := range rule.ErrorCodes {
cr.errorCodeSet[code] = struct{}{}
}
}
return cr
}
// ============================================================================= // =============================================================================
// 测试 ruleMatches 核心匹配逻辑 // 测试 ruleMatchesOptimized 核心匹配逻辑
// ============================================================================= // =============================================================================
func TestRuleMatches_NoConditions(t *testing.T) { func TestRuleMatches_NoConditions(t *testing.T) {
// 没有配置任何条件时,不应该匹配 // 没有配置任何条件时,不应该匹配
svc := newTestService(nil) svc := newTestService(nil)
rule := &model.ErrorPassthroughRule{ rule := newCachedRuleForTest(&model.ErrorPassthroughRule{
Enabled: true, Enabled: true,
ErrorCodes: []int{}, ErrorCodes: []int{},
Keywords: []string{}, Keywords: []string{},
MatchMode: model.MatchModeAny, MatchMode: model.MatchModeAny,
} })
assert.False(t, svc.ruleMatches(rule, 422, "some error message"), var bodyLower string
var bodyLowerDone bool
assert.False(t, svc.ruleMatchesOptimized(rule, 422, []byte("some error message"), &bodyLower, &bodyLowerDone),
"没有配置条件时不应该匹配") "没有配置条件时不应该匹配")
} }
func TestRuleMatches_OnlyErrorCodes_AnyMode(t *testing.T) { func TestRuleMatches_OnlyErrorCodes_AnyMode(t *testing.T) {
svc := newTestService(nil) svc := newTestService(nil)
rule := &model.ErrorPassthroughRule{ rule := newCachedRuleForTest(&model.ErrorPassthroughRule{
Enabled: true, Enabled: true,
ErrorCodes: []int{422, 400}, ErrorCodes: []int{422, 400},
Keywords: []string{}, Keywords: []string{},
MatchMode: model.MatchModeAny, MatchMode: model.MatchModeAny,
} })
tests := []struct { tests := []struct {
name string name string
...@@ -186,7 +212,9 @@ func TestRuleMatches_OnlyErrorCodes_AnyMode(t *testing.T) { ...@@ -186,7 +212,9 @@ func TestRuleMatches_OnlyErrorCodes_AnyMode(t *testing.T) {
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
result := svc.ruleMatches(rule, tt.statusCode, tt.body) var bodyLower string
var bodyLowerDone bool
result := svc.ruleMatchesOptimized(rule, tt.statusCode, []byte(tt.body), &bodyLower, &bodyLowerDone)
assert.Equal(t, tt.expected, result) assert.Equal(t, tt.expected, result)
}) })
} }
...@@ -194,12 +222,12 @@ func TestRuleMatches_OnlyErrorCodes_AnyMode(t *testing.T) { ...@@ -194,12 +222,12 @@ func TestRuleMatches_OnlyErrorCodes_AnyMode(t *testing.T) {
func TestRuleMatches_OnlyKeywords_AnyMode(t *testing.T) { func TestRuleMatches_OnlyKeywords_AnyMode(t *testing.T) {
svc := newTestService(nil) svc := newTestService(nil)
rule := &model.ErrorPassthroughRule{ rule := newCachedRuleForTest(&model.ErrorPassthroughRule{
Enabled: true, Enabled: true,
ErrorCodes: []int{}, ErrorCodes: []int{},
Keywords: []string{"context limit", "model not supported"}, Keywords: []string{"context limit", "model not supported"},
MatchMode: model.MatchModeAny, MatchMode: model.MatchModeAny,
} })
tests := []struct { tests := []struct {
name string name string
...@@ -210,16 +238,14 @@ func TestRuleMatches_OnlyKeywords_AnyMode(t *testing.T) { ...@@ -210,16 +238,14 @@ func TestRuleMatches_OnlyKeywords_AnyMode(t *testing.T) {
{"关键词匹配 context limit", 500, "error: context limit reached", true}, {"关键词匹配 context limit", 500, "error: context limit reached", true},
{"关键词匹配 model not supported", 400, "the model not supported here", true}, {"关键词匹配 model not supported", 400, "the model not supported here", true},
{"关键词不匹配", 422, "some other error", false}, {"关键词不匹配", 422, "some other error", false},
// 注意:ruleMatches 接收的 body 参数应该是已经转换为小写的 {"关键词大小写 - 自动转换", 500, "Context Limit exceeded", true},
// 实际使用时,MatchRule 会先将 body 转换为小写再传给 ruleMatches
{"关键词大小写 - 输入已小写", 500, "context limit exceeded", true},
} }
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
// 模拟 MatchRule 的行为:先转换为小写 var bodyLower string
bodyLower := strings.ToLower(tt.body) var bodyLowerDone bool
result := svc.ruleMatches(rule, tt.statusCode, bodyLower) result := svc.ruleMatchesOptimized(rule, tt.statusCode, []byte(tt.body), &bodyLower, &bodyLowerDone)
assert.Equal(t, tt.expected, result) assert.Equal(t, tt.expected, result)
}) })
} }
...@@ -228,12 +254,12 @@ func TestRuleMatches_OnlyKeywords_AnyMode(t *testing.T) { ...@@ -228,12 +254,12 @@ func TestRuleMatches_OnlyKeywords_AnyMode(t *testing.T) {
func TestRuleMatches_BothConditions_AnyMode(t *testing.T) { func TestRuleMatches_BothConditions_AnyMode(t *testing.T) {
// any 模式:错误码 OR 关键词 // any 模式:错误码 OR 关键词
svc := newTestService(nil) svc := newTestService(nil)
rule := &model.ErrorPassthroughRule{ rule := newCachedRuleForTest(&model.ErrorPassthroughRule{
Enabled: true, Enabled: true,
ErrorCodes: []int{422, 400}, ErrorCodes: []int{422, 400},
Keywords: []string{"context limit"}, Keywords: []string{"context limit"},
MatchMode: model.MatchModeAny, MatchMode: model.MatchModeAny,
} })
tests := []struct { tests := []struct {
name string name string
...@@ -274,7 +300,9 @@ func TestRuleMatches_BothConditions_AnyMode(t *testing.T) { ...@@ -274,7 +300,9 @@ func TestRuleMatches_BothConditions_AnyMode(t *testing.T) {
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
result := svc.ruleMatches(rule, tt.statusCode, tt.body) var bodyLower string
var bodyLowerDone bool
result := svc.ruleMatchesOptimized(rule, tt.statusCode, []byte(tt.body), &bodyLower, &bodyLowerDone)
assert.Equal(t, tt.expected, result, tt.reason) assert.Equal(t, tt.expected, result, tt.reason)
}) })
} }
...@@ -283,12 +311,12 @@ func TestRuleMatches_BothConditions_AnyMode(t *testing.T) { ...@@ -283,12 +311,12 @@ func TestRuleMatches_BothConditions_AnyMode(t *testing.T) {
func TestRuleMatches_BothConditions_AllMode(t *testing.T) { func TestRuleMatches_BothConditions_AllMode(t *testing.T) {
// all 模式:错误码 AND 关键词 // all 模式:错误码 AND 关键词
svc := newTestService(nil) svc := newTestService(nil)
rule := &model.ErrorPassthroughRule{ rule := newCachedRuleForTest(&model.ErrorPassthroughRule{
Enabled: true, Enabled: true,
ErrorCodes: []int{422, 400}, ErrorCodes: []int{422, 400},
Keywords: []string{"context limit"}, Keywords: []string{"context limit"},
MatchMode: model.MatchModeAll, MatchMode: model.MatchModeAll,
} })
tests := []struct { tests := []struct {
name string name string
...@@ -329,14 +357,16 @@ func TestRuleMatches_BothConditions_AllMode(t *testing.T) { ...@@ -329,14 +357,16 @@ func TestRuleMatches_BothConditions_AllMode(t *testing.T) {
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
result := svc.ruleMatches(rule, tt.statusCode, tt.body) var bodyLower string
var bodyLowerDone bool
result := svc.ruleMatchesOptimized(rule, tt.statusCode, []byte(tt.body), &bodyLower, &bodyLowerDone)
assert.Equal(t, tt.expected, result, tt.reason) assert.Equal(t, tt.expected, result, tt.reason)
}) })
} }
} }
// ============================================================================= // =============================================================================
// 测试 platformMatches 平台匹配逻辑 // 测试 platformMatchesCached 平台匹配逻辑
// ============================================================================= // =============================================================================
func TestPlatformMatches(t *testing.T) { func TestPlatformMatches(t *testing.T) {
...@@ -394,10 +424,10 @@ func TestPlatformMatches(t *testing.T) { ...@@ -394,10 +424,10 @@ func TestPlatformMatches(t *testing.T) {
for _, tt := range tests { for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
rule := &model.ErrorPassthroughRule{ rule := newCachedRuleForTest(&model.ErrorPassthroughRule{
Platforms: tt.rulePlatforms, Platforms: tt.rulePlatforms,
} })
result := svc.platformMatches(rule, tt.requestPlatform) result := svc.platformMatchesCached(rule, strings.ToLower(tt.requestPlatform))
assert.Equal(t, tt.expected, result) assert.Equal(t, tt.expected, result)
}) })
} }
......
...@@ -368,15 +368,31 @@ type ForwardResult struct { ...@@ -368,15 +368,31 @@ type ForwardResult struct {
// UpstreamFailoverError indicates an upstream error that should trigger account failover. // UpstreamFailoverError indicates an upstream error that should trigger account failover.
type UpstreamFailoverError struct { type UpstreamFailoverError struct {
StatusCode int StatusCode int
ResponseBody []byte // 上游响应体,用于错误透传规则匹配 ResponseBody []byte // 上游响应体,用于错误透传规则匹配
ForceCacheBilling bool // Antigravity 粘性会话切换时设为 true ForceCacheBilling bool // Antigravity 粘性会话切换时设为 true
RetryableOnSameAccount bool // 临时性错误(如 Google 间歇性 400、空响应),应在同一账号上重试 N 次再切换
} }
func (e *UpstreamFailoverError) Error() string { func (e *UpstreamFailoverError) Error() string {
return fmt.Sprintf("upstream error: %d (failover)", e.StatusCode) return fmt.Sprintf("upstream error: %d (failover)", e.StatusCode)
} }
// TempUnscheduleRetryableError 对 RetryableOnSameAccount 类型的 failover 错误触发临时封禁。
// 由 handler 层在同账号重试全部用尽、切换账号时调用。
func (s *GatewayService) TempUnscheduleRetryableError(ctx context.Context, accountID int64, failoverErr *UpstreamFailoverError) {
if failoverErr == nil || !failoverErr.RetryableOnSameAccount {
return
}
// 根据状态码选择封禁策略
switch failoverErr.StatusCode {
case http.StatusBadRequest:
tempUnscheduleGoogleConfigError(ctx, s.accountRepo, accountID, "[handler]")
case http.StatusBadGateway:
tempUnscheduleEmptyResponse(ctx, s.accountRepo, accountID, "[handler]")
}
}
// GatewayService handles API gateway operations // GatewayService handles API gateway operations
type GatewayService struct { type GatewayService struct {
accountRepo AccountRepository accountRepo AccountRepository
......
...@@ -880,6 +880,37 @@ func (s *GeminiMessagesCompatService) Forward(ctx context.Context, c *gin.Contex ...@@ -880,6 +880,37 @@ func (s *GeminiMessagesCompatService) Forward(ctx context.Context, c *gin.Contex
// ErrorPolicyNone → 原有逻辑 // ErrorPolicyNone → 原有逻辑
s.handleGeminiUpstreamError(ctx, account, resp.StatusCode, resp.Header, respBody) s.handleGeminiUpstreamError(ctx, account, resp.StatusCode, resp.Header, respBody)
// 精确匹配服务端配置类 400 错误,触发 failover + 临时封禁
if resp.StatusCode == http.StatusBadRequest {
msg400 := strings.ToLower(strings.TrimSpace(extractUpstreamErrorMessage(respBody)))
if isGoogleProjectConfigError(msg400) {
upstreamReqID := resp.Header.Get(requestIDHeader)
if upstreamReqID == "" {
upstreamReqID = resp.Header.Get("x-goog-request-id")
}
upstreamMsg := sanitizeUpstreamErrorMessage(strings.TrimSpace(extractUpstreamErrorMessage(respBody)))
upstreamDetail := ""
if s.cfg != nil && s.cfg.Gateway.LogUpstreamErrorBody {
maxBytes := s.cfg.Gateway.LogUpstreamErrorBodyMaxBytes
if maxBytes <= 0 {
maxBytes = 2048
}
upstreamDetail = truncateString(string(respBody), maxBytes)
}
log.Printf("[Gemini] status=400 google_config_error failover=true upstream_message=%q account=%d", upstreamMsg, account.ID)
appendOpsUpstreamError(c, OpsUpstreamErrorEvent{
Platform: account.Platform,
AccountID: account.ID,
AccountName: account.Name,
UpstreamStatusCode: resp.StatusCode,
UpstreamRequestID: upstreamReqID,
Kind: "failover",
Message: upstreamMsg,
Detail: upstreamDetail,
})
return nil, &UpstreamFailoverError{StatusCode: resp.StatusCode, ResponseBody: respBody, RetryableOnSameAccount: true}
}
}
if s.shouldFailoverGeminiUpstreamError(resp.StatusCode) { if s.shouldFailoverGeminiUpstreamError(resp.StatusCode) {
upstreamReqID := resp.Header.Get(requestIDHeader) upstreamReqID := resp.Header.Get(requestIDHeader)
if upstreamReqID == "" { if upstreamReqID == "" {
...@@ -1330,6 +1361,34 @@ func (s *GeminiMessagesCompatService) ForwardNative(ctx context.Context, c *gin. ...@@ -1330,6 +1361,34 @@ func (s *GeminiMessagesCompatService) ForwardNative(ctx context.Context, c *gin.
// ErrorPolicyNone → 原有逻辑 // ErrorPolicyNone → 原有逻辑
s.handleGeminiUpstreamError(ctx, account, resp.StatusCode, resp.Header, respBody) s.handleGeminiUpstreamError(ctx, account, resp.StatusCode, resp.Header, respBody)
// 精确匹配服务端配置类 400 错误,触发 failover + 临时封禁
if resp.StatusCode == http.StatusBadRequest {
msg400 := strings.ToLower(strings.TrimSpace(extractUpstreamErrorMessage(respBody)))
if isGoogleProjectConfigError(msg400) {
evBody := unwrapIfNeeded(isOAuth, respBody)
upstreamMsg := sanitizeUpstreamErrorMessage(strings.TrimSpace(extractUpstreamErrorMessage(evBody)))
upstreamDetail := ""
if s.cfg != nil && s.cfg.Gateway.LogUpstreamErrorBody {
maxBytes := s.cfg.Gateway.LogUpstreamErrorBodyMaxBytes
if maxBytes <= 0 {
maxBytes = 2048
}
upstreamDetail = truncateString(string(evBody), maxBytes)
}
log.Printf("[Gemini] status=400 google_config_error failover=true upstream_message=%q account=%d", upstreamMsg, account.ID)
appendOpsUpstreamError(c, OpsUpstreamErrorEvent{
Platform: account.Platform,
AccountID: account.ID,
AccountName: account.Name,
UpstreamStatusCode: resp.StatusCode,
UpstreamRequestID: requestID,
Kind: "failover",
Message: upstreamMsg,
Detail: upstreamDetail,
})
return nil, &UpstreamFailoverError{StatusCode: resp.StatusCode, ResponseBody: evBody, RetryableOnSameAccount: true}
}
}
if s.shouldFailoverGeminiUpstreamError(resp.StatusCode) { if s.shouldFailoverGeminiUpstreamError(resp.StatusCode) {
evBody := unwrapIfNeeded(isOAuth, respBody) evBody := unwrapIfNeeded(isOAuth, respBody)
upstreamMsg := strings.TrimSpace(extractUpstreamErrorMessage(evBody)) upstreamMsg := strings.TrimSpace(extractUpstreamErrorMessage(evBody))
......
...@@ -20,6 +20,10 @@ const ( ...@@ -20,6 +20,10 @@ const (
// retry the specific upstream attempt (not just the client request). // retry the specific upstream attempt (not just the client request).
// This value is sanitized+trimmed before being persisted. // This value is sanitized+trimmed before being persisted.
OpsUpstreamRequestBodyKey = "ops_upstream_request_body" OpsUpstreamRequestBodyKey = "ops_upstream_request_body"
// OpsSkipPassthroughKey 由 applyErrorPassthroughRule 在命中 skip_monitoring=true 的规则时设置。
// ops_error_logger 中间件检查此 key,为 true 时跳过错误记录。
OpsSkipPassthroughKey = "ops_skip_passthrough"
) )
func setOpsUpstreamError(c *gin.Context, upstreamStatusCode int, upstreamMessage, upstreamDetail string) { func setOpsUpstreamError(c *gin.Context, upstreamStatusCode int, upstreamMessage, upstreamDetail string) {
...@@ -103,6 +107,37 @@ func appendOpsUpstreamError(c *gin.Context, ev OpsUpstreamErrorEvent) { ...@@ -103,6 +107,37 @@ func appendOpsUpstreamError(c *gin.Context, ev OpsUpstreamErrorEvent) {
evCopy := ev evCopy := ev
existing = append(existing, &evCopy) existing = append(existing, &evCopy)
c.Set(OpsUpstreamErrorsKey, existing) c.Set(OpsUpstreamErrorsKey, existing)
checkSkipMonitoringForUpstreamEvent(c, &evCopy)
}
// checkSkipMonitoringForUpstreamEvent checks whether the upstream error event
// matches a passthrough rule with skip_monitoring=true and, if so, sets the
// OpsSkipPassthroughKey on the context. This ensures intermediate retry /
// failover errors (which never go through the final applyErrorPassthroughRule
// path) can still suppress ops_error_logs recording.
func checkSkipMonitoringForUpstreamEvent(c *gin.Context, ev *OpsUpstreamErrorEvent) {
if ev.UpstreamStatusCode == 0 {
return
}
svc := getBoundErrorPassthroughService(c)
if svc == nil {
return
}
// Use the best available body representation for keyword matching.
// Even when body is empty, MatchRule can still match rules that only
// specify ErrorCodes (no Keywords), so we always call it.
body := ev.Detail
if body == "" {
body = ev.Message
}
rule := svc.MatchRule(ev.Platform, ev.UpstreamStatusCode, []byte(body))
if rule != nil && rule.SkipMonitoring {
c.Set(OpsSkipPassthroughKey, true)
}
} }
func marshalOpsUpstreamErrors(events []*OpsUpstreamErrorEvent) *string { func marshalOpsUpstreamErrors(events []*OpsUpstreamErrorEvent) *string {
......
-- Add skip_monitoring field to error_passthrough_rules table
-- When true, errors matching this rule will not be recorded in ops_error_logs
ALTER TABLE error_passthrough_rules
ADD COLUMN IF NOT EXISTS skip_monitoring BOOLEAN NOT NULL DEFAULT false;
...@@ -21,6 +21,7 @@ export interface ErrorPassthroughRule { ...@@ -21,6 +21,7 @@ export interface ErrorPassthroughRule {
response_code: number | null response_code: number | null
passthrough_body: boolean passthrough_body: boolean
custom_message: string | null custom_message: string | null
skip_monitoring: boolean
description: string | null description: string | null
created_at: string created_at: string
updated_at: string updated_at: string
...@@ -41,6 +42,7 @@ export interface CreateRuleRequest { ...@@ -41,6 +42,7 @@ export interface CreateRuleRequest {
response_code?: number | null response_code?: number | null
passthrough_body?: boolean passthrough_body?: boolean
custom_message?: string | null custom_message?: string | null
skip_monitoring?: boolean
description?: string | null description?: string | null
} }
...@@ -59,6 +61,7 @@ export interface UpdateRuleRequest { ...@@ -59,6 +61,7 @@ export interface UpdateRuleRequest {
response_code?: number | null response_code?: number | null
passthrough_body?: boolean passthrough_body?: boolean
custom_message?: string | null custom_message?: string | null
skip_monitoring?: boolean
description?: string | null description?: string | null
} }
......
...@@ -148,6 +148,16 @@ ...@@ -148,6 +148,16 @@
{{ rule.passthrough_body ? t('admin.errorPassthrough.passthrough') : t('admin.errorPassthrough.custom') }} {{ rule.passthrough_body ? t('admin.errorPassthrough.passthrough') : t('admin.errorPassthrough.custom') }}
</span> </span>
</div> </div>
<div v-if="rule.skip_monitoring" class="flex items-center gap-1">
<Icon
name="checkCircle"
size="xs"
class="text-yellow-500"
/>
<span class="text-gray-600 dark:text-gray-400">
{{ t('admin.errorPassthrough.skipMonitoring') }}
</span>
</div>
</div> </div>
</td> </td>
<td class="px-3 py-2"> <td class="px-3 py-2">
...@@ -366,6 +376,19 @@ ...@@ -366,6 +376,19 @@
</div> </div>
</div> </div>
<!-- Skip Monitoring -->
<div class="flex items-center gap-1.5">
<input
type="checkbox"
v-model="form.skip_monitoring"
class="h-3.5 w-3.5 rounded border-gray-300 text-yellow-600 focus:ring-yellow-500"
/>
<span class="text-xs font-medium text-gray-700 dark:text-gray-300">
{{ t('admin.errorPassthrough.form.skipMonitoring') }}
</span>
</div>
<p class="input-hint text-xs -mt-3">{{ t('admin.errorPassthrough.form.skipMonitoringHint') }}</p>
<!-- Enabled --> <!-- Enabled -->
<div class="flex items-center gap-1.5"> <div class="flex items-center gap-1.5">
<input <input
...@@ -453,6 +476,7 @@ const form = reactive({ ...@@ -453,6 +476,7 @@ const form = reactive({
response_code: null as number | null, response_code: null as number | null,
passthrough_body: true, passthrough_body: true,
custom_message: null as string | null, custom_message: null as string | null,
skip_monitoring: false,
description: null as string | null description: null as string | null
}) })
...@@ -497,6 +521,7 @@ const resetForm = () => { ...@@ -497,6 +521,7 @@ const resetForm = () => {
form.response_code = null form.response_code = null
form.passthrough_body = true form.passthrough_body = true
form.custom_message = null form.custom_message = null
form.skip_monitoring = false
form.description = null form.description = null
errorCodesInput.value = '' errorCodesInput.value = ''
keywordsInput.value = '' keywordsInput.value = ''
...@@ -520,6 +545,7 @@ const handleEdit = (rule: ErrorPassthroughRule) => { ...@@ -520,6 +545,7 @@ const handleEdit = (rule: ErrorPassthroughRule) => {
form.response_code = rule.response_code form.response_code = rule.response_code
form.passthrough_body = rule.passthrough_body form.passthrough_body = rule.passthrough_body
form.custom_message = rule.custom_message form.custom_message = rule.custom_message
form.skip_monitoring = rule.skip_monitoring
form.description = rule.description form.description = rule.description
errorCodesInput.value = rule.error_codes.join(', ') errorCodesInput.value = rule.error_codes.join(', ')
keywordsInput.value = rule.keywords.join('\n') keywordsInput.value = rule.keywords.join('\n')
...@@ -575,6 +601,7 @@ const handleSubmit = async () => { ...@@ -575,6 +601,7 @@ const handleSubmit = async () => {
response_code: form.passthrough_code ? null : form.response_code, response_code: form.passthrough_code ? null : form.response_code,
passthrough_body: form.passthrough_body, passthrough_body: form.passthrough_body,
custom_message: form.passthrough_body ? null : form.custom_message, custom_message: form.passthrough_body ? null : form.custom_message,
skip_monitoring: form.skip_monitoring,
description: form.description?.trim() || null description: form.description?.trim() || null
} }
......
...@@ -3353,6 +3353,7 @@ export default { ...@@ -3353,6 +3353,7 @@ export default {
custom: 'Custom', custom: 'Custom',
code: 'Code', code: 'Code',
body: 'Body', body: 'Body',
skipMonitoring: 'Skip Monitoring',
// Columns // Columns
columns: { columns: {
...@@ -3397,6 +3398,8 @@ export default { ...@@ -3397,6 +3398,8 @@ export default {
passthroughBody: 'Passthrough upstream error message', passthroughBody: 'Passthrough upstream error message',
customMessage: 'Custom error message', customMessage: 'Custom error message',
customMessagePlaceholder: 'Error message to return to client...', customMessagePlaceholder: 'Error message to return to client...',
skipMonitoring: 'Skip monitoring',
skipMonitoringHint: 'When enabled, errors matching this rule will not be recorded in ops monitoring',
enabled: 'Enable this rule' enabled: 'Enable this rule'
}, },
......
...@@ -3527,6 +3527,7 @@ export default { ...@@ -3527,6 +3527,7 @@ export default {
custom: '自定义', custom: '自定义',
code: '状态码', code: '状态码',
body: '消息体', body: '消息体',
skipMonitoring: '跳过监控',
// Columns // Columns
columns: { columns: {
...@@ -3571,6 +3572,8 @@ export default { ...@@ -3571,6 +3572,8 @@ export default {
passthroughBody: '透传上游错误信息', passthroughBody: '透传上游错误信息',
customMessage: '自定义错误信息', customMessage: '自定义错误信息',
customMessagePlaceholder: '返回给客户端的错误信息...', customMessagePlaceholder: '返回给客户端的错误信息...',
skipMonitoring: '跳过运维监控记录',
skipMonitoringHint: '开启后,匹配此规则的错误不会被记录到运维监控中',
enabled: '启用此规则' enabled: '启用此规则'
}, },
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment