Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
陈曦
sub2api
Commits
ae6fed15
Unverified
Commit
ae6fed15
authored
Feb 10, 2026
by
Wesley Liddick
Committed by
GitHub
Feb 10, 2026
Browse files
Merge pull request #548 from Edric-Li/main
feat: 错误处理增强、重试优化与性能改进
parents
84ced1c4
378e476e
Changes
32
Hide whitespace changes
Inline
Side-by-side
backend/internal/service/error_passthrough_runtime.go
View file @
ae6fed15
...
...
@@ -61,6 +61,11 @@ func applyErrorPassthroughRule(
errMsg
=
*
rule
.
CustomMessage
}
// 命中 skip_monitoring 时在 context 中标记,供 ops_error_logger 跳过记录。
if
rule
.
SkipMonitoring
{
c
.
Set
(
OpsSkipPassthroughKey
,
true
)
}
// 与现有 failover 场景保持一致:命中规则时统一返回 upstream_error。
errType
=
"upstream_error"
return
status
,
errType
,
errMsg
,
true
...
...
backend/internal/service/error_passthrough_runtime_test.go
View file @
ae6fed15
...
...
@@ -194,6 +194,63 @@ func TestGeminiWriteGeminiMappedError_AppliesRuleFor422(t *testing.T) {
assert
.
Equal
(
t
,
"Gemini上游失败"
,
errField
[
"message"
])
}
func
TestApplyErrorPassthroughRule_SkipMonitoringSetsContextKey
(
t
*
testing
.
T
)
{
gin
.
SetMode
(
gin
.
TestMode
)
rec
:=
httptest
.
NewRecorder
()
c
,
_
:=
gin
.
CreateTestContext
(
rec
)
rule
:=
newNonFailoverPassthroughRule
(
http
.
StatusBadRequest
,
"prompt is too long"
,
http
.
StatusBadRequest
,
"上下文超限"
)
rule
.
SkipMonitoring
=
true
ruleSvc
:=
&
ErrorPassthroughService
{}
ruleSvc
.
setLocalCache
([]
*
model
.
ErrorPassthroughRule
{
rule
})
BindErrorPassthroughService
(
c
,
ruleSvc
)
_
,
_
,
_
,
matched
:=
applyErrorPassthroughRule
(
c
,
PlatformAnthropic
,
http
.
StatusBadRequest
,
[]
byte
(
`{"error":{"message":"prompt is too long"}}`
),
http
.
StatusBadGateway
,
"upstream_error"
,
"Upstream request failed"
,
)
assert
.
True
(
t
,
matched
)
v
,
exists
:=
c
.
Get
(
OpsSkipPassthroughKey
)
assert
.
True
(
t
,
exists
,
"OpsSkipPassthroughKey should be set when skip_monitoring=true"
)
boolVal
,
ok
:=
v
.
(
bool
)
assert
.
True
(
t
,
ok
,
"value should be bool"
)
assert
.
True
(
t
,
boolVal
)
}
func
TestApplyErrorPassthroughRule_NoSkipMonitoringDoesNotSetContextKey
(
t
*
testing
.
T
)
{
gin
.
SetMode
(
gin
.
TestMode
)
rec
:=
httptest
.
NewRecorder
()
c
,
_
:=
gin
.
CreateTestContext
(
rec
)
rule
:=
newNonFailoverPassthroughRule
(
http
.
StatusBadRequest
,
"prompt is too long"
,
http
.
StatusBadRequest
,
"上下文超限"
)
rule
.
SkipMonitoring
=
false
ruleSvc
:=
&
ErrorPassthroughService
{}
ruleSvc
.
setLocalCache
([]
*
model
.
ErrorPassthroughRule
{
rule
})
BindErrorPassthroughService
(
c
,
ruleSvc
)
_
,
_
,
_
,
matched
:=
applyErrorPassthroughRule
(
c
,
PlatformAnthropic
,
http
.
StatusBadRequest
,
[]
byte
(
`{"error":{"message":"prompt is too long"}}`
),
http
.
StatusBadGateway
,
"upstream_error"
,
"Upstream request failed"
,
)
assert
.
True
(
t
,
matched
)
_
,
exists
:=
c
.
Get
(
OpsSkipPassthroughKey
)
assert
.
False
(
t
,
exists
,
"OpsSkipPassthroughKey should NOT be set when skip_monitoring=false"
)
}
func
newNonFailoverPassthroughRule
(
statusCode
int
,
keyword
string
,
respCode
int
,
customMessage
string
)
*
model
.
ErrorPassthroughRule
{
return
&
model
.
ErrorPassthroughRule
{
ID
:
1
,
...
...
backend/internal/service/error_passthrough_service.go
View file @
ae6fed15
...
...
@@ -45,10 +45,20 @@ type ErrorPassthroughService struct {
cache
ErrorPassthroughCache
// 本地内存缓存,用于快速匹配
localCache
[]
*
model
.
Error
PassthroughRule
localCache
[]
*
cached
PassthroughRule
localCacheMu
sync
.
RWMutex
}
// cachedPassthroughRule 预计算的规则缓存,避免运行时重复 ToLower
type
cachedPassthroughRule
struct
{
*
model
.
ErrorPassthroughRule
lowerKeywords
[]
string
// 预计算的小写关键词
lowerPlatforms
[]
string
// 预计算的小写平台
errorCodeSet
map
[
int
]
struct
{}
// 预计算的 error code set
}
const
maxBodyMatchLen
=
8
<<
10
// 8KB,错误信息不会在 8KB 之后才出现
// NewErrorPassthroughService 创建错误透传规则服务
func
NewErrorPassthroughService
(
repo
ErrorPassthroughRepository
,
...
...
@@ -150,17 +160,19 @@ func (s *ErrorPassthroughService) MatchRule(platform string, statusCode int, bod
return
nil
}
bodyStr
:=
strings
.
ToLower
(
string
(
body
))
lowerPlatform
:=
strings
.
ToLower
(
platform
)
var
bodyLower
string
// 延迟初始化,只在需要关键词匹配时计算
var
bodyLowerDone
bool
for
_
,
rule
:=
range
rules
{
if
!
rule
.
Enabled
{
continue
}
if
!
s
.
platformMatches
(
rule
,
p
latform
)
{
if
!
s
.
platformMatches
Cached
(
rule
,
lowerP
latform
)
{
continue
}
if
s
.
ruleMatches
(
rule
,
statusCode
,
body
Str
)
{
return
rule
if
s
.
ruleMatches
Optimized
(
rule
,
statusCode
,
body
,
&
bodyLower
,
&
bodyLowerDone
)
{
return
rule
.
ErrorPassthroughRule
}
}
...
...
@@ -168,7 +180,7 @@ func (s *ErrorPassthroughService) MatchRule(platform string, statusCode int, bod
}
// getCachedRules 获取缓存的规则列表(按优先级排序)
func
(
s
*
ErrorPassthroughService
)
getCachedRules
()
[]
*
model
.
Error
PassthroughRule
{
func
(
s
*
ErrorPassthroughService
)
getCachedRules
()
[]
*
cached
PassthroughRule
{
s
.
localCacheMu
.
RLock
()
rules
:=
s
.
localCache
s
.
localCacheMu
.
RUnlock
()
...
...
@@ -223,17 +235,39 @@ func (s *ErrorPassthroughService) reloadRulesFromDB(ctx context.Context) error {
return
nil
}
// setLocalCache 设置本地缓存
// setLocalCache 设置本地缓存
,预计算小写值和 set 以避免运行时重复计算
func
(
s
*
ErrorPassthroughService
)
setLocalCache
(
rules
[]
*
model
.
ErrorPassthroughRule
)
{
cached
:=
make
([]
*
cachedPassthroughRule
,
len
(
rules
))
for
i
,
r
:=
range
rules
{
cr
:=
&
cachedPassthroughRule
{
ErrorPassthroughRule
:
r
}
if
len
(
r
.
Keywords
)
>
0
{
cr
.
lowerKeywords
=
make
([]
string
,
len
(
r
.
Keywords
))
for
j
,
kw
:=
range
r
.
Keywords
{
cr
.
lowerKeywords
[
j
]
=
strings
.
ToLower
(
kw
)
}
}
if
len
(
r
.
Platforms
)
>
0
{
cr
.
lowerPlatforms
=
make
([]
string
,
len
(
r
.
Platforms
))
for
j
,
p
:=
range
r
.
Platforms
{
cr
.
lowerPlatforms
[
j
]
=
strings
.
ToLower
(
p
)
}
}
if
len
(
r
.
ErrorCodes
)
>
0
{
cr
.
errorCodeSet
=
make
(
map
[
int
]
struct
{},
len
(
r
.
ErrorCodes
))
for
_
,
code
:=
range
r
.
ErrorCodes
{
cr
.
errorCodeSet
[
code
]
=
struct
{}{}
}
}
cached
[
i
]
=
cr
}
// 按优先级排序
sorted
:=
make
([]
*
model
.
ErrorPassthroughRule
,
len
(
rules
))
copy
(
sorted
,
rules
)
sort
.
Slice
(
sorted
,
func
(
i
,
j
int
)
bool
{
return
sorted
[
i
]
.
Priority
<
sorted
[
j
]
.
Priority
sort
.
Slice
(
cached
,
func
(
i
,
j
int
)
bool
{
return
cached
[
i
]
.
Priority
<
cached
[
j
]
.
Priority
})
s
.
localCacheMu
.
Lock
()
s
.
localCache
=
sort
ed
s
.
localCache
=
cach
ed
s
.
localCacheMu
.
Unlock
()
}
...
...
@@ -273,62 +307,79 @@ func (s *ErrorPassthroughService) invalidateAndNotify(ctx context.Context) {
}
}
// platformMatches 检查平台是否匹配
func
(
s
*
ErrorPassthroughService
)
platformMatches
(
rule
*
model
.
ErrorPassthroughRule
,
platform
string
)
bool
{
// 如果没有配置平台限制,则匹配所有平台
if
len
(
rule
.
Platforms
)
==
0
{
return
true
// ensureBodyLower 延迟初始化 body 的小写版本,只做一次转换,限制 8KB
func
ensureBodyLower
(
body
[]
byte
,
bodyLower
*
string
,
done
*
bool
)
string
{
if
*
done
{
return
*
bodyLower
}
b
:=
body
if
len
(
b
)
>
maxBodyMatchLen
{
b
=
b
[
:
maxBodyMatchLen
]
}
*
bodyLower
=
strings
.
ToLower
(
string
(
b
))
*
done
=
true
return
*
bodyLower
}
platform
=
strings
.
ToLower
(
platform
)
for
_
,
p
:=
range
rule
.
Platforms
{
if
strings
.
ToLower
(
p
)
==
platform
{
// platformMatchesCached 使用预计算的小写平台检查是否匹配
func
(
s
*
ErrorPassthroughService
)
platformMatchesCached
(
rule
*
cachedPassthroughRule
,
lowerPlatform
string
)
bool
{
if
len
(
rule
.
lowerPlatforms
)
==
0
{
return
true
}
for
_
,
p
:=
range
rule
.
lowerPlatforms
{
if
p
==
lowerPlatform
{
return
true
}
}
return
false
}
// ruleMatches
检查规则是否匹配
func
(
s
*
ErrorPassthroughService
)
ruleMatches
(
rule
*
model
.
Error
PassthroughRule
,
statusCode
int
,
bodyLower
string
)
bool
{
hasErrorCodes
:=
len
(
rule
.
E
rrorCode
s
)
>
0
hasKeywords
:=
len
(
rule
.
Keywords
)
>
0
// ruleMatches
Optimized 优化的规则匹配,支持短路和延迟 body 转换
func
(
s
*
ErrorPassthroughService
)
ruleMatches
Optimized
(
rule
*
cached
PassthroughRule
,
statusCode
int
,
body
[]
byte
,
bodyLower
*
string
,
bodyLowerDone
*
bool
)
bool
{
hasErrorCodes
:=
len
(
rule
.
e
rrorCode
Set
)
>
0
hasKeywords
:=
len
(
rule
.
lower
Keywords
)
>
0
// 如果没有配置任何条件,不匹配
if
!
hasErrorCodes
&&
!
hasKeywords
{
return
false
}
codeMatch
:=
!
hasErrorCodes
||
s
.
containsInt
(
rule
.
ErrorCodes
,
statusCode
)
keywordMatch
:=
!
hasKeywords
||
s
.
containsAnyKeyword
(
bodyLower
,
rule
.
Keywords
)
codeMatch
:=
!
hasErrorCodes
||
s
.
containsIntSet
(
rule
.
errorCodeSet
,
statusCode
)
if
rule
.
MatchMode
==
model
.
MatchModeAll
{
// "all" 模式:所有配置的条件都必须满足
return
codeMatch
&&
keywordMatch
// "all" 模式:所有配置的条件都必须满足,短路
if
hasErrorCodes
&&
!
codeMatch
{
return
false
}
if
hasKeywords
{
return
s
.
containsAnyKeywordCached
(
ensureBodyLower
(
body
,
bodyLower
,
bodyLowerDone
),
rule
.
lowerKeywords
)
}
return
codeMatch
}
// "any" 模式:任一条件满足即可
// "any" 模式:任一条件满足即可
,短路
if
hasErrorCodes
&&
hasKeywords
{
return
codeMatch
||
keywordMatch
}
return
codeMatch
&&
keywordMatch
}
// containsInt 检查切片是否包含指定整数
func
(
s
*
ErrorPassthroughService
)
containsInt
(
slice
[]
int
,
val
int
)
bool
{
for
_
,
v
:=
range
slice
{
if
v
==
val
{
if
codeMatch
{
return
true
}
return
s
.
containsAnyKeywordCached
(
ensureBodyLower
(
body
,
bodyLower
,
bodyLowerDone
),
rule
.
lowerKeywords
)
}
return
false
// 只配置了一种条件
if
hasKeywords
{
return
s
.
containsAnyKeywordCached
(
ensureBodyLower
(
body
,
bodyLower
,
bodyLowerDone
),
rule
.
lowerKeywords
)
}
return
codeMatch
}
// containsIntSet 使用 map 查找替代线性扫描
func
(
s
*
ErrorPassthroughService
)
containsIntSet
(
set
map
[
int
]
struct
{},
val
int
)
bool
{
_
,
ok
:=
set
[
val
]
return
ok
}
// containsAnyKeyword
检查字符串是否包含任一关键词(不区分大小写)
func
(
s
*
ErrorPassthroughService
)
containsAnyKeyword
(
bodyLower
string
,
k
eywords
[]
string
)
bool
{
for
_
,
kw
:=
range
k
eywords
{
if
strings
.
Contains
(
bodyLower
,
strings
.
ToLower
(
kw
)
)
{
// containsAnyKeyword
Cached 使用预计算的小写关键词检查匹配
func
(
s
*
ErrorPassthroughService
)
containsAnyKeyword
Cached
(
bodyLower
string
,
lowerK
eywords
[]
string
)
bool
{
for
_
,
kw
:=
range
lowerK
eywords
{
if
strings
.
Contains
(
bodyLower
,
kw
)
{
return
true
}
}
...
...
backend/internal/service/error_passthrough_service_test.go
View file @
ae6fed15
...
...
@@ -145,32 +145,58 @@ func newTestService(rules []*model.ErrorPassthroughRule) *ErrorPassthroughServic
return
svc
}
// newCachedRuleForTest 从 model.ErrorPassthroughRule 创建 cachedPassthroughRule(测试用)
func
newCachedRuleForTest
(
rule
*
model
.
ErrorPassthroughRule
)
*
cachedPassthroughRule
{
cr
:=
&
cachedPassthroughRule
{
ErrorPassthroughRule
:
rule
}
if
len
(
rule
.
Keywords
)
>
0
{
cr
.
lowerKeywords
=
make
([]
string
,
len
(
rule
.
Keywords
))
for
j
,
kw
:=
range
rule
.
Keywords
{
cr
.
lowerKeywords
[
j
]
=
strings
.
ToLower
(
kw
)
}
}
if
len
(
rule
.
Platforms
)
>
0
{
cr
.
lowerPlatforms
=
make
([]
string
,
len
(
rule
.
Platforms
))
for
j
,
p
:=
range
rule
.
Platforms
{
cr
.
lowerPlatforms
[
j
]
=
strings
.
ToLower
(
p
)
}
}
if
len
(
rule
.
ErrorCodes
)
>
0
{
cr
.
errorCodeSet
=
make
(
map
[
int
]
struct
{},
len
(
rule
.
ErrorCodes
))
for
_
,
code
:=
range
rule
.
ErrorCodes
{
cr
.
errorCodeSet
[
code
]
=
struct
{}{}
}
}
return
cr
}
// =============================================================================
// 测试 ruleMatches 核心匹配逻辑
// 测试 ruleMatches
Optimized
核心匹配逻辑
// =============================================================================
func
TestRuleMatches_NoConditions
(
t
*
testing
.
T
)
{
// 没有配置任何条件时,不应该匹配
svc
:=
newTestService
(
nil
)
rule
:=
&
model
.
ErrorPassthroughRule
{
rule
:=
newCachedRuleForTest
(
&
model
.
ErrorPassthroughRule
{
Enabled
:
true
,
ErrorCodes
:
[]
int
{},
Keywords
:
[]
string
{},
MatchMode
:
model
.
MatchModeAny
,
}
}
)
assert
.
False
(
t
,
svc
.
ruleMatches
(
rule
,
422
,
"some error message"
),
var
bodyLower
string
var
bodyLowerDone
bool
assert
.
False
(
t
,
svc
.
ruleMatchesOptimized
(
rule
,
422
,
[]
byte
(
"some error message"
),
&
bodyLower
,
&
bodyLowerDone
),
"没有配置条件时不应该匹配"
)
}
func
TestRuleMatches_OnlyErrorCodes_AnyMode
(
t
*
testing
.
T
)
{
svc
:=
newTestService
(
nil
)
rule
:=
&
model
.
ErrorPassthroughRule
{
rule
:=
newCachedRuleForTest
(
&
model
.
ErrorPassthroughRule
{
Enabled
:
true
,
ErrorCodes
:
[]
int
{
422
,
400
},
Keywords
:
[]
string
{},
MatchMode
:
model
.
MatchModeAny
,
}
}
)
tests
:=
[]
struct
{
name
string
...
...
@@ -186,7 +212,9 @@ func TestRuleMatches_OnlyErrorCodes_AnyMode(t *testing.T) {
for
_
,
tt
:=
range
tests
{
t
.
Run
(
tt
.
name
,
func
(
t
*
testing
.
T
)
{
result
:=
svc
.
ruleMatches
(
rule
,
tt
.
statusCode
,
tt
.
body
)
var
bodyLower
string
var
bodyLowerDone
bool
result
:=
svc
.
ruleMatchesOptimized
(
rule
,
tt
.
statusCode
,
[]
byte
(
tt
.
body
),
&
bodyLower
,
&
bodyLowerDone
)
assert
.
Equal
(
t
,
tt
.
expected
,
result
)
})
}
...
...
@@ -194,12 +222,12 @@ func TestRuleMatches_OnlyErrorCodes_AnyMode(t *testing.T) {
func
TestRuleMatches_OnlyKeywords_AnyMode
(
t
*
testing
.
T
)
{
svc
:=
newTestService
(
nil
)
rule
:=
&
model
.
ErrorPassthroughRule
{
rule
:=
newCachedRuleForTest
(
&
model
.
ErrorPassthroughRule
{
Enabled
:
true
,
ErrorCodes
:
[]
int
{},
Keywords
:
[]
string
{
"context limit"
,
"model not supported"
},
MatchMode
:
model
.
MatchModeAny
,
}
}
)
tests
:=
[]
struct
{
name
string
...
...
@@ -210,16 +238,14 @@ func TestRuleMatches_OnlyKeywords_AnyMode(t *testing.T) {
{
"关键词匹配 context limit"
,
500
,
"error: context limit reached"
,
true
},
{
"关键词匹配 model not supported"
,
400
,
"the model not supported here"
,
true
},
{
"关键词不匹配"
,
422
,
"some other error"
,
false
},
// 注意:ruleMatches 接收的 body 参数应该是已经转换为小写的
// 实际使用时,MatchRule 会先将 body 转换为小写再传给 ruleMatches
{
"关键词大小写 - 输入已小写"
,
500
,
"context limit exceeded"
,
true
},
{
"关键词大小写 - 自动转换"
,
500
,
"Context Limit exceeded"
,
true
},
}
for
_
,
tt
:=
range
tests
{
t
.
Run
(
tt
.
name
,
func
(
t
*
testing
.
T
)
{
// 模拟 MatchRule 的行为:先转换为小写
bodyLower
:=
strings
.
ToLower
(
tt
.
body
)
result
:=
svc
.
ruleMatches
(
rule
,
tt
.
statusCode
,
bodyLower
)
var
bodyLower
string
var
bodyLower
Done
bool
result
:=
svc
.
ruleMatches
Optimized
(
rule
,
tt
.
statusCode
,
[]
byte
(
tt
.
body
),
&
bodyLower
,
&
bodyLowerDone
)
assert
.
Equal
(
t
,
tt
.
expected
,
result
)
})
}
...
...
@@ -228,12 +254,12 @@ func TestRuleMatches_OnlyKeywords_AnyMode(t *testing.T) {
func
TestRuleMatches_BothConditions_AnyMode
(
t
*
testing
.
T
)
{
// any 模式:错误码 OR 关键词
svc
:=
newTestService
(
nil
)
rule
:=
&
model
.
ErrorPassthroughRule
{
rule
:=
newCachedRuleForTest
(
&
model
.
ErrorPassthroughRule
{
Enabled
:
true
,
ErrorCodes
:
[]
int
{
422
,
400
},
Keywords
:
[]
string
{
"context limit"
},
MatchMode
:
model
.
MatchModeAny
,
}
}
)
tests
:=
[]
struct
{
name
string
...
...
@@ -274,7 +300,9 @@ func TestRuleMatches_BothConditions_AnyMode(t *testing.T) {
for
_
,
tt
:=
range
tests
{
t
.
Run
(
tt
.
name
,
func
(
t
*
testing
.
T
)
{
result
:=
svc
.
ruleMatches
(
rule
,
tt
.
statusCode
,
tt
.
body
)
var
bodyLower
string
var
bodyLowerDone
bool
result
:=
svc
.
ruleMatchesOptimized
(
rule
,
tt
.
statusCode
,
[]
byte
(
tt
.
body
),
&
bodyLower
,
&
bodyLowerDone
)
assert
.
Equal
(
t
,
tt
.
expected
,
result
,
tt
.
reason
)
})
}
...
...
@@ -283,12 +311,12 @@ func TestRuleMatches_BothConditions_AnyMode(t *testing.T) {
func
TestRuleMatches_BothConditions_AllMode
(
t
*
testing
.
T
)
{
// all 模式:错误码 AND 关键词
svc
:=
newTestService
(
nil
)
rule
:=
&
model
.
ErrorPassthroughRule
{
rule
:=
newCachedRuleForTest
(
&
model
.
ErrorPassthroughRule
{
Enabled
:
true
,
ErrorCodes
:
[]
int
{
422
,
400
},
Keywords
:
[]
string
{
"context limit"
},
MatchMode
:
model
.
MatchModeAll
,
}
}
)
tests
:=
[]
struct
{
name
string
...
...
@@ -329,14 +357,16 @@ func TestRuleMatches_BothConditions_AllMode(t *testing.T) {
for
_
,
tt
:=
range
tests
{
t
.
Run
(
tt
.
name
,
func
(
t
*
testing
.
T
)
{
result
:=
svc
.
ruleMatches
(
rule
,
tt
.
statusCode
,
tt
.
body
)
var
bodyLower
string
var
bodyLowerDone
bool
result
:=
svc
.
ruleMatchesOptimized
(
rule
,
tt
.
statusCode
,
[]
byte
(
tt
.
body
),
&
bodyLower
,
&
bodyLowerDone
)
assert
.
Equal
(
t
,
tt
.
expected
,
result
,
tt
.
reason
)
})
}
}
// =============================================================================
// 测试 platformMatches 平台匹配逻辑
// 测试 platformMatches
Cached
平台匹配逻辑
// =============================================================================
func
TestPlatformMatches
(
t
*
testing
.
T
)
{
...
...
@@ -394,10 +424,10 @@ func TestPlatformMatches(t *testing.T) {
for
_
,
tt
:=
range
tests
{
t
.
Run
(
tt
.
name
,
func
(
t
*
testing
.
T
)
{
rule
:=
&
model
.
ErrorPassthroughRule
{
rule
:=
newCachedRuleForTest
(
&
model
.
ErrorPassthroughRule
{
Platforms
:
tt
.
rulePlatforms
,
}
result
:=
svc
.
platformMatches
(
rule
,
tt
.
requestPlatform
)
}
)
result
:=
svc
.
platformMatches
Cached
(
rule
,
strings
.
ToLower
(
tt
.
requestPlatform
)
)
assert
.
Equal
(
t
,
tt
.
expected
,
result
)
})
}
...
...
backend/internal/service/gateway_service.go
View file @
ae6fed15
...
...
@@ -368,15 +368,31 @@ type ForwardResult struct {
// UpstreamFailoverError indicates an upstream error that should trigger account failover.
type
UpstreamFailoverError
struct
{
StatusCode
int
ResponseBody
[]
byte
// 上游响应体,用于错误透传规则匹配
ForceCacheBilling
bool
// Antigravity 粘性会话切换时设为 true
StatusCode
int
ResponseBody
[]
byte
// 上游响应体,用于错误透传规则匹配
ForceCacheBilling
bool
// Antigravity 粘性会话切换时设为 true
RetryableOnSameAccount
bool
// 临时性错误(如 Google 间歇性 400、空响应),应在同一账号上重试 N 次再切换
}
func
(
e
*
UpstreamFailoverError
)
Error
()
string
{
return
fmt
.
Sprintf
(
"upstream error: %d (failover)"
,
e
.
StatusCode
)
}
// TempUnscheduleRetryableError 对 RetryableOnSameAccount 类型的 failover 错误触发临时封禁。
// 由 handler 层在同账号重试全部用尽、切换账号时调用。
func
(
s
*
GatewayService
)
TempUnscheduleRetryableError
(
ctx
context
.
Context
,
accountID
int64
,
failoverErr
*
UpstreamFailoverError
)
{
if
failoverErr
==
nil
||
!
failoverErr
.
RetryableOnSameAccount
{
return
}
// 根据状态码选择封禁策略
switch
failoverErr
.
StatusCode
{
case
http
.
StatusBadRequest
:
tempUnscheduleGoogleConfigError
(
ctx
,
s
.
accountRepo
,
accountID
,
"[handler]"
)
case
http
.
StatusBadGateway
:
tempUnscheduleEmptyResponse
(
ctx
,
s
.
accountRepo
,
accountID
,
"[handler]"
)
}
}
// GatewayService handles API gateway operations
type
GatewayService
struct
{
accountRepo
AccountRepository
...
...
backend/internal/service/gemini_messages_compat_service.go
View file @
ae6fed15
...
...
@@ -880,6 +880,37 @@ func (s *GeminiMessagesCompatService) Forward(ctx context.Context, c *gin.Contex
// ErrorPolicyNone → 原有逻辑
s
.
handleGeminiUpstreamError
(
ctx
,
account
,
resp
.
StatusCode
,
resp
.
Header
,
respBody
)
// 精确匹配服务端配置类 400 错误,触发 failover + 临时封禁
if
resp
.
StatusCode
==
http
.
StatusBadRequest
{
msg400
:=
strings
.
ToLower
(
strings
.
TrimSpace
(
extractUpstreamErrorMessage
(
respBody
)))
if
isGoogleProjectConfigError
(
msg400
)
{
upstreamReqID
:=
resp
.
Header
.
Get
(
requestIDHeader
)
if
upstreamReqID
==
""
{
upstreamReqID
=
resp
.
Header
.
Get
(
"x-goog-request-id"
)
}
upstreamMsg
:=
sanitizeUpstreamErrorMessage
(
strings
.
TrimSpace
(
extractUpstreamErrorMessage
(
respBody
)))
upstreamDetail
:=
""
if
s
.
cfg
!=
nil
&&
s
.
cfg
.
Gateway
.
LogUpstreamErrorBody
{
maxBytes
:=
s
.
cfg
.
Gateway
.
LogUpstreamErrorBodyMaxBytes
if
maxBytes
<=
0
{
maxBytes
=
2048
}
upstreamDetail
=
truncateString
(
string
(
respBody
),
maxBytes
)
}
log
.
Printf
(
"[Gemini] status=400 google_config_error failover=true upstream_message=%q account=%d"
,
upstreamMsg
,
account
.
ID
)
appendOpsUpstreamError
(
c
,
OpsUpstreamErrorEvent
{
Platform
:
account
.
Platform
,
AccountID
:
account
.
ID
,
AccountName
:
account
.
Name
,
UpstreamStatusCode
:
resp
.
StatusCode
,
UpstreamRequestID
:
upstreamReqID
,
Kind
:
"failover"
,
Message
:
upstreamMsg
,
Detail
:
upstreamDetail
,
})
return
nil
,
&
UpstreamFailoverError
{
StatusCode
:
resp
.
StatusCode
,
ResponseBody
:
respBody
,
RetryableOnSameAccount
:
true
}
}
}
if
s
.
shouldFailoverGeminiUpstreamError
(
resp
.
StatusCode
)
{
upstreamReqID
:=
resp
.
Header
.
Get
(
requestIDHeader
)
if
upstreamReqID
==
""
{
...
...
@@ -1330,6 +1361,34 @@ func (s *GeminiMessagesCompatService) ForwardNative(ctx context.Context, c *gin.
// ErrorPolicyNone → 原有逻辑
s
.
handleGeminiUpstreamError
(
ctx
,
account
,
resp
.
StatusCode
,
resp
.
Header
,
respBody
)
// 精确匹配服务端配置类 400 错误,触发 failover + 临时封禁
if
resp
.
StatusCode
==
http
.
StatusBadRequest
{
msg400
:=
strings
.
ToLower
(
strings
.
TrimSpace
(
extractUpstreamErrorMessage
(
respBody
)))
if
isGoogleProjectConfigError
(
msg400
)
{
evBody
:=
unwrapIfNeeded
(
isOAuth
,
respBody
)
upstreamMsg
:=
sanitizeUpstreamErrorMessage
(
strings
.
TrimSpace
(
extractUpstreamErrorMessage
(
evBody
)))
upstreamDetail
:=
""
if
s
.
cfg
!=
nil
&&
s
.
cfg
.
Gateway
.
LogUpstreamErrorBody
{
maxBytes
:=
s
.
cfg
.
Gateway
.
LogUpstreamErrorBodyMaxBytes
if
maxBytes
<=
0
{
maxBytes
=
2048
}
upstreamDetail
=
truncateString
(
string
(
evBody
),
maxBytes
)
}
log
.
Printf
(
"[Gemini] status=400 google_config_error failover=true upstream_message=%q account=%d"
,
upstreamMsg
,
account
.
ID
)
appendOpsUpstreamError
(
c
,
OpsUpstreamErrorEvent
{
Platform
:
account
.
Platform
,
AccountID
:
account
.
ID
,
AccountName
:
account
.
Name
,
UpstreamStatusCode
:
resp
.
StatusCode
,
UpstreamRequestID
:
requestID
,
Kind
:
"failover"
,
Message
:
upstreamMsg
,
Detail
:
upstreamDetail
,
})
return
nil
,
&
UpstreamFailoverError
{
StatusCode
:
resp
.
StatusCode
,
ResponseBody
:
evBody
,
RetryableOnSameAccount
:
true
}
}
}
if
s
.
shouldFailoverGeminiUpstreamError
(
resp
.
StatusCode
)
{
evBody
:=
unwrapIfNeeded
(
isOAuth
,
respBody
)
upstreamMsg
:=
strings
.
TrimSpace
(
extractUpstreamErrorMessage
(
evBody
))
...
...
backend/internal/service/ops_upstream_context.go
View file @
ae6fed15
...
...
@@ -20,6 +20,10 @@ const (
// retry the specific upstream attempt (not just the client request).
// This value is sanitized+trimmed before being persisted.
OpsUpstreamRequestBodyKey
=
"ops_upstream_request_body"
// OpsSkipPassthroughKey 由 applyErrorPassthroughRule 在命中 skip_monitoring=true 的规则时设置。
// ops_error_logger 中间件检查此 key,为 true 时跳过错误记录。
OpsSkipPassthroughKey
=
"ops_skip_passthrough"
)
func
setOpsUpstreamError
(
c
*
gin
.
Context
,
upstreamStatusCode
int
,
upstreamMessage
,
upstreamDetail
string
)
{
...
...
@@ -103,6 +107,37 @@ func appendOpsUpstreamError(c *gin.Context, ev OpsUpstreamErrorEvent) {
evCopy
:=
ev
existing
=
append
(
existing
,
&
evCopy
)
c
.
Set
(
OpsUpstreamErrorsKey
,
existing
)
checkSkipMonitoringForUpstreamEvent
(
c
,
&
evCopy
)
}
// checkSkipMonitoringForUpstreamEvent checks whether the upstream error event
// matches a passthrough rule with skip_monitoring=true and, if so, sets the
// OpsSkipPassthroughKey on the context. This ensures intermediate retry /
// failover errors (which never go through the final applyErrorPassthroughRule
// path) can still suppress ops_error_logs recording.
func
checkSkipMonitoringForUpstreamEvent
(
c
*
gin
.
Context
,
ev
*
OpsUpstreamErrorEvent
)
{
if
ev
.
UpstreamStatusCode
==
0
{
return
}
svc
:=
getBoundErrorPassthroughService
(
c
)
if
svc
==
nil
{
return
}
// Use the best available body representation for keyword matching.
// Even when body is empty, MatchRule can still match rules that only
// specify ErrorCodes (no Keywords), so we always call it.
body
:=
ev
.
Detail
if
body
==
""
{
body
=
ev
.
Message
}
rule
:=
svc
.
MatchRule
(
ev
.
Platform
,
ev
.
UpstreamStatusCode
,
[]
byte
(
body
))
if
rule
!=
nil
&&
rule
.
SkipMonitoring
{
c
.
Set
(
OpsSkipPassthroughKey
,
true
)
}
}
func
marshalOpsUpstreamErrors
(
events
[]
*
OpsUpstreamErrorEvent
)
*
string
{
...
...
backend/migrations/053_add_skip_monitoring_to_error_passthrough.sql
0 → 100644
View file @
ae6fed15
-- Add skip_monitoring field to error_passthrough_rules table
-- When true, errors matching this rule will not be recorded in ops_error_logs
ALTER
TABLE
error_passthrough_rules
ADD
COLUMN
IF
NOT
EXISTS
skip_monitoring
BOOLEAN
NOT
NULL
DEFAULT
false
;
frontend/src/api/admin/errorPassthrough.ts
View file @
ae6fed15
...
...
@@ -21,6 +21,7 @@ export interface ErrorPassthroughRule {
response_code
:
number
|
null
passthrough_body
:
boolean
custom_message
:
string
|
null
skip_monitoring
:
boolean
description
:
string
|
null
created_at
:
string
updated_at
:
string
...
...
@@ -41,6 +42,7 @@ export interface CreateRuleRequest {
response_code
?:
number
|
null
passthrough_body
?:
boolean
custom_message
?:
string
|
null
skip_monitoring
?:
boolean
description
?:
string
|
null
}
...
...
@@ -59,6 +61,7 @@ export interface UpdateRuleRequest {
response_code
?:
number
|
null
passthrough_body
?:
boolean
custom_message
?:
string
|
null
skip_monitoring
?:
boolean
description
?:
string
|
null
}
...
...
frontend/src/components/admin/ErrorPassthroughRulesModal.vue
View file @
ae6fed15
...
...
@@ -148,6 +148,16 @@
{{
rule
.
passthrough_body
?
t
(
'
admin.errorPassthrough.passthrough
'
)
:
t
(
'
admin.errorPassthrough.custom
'
)
}}
</span>
</div>
<div
v-if=
"rule.skip_monitoring"
class=
"flex items-center gap-1"
>
<Icon
name=
"checkCircle"
size=
"xs"
class=
"text-yellow-500"
/>
<span
class=
"text-gray-600 dark:text-gray-400"
>
{{
t
(
'
admin.errorPassthrough.skipMonitoring
'
)
}}
</span>
</div>
</div>
</td>
<td
class=
"px-3 py-2"
>
...
...
@@ -366,6 +376,19 @@
</div>
</div>
<!-- Skip Monitoring -->
<div
class=
"flex items-center gap-1.5"
>
<input
type=
"checkbox"
v-model=
"form.skip_monitoring"
class=
"h-3.5 w-3.5 rounded border-gray-300 text-yellow-600 focus:ring-yellow-500"
/>
<span
class=
"text-xs font-medium text-gray-700 dark:text-gray-300"
>
{{ t('admin.errorPassthrough.form.skipMonitoring') }}
</span>
</div>
<p
class=
"input-hint text-xs -mt-3"
>
{{ t('admin.errorPassthrough.form.skipMonitoringHint') }}
</p>
<!-- Enabled -->
<div
class=
"flex items-center gap-1.5"
>
<input
...
...
@@ -453,6 +476,7 @@ const form = reactive({
response_code
:
null
as
number
|
null
,
passthrough_body
:
true
,
custom_message
:
null
as
string
|
null
,
skip_monitoring
:
false
,
description
:
null
as
string
|
null
})
...
...
@@ -497,6 +521,7 @@ const resetForm = () => {
form
.
response_code
=
null
form
.
passthrough_body
=
true
form
.
custom_message
=
null
form
.
skip_monitoring
=
false
form
.
description
=
null
errorCodesInput
.
value
=
''
keywordsInput
.
value
=
''
...
...
@@ -520,6 +545,7 @@ const handleEdit = (rule: ErrorPassthroughRule) => {
form
.
response_code
=
rule
.
response_code
form
.
passthrough_body
=
rule
.
passthrough_body
form
.
custom_message
=
rule
.
custom_message
form
.
skip_monitoring
=
rule
.
skip_monitoring
form
.
description
=
rule
.
description
errorCodesInput
.
value
=
rule
.
error_codes
.
join
(
'
,
'
)
keywordsInput
.
value
=
rule
.
keywords
.
join
(
'
\n
'
)
...
...
@@ -575,6 +601,7 @@ const handleSubmit = async () => {
response_code
:
form
.
passthrough_code
?
null
:
form
.
response_code
,
passthrough_body
:
form
.
passthrough_body
,
custom_message
:
form
.
passthrough_body
?
null
:
form
.
custom_message
,
skip_monitoring
:
form
.
skip_monitoring
,
description
:
form
.
description
?.
trim
()
||
null
}
...
...
frontend/src/i18n/locales/en.ts
View file @
ae6fed15
...
...
@@ -3353,6 +3353,7 @@ export default {
custom
:
'
Custom
'
,
code
:
'
Code
'
,
body
:
'
Body
'
,
skipMonitoring
:
'
Skip Monitoring
'
,
// Columns
columns
:
{
...
...
@@ -3397,6 +3398,8 @@ export default {
passthroughBody
:
'
Passthrough upstream error message
'
,
customMessage
:
'
Custom error message
'
,
customMessagePlaceholder
:
'
Error message to return to client...
'
,
skipMonitoring
:
'
Skip monitoring
'
,
skipMonitoringHint
:
'
When enabled, errors matching this rule will not be recorded in ops monitoring
'
,
enabled
:
'
Enable this rule
'
},
...
...
frontend/src/i18n/locales/zh.ts
View file @
ae6fed15
...
...
@@ -3527,6 +3527,7 @@ export default {
custom
:
'
自定义
'
,
code
:
'
状态码
'
,
body
:
'
消息体
'
,
skipMonitoring
:
'
跳过监控
'
,
// Columns
columns
:
{
...
...
@@ -3571,6 +3572,8 @@ export default {
passthroughBody
:
'
透传上游错误信息
'
,
customMessage
:
'
自定义错误信息
'
,
customMessagePlaceholder
:
'
返回给客户端的错误信息...
'
,
skipMonitoring
:
'
跳过运维监控记录
'
,
skipMonitoringHint
:
'
开启后,匹配此规则的错误不会被记录到运维监控中
'
,
enabled
:
'
启用此规则
'
},
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment