Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in / Register
Toggle navigation
Menu
Open sidebar
陈曦
sub2api
Commits
7f317b90
Commit
7f317b90
authored
Jan 14, 2026
by
IanShaw027
Browse files
feat(ops): 增强ops核心服务功能和重试机制
parent
7c4309ea
Changes
4
Hide whitespace changes
Inline
Side-by-side
backend/internal/service/ops_models.go
View file @
7f317b90
...
@@ -25,12 +25,12 @@ type OpsErrorLog struct {
...
@@ -25,12 +25,12 @@ type OpsErrorLog struct {
IsRetryable
bool
`json:"is_retryable"`
IsRetryable
bool
`json:"is_retryable"`
RetryCount
int
`json:"retry_count"`
RetryCount
int
`json:"retry_count"`
Resolved
bool
`json:"resolved"`
Resolved
bool
`json:"resolved"`
ResolvedAt
*
time
.
Time
`json:"resolved_at"`
ResolvedAt
*
time
.
Time
`json:"resolved_at"`
ResolvedByUserID
*
int64
`json:"resolved_by_user_id"`
ResolvedByUserID
*
int64
`json:"resolved_by_user_id"`
ResolvedByUserName
string
`json:"resolved_by_user_name"`
ResolvedByUserName
string
`json:"resolved_by_user_name"`
ResolvedRetryID
*
int64
`json:"resolved_retry_id"`
ResolvedRetryID
*
int64
`json:"resolved_retry_id"`
ResolvedStatusRaw
string
`json:"-"`
ResolvedStatusRaw
string
`json:"-"`
ClientRequestID
string
`json:"client_request_id"`
ClientRequestID
string
`json:"client_request_id"`
RequestID
string
`json:"request_id"`
RequestID
string
`json:"request_id"`
...
@@ -93,6 +93,12 @@ type OpsErrorLogFilter struct {
...
@@ -93,6 +93,12 @@ type OpsErrorLogFilter struct {
Resolved
*
bool
Resolved
*
bool
Query
string
Query
string
// View controls error categorization for list endpoints.
// - errors: show actionable errors (exclude business-limited / 429 / 529)
// - excluded: only show excluded errors
// - all: show everything
View
string
Page
int
Page
int
PageSize
int
PageSize
int
}
}
...
...
backend/internal/service/ops_retry.go
View file @
7f317b90
...
@@ -108,6 +108,10 @@ func (w *limitedResponseWriter) truncated() bool {
...
@@ -108,6 +108,10 @@ func (w *limitedResponseWriter) truncated() bool {
return
w
.
totalWritten
>
int64
(
w
.
limit
)
return
w
.
totalWritten
>
int64
(
w
.
limit
)
}
}
const
(
OpsRetryModeUpstreamEvent
=
"upstream_event"
)
func
(
s
*
OpsService
)
RetryError
(
ctx
context
.
Context
,
requestedByUserID
int64
,
errorID
int64
,
mode
string
,
pinnedAccountID
*
int64
)
(
*
OpsRetryResult
,
error
)
{
func
(
s
*
OpsService
)
RetryError
(
ctx
context
.
Context
,
requestedByUserID
int64
,
errorID
int64
,
mode
string
,
pinnedAccountID
*
int64
)
(
*
OpsRetryResult
,
error
)
{
if
err
:=
s
.
RequireMonitoringEnabled
(
ctx
);
err
!=
nil
{
if
err
:=
s
.
RequireMonitoringEnabled
(
ctx
);
err
!=
nil
{
return
nil
,
err
return
nil
,
err
...
@@ -123,6 +127,81 @@ func (s *OpsService) RetryError(ctx context.Context, requestedByUserID int64, er
...
@@ -123,6 +127,81 @@ func (s *OpsService) RetryError(ctx context.Context, requestedByUserID int64, er
return
nil
,
infraerrors
.
BadRequest
(
"OPS_RETRY_INVALID_MODE"
,
"mode must be client or upstream"
)
return
nil
,
infraerrors
.
BadRequest
(
"OPS_RETRY_INVALID_MODE"
,
"mode must be client or upstream"
)
}
}
errorLog
,
err
:=
s
.
GetErrorLogByID
(
ctx
,
errorID
)
if
err
!=
nil
{
return
nil
,
err
}
if
errorLog
==
nil
{
return
nil
,
infraerrors
.
NotFound
(
"OPS_ERROR_NOT_FOUND"
,
"ops error log not found"
)
}
if
strings
.
TrimSpace
(
errorLog
.
RequestBody
)
==
""
{
return
nil
,
infraerrors
.
BadRequest
(
"OPS_RETRY_NO_REQUEST_BODY"
,
"No request body found to retry"
)
}
var
pinned
*
int64
if
mode
==
OpsRetryModeUpstream
{
if
pinnedAccountID
!=
nil
&&
*
pinnedAccountID
>
0
{
pinned
=
pinnedAccountID
}
else
if
errorLog
.
AccountID
!=
nil
&&
*
errorLog
.
AccountID
>
0
{
pinned
=
errorLog
.
AccountID
}
else
{
return
nil
,
infraerrors
.
BadRequest
(
"OPS_RETRY_PINNED_ACCOUNT_REQUIRED"
,
"pinned_account_id is required for upstream retry"
)
}
}
return
s
.
retryWithErrorLog
(
ctx
,
requestedByUserID
,
errorID
,
mode
,
mode
,
pinned
,
errorLog
)
}
// RetryUpstreamEvent retries a specific upstream attempt captured inside ops_error_logs.upstream_errors.
// idx is 0-based. It always pins the original event account_id.
func
(
s
*
OpsService
)
RetryUpstreamEvent
(
ctx
context
.
Context
,
requestedByUserID
int64
,
errorID
int64
,
idx
int
)
(
*
OpsRetryResult
,
error
)
{
if
err
:=
s
.
RequireMonitoringEnabled
(
ctx
);
err
!=
nil
{
return
nil
,
err
}
if
s
.
opsRepo
==
nil
{
return
nil
,
infraerrors
.
ServiceUnavailable
(
"OPS_REPO_UNAVAILABLE"
,
"Ops repository not available"
)
}
if
idx
<
0
{
return
nil
,
infraerrors
.
BadRequest
(
"OPS_RETRY_INVALID_UPSTREAM_IDX"
,
"invalid upstream idx"
)
}
errorLog
,
err
:=
s
.
GetErrorLogByID
(
ctx
,
errorID
)
if
err
!=
nil
{
return
nil
,
err
}
if
errorLog
==
nil
{
return
nil
,
infraerrors
.
NotFound
(
"OPS_ERROR_NOT_FOUND"
,
"ops error log not found"
)
}
events
,
err
:=
ParseOpsUpstreamErrors
(
errorLog
.
UpstreamErrors
)
if
err
!=
nil
{
return
nil
,
infraerrors
.
BadRequest
(
"OPS_RETRY_UPSTREAM_EVENTS_INVALID"
,
"invalid upstream_errors"
)
}
if
idx
>=
len
(
events
)
{
return
nil
,
infraerrors
.
BadRequest
(
"OPS_RETRY_UPSTREAM_IDX_OOB"
,
"upstream idx out of range"
)
}
ev
:=
events
[
idx
]
if
ev
==
nil
{
return
nil
,
infraerrors
.
BadRequest
(
"OPS_RETRY_UPSTREAM_EVENT_MISSING"
,
"upstream event missing"
)
}
if
ev
.
AccountID
<=
0
{
return
nil
,
infraerrors
.
BadRequest
(
"OPS_RETRY_PINNED_ACCOUNT_REQUIRED"
,
"account_id is required for upstream retry"
)
}
upstreamBody
:=
strings
.
TrimSpace
(
ev
.
UpstreamRequestBody
)
if
upstreamBody
==
""
{
return
nil
,
infraerrors
.
BadRequest
(
"OPS_RETRY_UPSTREAM_NO_REQUEST_BODY"
,
"No upstream request body found to retry"
)
}
override
:=
*
errorLog
override
.
RequestBody
=
upstreamBody
pinned
:=
ev
.
AccountID
// Persist as upstream_event, execute as upstream pinned retry.
return
s
.
retryWithErrorLog
(
ctx
,
requestedByUserID
,
errorID
,
OpsRetryModeUpstreamEvent
,
OpsRetryModeUpstream
,
&
pinned
,
&
override
)
}
func
(
s
*
OpsService
)
retryWithErrorLog
(
ctx
context
.
Context
,
requestedByUserID
int64
,
errorID
int64
,
mode
string
,
execMode
string
,
pinnedAccountID
*
int64
,
errorLog
*
OpsErrorLogDetail
)
(
*
OpsRetryResult
,
error
)
{
latest
,
err
:=
s
.
opsRepo
.
GetLatestRetryAttemptForError
(
ctx
,
errorID
)
latest
,
err
:=
s
.
opsRepo
.
GetLatestRetryAttemptForError
(
ctx
,
errorID
)
if
err
!=
nil
&&
!
errors
.
Is
(
err
,
sql
.
ErrNoRows
)
{
if
err
!=
nil
&&
!
errors
.
Is
(
err
,
sql
.
ErrNoRows
)
{
return
nil
,
infraerrors
.
InternalServer
(
"OPS_RETRY_LOAD_LATEST_FAILED"
,
"Failed to check retry status"
)
.
WithCause
(
err
)
return
nil
,
infraerrors
.
InternalServer
(
"OPS_RETRY_LOAD_LATEST_FAILED"
,
"Failed to check retry status"
)
.
WithCause
(
err
)
...
@@ -144,22 +223,18 @@ func (s *OpsService) RetryError(ctx context.Context, requestedByUserID int64, er
...
@@ -144,22 +223,18 @@ func (s *OpsService) RetryError(ctx context.Context, requestedByUserID int64, er
}
}
}
}
errorLog
,
err
:=
s
.
GetErrorLogByID
(
ctx
,
errorID
)
if
errorLog
==
nil
||
strings
.
TrimSpace
(
errorLog
.
RequestBody
)
==
""
{
if
err
!=
nil
{
return
nil
,
err
}
if
strings
.
TrimSpace
(
errorLog
.
RequestBody
)
==
""
{
return
nil
,
infraerrors
.
BadRequest
(
"OPS_RETRY_NO_REQUEST_BODY"
,
"No request body found to retry"
)
return
nil
,
infraerrors
.
BadRequest
(
"OPS_RETRY_NO_REQUEST_BODY"
,
"No request body found to retry"
)
}
}
var
pinned
*
int64
var
pinned
*
int64
if
m
ode
==
OpsRetryModeUpstream
{
if
execM
ode
==
OpsRetryModeUpstream
{
if
pinnedAccountID
!=
nil
&&
*
pinnedAccountID
>
0
{
if
pinnedAccountID
!=
nil
&&
*
pinnedAccountID
>
0
{
pinned
=
pinnedAccountID
pinned
=
pinnedAccountID
}
else
if
errorLog
.
AccountID
!=
nil
&&
*
errorLog
.
AccountID
>
0
{
}
else
if
errorLog
.
AccountID
!=
nil
&&
*
errorLog
.
AccountID
>
0
{
pinned
=
errorLog
.
AccountID
pinned
=
errorLog
.
AccountID
}
else
{
}
else
{
return
nil
,
infraerrors
.
BadRequest
(
"OPS_RETRY_PINNED_ACCOUNT_REQUIRED"
,
"
pinned_
account_id is required for upstream retry"
)
return
nil
,
infraerrors
.
BadRequest
(
"OPS_RETRY_PINNED_ACCOUNT_REQUIRED"
,
"account_id is required for upstream retry"
)
}
}
}
}
...
@@ -196,7 +271,7 @@ func (s *OpsService) RetryError(ctx context.Context, requestedByUserID int64, er
...
@@ -196,7 +271,7 @@ func (s *OpsService) RetryError(ctx context.Context, requestedByUserID int64, er
execCtx
,
cancel
:=
context
.
WithTimeout
(
ctx
,
opsRetryTimeout
)
execCtx
,
cancel
:=
context
.
WithTimeout
(
ctx
,
opsRetryTimeout
)
defer
cancel
()
defer
cancel
()
execRes
:=
s
.
executeRetry
(
execCtx
,
errorLog
,
m
ode
,
pinned
)
execRes
:=
s
.
executeRetry
(
execCtx
,
errorLog
,
execM
ode
,
pinned
)
finishedAt
:=
time
.
Now
()
finishedAt
:=
time
.
Now
()
result
.
FinishedAt
=
finishedAt
result
.
FinishedAt
=
finishedAt
...
@@ -249,14 +324,10 @@ func (s *OpsService) RetryError(ctx context.Context, requestedByUserID int64, er
...
@@ -249,14 +324,10 @@ func (s *OpsService) RetryError(ctx context.Context, requestedByUserID int64, er
ResultRequestID
:
resultRequestID
,
ResultRequestID
:
resultRequestID
,
ErrorMessage
:
updateErrMsg
,
ErrorMessage
:
updateErrMsg
,
});
err
!=
nil
{
});
err
!=
nil
{
// Best-effort: retry itself already executed; do not fail the API response.
log
.
Printf
(
"[Ops] UpdateRetryAttempt failed: %v"
,
err
)
log
.
Printf
(
"[Ops] UpdateRetryAttempt failed: %v"
,
err
)
}
else
{
}
else
if
success
{
// Auto-resolve the source error when a manual retry succeeds.
if
err
:=
s
.
opsRepo
.
UpdateErrorResolution
(
updateCtx
,
errorID
,
true
,
&
requestedByUserID
,
&
attemptID
,
&
finishedAt
);
err
!=
nil
{
if
success
{
log
.
Printf
(
"[Ops] UpdateErrorResolution failed: %v"
,
err
)
if
err
:=
s
.
opsRepo
.
UpdateErrorResolution
(
updateCtx
,
errorID
,
true
,
&
requestedByUserID
,
&
attemptID
,
&
finishedAt
);
err
!=
nil
{
log
.
Printf
(
"[Ops] UpdateErrorResolution failed: %v"
,
err
)
}
}
}
}
}
...
...
backend/internal/service/ops_service.go
View file @
7f317b90
...
@@ -208,6 +208,25 @@ func (s *OpsService) RecordError(ctx context.Context, entry *OpsInsertErrorLogIn
...
@@ -208,6 +208,25 @@ func (s *OpsService) RecordError(ctx context.Context, entry *OpsInsertErrorLogIn
out
.
Detail
=
""
out
.
Detail
=
""
}
}
out
.
UpstreamRequestBody
=
strings
.
TrimSpace
(
out
.
UpstreamRequestBody
)
if
out
.
UpstreamRequestBody
!=
""
{
// Reuse the same sanitization/trimming strategy as request body storage.
// Keep it small so it is safe to persist in ops_error_logs JSON.
sanitized
,
truncated
,
_
:=
sanitizeAndTrimRequestBody
([]
byte
(
out
.
UpstreamRequestBody
),
10
*
1024
)
if
sanitized
!=
""
{
out
.
UpstreamRequestBody
=
sanitized
if
truncated
{
out
.
Kind
=
strings
.
TrimSpace
(
out
.
Kind
)
if
out
.
Kind
==
""
{
out
.
Kind
=
"upstream"
}
out
.
Kind
=
out
.
Kind
+
":request_body_truncated"
}
}
else
{
out
.
UpstreamRequestBody
=
""
}
}
// Drop fully-empty events (can happen if only status code was known).
// Drop fully-empty events (can happen if only status code was known).
if
out
.
UpstreamStatusCode
==
0
&&
out
.
Message
==
""
&&
out
.
Detail
==
""
{
if
out
.
UpstreamStatusCode
==
0
&&
out
.
Message
==
""
&&
out
.
Detail
==
""
{
continue
continue
...
...
backend/internal/service/ops_upstream_context.go
View file @
7f317b90
...
@@ -15,6 +15,11 @@ const (
...
@@ -15,6 +15,11 @@ const (
OpsUpstreamErrorMessageKey
=
"ops_upstream_error_message"
OpsUpstreamErrorMessageKey
=
"ops_upstream_error_message"
OpsUpstreamErrorDetailKey
=
"ops_upstream_error_detail"
OpsUpstreamErrorDetailKey
=
"ops_upstream_error_detail"
OpsUpstreamErrorsKey
=
"ops_upstream_errors"
OpsUpstreamErrorsKey
=
"ops_upstream_errors"
// Best-effort capture of the current upstream request body so ops can
// retry the specific upstream attempt (not just the client request).
// This value is sanitized+trimmed before being persisted.
OpsUpstreamRequestBodyKey
=
"ops_upstream_request_body"
)
)
func
setOpsUpstreamError
(
c
*
gin
.
Context
,
upstreamStatusCode
int
,
upstreamMessage
,
upstreamDetail
string
)
{
func
setOpsUpstreamError
(
c
*
gin
.
Context
,
upstreamStatusCode
int
,
upstreamMessage
,
upstreamDetail
string
)
{
...
@@ -46,6 +51,10 @@ type OpsUpstreamErrorEvent struct {
...
@@ -46,6 +51,10 @@ type OpsUpstreamErrorEvent struct {
UpstreamStatusCode
int
`json:"upstream_status_code,omitempty"`
UpstreamStatusCode
int
`json:"upstream_status_code,omitempty"`
UpstreamRequestID
string
`json:"upstream_request_id,omitempty"`
UpstreamRequestID
string
`json:"upstream_request_id,omitempty"`
// Best-effort upstream request capture (sanitized+trimmed).
// Required for retrying a specific upstream attempt.
UpstreamRequestBody
string
`json:"upstream_request_body,omitempty"`
// Kind: http_error | request_error | retry_exhausted | failover
// Kind: http_error | request_error | retry_exhausted | failover
Kind
string
`json:"kind,omitempty"`
Kind
string
`json:"kind,omitempty"`
...
@@ -62,6 +71,7 @@ func appendOpsUpstreamError(c *gin.Context, ev OpsUpstreamErrorEvent) {
...
@@ -62,6 +71,7 @@ func appendOpsUpstreamError(c *gin.Context, ev OpsUpstreamErrorEvent) {
}
}
ev
.
Platform
=
strings
.
TrimSpace
(
ev
.
Platform
)
ev
.
Platform
=
strings
.
TrimSpace
(
ev
.
Platform
)
ev
.
UpstreamRequestID
=
strings
.
TrimSpace
(
ev
.
UpstreamRequestID
)
ev
.
UpstreamRequestID
=
strings
.
TrimSpace
(
ev
.
UpstreamRequestID
)
ev
.
UpstreamRequestBody
=
strings
.
TrimSpace
(
ev
.
UpstreamRequestBody
)
ev
.
Kind
=
strings
.
TrimSpace
(
ev
.
Kind
)
ev
.
Kind
=
strings
.
TrimSpace
(
ev
.
Kind
)
ev
.
Message
=
strings
.
TrimSpace
(
ev
.
Message
)
ev
.
Message
=
strings
.
TrimSpace
(
ev
.
Message
)
ev
.
Detail
=
strings
.
TrimSpace
(
ev
.
Detail
)
ev
.
Detail
=
strings
.
TrimSpace
(
ev
.
Detail
)
...
@@ -69,6 +79,16 @@ func appendOpsUpstreamError(c *gin.Context, ev OpsUpstreamErrorEvent) {
...
@@ -69,6 +79,16 @@ func appendOpsUpstreamError(c *gin.Context, ev OpsUpstreamErrorEvent) {
ev
.
Message
=
sanitizeUpstreamErrorMessage
(
ev
.
Message
)
ev
.
Message
=
sanitizeUpstreamErrorMessage
(
ev
.
Message
)
}
}
// If the caller didn't explicitly pass upstream request body but the gateway
// stored it on the context, attach it so ops can retry this specific attempt.
if
ev
.
UpstreamRequestBody
==
""
{
if
v
,
ok
:=
c
.
Get
(
OpsUpstreamRequestBodyKey
);
ok
{
if
s
,
ok
:=
v
.
(
string
);
ok
{
ev
.
UpstreamRequestBody
=
strings
.
TrimSpace
(
s
)
}
}
}
var
existing
[]
*
OpsUpstreamErrorEvent
var
existing
[]
*
OpsUpstreamErrorEvent
if
v
,
ok
:=
c
.
Get
(
OpsUpstreamErrorsKey
);
ok
{
if
v
,
ok
:=
c
.
Get
(
OpsUpstreamErrorsKey
);
ok
{
if
arr
,
ok
:=
v
.
([]
*
OpsUpstreamErrorEvent
);
ok
{
if
arr
,
ok
:=
v
.
([]
*
OpsUpstreamErrorEvent
);
ok
{
...
@@ -93,3 +113,15 @@ func marshalOpsUpstreamErrors(events []*OpsUpstreamErrorEvent) *string {
...
@@ -93,3 +113,15 @@ func marshalOpsUpstreamErrors(events []*OpsUpstreamErrorEvent) *string {
s
:=
string
(
raw
)
s
:=
string
(
raw
)
return
&
s
return
&
s
}
}
func
ParseOpsUpstreamErrors
(
raw
string
)
([]
*
OpsUpstreamErrorEvent
,
error
)
{
raw
=
strings
.
TrimSpace
(
raw
)
if
raw
==
""
{
return
[]
*
OpsUpstreamErrorEvent
{},
nil
}
var
out
[]
*
OpsUpstreamErrorEvent
if
err
:=
json
.
Unmarshal
([]
byte
(
raw
),
&
out
);
err
!=
nil
{
return
nil
,
err
}
return
out
,
nil
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment