"git@web.lueluesay.top:chenxi/sub2api.git" did not exist on "028f8aaa973d404a3d2c58b7701601dcf885a5dc"
Commit fff1d548 authored by yangjianbo's avatar yangjianbo
Browse files

feat(log): 落地统一日志底座与系统日志运维能力

parent a5f29019
package logger
import (
"os"
"path/filepath"
"testing"
"go.uber.org/zap"
"go.uber.org/zap/zapcore"
)
func TestResolveLogFilePath_Default(t *testing.T) {
t.Setenv("DATA_DIR", "")
got := resolveLogFilePath("")
if got != DefaultContainerLogPath {
t.Fatalf("resolveLogFilePath() = %q, want %q", got, DefaultContainerLogPath)
}
}
func TestResolveLogFilePath_WithDataDir(t *testing.T) {
t.Setenv("DATA_DIR", "/tmp/sub2api-data")
got := resolveLogFilePath("")
want := filepath.Join("/tmp/sub2api-data", "logs", "sub2api.log")
if got != want {
t.Fatalf("resolveLogFilePath() = %q, want %q", got, want)
}
}
func TestResolveLogFilePath_ExplicitPath(t *testing.T) {
t.Setenv("DATA_DIR", "/tmp/ignore")
got := resolveLogFilePath("/var/log/custom.log")
if got != "/var/log/custom.log" {
t.Fatalf("resolveLogFilePath() = %q, want explicit path", got)
}
}
func TestNormalizedOptions_InvalidFallback(t *testing.T) {
t.Setenv("DATA_DIR", "")
opts := InitOptions{
Level: "TRACE",
Format: "TEXT",
ServiceName: "",
Environment: "",
StacktraceLevel: "panic",
Output: OutputOptions{
ToStdout: false,
ToFile: false,
},
Rotation: RotationOptions{
MaxSizeMB: 0,
MaxBackups: -1,
MaxAgeDays: -1,
},
Sampling: SamplingOptions{
Enabled: true,
Initial: 0,
Thereafter: 0,
},
}
out := opts.normalized()
if out.Level != "trace" {
// normalized 仅做 trim/lower,不做校验;校验在 config 层。
t.Fatalf("normalized level should preserve value for upstream validation, got %q", out.Level)
}
if !out.Output.ToStdout {
t.Fatalf("normalized output should fallback to stdout")
}
if out.Output.FilePath != DefaultContainerLogPath {
t.Fatalf("normalized file path = %q", out.Output.FilePath)
}
if out.Rotation.MaxSizeMB != 100 {
t.Fatalf("normalized max_size_mb = %d", out.Rotation.MaxSizeMB)
}
if out.Rotation.MaxBackups != 10 {
t.Fatalf("normalized max_backups = %d", out.Rotation.MaxBackups)
}
if out.Rotation.MaxAgeDays != 7 {
t.Fatalf("normalized max_age_days = %d", out.Rotation.MaxAgeDays)
}
if out.Sampling.Initial != 100 || out.Sampling.Thereafter != 100 {
t.Fatalf("normalized sampling defaults invalid: %+v", out.Sampling)
}
}
func TestBuildFileCore_InvalidPathFallback(t *testing.T) {
t.Setenv("DATA_DIR", "")
opts := bootstrapOptions()
opts.Output.ToFile = true
opts.Output.FilePath = filepath.Join(os.DevNull, "logs", "sub2api.log")
encoderCfg := zapcore.EncoderConfig{
TimeKey: "time",
LevelKey: "level",
MessageKey: "msg",
EncodeTime: zapcore.ISO8601TimeEncoder,
EncodeLevel: zapcore.CapitalLevelEncoder,
}
encoder := zapcore.NewJSONEncoder(encoderCfg)
_, _, err := buildFileCore(encoder, zap.NewAtomicLevel(), opts)
if err == nil {
t.Fatalf("buildFileCore() expected error for invalid path")
}
}
package logger
import (
"context"
"log/slog"
"strings"
"time"
"go.uber.org/zap"
"go.uber.org/zap/zapcore"
)
type slogZapHandler struct {
logger *zap.Logger
attrs []slog.Attr
groups []string
}
func newSlogZapHandler(logger *zap.Logger) slog.Handler {
if logger == nil {
logger = zap.NewNop()
}
return &slogZapHandler{
logger: logger,
attrs: make([]slog.Attr, 0, 8),
groups: make([]string, 0, 4),
}
}
func (h *slogZapHandler) Enabled(_ context.Context, level slog.Level) bool {
switch {
case level >= slog.LevelError:
return h.logger.Core().Enabled(LevelError)
case level >= slog.LevelWarn:
return h.logger.Core().Enabled(LevelWarn)
case level <= slog.LevelDebug:
return h.logger.Core().Enabled(LevelDebug)
default:
return h.logger.Core().Enabled(LevelInfo)
}
}
func (h *slogZapHandler) Handle(_ context.Context, record slog.Record) error {
fields := make([]zap.Field, 0, len(h.attrs)+record.NumAttrs()+4)
fields = append(fields, zap.Time("time", record.Time))
fields = append(fields, slogAttrsToZapFields(h.groups, h.attrs)...)
record.Attrs(func(attr slog.Attr) bool {
fields = append(fields, slogAttrToZapField(h.groups, attr))
return true
})
entry := h.logger.With(fields...)
switch {
case record.Level >= slog.LevelError:
entry.Error(record.Message)
case record.Level >= slog.LevelWarn:
entry.Warn(record.Message)
case record.Level <= slog.LevelDebug:
entry.Debug(record.Message)
default:
entry.Info(record.Message)
}
return nil
}
func (h *slogZapHandler) WithAttrs(attrs []slog.Attr) slog.Handler {
next := *h
next.attrs = append(append([]slog.Attr{}, h.attrs...), attrs...)
return &next
}
func (h *slogZapHandler) WithGroup(name string) slog.Handler {
name = strings.TrimSpace(name)
if name == "" {
return h
}
next := *h
next.groups = append(append([]string{}, h.groups...), name)
return &next
}
func slogAttrsToZapFields(groups []string, attrs []slog.Attr) []zap.Field {
fields := make([]zap.Field, 0, len(attrs))
for _, attr := range attrs {
fields = append(fields, slogAttrToZapField(groups, attr))
}
return fields
}
func slogAttrToZapField(groups []string, attr slog.Attr) zap.Field {
if len(groups) > 0 {
attr.Key = strings.Join(append(append([]string{}, groups...), attr.Key), ".")
}
value := attr.Value.Resolve()
switch value.Kind() {
case slog.KindBool:
return zap.Bool(attr.Key, value.Bool())
case slog.KindInt64:
return zap.Int64(attr.Key, value.Int64())
case slog.KindUint64:
return zap.Uint64(attr.Key, value.Uint64())
case slog.KindFloat64:
return zap.Float64(attr.Key, value.Float64())
case slog.KindDuration:
return zap.Duration(attr.Key, value.Duration())
case slog.KindTime:
return zap.Time(attr.Key, value.Time())
case slog.KindString:
return zap.String(attr.Key, value.String())
case slog.KindGroup:
groupFields := make([]zap.Field, 0, len(value.Group()))
for _, nested := range value.Group() {
groupFields = append(groupFields, slogAttrToZapField(nil, nested))
}
return zap.Object(attr.Key, zapObjectFields(groupFields))
case slog.KindAny:
if t, ok := value.Any().(time.Time); ok {
return zap.Time(attr.Key, t)
}
return zap.Any(attr.Key, value.Any())
default:
return zap.String(attr.Key, value.String())
}
}
type zapObjectFields []zap.Field
func (z zapObjectFields) MarshalLogObject(enc zapcore.ObjectEncoder) error {
for _, field := range z {
field.AddTo(enc)
}
return nil
}
...@@ -3,6 +3,7 @@ package repository ...@@ -3,6 +3,7 @@ package repository
import ( import (
"context" "context"
"database/sql" "database/sql"
"encoding/json"
"fmt" "fmt"
"strings" "strings"
"time" "time"
...@@ -938,6 +939,243 @@ WHERE id = $1` ...@@ -938,6 +939,243 @@ WHERE id = $1`
return err return err
} }
func (r *opsRepository) BatchInsertSystemLogs(ctx context.Context, inputs []*service.OpsInsertSystemLogInput) (int64, error) {
if r == nil || r.db == nil {
return 0, fmt.Errorf("nil ops repository")
}
if len(inputs) == 0 {
return 0, nil
}
tx, err := r.db.BeginTx(ctx, nil)
if err != nil {
return 0, err
}
stmt, err := tx.PrepareContext(ctx, pq.CopyIn(
"ops_system_logs",
"created_at",
"level",
"component",
"message",
"request_id",
"client_request_id",
"user_id",
"account_id",
"platform",
"model",
"extra",
))
if err != nil {
_ = tx.Rollback()
return 0, err
}
var inserted int64
for _, input := range inputs {
if input == nil {
continue
}
createdAt := input.CreatedAt
if createdAt.IsZero() {
createdAt = time.Now().UTC()
}
component := strings.TrimSpace(input.Component)
level := strings.ToLower(strings.TrimSpace(input.Level))
message := strings.TrimSpace(input.Message)
if level == "" || message == "" {
continue
}
if component == "" {
component = "app"
}
extra := strings.TrimSpace(input.ExtraJSON)
if extra == "" {
extra = "{}"
}
if _, err := stmt.ExecContext(
ctx,
createdAt.UTC(),
level,
component,
message,
opsNullString(input.RequestID),
opsNullString(input.ClientRequestID),
opsNullInt64(input.UserID),
opsNullInt64(input.AccountID),
opsNullString(input.Platform),
opsNullString(input.Model),
extra,
); err != nil {
_ = stmt.Close()
_ = tx.Rollback()
return inserted, err
}
inserted++
}
if _, err := stmt.ExecContext(ctx); err != nil {
_ = stmt.Close()
_ = tx.Rollback()
return inserted, err
}
if err := stmt.Close(); err != nil {
_ = tx.Rollback()
return inserted, err
}
if err := tx.Commit(); err != nil {
return inserted, err
}
return inserted, nil
}
func (r *opsRepository) ListSystemLogs(ctx context.Context, filter *service.OpsSystemLogFilter) (*service.OpsSystemLogList, error) {
if r == nil || r.db == nil {
return nil, fmt.Errorf("nil ops repository")
}
if filter == nil {
filter = &service.OpsSystemLogFilter{}
}
page := filter.Page
if page <= 0 {
page = 1
}
pageSize := filter.PageSize
if pageSize <= 0 {
pageSize = 50
}
if pageSize > 200 {
pageSize = 200
}
where, args, _ := buildOpsSystemLogsWhere(filter)
countSQL := "SELECT COUNT(*) FROM ops_system_logs l " + where
var total int
if err := r.db.QueryRowContext(ctx, countSQL, args...).Scan(&total); err != nil {
return nil, err
}
offset := (page - 1) * pageSize
argsWithLimit := append(args, pageSize, offset)
query := `
SELECT
l.id,
l.created_at,
l.level,
COALESCE(l.component, ''),
COALESCE(l.message, ''),
COALESCE(l.request_id, ''),
COALESCE(l.client_request_id, ''),
l.user_id,
l.account_id,
COALESCE(l.platform, ''),
COALESCE(l.model, ''),
COALESCE(l.extra::text, '{}')
FROM ops_system_logs l
` + where + `
ORDER BY l.created_at DESC, l.id DESC
LIMIT $` + itoa(len(args)+1) + ` OFFSET $` + itoa(len(args)+2)
rows, err := r.db.QueryContext(ctx, query, argsWithLimit...)
if err != nil {
return nil, err
}
defer func() { _ = rows.Close() }()
logs := make([]*service.OpsSystemLog, 0, pageSize)
for rows.Next() {
item := &service.OpsSystemLog{}
var userID sql.NullInt64
var accountID sql.NullInt64
var extraRaw string
if err := rows.Scan(
&item.ID,
&item.CreatedAt,
&item.Level,
&item.Component,
&item.Message,
&item.RequestID,
&item.ClientRequestID,
&userID,
&accountID,
&item.Platform,
&item.Model,
&extraRaw,
); err != nil {
return nil, err
}
if userID.Valid {
v := userID.Int64
item.UserID = &v
}
if accountID.Valid {
v := accountID.Int64
item.AccountID = &v
}
extraRaw = strings.TrimSpace(extraRaw)
if extraRaw != "" && extraRaw != "null" && extraRaw != "{}" {
extra := make(map[string]any)
if err := json.Unmarshal([]byte(extraRaw), &extra); err == nil {
item.Extra = extra
}
}
logs = append(logs, item)
}
if err := rows.Err(); err != nil {
return nil, err
}
return &service.OpsSystemLogList{
Logs: logs,
Total: total,
Page: page,
PageSize: pageSize,
}, nil
}
func (r *opsRepository) DeleteSystemLogs(ctx context.Context, filter *service.OpsSystemLogCleanupFilter) (int64, error) {
if r == nil || r.db == nil {
return 0, fmt.Errorf("nil ops repository")
}
if filter == nil {
filter = &service.OpsSystemLogCleanupFilter{}
}
where, args, hasConstraint := buildOpsSystemLogsCleanupWhere(filter)
if !hasConstraint {
return 0, fmt.Errorf("cleanup requires at least one filter condition")
}
query := "DELETE FROM ops_system_logs l " + where
res, err := r.db.ExecContext(ctx, query, args...)
if err != nil {
return 0, err
}
return res.RowsAffected()
}
func (r *opsRepository) InsertSystemLogCleanupAudit(ctx context.Context, input *service.OpsSystemLogCleanupAudit) error {
if r == nil || r.db == nil {
return fmt.Errorf("nil ops repository")
}
if input == nil {
return fmt.Errorf("nil input")
}
createdAt := input.CreatedAt
if createdAt.IsZero() {
createdAt = time.Now().UTC()
}
_, err := r.db.ExecContext(ctx, `
INSERT INTO ops_system_log_cleanup_audits (
created_at,
operator_id,
conditions,
deleted_rows
) VALUES ($1,$2,$3,$4)
`, createdAt.UTC(), input.OperatorID, input.Conditions, input.DeletedRows)
return err
}
func buildOpsErrorLogsWhere(filter *service.OpsErrorLogFilter) (string, []any) { func buildOpsErrorLogsWhere(filter *service.OpsErrorLogFilter) (string, []any) {
clauses := make([]string, 0, 12) clauses := make([]string, 0, 12)
args := make([]any, 0, 12) args := make([]any, 0, 12)
...@@ -1053,6 +1291,95 @@ func buildOpsErrorLogsWhere(filter *service.OpsErrorLogFilter) (string, []any) { ...@@ -1053,6 +1291,95 @@ func buildOpsErrorLogsWhere(filter *service.OpsErrorLogFilter) (string, []any) {
return "WHERE " + strings.Join(clauses, " AND "), args return "WHERE " + strings.Join(clauses, " AND "), args
} }
func buildOpsSystemLogsWhere(filter *service.OpsSystemLogFilter) (string, []any, bool) {
clauses := make([]string, 0, 10)
args := make([]any, 0, 10)
clauses = append(clauses, "1=1")
hasConstraint := false
if filter != nil && filter.StartTime != nil && !filter.StartTime.IsZero() {
args = append(args, filter.StartTime.UTC())
clauses = append(clauses, "l.created_at >= $"+itoa(len(args)))
hasConstraint = true
}
if filter != nil && filter.EndTime != nil && !filter.EndTime.IsZero() {
args = append(args, filter.EndTime.UTC())
clauses = append(clauses, "l.created_at < $"+itoa(len(args)))
hasConstraint = true
}
if filter != nil {
if v := strings.ToLower(strings.TrimSpace(filter.Level)); v != "" {
args = append(args, v)
clauses = append(clauses, "LOWER(COALESCE(l.level,'')) = $"+itoa(len(args)))
hasConstraint = true
}
if v := strings.TrimSpace(filter.Component); v != "" {
args = append(args, v)
clauses = append(clauses, "COALESCE(l.component,'') = $"+itoa(len(args)))
hasConstraint = true
}
if v := strings.TrimSpace(filter.RequestID); v != "" {
args = append(args, v)
clauses = append(clauses, "COALESCE(l.request_id,'') = $"+itoa(len(args)))
hasConstraint = true
}
if v := strings.TrimSpace(filter.ClientRequestID); v != "" {
args = append(args, v)
clauses = append(clauses, "COALESCE(l.client_request_id,'') = $"+itoa(len(args)))
hasConstraint = true
}
if filter.UserID != nil && *filter.UserID > 0 {
args = append(args, *filter.UserID)
clauses = append(clauses, "l.user_id = $"+itoa(len(args)))
hasConstraint = true
}
if filter.AccountID != nil && *filter.AccountID > 0 {
args = append(args, *filter.AccountID)
clauses = append(clauses, "l.account_id = $"+itoa(len(args)))
hasConstraint = true
}
if v := strings.TrimSpace(filter.Platform); v != "" {
args = append(args, v)
clauses = append(clauses, "COALESCE(l.platform,'') = $"+itoa(len(args)))
hasConstraint = true
}
if v := strings.TrimSpace(filter.Model); v != "" {
args = append(args, v)
clauses = append(clauses, "COALESCE(l.model,'') = $"+itoa(len(args)))
hasConstraint = true
}
if v := strings.TrimSpace(filter.Query); v != "" {
like := "%" + v + "%"
args = append(args, like)
n := itoa(len(args))
clauses = append(clauses, "(l.message ILIKE $"+n+" OR COALESCE(l.request_id,'') ILIKE $"+n+" OR COALESCE(l.client_request_id,'') ILIKE $"+n+" OR COALESCE(l.extra::text,'') ILIKE $"+n+")")
hasConstraint = true
}
}
return "WHERE " + strings.Join(clauses, " AND "), args, hasConstraint
}
func buildOpsSystemLogsCleanupWhere(filter *service.OpsSystemLogCleanupFilter) (string, []any, bool) {
if filter == nil {
filter = &service.OpsSystemLogCleanupFilter{}
}
listFilter := &service.OpsSystemLogFilter{
StartTime: filter.StartTime,
EndTime: filter.EndTime,
Level: filter.Level,
Component: filter.Component,
RequestID: filter.RequestID,
ClientRequestID: filter.ClientRequestID,
UserID: filter.UserID,
AccountID: filter.AccountID,
Platform: filter.Platform,
Model: filter.Model,
Query: filter.Query,
}
return buildOpsSystemLogsWhere(listFilter)
}
// Helpers for nullable args // Helpers for nullable args
func opsNullString(v any) any { func opsNullString(v any) any {
switch s := v.(type) { switch s := v.(type) {
......
package repository
import (
"strings"
"testing"
"time"
"github.com/Wei-Shaw/sub2api/internal/service"
)
func TestBuildOpsSystemLogsWhere_WithClientRequestIDAndUserID(t *testing.T) {
start := time.Date(2026, 2, 1, 0, 0, 0, 0, time.UTC)
end := time.Date(2026, 2, 2, 0, 0, 0, 0, time.UTC)
userID := int64(12)
accountID := int64(34)
filter := &service.OpsSystemLogFilter{
StartTime: &start,
EndTime: &end,
Level: "warn",
Component: "http.access",
RequestID: "req-1",
ClientRequestID: "creq-1",
UserID: &userID,
AccountID: &accountID,
Platform: "openai",
Model: "gpt-5",
Query: "timeout",
}
where, args, hasConstraint := buildOpsSystemLogsWhere(filter)
if !hasConstraint {
t.Fatalf("expected hasConstraint=true")
}
if where == "" {
t.Fatalf("where should not be empty")
}
if len(args) != 11 {
t.Fatalf("args len = %d, want 11", len(args))
}
if !contains(where, "COALESCE(l.client_request_id,'') = $") {
t.Fatalf("where should include client_request_id condition: %s", where)
}
if !contains(where, "l.user_id = $") {
t.Fatalf("where should include user_id condition: %s", where)
}
}
func TestBuildOpsSystemLogsCleanupWhere_RequireConstraint(t *testing.T) {
where, args, hasConstraint := buildOpsSystemLogsCleanupWhere(&service.OpsSystemLogCleanupFilter{})
if hasConstraint {
t.Fatalf("expected hasConstraint=false")
}
if where == "" {
t.Fatalf("where should not be empty")
}
if len(args) != 0 {
t.Fatalf("args len = %d, want 0", len(args))
}
}
func TestBuildOpsSystemLogsCleanupWhere_WithClientRequestIDAndUserID(t *testing.T) {
userID := int64(9)
filter := &service.OpsSystemLogCleanupFilter{
ClientRequestID: "creq-9",
UserID: &userID,
}
where, args, hasConstraint := buildOpsSystemLogsCleanupWhere(filter)
if !hasConstraint {
t.Fatalf("expected hasConstraint=true")
}
if len(args) != 2 {
t.Fatalf("args len = %d, want 2", len(args))
}
if !contains(where, "COALESCE(l.client_request_id,'') = $") {
t.Fatalf("where should include client_request_id condition: %s", where)
}
if !contains(where, "l.user_id = $") {
t.Fatalf("where should include user_id condition: %s", where)
}
}
func contains(s string, sub string) bool {
return strings.Contains(s, sub)
}
...@@ -2,10 +2,13 @@ package middleware ...@@ -2,10 +2,13 @@ package middleware
import ( import (
"context" "context"
"strings"
"github.com/Wei-Shaw/sub2api/internal/pkg/ctxkey" "github.com/Wei-Shaw/sub2api/internal/pkg/ctxkey"
"github.com/Wei-Shaw/sub2api/internal/pkg/logger"
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
"github.com/google/uuid" "github.com/google/uuid"
"go.uber.org/zap"
) )
// ClientRequestID ensures every request has a unique client_request_id in request.Context(). // ClientRequestID ensures every request has a unique client_request_id in request.Context().
...@@ -24,7 +27,10 @@ func ClientRequestID() gin.HandlerFunc { ...@@ -24,7 +27,10 @@ func ClientRequestID() gin.HandlerFunc {
} }
id := uuid.New().String() id := uuid.New().String()
c.Request = c.Request.WithContext(context.WithValue(c.Request.Context(), ctxkey.ClientRequestID, id)) ctx := context.WithValue(c.Request.Context(), ctxkey.ClientRequestID, id)
requestLogger := logger.FromContext(ctx).With(zap.String("client_request_id", strings.TrimSpace(id)))
ctx = logger.IntoContext(ctx, requestLogger)
c.Request = c.Request.WithContext(ctx)
c.Next() c.Next()
} }
} }
package middleware package middleware
import ( import (
"log"
"time" "time"
"github.com/Wei-Shaw/sub2api/internal/pkg/ctxkey"
"github.com/Wei-Shaw/sub2api/internal/pkg/logger"
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
"go.uber.org/zap"
) )
// Logger 请求日志中间件 // Logger 请求日志中间件
...@@ -24,38 +26,41 @@ func Logger() gin.HandlerFunc { ...@@ -24,38 +26,41 @@ func Logger() gin.HandlerFunc {
return return
} }
// 结束时间
endTime := time.Now() endTime := time.Now()
// 执行时间
latency := endTime.Sub(startTime) latency := endTime.Sub(startTime)
// 请求方法
method := c.Request.Method method := c.Request.Method
// 状态码
statusCode := c.Writer.Status() statusCode := c.Writer.Status()
// 客户端IP
clientIP := c.ClientIP() clientIP := c.ClientIP()
// 协议版本
protocol := c.Request.Proto protocol := c.Request.Proto
accountID, hasAccountID := c.Request.Context().Value(ctxkey.AccountID).(int64)
platform, _ := c.Request.Context().Value(ctxkey.Platform).(string)
model, _ := c.Request.Context().Value(ctxkey.Model).(string)
fields := []zap.Field{
zap.String("component", "http.access"),
zap.Int("status_code", statusCode),
zap.Int64("latency_ms", latency.Milliseconds()),
zap.String("client_ip", clientIP),
zap.String("protocol", protocol),
zap.String("method", method),
zap.String("path", path),
}
if hasAccountID && accountID > 0 {
fields = append(fields, zap.Int64("account_id", accountID))
}
if platform != "" {
fields = append(fields, zap.String("platform", platform))
}
if model != "" {
fields = append(fields, zap.String("model", model))
}
l := logger.FromContext(c.Request.Context()).With(fields...)
l.Info("http request completed", zap.Time("completed_at", endTime))
// 日志格式: [时间] 状态码 | 延迟 | IP | 协议 | 方法 路径
log.Printf("[GIN] %v | %3d | %13v | %15s | %-6s | %-7s %s",
endTime.Format("2006/01/02 - 15:04:05"),
statusCode,
latency,
clientIP,
protocol,
method,
path,
)
// 如果有错误,额外记录错误信息
if len(c.Errors) > 0 { if len(c.Errors) > 0 {
log.Printf("[GIN] Errors: %v", c.Errors.String()) l.Warn("http request contains gin errors", zap.String("errors", c.Errors.String()))
} }
} }
} }
package middleware
import (
"context"
"net/http"
"net/http/httptest"
"sync"
"testing"
"github.com/Wei-Shaw/sub2api/internal/pkg/ctxkey"
"github.com/Wei-Shaw/sub2api/internal/pkg/logger"
"github.com/gin-gonic/gin"
)
type testLogSink struct {
mu sync.Mutex
events []*logger.LogEvent
}
func (s *testLogSink) WriteLogEvent(event *logger.LogEvent) {
s.mu.Lock()
defer s.mu.Unlock()
s.events = append(s.events, event)
}
func (s *testLogSink) list() []*logger.LogEvent {
s.mu.Lock()
defer s.mu.Unlock()
out := make([]*logger.LogEvent, len(s.events))
copy(out, s.events)
return out
}
func initMiddlewareTestLogger(t *testing.T) *testLogSink {
t.Helper()
if err := logger.Init(logger.InitOptions{
Level: "debug",
Format: "json",
ServiceName: "sub2api",
Environment: "test",
Output: logger.OutputOptions{
ToStdout: false,
ToFile: false,
},
}); err != nil {
t.Fatalf("init logger: %v", err)
}
sink := &testLogSink{}
logger.SetSink(sink)
t.Cleanup(func() {
logger.SetSink(nil)
})
return sink
}
func TestRequestLogger_GenerateAndPropagateRequestID(t *testing.T) {
gin.SetMode(gin.TestMode)
r := gin.New()
r.Use(RequestLogger())
r.GET("/t", func(c *gin.Context) {
reqID, ok := c.Request.Context().Value(ctxkey.RequestID).(string)
if !ok || reqID == "" {
t.Fatalf("request_id missing in context")
}
if got := c.Writer.Header().Get(requestIDHeader); got != reqID {
t.Fatalf("response header request_id mismatch, header=%q ctx=%q", got, reqID)
}
c.Status(http.StatusOK)
})
w := httptest.NewRecorder()
req := httptest.NewRequest(http.MethodGet, "/t", nil)
r.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Fatalf("status=%d", w.Code)
}
if w.Header().Get(requestIDHeader) == "" {
t.Fatalf("X-Request-ID should be set")
}
}
func TestRequestLogger_KeepIncomingRequestID(t *testing.T) {
gin.SetMode(gin.TestMode)
r := gin.New()
r.Use(RequestLogger())
r.GET("/t", func(c *gin.Context) {
reqID, _ := c.Request.Context().Value(ctxkey.RequestID).(string)
if reqID != "rid-fixed" {
t.Fatalf("request_id=%q, want rid-fixed", reqID)
}
c.Status(http.StatusOK)
})
w := httptest.NewRecorder()
req := httptest.NewRequest(http.MethodGet, "/t", nil)
req.Header.Set(requestIDHeader, "rid-fixed")
r.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Fatalf("status=%d", w.Code)
}
if got := w.Header().Get(requestIDHeader); got != "rid-fixed" {
t.Fatalf("header=%q, want rid-fixed", got)
}
}
func TestLogger_AccessLogIncludesCoreFields(t *testing.T) {
gin.SetMode(gin.TestMode)
sink := initMiddlewareTestLogger(t)
r := gin.New()
r.Use(Logger())
r.Use(func(c *gin.Context) {
ctx := c.Request.Context()
ctx = context.WithValue(ctx, ctxkey.AccountID, int64(101))
ctx = context.WithValue(ctx, ctxkey.Platform, "openai")
ctx = context.WithValue(ctx, ctxkey.Model, "gpt-5")
c.Request = c.Request.WithContext(ctx)
c.Next()
})
r.GET("/api/test", func(c *gin.Context) {
c.Status(http.StatusCreated)
})
w := httptest.NewRecorder()
req := httptest.NewRequest(http.MethodGet, "/api/test", nil)
r.ServeHTTP(w, req)
if w.Code != http.StatusCreated {
t.Fatalf("status=%d", w.Code)
}
events := sink.list()
if len(events) == 0 {
t.Fatalf("expected at least one log event")
}
found := false
for _, event := range events {
if event == nil || event.Message != "http request completed" {
continue
}
found = true
switch v := event.Fields["status_code"].(type) {
case int:
if v != http.StatusCreated {
t.Fatalf("status_code field mismatch: %v", v)
}
case int64:
if v != int64(http.StatusCreated) {
t.Fatalf("status_code field mismatch: %v", v)
}
default:
t.Fatalf("status_code type mismatch: %T", v)
}
switch v := event.Fields["account_id"].(type) {
case int64:
if v != 101 {
t.Fatalf("account_id field mismatch: %v", v)
}
case int:
if v != 101 {
t.Fatalf("account_id field mismatch: %v", v)
}
default:
t.Fatalf("account_id type mismatch: %T", v)
}
if event.Fields["platform"] != "openai" || event.Fields["model"] != "gpt-5" {
t.Fatalf("platform/model mismatch: %+v", event.Fields)
}
}
if !found {
t.Fatalf("access log event not found")
}
}
func TestLogger_HealthPathSkipped(t *testing.T) {
gin.SetMode(gin.TestMode)
sink := initMiddlewareTestLogger(t)
r := gin.New()
r.Use(Logger())
r.GET("/health", func(c *gin.Context) {
c.Status(http.StatusOK)
})
w := httptest.NewRecorder()
req := httptest.NewRequest(http.MethodGet, "/health", nil)
r.ServeHTTP(w, req)
if w.Code != http.StatusOK {
t.Fatalf("status=%d", w.Code)
}
if len(sink.list()) != 0 {
t.Fatalf("health endpoint should not write access log")
}
}
package middleware
import (
"context"
"strings"
"github.com/Wei-Shaw/sub2api/internal/pkg/ctxkey"
"github.com/Wei-Shaw/sub2api/internal/pkg/logger"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
"go.uber.org/zap"
)
const requestIDHeader = "X-Request-ID"
// RequestLogger 在请求入口注入 request-scoped logger。
func RequestLogger() gin.HandlerFunc {
return func(c *gin.Context) {
if c.Request == nil {
c.Next()
return
}
requestID := strings.TrimSpace(c.GetHeader(requestIDHeader))
if requestID == "" {
requestID = uuid.NewString()
}
c.Header(requestIDHeader, requestID)
ctx := context.WithValue(c.Request.Context(), ctxkey.RequestID, requestID)
clientRequestID, _ := ctx.Value(ctxkey.ClientRequestID).(string)
requestLogger := logger.With(
zap.String("component", "http"),
zap.String("request_id", requestID),
zap.String("client_request_id", strings.TrimSpace(clientRequestID)),
zap.String("path", c.Request.URL.Path),
zap.String("method", c.Request.Method),
)
ctx = logger.IntoContext(ctx, requestLogger)
c.Request = c.Request.WithContext(ctx)
c.Next()
}
}
...@@ -29,6 +29,7 @@ func SetupRouter( ...@@ -29,6 +29,7 @@ func SetupRouter(
redisClient *redis.Client, redisClient *redis.Client,
) *gin.Engine { ) *gin.Engine {
// 应用中间件 // 应用中间件
r.Use(middleware2.RequestLogger())
r.Use(middleware2.Logger()) r.Use(middleware2.Logger())
r.Use(middleware2.CORS(cfg.CORS)) r.Use(middleware2.CORS(cfg.CORS))
r.Use(middleware2.SecurityHeaders(cfg.Security.CSP)) r.Use(middleware2.SecurityHeaders(cfg.Security.CSP))
......
...@@ -101,6 +101,9 @@ func registerOpsRoutes(admin *gin.RouterGroup, h *handler.Handlers) { ...@@ -101,6 +101,9 @@ func registerOpsRoutes(admin *gin.RouterGroup, h *handler.Handlers) {
{ {
runtime.GET("/alert", h.Admin.Ops.GetAlertRuntimeSettings) runtime.GET("/alert", h.Admin.Ops.GetAlertRuntimeSettings)
runtime.PUT("/alert", h.Admin.Ops.UpdateAlertRuntimeSettings) runtime.PUT("/alert", h.Admin.Ops.UpdateAlertRuntimeSettings)
runtime.GET("/logging", h.Admin.Ops.GetRuntimeLogConfig)
runtime.PUT("/logging", h.Admin.Ops.UpdateRuntimeLogConfig)
runtime.POST("/logging/reset", h.Admin.Ops.ResetRuntimeLogConfig)
} }
// Advanced settings (DB-backed) // Advanced settings (DB-backed)
...@@ -144,6 +147,11 @@ func registerOpsRoutes(admin *gin.RouterGroup, h *handler.Handlers) { ...@@ -144,6 +147,11 @@ func registerOpsRoutes(admin *gin.RouterGroup, h *handler.Handlers) {
// Request drilldown (success + error) // Request drilldown (success + error)
ops.GET("/requests", h.Admin.Ops.ListRequestDetails) ops.GET("/requests", h.Admin.Ops.ListRequestDetails)
// Indexed system logs
ops.GET("/system-logs", h.Admin.Ops.ListSystemLogs)
ops.POST("/system-logs/cleanup", h.Admin.Ops.CleanupSystemLogs)
ops.GET("/system-logs/health", h.Admin.Ops.GetSystemLogIngestionHealth)
// Dashboard (vNext - raw path for MVP) // Dashboard (vNext - raw path for MVP)
ops.GET("/dashboard/overview", h.Admin.Ops.GetDashboardOverview) ops.GET("/dashboard/overview", h.Admin.Ops.GetDashboardOverview)
ops.GET("/dashboard/throughput-trend", h.Admin.Ops.GetDashboardThroughputTrend) ops.GET("/dashboard/throughput-trend", h.Admin.Ops.GetDashboardThroughputTrend)
......
...@@ -161,6 +161,9 @@ const ( ...@@ -161,6 +161,9 @@ const (
// SettingKeyOpsAdvancedSettings stores JSON config for ops advanced settings (data retention, aggregation). // SettingKeyOpsAdvancedSettings stores JSON config for ops advanced settings (data retention, aggregation).
SettingKeyOpsAdvancedSettings = "ops_advanced_settings" SettingKeyOpsAdvancedSettings = "ops_advanced_settings"
// SettingKeyOpsRuntimeLogConfig stores JSON config for runtime log settings.
SettingKeyOpsRuntimeLogConfig = "ops_runtime_log_config"
// ========================= // =========================
// Stream Timeout Handling // Stream Timeout Handling
// ========================= // =========================
......
...@@ -157,6 +157,8 @@ type opsCleanupDeletedCounts struct { ...@@ -157,6 +157,8 @@ type opsCleanupDeletedCounts struct {
errorLogs int64 errorLogs int64
retryAttempts int64 retryAttempts int64
alertEvents int64 alertEvents int64
systemLogs int64
logAudits int64
systemMetrics int64 systemMetrics int64
hourlyPreagg int64 hourlyPreagg int64
dailyPreagg int64 dailyPreagg int64
...@@ -164,10 +166,12 @@ type opsCleanupDeletedCounts struct { ...@@ -164,10 +166,12 @@ type opsCleanupDeletedCounts struct {
func (c opsCleanupDeletedCounts) String() string { func (c opsCleanupDeletedCounts) String() string {
return fmt.Sprintf( return fmt.Sprintf(
"error_logs=%d retry_attempts=%d alert_events=%d system_metrics=%d hourly_preagg=%d daily_preagg=%d", "error_logs=%d retry_attempts=%d alert_events=%d system_logs=%d log_audits=%d system_metrics=%d hourly_preagg=%d daily_preagg=%d",
c.errorLogs, c.errorLogs,
c.retryAttempts, c.retryAttempts,
c.alertEvents, c.alertEvents,
c.systemLogs,
c.logAudits,
c.systemMetrics, c.systemMetrics,
c.hourlyPreagg, c.hourlyPreagg,
c.dailyPreagg, c.dailyPreagg,
...@@ -204,6 +208,18 @@ func (s *OpsCleanupService) runCleanupOnce(ctx context.Context) (opsCleanupDelet ...@@ -204,6 +208,18 @@ func (s *OpsCleanupService) runCleanupOnce(ctx context.Context) (opsCleanupDelet
return out, err return out, err
} }
out.alertEvents = n out.alertEvents = n
n, err = deleteOldRowsByID(ctx, s.db, "ops_system_logs", "created_at", cutoff, batchSize, false)
if err != nil {
return out, err
}
out.systemLogs = n
n, err = deleteOldRowsByID(ctx, s.db, "ops_system_log_cleanup_audits", "created_at", cutoff, batchSize, false)
if err != nil {
return out, err
}
out.logAudits = n
} }
// Minute-level metrics snapshots. // Minute-level metrics snapshots.
......
package service
import (
"context"
"encoding/json"
"errors"
"fmt"
"strings"
"time"
"github.com/Wei-Shaw/sub2api/internal/config"
"github.com/Wei-Shaw/sub2api/internal/pkg/logger"
"go.uber.org/zap"
)
func defaultOpsRuntimeLogConfig(cfg *config.Config) *OpsRuntimeLogConfig {
out := &OpsRuntimeLogConfig{
Level: "info",
EnableSampling: false,
SamplingInitial: 100,
SamplingNext: 100,
Caller: true,
StacktraceLevel: "error",
RetentionDays: 30,
}
if cfg == nil {
return out
}
out.Level = strings.ToLower(strings.TrimSpace(cfg.Log.Level))
out.EnableSampling = cfg.Log.Sampling.Enabled
out.SamplingInitial = cfg.Log.Sampling.Initial
out.SamplingNext = cfg.Log.Sampling.Thereafter
out.Caller = cfg.Log.Caller
out.StacktraceLevel = strings.ToLower(strings.TrimSpace(cfg.Log.StacktraceLevel))
if cfg.Ops.Cleanup.ErrorLogRetentionDays > 0 {
out.RetentionDays = cfg.Ops.Cleanup.ErrorLogRetentionDays
}
return out
}
func normalizeOpsRuntimeLogConfig(cfg *OpsRuntimeLogConfig, defaults *OpsRuntimeLogConfig) {
if cfg == nil || defaults == nil {
return
}
cfg.Level = strings.ToLower(strings.TrimSpace(cfg.Level))
if cfg.Level == "" {
cfg.Level = defaults.Level
}
cfg.StacktraceLevel = strings.ToLower(strings.TrimSpace(cfg.StacktraceLevel))
if cfg.StacktraceLevel == "" {
cfg.StacktraceLevel = defaults.StacktraceLevel
}
if cfg.SamplingInitial <= 0 {
cfg.SamplingInitial = defaults.SamplingInitial
}
if cfg.SamplingNext <= 0 {
cfg.SamplingNext = defaults.SamplingNext
}
if cfg.RetentionDays <= 0 {
cfg.RetentionDays = defaults.RetentionDays
}
}
func validateOpsRuntimeLogConfig(cfg *OpsRuntimeLogConfig) error {
if cfg == nil {
return errors.New("invalid config")
}
switch strings.ToLower(strings.TrimSpace(cfg.Level)) {
case "debug", "info", "warn", "error":
default:
return errors.New("level must be one of: debug/info/warn/error")
}
switch strings.ToLower(strings.TrimSpace(cfg.StacktraceLevel)) {
case "none", "error", "fatal":
default:
return errors.New("stacktrace_level must be one of: none/error/fatal")
}
if cfg.SamplingInitial <= 0 {
return errors.New("sampling_initial must be positive")
}
if cfg.SamplingNext <= 0 {
return errors.New("sampling_thereafter must be positive")
}
if cfg.RetentionDays < 1 || cfg.RetentionDays > 3650 {
return errors.New("retention_days must be between 1 and 3650")
}
return nil
}
func (s *OpsService) GetRuntimeLogConfig(ctx context.Context) (*OpsRuntimeLogConfig, error) {
if s == nil || s.settingRepo == nil {
var cfg *config.Config
if s != nil {
cfg = s.cfg
}
defaultCfg := defaultOpsRuntimeLogConfig(cfg)
return defaultCfg, nil
}
defaultCfg := defaultOpsRuntimeLogConfig(s.cfg)
if ctx == nil {
ctx = context.Background()
}
raw, err := s.settingRepo.GetValue(ctx, SettingKeyOpsRuntimeLogConfig)
if err != nil {
if errors.Is(err, ErrSettingNotFound) {
b, _ := json.Marshal(defaultCfg)
_ = s.settingRepo.Set(ctx, SettingKeyOpsRuntimeLogConfig, string(b))
return defaultCfg, nil
}
return nil, err
}
cfg := &OpsRuntimeLogConfig{}
if err := json.Unmarshal([]byte(raw), cfg); err != nil {
return defaultCfg, nil
}
normalizeOpsRuntimeLogConfig(cfg, defaultCfg)
return cfg, nil
}
func (s *OpsService) UpdateRuntimeLogConfig(ctx context.Context, req *OpsRuntimeLogConfig, operatorID int64) (*OpsRuntimeLogConfig, error) {
if s == nil || s.settingRepo == nil {
return nil, errors.New("setting repository not initialized")
}
if req == nil {
return nil, errors.New("invalid config")
}
if ctx == nil {
ctx = context.Background()
}
if operatorID <= 0 {
return nil, errors.New("invalid operator id")
}
oldCfg, err := s.GetRuntimeLogConfig(ctx)
if err != nil {
return nil, err
}
next := *req
normalizeOpsRuntimeLogConfig(&next, defaultOpsRuntimeLogConfig(s.cfg))
if err := validateOpsRuntimeLogConfig(&next); err != nil {
s.auditRuntimeLogConfigFailure(operatorID, oldCfg, &next, "validation_failed: "+err.Error())
return nil, err
}
if err := applyOpsRuntimeLogConfig(&next); err != nil {
s.auditRuntimeLogConfigFailure(operatorID, oldCfg, &next, "apply_failed: "+err.Error())
return nil, err
}
next.Source = "runtime_setting"
next.UpdatedAt = time.Now().UTC().Format(time.RFC3339Nano)
next.UpdatedByUserID = operatorID
encoded, err := json.Marshal(&next)
if err != nil {
return nil, err
}
if err := s.settingRepo.Set(ctx, SettingKeyOpsRuntimeLogConfig, string(encoded)); err != nil {
// 存储失败时回滚到旧配置,避免内存状态与持久化状态不一致。
_ = applyOpsRuntimeLogConfig(oldCfg)
s.auditRuntimeLogConfigFailure(operatorID, oldCfg, &next, "persist_failed: "+err.Error())
return nil, err
}
s.auditRuntimeLogConfigChange(operatorID, oldCfg, &next, "updated")
return &next, nil
}
func (s *OpsService) ResetRuntimeLogConfig(ctx context.Context, operatorID int64) (*OpsRuntimeLogConfig, error) {
if s == nil || s.settingRepo == nil {
return nil, errors.New("setting repository not initialized")
}
if ctx == nil {
ctx = context.Background()
}
if operatorID <= 0 {
return nil, errors.New("invalid operator id")
}
oldCfg, err := s.GetRuntimeLogConfig(ctx)
if err != nil {
return nil, err
}
resetCfg := defaultOpsRuntimeLogConfig(s.cfg)
normalizeOpsRuntimeLogConfig(resetCfg, defaultOpsRuntimeLogConfig(s.cfg))
if err := validateOpsRuntimeLogConfig(resetCfg); err != nil {
s.auditRuntimeLogConfigFailure(operatorID, oldCfg, resetCfg, "reset_validation_failed: "+err.Error())
return nil, err
}
if err := applyOpsRuntimeLogConfig(resetCfg); err != nil {
s.auditRuntimeLogConfigFailure(operatorID, oldCfg, resetCfg, "reset_apply_failed: "+err.Error())
return nil, err
}
// 清理 runtime 覆盖配置,回退到 env/yaml baseline。
if err := s.settingRepo.Delete(ctx, SettingKeyOpsRuntimeLogConfig); err != nil && !errors.Is(err, ErrSettingNotFound) {
_ = applyOpsRuntimeLogConfig(oldCfg)
s.auditRuntimeLogConfigFailure(operatorID, oldCfg, resetCfg, "reset_persist_failed: "+err.Error())
return nil, err
}
now := time.Now().UTC().Format(time.RFC3339Nano)
resetCfg.Source = "baseline"
resetCfg.UpdatedAt = now
resetCfg.UpdatedByUserID = operatorID
s.auditRuntimeLogConfigChange(operatorID, oldCfg, resetCfg, "reset")
return resetCfg, nil
}
func applyOpsRuntimeLogConfig(cfg *OpsRuntimeLogConfig) error {
if cfg == nil {
return fmt.Errorf("nil runtime log config")
}
if err := logger.Reconfigure(func(opts *logger.InitOptions) error {
opts.Level = strings.ToLower(strings.TrimSpace(cfg.Level))
opts.Caller = cfg.Caller
opts.StacktraceLevel = strings.ToLower(strings.TrimSpace(cfg.StacktraceLevel))
opts.Sampling.Enabled = cfg.EnableSampling
opts.Sampling.Initial = cfg.SamplingInitial
opts.Sampling.Thereafter = cfg.SamplingNext
return nil
}); err != nil {
return err
}
return nil
}
func (s *OpsService) applyRuntimeLogConfigOnStartup(ctx context.Context) {
if s == nil {
return
}
cfg, err := s.GetRuntimeLogConfig(ctx)
if err != nil {
return
}
_ = applyOpsRuntimeLogConfig(cfg)
}
func (s *OpsService) auditRuntimeLogConfigChange(operatorID int64, oldCfg *OpsRuntimeLogConfig, newCfg *OpsRuntimeLogConfig, action string) {
oldRaw, _ := json.Marshal(oldCfg)
newRaw, _ := json.Marshal(newCfg)
logger.With(
zap.String("component", "audit.log_config_change"),
zap.String("action", strings.TrimSpace(action)),
zap.Int64("operator_id", operatorID),
zap.String("old", string(oldRaw)),
zap.String("new", string(newRaw)),
).Info("runtime log config changed")
}
func (s *OpsService) auditRuntimeLogConfigFailure(operatorID int64, oldCfg *OpsRuntimeLogConfig, newCfg *OpsRuntimeLogConfig, reason string) {
oldRaw, _ := json.Marshal(oldCfg)
newRaw, _ := json.Marshal(newCfg)
logger.With(
zap.String("component", "audit.log_config_change"),
zap.String("action", "failed"),
zap.Int64("operator_id", operatorID),
zap.String("reason", strings.TrimSpace(reason)),
zap.String("old", string(oldRaw)),
zap.String("new", string(newRaw)),
).Warn("runtime log config change failed")
}
...@@ -2,6 +2,21 @@ package service ...@@ -2,6 +2,21 @@ package service
import "time" import "time"
type OpsSystemLog struct {
ID int64 `json:"id"`
CreatedAt time.Time `json:"created_at"`
Level string `json:"level"`
Component string `json:"component"`
Message string `json:"message"`
RequestID string `json:"request_id"`
ClientRequestID string `json:"client_request_id"`
UserID *int64 `json:"user_id"`
AccountID *int64 `json:"account_id"`
Platform string `json:"platform"`
Model string `json:"model"`
Extra map[string]any `json:"extra,omitempty"`
}
type OpsErrorLog struct { type OpsErrorLog struct {
ID int64 `json:"id"` ID int64 `json:"id"`
CreatedAt time.Time `json:"created_at"` CreatedAt time.Time `json:"created_at"`
......
...@@ -10,6 +10,10 @@ type OpsRepository interface { ...@@ -10,6 +10,10 @@ type OpsRepository interface {
ListErrorLogs(ctx context.Context, filter *OpsErrorLogFilter) (*OpsErrorLogList, error) ListErrorLogs(ctx context.Context, filter *OpsErrorLogFilter) (*OpsErrorLogList, error)
GetErrorLogByID(ctx context.Context, id int64) (*OpsErrorLogDetail, error) GetErrorLogByID(ctx context.Context, id int64) (*OpsErrorLogDetail, error)
ListRequestDetails(ctx context.Context, filter *OpsRequestDetailFilter) ([]*OpsRequestDetail, int64, error) ListRequestDetails(ctx context.Context, filter *OpsRequestDetailFilter) ([]*OpsRequestDetail, int64, error)
BatchInsertSystemLogs(ctx context.Context, inputs []*OpsInsertSystemLogInput) (int64, error)
ListSystemLogs(ctx context.Context, filter *OpsSystemLogFilter) (*OpsSystemLogList, error)
DeleteSystemLogs(ctx context.Context, filter *OpsSystemLogCleanupFilter) (int64, error)
InsertSystemLogCleanupAudit(ctx context.Context, input *OpsSystemLogCleanupAudit) error
InsertRetryAttempt(ctx context.Context, input *OpsInsertRetryAttemptInput) (int64, error) InsertRetryAttempt(ctx context.Context, input *OpsInsertRetryAttemptInput) (int64, error)
UpdateRetryAttempt(ctx context.Context, input *OpsUpdateRetryAttemptInput) error UpdateRetryAttempt(ctx context.Context, input *OpsUpdateRetryAttemptInput) error
...@@ -205,6 +209,69 @@ type OpsInsertSystemMetricsInput struct { ...@@ -205,6 +209,69 @@ type OpsInsertSystemMetricsInput struct {
ConcurrencyQueueDepth *int ConcurrencyQueueDepth *int
} }
type OpsInsertSystemLogInput struct {
CreatedAt time.Time
Level string
Component string
Message string
RequestID string
ClientRequestID string
UserID *int64
AccountID *int64
Platform string
Model string
ExtraJSON string
}
type OpsSystemLogFilter struct {
StartTime *time.Time
EndTime *time.Time
Level string
Component string
RequestID string
ClientRequestID string
UserID *int64
AccountID *int64
Platform string
Model string
Query string
Page int
PageSize int
}
type OpsSystemLogCleanupFilter struct {
StartTime *time.Time
EndTime *time.Time
Level string
Component string
RequestID string
ClientRequestID string
UserID *int64
AccountID *int64
Platform string
Model string
Query string
}
type OpsSystemLogList struct {
Logs []*OpsSystemLog `json:"logs"`
Total int `json:"total"`
Page int `json:"page"`
PageSize int `json:"page_size"`
}
type OpsSystemLogCleanupAudit struct {
CreatedAt time.Time
OperatorID int64
Conditions string
DeletedRows int64
}
type OpsSystemMetricsSnapshot struct { type OpsSystemMetricsSnapshot struct {
ID int64 `json:"id"` ID int64 `json:"id"`
CreatedAt time.Time `json:"created_at"` CreatedAt time.Time `json:"created_at"`
......
package service
import (
"context"
"time"
)
// opsRepoMock is a test-only OpsRepository implementation with optional function hooks.
type opsRepoMock struct {
BatchInsertSystemLogsFn func(ctx context.Context, inputs []*OpsInsertSystemLogInput) (int64, error)
ListSystemLogsFn func(ctx context.Context, filter *OpsSystemLogFilter) (*OpsSystemLogList, error)
DeleteSystemLogsFn func(ctx context.Context, filter *OpsSystemLogCleanupFilter) (int64, error)
InsertSystemLogCleanupAuditFn func(ctx context.Context, input *OpsSystemLogCleanupAudit) error
}
func (m *opsRepoMock) InsertErrorLog(ctx context.Context, input *OpsInsertErrorLogInput) (int64, error) {
return 0, nil
}
func (m *opsRepoMock) ListErrorLogs(ctx context.Context, filter *OpsErrorLogFilter) (*OpsErrorLogList, error) {
return &OpsErrorLogList{Errors: []*OpsErrorLog{}, Page: 1, PageSize: 20}, nil
}
func (m *opsRepoMock) GetErrorLogByID(ctx context.Context, id int64) (*OpsErrorLogDetail, error) {
return &OpsErrorLogDetail{}, nil
}
func (m *opsRepoMock) ListRequestDetails(ctx context.Context, filter *OpsRequestDetailFilter) ([]*OpsRequestDetail, int64, error) {
return []*OpsRequestDetail{}, 0, nil
}
func (m *opsRepoMock) BatchInsertSystemLogs(ctx context.Context, inputs []*OpsInsertSystemLogInput) (int64, error) {
if m.BatchInsertSystemLogsFn != nil {
return m.BatchInsertSystemLogsFn(ctx, inputs)
}
return int64(len(inputs)), nil
}
func (m *opsRepoMock) ListSystemLogs(ctx context.Context, filter *OpsSystemLogFilter) (*OpsSystemLogList, error) {
if m.ListSystemLogsFn != nil {
return m.ListSystemLogsFn(ctx, filter)
}
return &OpsSystemLogList{Logs: []*OpsSystemLog{}, Total: 0, Page: 1, PageSize: 50}, nil
}
func (m *opsRepoMock) DeleteSystemLogs(ctx context.Context, filter *OpsSystemLogCleanupFilter) (int64, error) {
if m.DeleteSystemLogsFn != nil {
return m.DeleteSystemLogsFn(ctx, filter)
}
return 0, nil
}
func (m *opsRepoMock) InsertSystemLogCleanupAudit(ctx context.Context, input *OpsSystemLogCleanupAudit) error {
if m.InsertSystemLogCleanupAuditFn != nil {
return m.InsertSystemLogCleanupAuditFn(ctx, input)
}
return nil
}
func (m *opsRepoMock) InsertRetryAttempt(ctx context.Context, input *OpsInsertRetryAttemptInput) (int64, error) {
return 0, nil
}
func (m *opsRepoMock) UpdateRetryAttempt(ctx context.Context, input *OpsUpdateRetryAttemptInput) error {
return nil
}
func (m *opsRepoMock) GetLatestRetryAttemptForError(ctx context.Context, sourceErrorID int64) (*OpsRetryAttempt, error) {
return nil, nil
}
func (m *opsRepoMock) ListRetryAttemptsByErrorID(ctx context.Context, sourceErrorID int64, limit int) ([]*OpsRetryAttempt, error) {
return []*OpsRetryAttempt{}, nil
}
func (m *opsRepoMock) UpdateErrorResolution(ctx context.Context, errorID int64, resolved bool, resolvedByUserID *int64, resolvedRetryID *int64, resolvedAt *time.Time) error {
return nil
}
func (m *opsRepoMock) GetWindowStats(ctx context.Context, filter *OpsDashboardFilter) (*OpsWindowStats, error) {
return &OpsWindowStats{}, nil
}
func (m *opsRepoMock) GetRealtimeTrafficSummary(ctx context.Context, filter *OpsDashboardFilter) (*OpsRealtimeTrafficSummary, error) {
return &OpsRealtimeTrafficSummary{}, nil
}
func (m *opsRepoMock) GetDashboardOverview(ctx context.Context, filter *OpsDashboardFilter) (*OpsDashboardOverview, error) {
return &OpsDashboardOverview{}, nil
}
func (m *opsRepoMock) GetThroughputTrend(ctx context.Context, filter *OpsDashboardFilter, bucketSeconds int) (*OpsThroughputTrendResponse, error) {
return &OpsThroughputTrendResponse{}, nil
}
func (m *opsRepoMock) GetLatencyHistogram(ctx context.Context, filter *OpsDashboardFilter) (*OpsLatencyHistogramResponse, error) {
return &OpsLatencyHistogramResponse{}, nil
}
func (m *opsRepoMock) GetErrorTrend(ctx context.Context, filter *OpsDashboardFilter, bucketSeconds int) (*OpsErrorTrendResponse, error) {
return &OpsErrorTrendResponse{}, nil
}
func (m *opsRepoMock) GetErrorDistribution(ctx context.Context, filter *OpsDashboardFilter) (*OpsErrorDistributionResponse, error) {
return &OpsErrorDistributionResponse{}, nil
}
func (m *opsRepoMock) GetOpenAITokenStats(ctx context.Context, filter *OpsOpenAITokenStatsFilter) (*OpsOpenAITokenStatsResponse, error) {
return &OpsOpenAITokenStatsResponse{}, nil
}
func (m *opsRepoMock) InsertSystemMetrics(ctx context.Context, input *OpsInsertSystemMetricsInput) error {
return nil
}
func (m *opsRepoMock) GetLatestSystemMetrics(ctx context.Context, windowMinutes int) (*OpsSystemMetricsSnapshot, error) {
return &OpsSystemMetricsSnapshot{}, nil
}
func (m *opsRepoMock) UpsertJobHeartbeat(ctx context.Context, input *OpsUpsertJobHeartbeatInput) error {
return nil
}
func (m *opsRepoMock) ListJobHeartbeats(ctx context.Context) ([]*OpsJobHeartbeat, error) {
return []*OpsJobHeartbeat{}, nil
}
func (m *opsRepoMock) ListAlertRules(ctx context.Context) ([]*OpsAlertRule, error) {
return []*OpsAlertRule{}, nil
}
func (m *opsRepoMock) CreateAlertRule(ctx context.Context, input *OpsAlertRule) (*OpsAlertRule, error) {
return input, nil
}
func (m *opsRepoMock) UpdateAlertRule(ctx context.Context, input *OpsAlertRule) (*OpsAlertRule, error) {
return input, nil
}
func (m *opsRepoMock) DeleteAlertRule(ctx context.Context, id int64) error {
return nil
}
func (m *opsRepoMock) ListAlertEvents(ctx context.Context, filter *OpsAlertEventFilter) ([]*OpsAlertEvent, error) {
return []*OpsAlertEvent{}, nil
}
func (m *opsRepoMock) GetAlertEventByID(ctx context.Context, eventID int64) (*OpsAlertEvent, error) {
return &OpsAlertEvent{}, nil
}
func (m *opsRepoMock) GetActiveAlertEvent(ctx context.Context, ruleID int64) (*OpsAlertEvent, error) {
return nil, nil
}
func (m *opsRepoMock) GetLatestAlertEvent(ctx context.Context, ruleID int64) (*OpsAlertEvent, error) {
return nil, nil
}
func (m *opsRepoMock) CreateAlertEvent(ctx context.Context, event *OpsAlertEvent) (*OpsAlertEvent, error) {
return event, nil
}
func (m *opsRepoMock) UpdateAlertEventStatus(ctx context.Context, eventID int64, status string, resolvedAt *time.Time) error {
return nil
}
func (m *opsRepoMock) UpdateAlertEventEmailSent(ctx context.Context, eventID int64, emailSent bool) error {
return nil
}
func (m *opsRepoMock) CreateAlertSilence(ctx context.Context, input *OpsAlertSilence) (*OpsAlertSilence, error) {
return input, nil
}
func (m *opsRepoMock) IsAlertSilenced(ctx context.Context, ruleID int64, platform string, groupID *int64, region *string, now time.Time) (bool, error) {
return false, nil
}
func (m *opsRepoMock) UpsertHourlyMetrics(ctx context.Context, startTime, endTime time.Time) error {
return nil
}
func (m *opsRepoMock) UpsertDailyMetrics(ctx context.Context, startTime, endTime time.Time) error {
return nil
}
func (m *opsRepoMock) GetLatestHourlyBucketStart(ctx context.Context) (time.Time, bool, error) {
return time.Time{}, false, nil
}
func (m *opsRepoMock) GetLatestDailyBucketDate(ctx context.Context) (time.Time, bool, error) {
return time.Time{}, false, nil
}
var _ OpsRepository = (*opsRepoMock)(nil)
...@@ -37,6 +37,7 @@ type OpsService struct { ...@@ -37,6 +37,7 @@ type OpsService struct {
openAIGatewayService *OpenAIGatewayService openAIGatewayService *OpenAIGatewayService
geminiCompatService *GeminiMessagesCompatService geminiCompatService *GeminiMessagesCompatService
antigravityGatewayService *AntigravityGatewayService antigravityGatewayService *AntigravityGatewayService
systemLogSink *OpsSystemLogSink
} }
func NewOpsService( func NewOpsService(
...@@ -50,8 +51,9 @@ func NewOpsService( ...@@ -50,8 +51,9 @@ func NewOpsService(
openAIGatewayService *OpenAIGatewayService, openAIGatewayService *OpenAIGatewayService,
geminiCompatService *GeminiMessagesCompatService, geminiCompatService *GeminiMessagesCompatService,
antigravityGatewayService *AntigravityGatewayService, antigravityGatewayService *AntigravityGatewayService,
systemLogSink *OpsSystemLogSink,
) *OpsService { ) *OpsService {
return &OpsService{ svc := &OpsService{
opsRepo: opsRepo, opsRepo: opsRepo,
settingRepo: settingRepo, settingRepo: settingRepo,
cfg: cfg, cfg: cfg,
...@@ -64,7 +66,10 @@ func NewOpsService( ...@@ -64,7 +66,10 @@ func NewOpsService(
openAIGatewayService: openAIGatewayService, openAIGatewayService: openAIGatewayService,
geminiCompatService: geminiCompatService, geminiCompatService: geminiCompatService,
antigravityGatewayService: antigravityGatewayService, antigravityGatewayService: antigravityGatewayService,
systemLogSink: systemLogSink,
} }
svc.applyRuntimeLogConfigOnStartup(context.Background())
return svc
} }
func (s *OpsService) RequireMonitoringEnabled(ctx context.Context) error { func (s *OpsService) RequireMonitoringEnabled(ctx context.Context) error {
......
package service
import (
"context"
"database/sql"
"encoding/json"
"errors"
"log"
"strings"
"time"
infraerrors "github.com/Wei-Shaw/sub2api/internal/pkg/errors"
)
func (s *OpsService) ListSystemLogs(ctx context.Context, filter *OpsSystemLogFilter) (*OpsSystemLogList, error) {
if err := s.RequireMonitoringEnabled(ctx); err != nil {
return nil, err
}
if s.opsRepo == nil {
return &OpsSystemLogList{
Logs: []*OpsSystemLog{},
Total: 0,
Page: 1,
PageSize: 50,
}, nil
}
if filter == nil {
filter = &OpsSystemLogFilter{}
}
if filter.Page <= 0 {
filter.Page = 1
}
if filter.PageSize <= 0 {
filter.PageSize = 50
}
if filter.PageSize > 200 {
filter.PageSize = 200
}
result, err := s.opsRepo.ListSystemLogs(ctx, filter)
if err != nil {
return nil, infraerrors.InternalServer("OPS_SYSTEM_LOG_LIST_FAILED", "Failed to list system logs").WithCause(err)
}
return result, nil
}
func (s *OpsService) CleanupSystemLogs(ctx context.Context, filter *OpsSystemLogCleanupFilter, operatorID int64) (int64, error) {
if err := s.RequireMonitoringEnabled(ctx); err != nil {
return 0, err
}
if s.opsRepo == nil {
return 0, infraerrors.ServiceUnavailable("OPS_REPO_UNAVAILABLE", "Ops repository not available")
}
if operatorID <= 0 {
return 0, infraerrors.BadRequest("OPS_SYSTEM_LOG_CLEANUP_INVALID_OPERATOR", "invalid operator")
}
if filter == nil {
filter = &OpsSystemLogCleanupFilter{}
}
if filter.EndTime != nil && filter.StartTime != nil && filter.StartTime.After(*filter.EndTime) {
return 0, infraerrors.BadRequest("OPS_SYSTEM_LOG_CLEANUP_INVALID_RANGE", "invalid time range")
}
deletedRows, err := s.opsRepo.DeleteSystemLogs(ctx, filter)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
return 0, nil
}
if strings.Contains(strings.ToLower(err.Error()), "requires at least one filter") {
return 0, infraerrors.BadRequest("OPS_SYSTEM_LOG_CLEANUP_FILTER_REQUIRED", "cleanup requires at least one filter condition")
}
return 0, infraerrors.InternalServer("OPS_SYSTEM_LOG_CLEANUP_FAILED", "Failed to cleanup system logs").WithCause(err)
}
if auditErr := s.opsRepo.InsertSystemLogCleanupAudit(ctx, &OpsSystemLogCleanupAudit{
CreatedAt: time.Now().UTC(),
OperatorID: operatorID,
Conditions: marshalSystemLogCleanupConditions(filter),
DeletedRows: deletedRows,
}); auditErr != nil {
// 审计失败不影响主流程,避免运维清理被阻塞。
log.Printf("[OpsSystemLog] cleanup audit failed: %v", auditErr)
}
return deletedRows, nil
}
func marshalSystemLogCleanupConditions(filter *OpsSystemLogCleanupFilter) string {
if filter == nil {
return "{}"
}
payload := map[string]any{
"level": strings.TrimSpace(filter.Level),
"component": strings.TrimSpace(filter.Component),
"request_id": strings.TrimSpace(filter.RequestID),
"client_request_id": strings.TrimSpace(filter.ClientRequestID),
"platform": strings.TrimSpace(filter.Platform),
"model": strings.TrimSpace(filter.Model),
"query": strings.TrimSpace(filter.Query),
}
if filter.UserID != nil {
payload["user_id"] = *filter.UserID
}
if filter.AccountID != nil {
payload["account_id"] = *filter.AccountID
}
if filter.StartTime != nil && !filter.StartTime.IsZero() {
payload["start_time"] = filter.StartTime.UTC().Format(time.RFC3339Nano)
}
if filter.EndTime != nil && !filter.EndTime.IsZero() {
payload["end_time"] = filter.EndTime.UTC().Format(time.RFC3339Nano)
}
raw, err := json.Marshal(payload)
if err != nil {
return "{}"
}
return string(raw)
}
func (s *OpsService) GetSystemLogSinkHealth() OpsSystemLogSinkHealth {
if s == nil || s.systemLogSink == nil {
return OpsSystemLogSinkHealth{}
}
return s.systemLogSink.Health()
}
package service
import (
"context"
"encoding/json"
"fmt"
"os"
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/Wei-Shaw/sub2api/internal/pkg/logger"
"github.com/Wei-Shaw/sub2api/internal/util/logredact"
)
type OpsSystemLogSinkHealth struct {
QueueDepth int64 `json:"queue_depth"`
QueueCapacity int64 `json:"queue_capacity"`
DroppedCount uint64 `json:"dropped_count"`
WriteFailed uint64 `json:"write_failed_count"`
WrittenCount uint64 `json:"written_count"`
AvgWriteDelayMs uint64 `json:"avg_write_delay_ms"`
LastError string `json:"last_error"`
}
type OpsSystemLogSink struct {
opsRepo OpsRepository
queue chan *logger.LogEvent
batchSize int
flushInterval time.Duration
ctx context.Context
cancel context.CancelFunc
wg sync.WaitGroup
droppedCount uint64
writeFailed uint64
writtenCount uint64
totalDelayNs uint64
lastError atomic.Value
}
func NewOpsSystemLogSink(opsRepo OpsRepository) *OpsSystemLogSink {
ctx, cancel := context.WithCancel(context.Background())
s := &OpsSystemLogSink{
opsRepo: opsRepo,
queue: make(chan *logger.LogEvent, 5000),
batchSize: 200,
flushInterval: time.Second,
ctx: ctx,
cancel: cancel,
}
s.lastError.Store("")
return s
}
func (s *OpsSystemLogSink) Start() {
if s == nil || s.opsRepo == nil {
return
}
s.wg.Add(1)
go s.run()
}
func (s *OpsSystemLogSink) Stop() {
if s == nil {
return
}
s.cancel()
s.wg.Wait()
}
func (s *OpsSystemLogSink) WriteLogEvent(event *logger.LogEvent) {
if s == nil || event == nil || !s.shouldIndex(event) {
return
}
select {
case s.queue <- event:
default:
atomic.AddUint64(&s.droppedCount, 1)
}
}
func (s *OpsSystemLogSink) shouldIndex(event *logger.LogEvent) bool {
level := strings.ToLower(strings.TrimSpace(event.Level))
switch level {
case "warn", "warning", "error", "fatal", "panic", "dpanic":
return true
}
component := strings.ToLower(strings.TrimSpace(event.Component))
if strings.Contains(component, "http.access") {
return true
}
if strings.Contains(component, "audit") {
return true
}
return false
}
func (s *OpsSystemLogSink) run() {
defer s.wg.Done()
ticker := time.NewTicker(s.flushInterval)
defer ticker.Stop()
batch := make([]*logger.LogEvent, 0, s.batchSize)
flush := func() {
if len(batch) == 0 {
return
}
started := time.Now()
inserted, err := s.flushBatch(batch)
delay := time.Since(started)
if err != nil {
atomic.AddUint64(&s.writeFailed, uint64(len(batch)))
s.lastError.Store(err.Error())
_, _ = fmt.Fprintf(os.Stderr, "time=%s level=WARN msg=\"ops system log sink flush failed\" err=%v batch=%d\n",
time.Now().Format(time.RFC3339Nano), err, len(batch),
)
} else {
atomic.AddUint64(&s.writtenCount, uint64(inserted))
atomic.AddUint64(&s.totalDelayNs, uint64(delay.Nanoseconds()))
s.lastError.Store("")
}
batch = batch[:0]
}
for {
select {
case <-s.ctx.Done():
flush()
return
case item := <-s.queue:
if item == nil {
continue
}
batch = append(batch, item)
if len(batch) >= s.batchSize {
flush()
}
case <-ticker.C:
flush()
}
}
}
func (s *OpsSystemLogSink) flushBatch(batch []*logger.LogEvent) (int, error) {
inputs := make([]*OpsInsertSystemLogInput, 0, len(batch))
for _, event := range batch {
if event == nil {
continue
}
createdAt := event.Time.UTC()
if createdAt.IsZero() {
createdAt = time.Now().UTC()
}
fields := copyMap(event.Fields)
requestID := asString(fields["request_id"])
clientRequestID := asString(fields["client_request_id"])
platform := asString(fields["platform"])
model := asString(fields["model"])
component := strings.TrimSpace(event.Component)
if fieldComponent := asString(fields["component"]); fieldComponent != "" {
component = fieldComponent
}
if component == "" {
component = "app"
}
userID := asInt64Ptr(fields["user_id"])
accountID := asInt64Ptr(fields["account_id"])
// 统一脱敏后写入索引。
message := logredact.RedactText(strings.TrimSpace(event.Message))
redactedExtra := logredact.RedactMap(fields)
extraJSONBytes, _ := json.Marshal(redactedExtra)
extraJSON := string(extraJSONBytes)
if strings.TrimSpace(extraJSON) == "" {
extraJSON = "{}"
}
inputs = append(inputs, &OpsInsertSystemLogInput{
CreatedAt: createdAt,
Level: strings.ToLower(strings.TrimSpace(event.Level)),
Component: component,
Message: message,
RequestID: requestID,
ClientRequestID: clientRequestID,
UserID: userID,
AccountID: accountID,
Platform: platform,
Model: model,
ExtraJSON: extraJSON,
})
}
if len(inputs) == 0 {
return 0, nil
}
ctx, cancel := context.WithTimeout(s.ctx, 5*time.Second)
defer cancel()
inserted, err := s.opsRepo.BatchInsertSystemLogs(ctx, inputs)
if err != nil {
return 0, err
}
return int(inserted), nil
}
func (s *OpsSystemLogSink) Health() OpsSystemLogSinkHealth {
if s == nil {
return OpsSystemLogSinkHealth{}
}
written := atomic.LoadUint64(&s.writtenCount)
totalDelay := atomic.LoadUint64(&s.totalDelayNs)
var avgDelay uint64
if written > 0 {
avgDelay = (totalDelay / written) / uint64(time.Millisecond)
}
lastErr, _ := s.lastError.Load().(string)
return OpsSystemLogSinkHealth{
QueueDepth: int64(len(s.queue)),
QueueCapacity: int64(cap(s.queue)),
DroppedCount: atomic.LoadUint64(&s.droppedCount),
WriteFailed: atomic.LoadUint64(&s.writeFailed),
WrittenCount: written,
AvgWriteDelayMs: avgDelay,
LastError: strings.TrimSpace(lastErr),
}
}
func copyMap(in map[string]any) map[string]any {
if len(in) == 0 {
return map[string]any{}
}
out := make(map[string]any, len(in))
for k, v := range in {
out[k] = v
}
return out
}
func asString(v any) string {
switch t := v.(type) {
case string:
return strings.TrimSpace(t)
case fmt.Stringer:
return strings.TrimSpace(t.String())
default:
return ""
}
}
func asInt64Ptr(v any) *int64 {
switch t := v.(type) {
case int:
n := int64(t)
if n <= 0 {
return nil
}
return &n
case int64:
n := t
if n <= 0 {
return nil
}
return &n
case float64:
n := int64(t)
if n <= 0 {
return nil
}
return &n
case json.Number:
if n, err := t.Int64(); err == nil {
if n <= 0 {
return nil
}
return &n
}
case string:
raw := strings.TrimSpace(t)
if raw == "" {
return nil
}
if n, err := strconv.ParseInt(raw, 10, 64); err == nil {
if n <= 0 {
return nil
}
return &n
}
}
return nil
}
package service
import (
"context"
"encoding/json"
"errors"
"fmt"
"strings"
"sync/atomic"
"testing"
"time"
"github.com/Wei-Shaw/sub2api/internal/pkg/logger"
)
func TestOpsSystemLogSink_ShouldIndex(t *testing.T) {
sink := &OpsSystemLogSink{}
cases := []struct {
name string
event *logger.LogEvent
want bool
}{
{
name: "warn level",
event: &logger.LogEvent{Level: "warn", Component: "app"},
want: true,
},
{
name: "error level",
event: &logger.LogEvent{Level: "error", Component: "app"},
want: true,
},
{
name: "access component",
event: &logger.LogEvent{Level: "info", Component: "http.access"},
want: true,
},
{
name: "audit component",
event: &logger.LogEvent{Level: "info", Component: "audit.log_config_change"},
want: true,
},
{
name: "plain info",
event: &logger.LogEvent{Level: "info", Component: "app"},
want: false,
},
}
for _, tc := range cases {
if got := sink.shouldIndex(tc.event); got != tc.want {
t.Fatalf("%s: shouldIndex()=%v, want %v", tc.name, got, tc.want)
}
}
}
func TestOpsSystemLogSink_WriteLogEvent_ShouldDropWhenQueueFull(t *testing.T) {
sink := &OpsSystemLogSink{
queue: make(chan *logger.LogEvent, 1),
}
sink.WriteLogEvent(&logger.LogEvent{Level: "warn", Component: "app"})
sink.WriteLogEvent(&logger.LogEvent{Level: "warn", Component: "app"})
if got := len(sink.queue); got != 1 {
t.Fatalf("queue len = %d, want 1", got)
}
if dropped := atomic.LoadUint64(&sink.droppedCount); dropped != 1 {
t.Fatalf("droppedCount = %d, want 1", dropped)
}
}
func TestOpsSystemLogSink_Health(t *testing.T) {
sink := &OpsSystemLogSink{
queue: make(chan *logger.LogEvent, 10),
}
sink.lastError.Store("db timeout")
atomic.StoreUint64(&sink.droppedCount, 3)
atomic.StoreUint64(&sink.writeFailed, 2)
atomic.StoreUint64(&sink.writtenCount, 5)
atomic.StoreUint64(&sink.totalDelayNs, uint64(5000000)) // 5ms total -> avg 1ms
sink.queue <- &logger.LogEvent{Level: "warn", Component: "app"}
sink.queue <- &logger.LogEvent{Level: "warn", Component: "app"}
health := sink.Health()
if health.QueueDepth != 2 {
t.Fatalf("queue depth = %d, want 2", health.QueueDepth)
}
if health.QueueCapacity != 10 {
t.Fatalf("queue capacity = %d, want 10", health.QueueCapacity)
}
if health.DroppedCount != 3 {
t.Fatalf("dropped = %d, want 3", health.DroppedCount)
}
if health.WriteFailed != 2 {
t.Fatalf("write failed = %d, want 2", health.WriteFailed)
}
if health.WrittenCount != 5 {
t.Fatalf("written = %d, want 5", health.WrittenCount)
}
if health.AvgWriteDelayMs != 1 {
t.Fatalf("avg delay ms = %d, want 1", health.AvgWriteDelayMs)
}
if health.LastError != "db timeout" {
t.Fatalf("last error = %q, want db timeout", health.LastError)
}
}
func TestOpsSystemLogSink_StartStopAndFlushSuccess(t *testing.T) {
done := make(chan struct{}, 1)
var captured []*OpsInsertSystemLogInput
repo := &opsRepoMock{
BatchInsertSystemLogsFn: func(_ context.Context, inputs []*OpsInsertSystemLogInput) (int64, error) {
captured = append(captured, inputs...)
select {
case done <- struct{}{}:
default:
}
return int64(len(inputs)), nil
},
}
sink := NewOpsSystemLogSink(repo)
sink.batchSize = 1
sink.flushInterval = 10 * time.Millisecond
sink.Start()
defer sink.Stop()
sink.WriteLogEvent(&logger.LogEvent{
Time: time.Now().UTC(),
Level: "warn",
Component: "http.access",
Message: `authorization="Bearer sk-test-123"`,
Fields: map[string]any{
"component": "http.access",
"request_id": "req-1",
"client_request_id": "creq-1",
"user_id": "12",
"account_id": json.Number("34"),
"platform": "openai",
"model": "gpt-5",
},
})
select {
case <-done:
case <-time.After(2 * time.Second):
t.Fatalf("timeout waiting for sink flush")
}
if len(captured) != 1 {
t.Fatalf("captured len = %d, want 1", len(captured))
}
item := captured[0]
if item.RequestID != "req-1" || item.ClientRequestID != "creq-1" {
t.Fatalf("unexpected request ids: %+v", item)
}
if item.UserID == nil || *item.UserID != 12 {
t.Fatalf("unexpected user_id: %+v", item.UserID)
}
if item.AccountID == nil || *item.AccountID != 34 {
t.Fatalf("unexpected account_id: %+v", item.AccountID)
}
if strings.TrimSpace(item.Message) == "" {
t.Fatalf("message should not be empty")
}
health := sink.Health()
if health.WrittenCount == 0 {
t.Fatalf("written_count should be >0")
}
}
func TestOpsSystemLogSink_FlushFailureUpdatesHealth(t *testing.T) {
repo := &opsRepoMock{
BatchInsertSystemLogsFn: func(_ context.Context, inputs []*OpsInsertSystemLogInput) (int64, error) {
return 0, errors.New("db unavailable")
},
}
sink := NewOpsSystemLogSink(repo)
sink.batchSize = 1
sink.flushInterval = 10 * time.Millisecond
sink.Start()
defer sink.Stop()
sink.WriteLogEvent(&logger.LogEvent{
Time: time.Now().UTC(),
Level: "warn",
Component: "app",
Message: "boom",
Fields: map[string]any{},
})
deadline := time.Now().Add(2 * time.Second)
for time.Now().Before(deadline) {
health := sink.Health()
if health.WriteFailed > 0 {
if !strings.Contains(health.LastError, "db unavailable") {
t.Fatalf("unexpected last error: %s", health.LastError)
}
return
}
time.Sleep(20 * time.Millisecond)
}
t.Fatalf("write_failed_count not updated")
}
type stringerValue string
func (s stringerValue) String() string { return string(s) }
func TestOpsSystemLogSink_HelperFunctions(t *testing.T) {
src := map[string]any{"a": 1}
cloned := copyMap(src)
src["a"] = 2
v, ok := cloned["a"].(int)
if !ok || v != 1 {
t.Fatalf("copyMap should create copy")
}
if got := asString(stringerValue(" hello ")); got != "hello" {
t.Fatalf("asString stringer = %q", got)
}
if got := asString(fmt.Errorf("x")); got != "" {
t.Fatalf("asString error should be empty, got %q", got)
}
if got := asString(123); got != "" {
t.Fatalf("asString non-string should be empty, got %q", got)
}
cases := []struct {
in any
want int64
ok bool
}{
{in: 5, want: 5, ok: true},
{in: int64(6), want: 6, ok: true},
{in: float64(7), want: 7, ok: true},
{in: json.Number("8"), want: 8, ok: true},
{in: "9", want: 9, ok: true},
{in: "0", ok: false},
{in: -1, ok: false},
{in: "abc", ok: false},
}
for _, tc := range cases {
got := asInt64Ptr(tc.in)
if tc.ok {
if got == nil || *got != tc.want {
t.Fatalf("asInt64Ptr(%v) = %+v, want %d", tc.in, got, tc.want)
}
} else if got != nil {
t.Fatalf("asInt64Ptr(%v) should be nil, got %d", tc.in, *got)
}
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment