feat(log): 落地统一日志底座与系统日志运维能力

fff1d548 · yangjianbo · a5f29019 · fff1d548 · fff1d548 · fff1d548
Commit fff1d548 authored Feb 12, 2026 by yangjianbo
--- a/backend/internal/pkg/logger/options_test.go
+++ b/backend/internal/pkg/logger/options_test.go
+package logger
+import (
+	"os"
+	"path/filepath"
+	"testing"
+	"go.uber.org/zap"
+	"go.uber.org/zap/zapcore"
+)
+func TestResolveLogFilePath_Default(t *testing.T) {
+	t.Setenv("DATA_DIR", "")
+	got := resolveLogFilePath("")
+	if got != DefaultContainerLogPath {
+		t.Fatalf("resolveLogFilePath() = %q, want %q", got, DefaultContainerLogPath)
+	}
+}
+func TestResolveLogFilePath_WithDataDir(t *testing.T) {
+	t.Setenv("DATA_DIR", "/tmp/sub2api-data")
+	got := resolveLogFilePath("")
+	want := filepath.Join("/tmp/sub2api-data", "logs", "sub2api.log")
+	if got != want {
+		t.Fatalf("resolveLogFilePath() = %q, want %q", got, want)
+	}
+}
+func TestResolveLogFilePath_ExplicitPath(t *testing.T) {
+	t.Setenv("DATA_DIR", "/tmp/ignore")
+	got := resolveLogFilePath("/var/log/custom.log")
+	if got != "/var/log/custom.log" {
+		t.Fatalf("resolveLogFilePath() = %q, want explicit path", got)
+	}
+}
+func TestNormalizedOptions_InvalidFallback(t *testing.T) {
+	t.Setenv("DATA_DIR", "")
+	opts := InitOptions{
+		Level:           "TRACE",
+		Format:          "TEXT",
+		ServiceName:     "",
+		Environment:     "",
+		StacktraceLevel: "panic",
+		Output: OutputOptions{
+			ToStdout: false,
+			ToFile:   false,
+		},
+		Rotation: RotationOptions{
+			MaxSizeMB:  0,
+			MaxBackups: -1,
+			MaxAgeDays: -1,
+		},
+		Sampling: SamplingOptions{
+			Enabled:    true,
+			Initial:    0,
+			Thereafter: 0,
+		},
+	}
+	out := opts.normalized()
+	if out.Level != "trace" {
+		// normalized 仅做 trim/lower，不做校验；校验在 config 层。
+		t.Fatalf("normalized level should preserve value for upstream validation, got %q", out.Level)
+	}
+	if !out.Output.ToStdout {
+		t.Fatalf("normalized output should fallback to stdout")
+	}
+	if out.Output.FilePath != DefaultContainerLogPath {
+		t.Fatalf("normalized file path = %q", out.Output.FilePath)
+	}
+	if out.Rotation.MaxSizeMB != 100 {
+		t.Fatalf("normalized max_size_mb = %d", out.Rotation.MaxSizeMB)
+	}
+	if out.Rotation.MaxBackups != 10 {
+		t.Fatalf("normalized max_backups = %d", out.Rotation.MaxBackups)
+	}
+	if out.Rotation.MaxAgeDays != 7 {
+		t.Fatalf("normalized max_age_days = %d", out.Rotation.MaxAgeDays)
+	}
+	if out.Sampling.Initial != 100 || out.Sampling.Thereafter != 100 {
+		t.Fatalf("normalized sampling defaults invalid: %+v", out.Sampling)
+	}
+}
+func TestBuildFileCore_InvalidPathFallback(t *testing.T) {
+	t.Setenv("DATA_DIR", "")
+	opts := bootstrapOptions()
+	opts.Output.ToFile = true
+	opts.Output.FilePath = filepath.Join(os.DevNull, "logs", "sub2api.log")
+	encoderCfg := zapcore.EncoderConfig{
+		TimeKey:     "time",
+		LevelKey:    "level",
+		MessageKey:  "msg",
+		EncodeTime:  zapcore.ISO8601TimeEncoder,
+		EncodeLevel: zapcore.CapitalLevelEncoder,
+	}
+	encoder := zapcore.NewJSONEncoder(encoderCfg)
+	_, _, err := buildFileCore(encoder, zap.NewAtomicLevel(), opts)
+	if err == nil {
+		t.Fatalf("buildFileCore() expected error for invalid path")
+	}
+}
--- a/backend/internal/pkg/logger/slog_handler.go
+++ b/backend/internal/pkg/logger/slog_handler.go
+package logger
+import (
+	"context"
+	"log/slog"
+	"strings"
+	"time"
+	"go.uber.org/zap"
+	"go.uber.org/zap/zapcore"
+)
+type slogZapHandler struct {
+	logger *zap.Logger
+	attrs  []slog.Attr
+	groups []string
+}
+func newSlogZapHandler(logger *zap.Logger) slog.Handler {
+	if logger == nil {
+		logger = zap.NewNop()
+	}
+	return &slogZapHandler{
+		logger: logger,
+		attrs:  make([]slog.Attr, 0, 8),
+		groups: make([]string, 0, 4),
+	}
+}
+func (h *slogZapHandler) Enabled(_ context.Context, level slog.Level) bool {
+	switch {
+	case level >= slog.LevelError:
+		return h.logger.Core().Enabled(LevelError)
+	case level >= slog.LevelWarn:
+		return h.logger.Core().Enabled(LevelWarn)
+	case level <= slog.LevelDebug:
+		return h.logger.Core().Enabled(LevelDebug)
+	default:
+		return h.logger.Core().Enabled(LevelInfo)
+	}
+}
+func (h *slogZapHandler) Handle(_ context.Context, record slog.Record) error {
+	fields := make([]zap.Field, 0, len(h.attrs)+record.NumAttrs()+4)
+	fields = append(fields, zap.Time("time", record.Time))
+	fields = append(fields, slogAttrsToZapFields(h.groups, h.attrs)...)
+	record.Attrs(func(attr slog.Attr) bool {
+		fields = append(fields, slogAttrToZapField(h.groups, attr))
+		return true
+	})
+	entry := h.logger.With(fields...)
+	switch {
+	case record.Level >= slog.LevelError:
+		entry.Error(record.Message)
+	case record.Level >= slog.LevelWarn:
+		entry.Warn(record.Message)
+	case record.Level <= slog.LevelDebug:
+		entry.Debug(record.Message)
+	default:
+		entry.Info(record.Message)
+	}
+	return nil
+}
+func (h *slogZapHandler) WithAttrs(attrs []slog.Attr) slog.Handler {
+	next := *h
+	next.attrs = append(append([]slog.Attr{}, h.attrs...), attrs...)
+	return &next
+}
+func (h *slogZapHandler) WithGroup(name string) slog.Handler {
+	name = strings.TrimSpace(name)
+	if name == "" {
+		return h
+	}
+	next := *h
+	next.groups = append(append([]string{}, h.groups...), name)
+	return &next
+}
+func slogAttrsToZapFields(groups []string, attrs []slog.Attr) []zap.Field {
+	fields := make([]zap.Field, 0, len(attrs))
+	for _, attr := range attrs {
+		fields = append(fields, slogAttrToZapField(groups, attr))
+	}
+	return fields
+}
+func slogAttrToZapField(groups []string, attr slog.Attr) zap.Field {
+	if len(groups) > 0 {
+		attr.Key = strings.Join(append(append([]string{}, groups...), attr.Key), ".")
+	}
+	value := attr.Value.Resolve()
+	switch value.Kind() {
+	case slog.KindBool:
+		return zap.Bool(attr.Key, value.Bool())
+	case slog.KindInt64:
+		return zap.Int64(attr.Key, value.Int64())
+	case slog.KindUint64:
+		return zap.Uint64(attr.Key, value.Uint64())
+	case slog.KindFloat64:
+		return zap.Float64(attr.Key, value.Float64())
+	case slog.KindDuration:
+		return zap.Duration(attr.Key, value.Duration())
+	case slog.KindTime:
+		return zap.Time(attr.Key, value.Time())
+	case slog.KindString:
+		return zap.String(attr.Key, value.String())
+	case slog.KindGroup:
+		groupFields := make([]zap.Field, 0, len(value.Group()))
+		for _, nested := range value.Group() {
+			groupFields = append(groupFields, slogAttrToZapField(nil, nested))
+		}
+		return zap.Object(attr.Key, zapObjectFields(groupFields))
+	case slog.KindAny:
+		if t, ok := value.Any().(time.Time); ok {
+			return zap.Time(attr.Key, t)
+		}
+		return zap.Any(attr.Key, value.Any())
+	default:
+		return zap.String(attr.Key, value.String())
+	}
+}
+type zapObjectFields []zap.Field
+func (z zapObjectFields) MarshalLogObject(enc zapcore.ObjectEncoder) error {
+	for _, field := range z {
+		field.AddTo(enc)
+	}
+	return nil
+}
--- a/backend/internal/repository/ops_repo.go
+++ b/backend/internal/repository/ops_repo.go
@@ -3,6 +3,7 @@ package repository
 import (
 	"context"
 	"database/sql"
+	"encoding/json"
 	"fmt"
 	"strings"
 	"time"
@@ -938,6 +939,243 @@ WHERE id = $1`
 	return err
 }
+func (r *opsRepository) BatchInsertSystemLogs(ctx context.Context, inputs []*service.OpsInsertSystemLogInput) (int64, error) {
+	if r == nil || r.db == nil {
+		return 0, fmt.Errorf("nil ops repository")
+	}
+	if len(inputs) == 0 {
+		return 0, nil
+	}
+	tx, err := r.db.BeginTx(ctx, nil)
+	if err != nil {
+		return 0, err
+	}
+	stmt, err := tx.PrepareContext(ctx, pq.CopyIn(
+		"ops_system_logs",
+		"created_at",
+		"level",
+		"component",
+		"message",
+		"request_id",
+		"client_request_id",
+		"user_id",
+		"account_id",
+		"platform",
+		"model",
+		"extra",
+	))
+	if err != nil {
+		_ = tx.Rollback()
+		return 0, err
+	}
+	var inserted int64
+	for _, input := range inputs {
+		if input == nil {
+			continue
+		}
+		createdAt := input.CreatedAt
+		if createdAt.IsZero() {
+			createdAt = time.Now().UTC()
+		}
+		component := strings.TrimSpace(input.Component)
+		level := strings.ToLower(strings.TrimSpace(input.Level))
+		message := strings.TrimSpace(input.Message)
+		if level == "" || message == "" {
+			continue
+		}
+		if component == "" {
+			component = "app"
+		}
+		extra := strings.TrimSpace(input.ExtraJSON)
+		if extra == "" {
+			extra = "{}"
+		}
+		if _, err := stmt.ExecContext(
+			ctx,
+			createdAt.UTC(),
+			level,
+			component,
+			message,
+			opsNullString(input.RequestID),
+			opsNullString(input.ClientRequestID),
+			opsNullInt64(input.UserID),
+			opsNullInt64(input.AccountID),
+			opsNullString(input.Platform),
+			opsNullString(input.Model),
+			extra,
+		); err != nil {
+			_ = stmt.Close()
+			_ = tx.Rollback()
+			return inserted, err
+		}
+		inserted++
+	}
+	if _, err := stmt.ExecContext(ctx); err != nil {
+		_ = stmt.Close()
+		_ = tx.Rollback()
+		return inserted, err
+	}
+	if err := stmt.Close(); err != nil {
+		_ = tx.Rollback()
+		return inserted, err
+	}
+	if err := tx.Commit(); err != nil {
+		return inserted, err
+	}
+	return inserted, nil
+}
+func (r *opsRepository) ListSystemLogs(ctx context.Context, filter *service.OpsSystemLogFilter) (*service.OpsSystemLogList, error) {
+	if r == nil || r.db == nil {
+		return nil, fmt.Errorf("nil ops repository")
+	}
+	if filter == nil {
+		filter = &service.OpsSystemLogFilter{}
+	}
+	page := filter.Page
+	if page <= 0 {
+		page = 1
+	}
+	pageSize := filter.PageSize
+	if pageSize <= 0 {
+		pageSize = 50
+	}
+	if pageSize > 200 {
+		pageSize = 200
+	}
+	where, args, _ := buildOpsSystemLogsWhere(filter)
+	countSQL := "SELECT COUNT(*) FROM ops_system_logs l " + where
+	var total int
+	if err := r.db.QueryRowContext(ctx, countSQL, args...).Scan(&total); err != nil {
+		return nil, err
+	}
+	offset := (page - 1) * pageSize
+	argsWithLimit := append(args, pageSize, offset)
+	query := `
+SELECT
+  l.id,
+  l.created_at,
+  l.level,
+  COALESCE(l.component, ''),
+  COALESCE(l.message, ''),
+  COALESCE(l.request_id, ''),
+  COALESCE(l.client_request_id, ''),
+  l.user_id,
+  l.account_id,
+  COALESCE(l.platform, ''),
+  COALESCE(l.model, ''),
+  COALESCE(l.extra::text, '{}')
+FROM ops_system_logs l
+` + where + `
+ORDER BY l.created_at DESC, l.id DESC
+LIMIT $` + itoa(len(args)+1) + ` OFFSET $` + itoa(len(args)+2)
+	rows, err := r.db.QueryContext(ctx, query, argsWithLimit...)
+	if err != nil {
+		return nil, err
+	}
+	defer func() { _ = rows.Close() }()
+	logs := make([]*service.OpsSystemLog, 0, pageSize)
+	for rows.Next() {
+		item := &service.OpsSystemLog{}
+		var userID sql.NullInt64
+		var accountID sql.NullInt64
+		var extraRaw string
+		if err := rows.Scan(
+			&item.ID,
+			&item.CreatedAt,
+			&item.Level,
+			&item.Component,
+			&item.Message,
+			&item.RequestID,
+			&item.ClientRequestID,
+			&userID,
+			&accountID,
+			&item.Platform,
+			&item.Model,
+			&extraRaw,
+		); err != nil {
+			return nil, err
+		}
+		if userID.Valid {
+			v := userID.Int64
+			item.UserID = &v
+		}
+		if accountID.Valid {
+			v := accountID.Int64
+			item.AccountID = &v
+		}
+		extraRaw = strings.TrimSpace(extraRaw)
+		if extraRaw != "" && extraRaw != "null" && extraRaw != "{}" {
+			extra := make(map[string]any)
+			if err := json.Unmarshal([]byte(extraRaw), &extra); err == nil {
+				item.Extra = extra
+			}
+		}
+		logs = append(logs, item)
+	}
+	if err := rows.Err(); err != nil {
+		return nil, err
+	}
+	return &service.OpsSystemLogList{
+		Logs:     logs,
+		Total:    total,
+		Page:     page,
+		PageSize: pageSize,
+	}, nil
+}
+func (r *opsRepository) DeleteSystemLogs(ctx context.Context, filter *service.OpsSystemLogCleanupFilter) (int64, error) {
+	if r == nil || r.db == nil {
+		return 0, fmt.Errorf("nil ops repository")
+	}
+	if filter == nil {
+		filter = &service.OpsSystemLogCleanupFilter{}
+	}
+	where, args, hasConstraint := buildOpsSystemLogsCleanupWhere(filter)
+	if !hasConstraint {
+		return 0, fmt.Errorf("cleanup requires at least one filter condition")
+	}
+	query := "DELETE FROM ops_system_logs l " + where
+	res, err := r.db.ExecContext(ctx, query, args...)
+	if err != nil {
+		return 0, err
+	}
+	return res.RowsAffected()
+}
+func (r *opsRepository) InsertSystemLogCleanupAudit(ctx context.Context, input *service.OpsSystemLogCleanupAudit) error {
+	if r == nil || r.db == nil {
+		return fmt.Errorf("nil ops repository")
+	}
+	if input == nil {
+		return fmt.Errorf("nil input")
+	}
+	createdAt := input.CreatedAt
+	if createdAt.IsZero() {
+		createdAt = time.Now().UTC()
+	}
+	_, err := r.db.ExecContext(ctx, `
+INSERT INTO ops_system_log_cleanup_audits (
+  created_at,
+  operator_id,
+  conditions,
+  deleted_rows
+) VALUES ($1,$2,$3,$4)
+`, createdAt.UTC(), input.OperatorID, input.Conditions, input.DeletedRows)
+	return err
+}
 func buildOpsErrorLogsWhere(filter *service.OpsErrorLogFilter) (string, []any) {
 	clauses := make([]string, 0, 12)
 	args := make([]any, 0, 12)
@@ -1053,6 +1291,95 @@ func buildOpsErrorLogsWhere(filter *service.OpsErrorLogFilter) (string, []any) {
 	return "WHERE " + strings.Join(clauses, " AND "), args
 }
+func buildOpsSystemLogsWhere(filter *service.OpsSystemLogFilter) (string, []any, bool) {
+	clauses := make([]string, 0, 10)
+	args := make([]any, 0, 10)
+	clauses = append(clauses, "1=1")
+	hasConstraint := false
+	if filter != nil && filter.StartTime != nil && !filter.StartTime.IsZero() {
+		args = append(args, filter.StartTime.UTC())
+		clauses = append(clauses, "l.created_at >= $"+itoa(len(args)))
+		hasConstraint = true
+	}
+	if filter != nil && filter.EndTime != nil && !filter.EndTime.IsZero() {
+		args = append(args, filter.EndTime.UTC())
+		clauses = append(clauses, "l.created_at < $"+itoa(len(args)))
+		hasConstraint = true
+	}
+	if filter != nil {
+		if v := strings.ToLower(strings.TrimSpace(filter.Level)); v != "" {
+			args = append(args, v)
+			clauses = append(clauses, "LOWER(COALESCE(l.level,'')) = $"+itoa(len(args)))
+			hasConstraint = true
+		}
+		if v := strings.TrimSpace(filter.Component); v != "" {
+			args = append(args, v)
+			clauses = append(clauses, "COALESCE(l.component,'') = $"+itoa(len(args)))
+			hasConstraint = true
+		}
+		if v := strings.TrimSpace(filter.RequestID); v != "" {
+			args = append(args, v)
+			clauses = append(clauses, "COALESCE(l.request_id,'') = $"+itoa(len(args)))
+			hasConstraint = true
+		}
+		if v := strings.TrimSpace(filter.ClientRequestID); v != "" {
+			args = append(args, v)
+			clauses = append(clauses, "COALESCE(l.client_request_id,'') = $"+itoa(len(args)))
+			hasConstraint = true
+		}
+		if filter.UserID != nil && *filter.UserID > 0 {
+			args = append(args, *filter.UserID)
+			clauses = append(clauses, "l.user_id = $"+itoa(len(args)))
+			hasConstraint = true
+		}
+		if filter.AccountID != nil && *filter.AccountID > 0 {
+			args = append(args, *filter.AccountID)
+			clauses = append(clauses, "l.account_id = $"+itoa(len(args)))
+			hasConstraint = true
+		}
+		if v := strings.TrimSpace(filter.Platform); v != "" {
+			args = append(args, v)
+			clauses = append(clauses, "COALESCE(l.platform,'') = $"+itoa(len(args)))
+			hasConstraint = true
+		}
+		if v := strings.TrimSpace(filter.Model); v != "" {
+			args = append(args, v)
+			clauses = append(clauses, "COALESCE(l.model,'') = $"+itoa(len(args)))
+			hasConstraint = true
+		}
+		if v := strings.TrimSpace(filter.Query); v != "" {
+			like := "%" + v + "%"
+			args = append(args, like)
+			n := itoa(len(args))
+			clauses = append(clauses, "(l.message ILIKE $"+n+" OR COALESCE(l.request_id,'') ILIKE $"+n+" OR COALESCE(l.client_request_id,'') ILIKE $"+n+" OR COALESCE(l.extra::text,'') ILIKE $"+n+")")
+			hasConstraint = true
+		}
+	}
+	return "WHERE " + strings.Join(clauses, " AND "), args, hasConstraint
+}
+func buildOpsSystemLogsCleanupWhere(filter *service.OpsSystemLogCleanupFilter) (string, []any, bool) {
+	if filter == nil {
+		filter = &service.OpsSystemLogCleanupFilter{}
+	}
+	listFilter := &service.OpsSystemLogFilter{
+		StartTime:       filter.StartTime,
+		EndTime:         filter.EndTime,
+		Level:           filter.Level,
+		Component:       filter.Component,
+		RequestID:       filter.RequestID,
+		ClientRequestID: filter.ClientRequestID,
+		UserID:          filter.UserID,
+		AccountID:       filter.AccountID,
+		Platform:        filter.Platform,
+		Model:           filter.Model,
+		Query:           filter.Query,
+	}
+	return buildOpsSystemLogsWhere(listFilter)
+}
 // Helpers for nullable args
 func opsNullString(v any) any {
 	switch s := v.(type) {

--- a/backend/internal/repository/ops_repo_system_logs_test.go
+++ b/backend/internal/repository/ops_repo_system_logs_test.go
+package repository
+import (
+	"strings"
+	"testing"
+	"time"
+	"github.com/Wei-Shaw/sub2api/internal/service"
+)
+func TestBuildOpsSystemLogsWhere_WithClientRequestIDAndUserID(t *testing.T) {
+	start := time.Date(2026, 2, 1, 0, 0, 0, 0, time.UTC)
+	end := time.Date(2026, 2, 2, 0, 0, 0, 0, time.UTC)
+	userID := int64(12)
+	accountID := int64(34)
+	filter := &service.OpsSystemLogFilter{
+		StartTime:       &start,
+		EndTime:         &end,
+		Level:           "warn",
+		Component:       "http.access",
+		RequestID:       "req-1",
+		ClientRequestID: "creq-1",
+		UserID:          &userID,
+		AccountID:       &accountID,
+		Platform:        "openai",
+		Model:           "gpt-5",
+		Query:           "timeout",
+	}
+	where, args, hasConstraint := buildOpsSystemLogsWhere(filter)
+	if !hasConstraint {
+		t.Fatalf("expected hasConstraint=true")
+	}
+	if where == "" {
+		t.Fatalf("where should not be empty")
+	}
+	if len(args) != 11 {
+		t.Fatalf("args len = %d, want 11", len(args))
+	}
+	if !contains(where, "COALESCE(l.client_request_id,'') = $") {
+		t.Fatalf("where should include client_request_id condition: %s", where)
+	}
+	if !contains(where, "l.user_id = $") {
+		t.Fatalf("where should include user_id condition: %s", where)
+	}
+}
+func TestBuildOpsSystemLogsCleanupWhere_RequireConstraint(t *testing.T) {
+	where, args, hasConstraint := buildOpsSystemLogsCleanupWhere(&service.OpsSystemLogCleanupFilter{})
+	if hasConstraint {
+		t.Fatalf("expected hasConstraint=false")
+	}
+	if where == "" {
+		t.Fatalf("where should not be empty")
+	}
+	if len(args) != 0 {
+		t.Fatalf("args len = %d, want 0", len(args))
+	}
+}
+func TestBuildOpsSystemLogsCleanupWhere_WithClientRequestIDAndUserID(t *testing.T) {
+	userID := int64(9)
+	filter := &service.OpsSystemLogCleanupFilter{
+		ClientRequestID: "creq-9",
+		UserID:          &userID,
+	}
+	where, args, hasConstraint := buildOpsSystemLogsCleanupWhere(filter)
+	if !hasConstraint {
+		t.Fatalf("expected hasConstraint=true")
+	}
+	if len(args) != 2 {
+		t.Fatalf("args len = %d, want 2", len(args))
+	}
+	if !contains(where, "COALESCE(l.client_request_id,'') = $") {
+		t.Fatalf("where should include client_request_id condition: %s", where)
+	}
+	if !contains(where, "l.user_id = $") {
+		t.Fatalf("where should include user_id condition: %s", where)
+	}
+}
+func contains(s string, sub string) bool {
+	return strings.Contains(s, sub)
+}
--- a/backend/internal/server/middleware/client_request_id.go
+++ b/backend/internal/server/middleware/client_request_id.go
@@ -2,10 +2,13 @@ package middleware
 import (
 	"context"
+	"strings"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/ctxkey"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/logger"
 	"github.com/gin-gonic/gin"
 	"github.com/google/uuid"
+	"go.uber.org/zap"
 )
 // ClientRequestID ensures every request has a unique client_request_id in request.Context().
@@ -24,7 +27,10 @@ func ClientRequestID() gin.HandlerFunc {
 		}
 		id := uuid.New().String()
-		c.Request = c.Request.WithContext(context.WithValue(c.Request.Context(), ctxkey.ClientRequestID, id))
+		ctx := context.WithValue(c.Request.Context(), ctxkey.ClientRequestID, id)
+		requestLogger := logger.FromContext(ctx).With(zap.String("client_request_id", strings.TrimSpace(id)))
+		ctx = logger.IntoContext(ctx, requestLogger)
+		c.Request = c.Request.WithContext(ctx)
 		c.Next()
 	}
 }
--- a/backend/internal/server/middleware/logger.go
+++ b/backend/internal/server/middleware/logger.go
 package middleware
 import (
-	"log"
 	"time"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/ctxkey"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/logger"
 	"github.com/gin-gonic/gin"
+	"go.uber.org/zap"
 )
 // Logger 请求日志中间件
@@ -24,38 +26,41 @@ func Logger() gin.HandlerFunc {
 			return
 		}
-		// 结束时间
 		endTime := time.Now()
-		// 执行时间
 		latency := endTime.Sub(startTime)
-		// 请求方法
 		method := c.Request.Method
-		// 状态码
 		statusCode := c.Writer.Status()
-		// 客户端IP
 		clientIP := c.ClientIP()
-		// 协议版本
 		protocol := c.Request.Proto
+		accountID, hasAccountID := c.Request.Context().Value(ctxkey.AccountID).(int64)
+		platform, _ := c.Request.Context().Value(ctxkey.Platform).(string)
+		model, _ := c.Request.Context().Value(ctxkey.Model).(string)
+		fields := []zap.Field{
+			zap.String("component", "http.access"),
+			zap.Int("status_code", statusCode),
+			zap.Int64("latency_ms", latency.Milliseconds()),
+			zap.String("client_ip", clientIP),
+			zap.String("protocol", protocol),
+			zap.String("method", method),
+			zap.String("path", path),
+		}
+		if hasAccountID && accountID > 0 {
+			fields = append(fields, zap.Int64("account_id", accountID))
+		}
+		if platform != "" {
+			fields = append(fields, zap.String("platform", platform))
+		}
+		if model != "" {
+			fields = append(fields, zap.String("model", model))
+		}
+		l := logger.FromContext(c.Request.Context()).With(fields...)
+		l.Info("http request completed", zap.Time("completed_at", endTime))
-		// 日志格式: [时间] 状态码 | 延迟 | IP | 协议 | 方法 路径
-		log.Printf("[GIN] %v | %3d | %13v | %15s | %-6s | %-7s %s",
-			endTime.Format("2006/01/02 - 15:04:05"),
-			statusCode,
-			latency,
-			clientIP,
-			protocol,
-			method,
-			path,
-		)
-		// 如果有错误，额外记录错误信息
 		if len(c.Errors) > 0 {
-			log.Printf("[GIN] Errors: %v", c.Errors.String())
+			l.Warn("http request contains gin errors", zap.String("errors", c.Errors.String()))
 		}
 	}
 }
--- a/backend/internal/server/middleware/request_access_logger_test.go
+++ b/backend/internal/server/middleware/request_access_logger_test.go
+package middleware
+import (
+	"context"
+	"net/http"
+	"net/http/httptest"
+	"sync"
+	"testing"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/ctxkey"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/logger"
+	"github.com/gin-gonic/gin"
+)
+type testLogSink struct {
+	mu     sync.Mutex
+	events []*logger.LogEvent
+}
+func (s *testLogSink) WriteLogEvent(event *logger.LogEvent) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	s.events = append(s.events, event)
+}
+func (s *testLogSink) list() []*logger.LogEvent {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	out := make([]*logger.LogEvent, len(s.events))
+	copy(out, s.events)
+	return out
+}
+func initMiddlewareTestLogger(t *testing.T) *testLogSink {
+	t.Helper()
+	if err := logger.Init(logger.InitOptions{
+		Level:       "debug",
+		Format:      "json",
+		ServiceName: "sub2api",
+		Environment: "test",
+		Output: logger.OutputOptions{
+			ToStdout: false,
+			ToFile:   false,
+		},
+	}); err != nil {
+		t.Fatalf("init logger: %v", err)
+	}
+	sink := &testLogSink{}
+	logger.SetSink(sink)
+	t.Cleanup(func() {
+		logger.SetSink(nil)
+	})
+	return sink
+}
+func TestRequestLogger_GenerateAndPropagateRequestID(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	r := gin.New()
+	r.Use(RequestLogger())
+	r.GET("/t", func(c *gin.Context) {
+		reqID, ok := c.Request.Context().Value(ctxkey.RequestID).(string)
+		if !ok || reqID == "" {
+			t.Fatalf("request_id missing in context")
+		}
+		if got := c.Writer.Header().Get(requestIDHeader); got != reqID {
+			t.Fatalf("response header request_id mismatch, header=%q ctx=%q", got, reqID)
+		}
+		c.Status(http.StatusOK)
+	})
+	w := httptest.NewRecorder()
+	req := httptest.NewRequest(http.MethodGet, "/t", nil)
+	r.ServeHTTP(w, req)
+	if w.Code != http.StatusOK {
+		t.Fatalf("status=%d", w.Code)
+	}
+	if w.Header().Get(requestIDHeader) == "" {
+		t.Fatalf("X-Request-ID should be set")
+	}
+}
+func TestRequestLogger_KeepIncomingRequestID(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	r := gin.New()
+	r.Use(RequestLogger())
+	r.GET("/t", func(c *gin.Context) {
+		reqID, _ := c.Request.Context().Value(ctxkey.RequestID).(string)
+		if reqID != "rid-fixed" {
+			t.Fatalf("request_id=%q, want rid-fixed", reqID)
+		}
+		c.Status(http.StatusOK)
+	})
+	w := httptest.NewRecorder()
+	req := httptest.NewRequest(http.MethodGet, "/t", nil)
+	req.Header.Set(requestIDHeader, "rid-fixed")
+	r.ServeHTTP(w, req)
+	if w.Code != http.StatusOK {
+		t.Fatalf("status=%d", w.Code)
+	}
+	if got := w.Header().Get(requestIDHeader); got != "rid-fixed" {
+		t.Fatalf("header=%q, want rid-fixed", got)
+	}
+}
+func TestLogger_AccessLogIncludesCoreFields(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	sink := initMiddlewareTestLogger(t)
+	r := gin.New()
+	r.Use(Logger())
+	r.Use(func(c *gin.Context) {
+		ctx := c.Request.Context()
+		ctx = context.WithValue(ctx, ctxkey.AccountID, int64(101))
+		ctx = context.WithValue(ctx, ctxkey.Platform, "openai")
+		ctx = context.WithValue(ctx, ctxkey.Model, "gpt-5")
+		c.Request = c.Request.WithContext(ctx)
+		c.Next()
+	})
+	r.GET("/api/test", func(c *gin.Context) {
+		c.Status(http.StatusCreated)
+	})
+	w := httptest.NewRecorder()
+	req := httptest.NewRequest(http.MethodGet, "/api/test", nil)
+	r.ServeHTTP(w, req)
+	if w.Code != http.StatusCreated {
+		t.Fatalf("status=%d", w.Code)
+	}
+	events := sink.list()
+	if len(events) == 0 {
+		t.Fatalf("expected at least one log event")
+	}
+	found := false
+	for _, event := range events {
+		if event == nil || event.Message != "http request completed" {
+			continue
+		}
+		found = true
+		switch v := event.Fields["status_code"].(type) {
+		case int:
+			if v != http.StatusCreated {
+				t.Fatalf("status_code field mismatch: %v", v)
+			}
+		case int64:
+			if v != int64(http.StatusCreated) {
+				t.Fatalf("status_code field mismatch: %v", v)
+			}
+		default:
+			t.Fatalf("status_code type mismatch: %T", v)
+		}
+		switch v := event.Fields["account_id"].(type) {
+		case int64:
+			if v != 101 {
+				t.Fatalf("account_id field mismatch: %v", v)
+			}
+		case int:
+			if v != 101 {
+				t.Fatalf("account_id field mismatch: %v", v)
+			}
+		default:
+			t.Fatalf("account_id type mismatch: %T", v)
+		}
+		if event.Fields["platform"] != "openai" || event.Fields["model"] != "gpt-5" {
+			t.Fatalf("platform/model mismatch: %+v", event.Fields)
+		}
+	}
+	if !found {
+		t.Fatalf("access log event not found")
+	}
+}
+func TestLogger_HealthPathSkipped(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+	sink := initMiddlewareTestLogger(t)
+	r := gin.New()
+	r.Use(Logger())
+	r.GET("/health", func(c *gin.Context) {
+		c.Status(http.StatusOK)
+	})
+	w := httptest.NewRecorder()
+	req := httptest.NewRequest(http.MethodGet, "/health", nil)
+	r.ServeHTTP(w, req)
+	if w.Code != http.StatusOK {
+		t.Fatalf("status=%d", w.Code)
+	}
+	if len(sink.list()) != 0 {
+		t.Fatalf("health endpoint should not write access log")
+	}
+}
--- a/backend/internal/server/middleware/request_logger.go
+++ b/backend/internal/server/middleware/request_logger.go
+package middleware
+import (
+	"context"
+	"strings"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/ctxkey"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/logger"
+	"github.com/gin-gonic/gin"
+	"github.com/google/uuid"
+	"go.uber.org/zap"
+)
+const requestIDHeader = "X-Request-ID"
+// RequestLogger 在请求入口注入 request-scoped logger。
+func RequestLogger() gin.HandlerFunc {
+	return func(c *gin.Context) {
+		if c.Request == nil {
+			c.Next()
+			return
+		}
+		requestID := strings.TrimSpace(c.GetHeader(requestIDHeader))
+		if requestID == "" {
+			requestID = uuid.NewString()
+		}
+		c.Header(requestIDHeader, requestID)
+		ctx := context.WithValue(c.Request.Context(), ctxkey.RequestID, requestID)
+		clientRequestID, _ := ctx.Value(ctxkey.ClientRequestID).(string)
+		requestLogger := logger.With(
+			zap.String("component", "http"),
+			zap.String("request_id", requestID),
+			zap.String("client_request_id", strings.TrimSpace(clientRequestID)),
+			zap.String("path", c.Request.URL.Path),
+			zap.String("method", c.Request.Method),
+		)
+		ctx = logger.IntoContext(ctx, requestLogger)
+		c.Request = c.Request.WithContext(ctx)
+		c.Next()
+	}
+}
--- a/backend/internal/server/router.go
+++ b/backend/internal/server/router.go
@@ -29,6 +29,7 @@ func SetupRouter(
 	redisClient *redis.Client,
 ) *gin.Engine {
 	// 应用中间件
+	r.Use(middleware2.RequestLogger())
 	r.Use(middleware2.Logger())
 	r.Use(middleware2.CORS(cfg.CORS))
 	r.Use(middleware2.SecurityHeaders(cfg.Security.CSP))

--- a/backend/internal/server/routes/admin.go
+++ b/backend/internal/server/routes/admin.go
@@ -101,6 +101,9 @@ func registerOpsRoutes(admin *gin.RouterGroup, h *handler.Handlers) {
 		{
 			runtime.GET("/alert", h.Admin.Ops.GetAlertRuntimeSettings)
 			runtime.PUT("/alert", h.Admin.Ops.UpdateAlertRuntimeSettings)
+			runtime.GET("/logging", h.Admin.Ops.GetRuntimeLogConfig)
+			runtime.PUT("/logging", h.Admin.Ops.UpdateRuntimeLogConfig)
+			runtime.POST("/logging/reset", h.Admin.Ops.ResetRuntimeLogConfig)
 		}
 		// Advanced settings (DB-backed)
@@ -144,6 +147,11 @@ func registerOpsRoutes(admin *gin.RouterGroup, h *handler.Handlers) {
 		// Request drilldown (success + error)
 		ops.GET("/requests", h.Admin.Ops.ListRequestDetails)
+		// Indexed system logs
+		ops.GET("/system-logs", h.Admin.Ops.ListSystemLogs)
+		ops.POST("/system-logs/cleanup", h.Admin.Ops.CleanupSystemLogs)
+		ops.GET("/system-logs/health", h.Admin.Ops.GetSystemLogIngestionHealth)
 		// Dashboard (vNext - raw path for MVP)
 		ops.GET("/dashboard/overview", h.Admin.Ops.GetDashboardOverview)
 		ops.GET("/dashboard/throughput-trend", h.Admin.Ops.GetDashboardThroughputTrend)

--- a/backend/internal/service/domain_constants.go
+++ b/backend/internal/service/domain_constants.go
@@ -161,6 +161,9 @@ const (
 	// SettingKeyOpsAdvancedSettings stores JSON config for ops advanced settings (data retention, aggregation).
 	SettingKeyOpsAdvancedSettings = "ops_advanced_settings"
+	// SettingKeyOpsRuntimeLogConfig stores JSON config for runtime log settings.
+	SettingKeyOpsRuntimeLogConfig = "ops_runtime_log_config"
 	// =========================
 	// Stream Timeout Handling
 	// =========================

--- a/backend/internal/service/ops_cleanup_service.go
+++ b/backend/internal/service/ops_cleanup_service.go
@@ -157,6 +157,8 @@ type opsCleanupDeletedCounts struct {
 	errorLogs     int64
 	retryAttempts int64
 	alertEvents   int64
+	systemLogs    int64
+	logAudits     int64
 	systemMetrics int64
 	hourlyPreagg  int64
 	dailyPreagg   int64
@@ -164,10 +166,12 @@ type opsCleanupDeletedCounts struct {
 func (c opsCleanupDeletedCounts) String() string {
 	return fmt.Sprintf(
-		"error_logs=%d retry_attempts=%d alert_events=%d system_metrics=%d hourly_preagg=%d daily_preagg=%d",
+		"error_logs=%d retry_attempts=%d alert_events=%d system_logs=%d log_audits=%d system_metrics=%d hourly_preagg=%d daily_preagg=%d",
 		c.errorLogs,
 		c.retryAttempts,
 		c.alertEvents,
+		c.systemLogs,
+		c.logAudits,
 		c.systemMetrics,
 		c.hourlyPreagg,
 		c.dailyPreagg,
@@ -204,6 +208,18 @@ func (s *OpsCleanupService) runCleanupOnce(ctx context.Context) (opsCleanupDelet
 			return out, err
 		}
 		out.alertEvents = n
+		n, err = deleteOldRowsByID(ctx, s.db, "ops_system_logs", "created_at", cutoff, batchSize, false)
+		if err != nil {
+			return out, err
+		}
+		out.systemLogs = n
+		n, err = deleteOldRowsByID(ctx, s.db, "ops_system_log_cleanup_audits", "created_at", cutoff, batchSize, false)
+		if err != nil {
+			return out, err
+		}
+		out.logAudits = n
 	}
 	// Minute-level metrics snapshots.

--- a/backend/internal/service/ops_log_runtime.go
+++ b/backend/internal/service/ops_log_runtime.go
+package service
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"strings"
+	"time"
+	"github.com/Wei-Shaw/sub2api/internal/config"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/logger"
+	"go.uber.org/zap"
+)
+func defaultOpsRuntimeLogConfig(cfg *config.Config) *OpsRuntimeLogConfig {
+	out := &OpsRuntimeLogConfig{
+		Level:           "info",
+		EnableSampling:  false,
+		SamplingInitial: 100,
+		SamplingNext:    100,
+		Caller:          true,
+		StacktraceLevel: "error",
+		RetentionDays:   30,
+	}
+	if cfg == nil {
+		return out
+	}
+	out.Level = strings.ToLower(strings.TrimSpace(cfg.Log.Level))
+	out.EnableSampling = cfg.Log.Sampling.Enabled
+	out.SamplingInitial = cfg.Log.Sampling.Initial
+	out.SamplingNext = cfg.Log.Sampling.Thereafter
+	out.Caller = cfg.Log.Caller
+	out.StacktraceLevel = strings.ToLower(strings.TrimSpace(cfg.Log.StacktraceLevel))
+	if cfg.Ops.Cleanup.ErrorLogRetentionDays > 0 {
+		out.RetentionDays = cfg.Ops.Cleanup.ErrorLogRetentionDays
+	}
+	return out
+}
+func normalizeOpsRuntimeLogConfig(cfg *OpsRuntimeLogConfig, defaults *OpsRuntimeLogConfig) {
+	if cfg == nil || defaults == nil {
+		return
+	}
+	cfg.Level = strings.ToLower(strings.TrimSpace(cfg.Level))
+	if cfg.Level == "" {
+		cfg.Level = defaults.Level
+	}
+	cfg.StacktraceLevel = strings.ToLower(strings.TrimSpace(cfg.StacktraceLevel))
+	if cfg.StacktraceLevel == "" {
+		cfg.StacktraceLevel = defaults.StacktraceLevel
+	}
+	if cfg.SamplingInitial <= 0 {
+		cfg.SamplingInitial = defaults.SamplingInitial
+	}
+	if cfg.SamplingNext <= 0 {
+		cfg.SamplingNext = defaults.SamplingNext
+	}
+	if cfg.RetentionDays <= 0 {
+		cfg.RetentionDays = defaults.RetentionDays
+	}
+}
+func validateOpsRuntimeLogConfig(cfg *OpsRuntimeLogConfig) error {
+	if cfg == nil {
+		return errors.New("invalid config")
+	}
+	switch strings.ToLower(strings.TrimSpace(cfg.Level)) {
+	case "debug", "info", "warn", "error":
+	default:
+		return errors.New("level must be one of: debug/info/warn/error")
+	}
+	switch strings.ToLower(strings.TrimSpace(cfg.StacktraceLevel)) {
+	case "none", "error", "fatal":
+	default:
+		return errors.New("stacktrace_level must be one of: none/error/fatal")
+	}
+	if cfg.SamplingInitial <= 0 {
+		return errors.New("sampling_initial must be positive")
+	}
+	if cfg.SamplingNext <= 0 {
+		return errors.New("sampling_thereafter must be positive")
+	}
+	if cfg.RetentionDays < 1 || cfg.RetentionDays > 3650 {
+		return errors.New("retention_days must be between 1 and 3650")
+	}
+	return nil
+}
+func (s *OpsService) GetRuntimeLogConfig(ctx context.Context) (*OpsRuntimeLogConfig, error) {
+	if s == nil || s.settingRepo == nil {
+		var cfg *config.Config
+		if s != nil {
+			cfg = s.cfg
+		}
+		defaultCfg := defaultOpsRuntimeLogConfig(cfg)
+		return defaultCfg, nil
+	}
+	defaultCfg := defaultOpsRuntimeLogConfig(s.cfg)
+	if ctx == nil {
+		ctx = context.Background()
+	}
+	raw, err := s.settingRepo.GetValue(ctx, SettingKeyOpsRuntimeLogConfig)
+	if err != nil {
+		if errors.Is(err, ErrSettingNotFound) {
+			b, _ := json.Marshal(defaultCfg)
+			_ = s.settingRepo.Set(ctx, SettingKeyOpsRuntimeLogConfig, string(b))
+			return defaultCfg, nil
+		}
+		return nil, err
+	}
+	cfg := &OpsRuntimeLogConfig{}
+	if err := json.Unmarshal([]byte(raw), cfg); err != nil {
+		return defaultCfg, nil
+	}
+	normalizeOpsRuntimeLogConfig(cfg, defaultCfg)
+	return cfg, nil
+}
+func (s *OpsService) UpdateRuntimeLogConfig(ctx context.Context, req *OpsRuntimeLogConfig, operatorID int64) (*OpsRuntimeLogConfig, error) {
+	if s == nil || s.settingRepo == nil {
+		return nil, errors.New("setting repository not initialized")
+	}
+	if req == nil {
+		return nil, errors.New("invalid config")
+	}
+	if ctx == nil {
+		ctx = context.Background()
+	}
+	if operatorID <= 0 {
+		return nil, errors.New("invalid operator id")
+	}
+	oldCfg, err := s.GetRuntimeLogConfig(ctx)
+	if err != nil {
+		return nil, err
+	}
+	next := *req
+	normalizeOpsRuntimeLogConfig(&next, defaultOpsRuntimeLogConfig(s.cfg))
+	if err := validateOpsRuntimeLogConfig(&next); err != nil {
+		s.auditRuntimeLogConfigFailure(operatorID, oldCfg, &next, "validation_failed: "+err.Error())
+		return nil, err
+	}
+	if err := applyOpsRuntimeLogConfig(&next); err != nil {
+		s.auditRuntimeLogConfigFailure(operatorID, oldCfg, &next, "apply_failed: "+err.Error())
+		return nil, err
+	}
+	next.Source = "runtime_setting"
+	next.UpdatedAt = time.Now().UTC().Format(time.RFC3339Nano)
+	next.UpdatedByUserID = operatorID
+	encoded, err := json.Marshal(&next)
+	if err != nil {
+		return nil, err
+	}
+	if err := s.settingRepo.Set(ctx, SettingKeyOpsRuntimeLogConfig, string(encoded)); err != nil {
+		// 存储失败时回滚到旧配置，避免内存状态与持久化状态不一致。
+		_ = applyOpsRuntimeLogConfig(oldCfg)
+		s.auditRuntimeLogConfigFailure(operatorID, oldCfg, &next, "persist_failed: "+err.Error())
+		return nil, err
+	}
+	s.auditRuntimeLogConfigChange(operatorID, oldCfg, &next, "updated")
+	return &next, nil
+}
+func (s *OpsService) ResetRuntimeLogConfig(ctx context.Context, operatorID int64) (*OpsRuntimeLogConfig, error) {
+	if s == nil || s.settingRepo == nil {
+		return nil, errors.New("setting repository not initialized")
+	}
+	if ctx == nil {
+		ctx = context.Background()
+	}
+	if operatorID <= 0 {
+		return nil, errors.New("invalid operator id")
+	}
+	oldCfg, err := s.GetRuntimeLogConfig(ctx)
+	if err != nil {
+		return nil, err
+	}
+	resetCfg := defaultOpsRuntimeLogConfig(s.cfg)
+	normalizeOpsRuntimeLogConfig(resetCfg, defaultOpsRuntimeLogConfig(s.cfg))
+	if err := validateOpsRuntimeLogConfig(resetCfg); err != nil {
+		s.auditRuntimeLogConfigFailure(operatorID, oldCfg, resetCfg, "reset_validation_failed: "+err.Error())
+		return nil, err
+	}
+	if err := applyOpsRuntimeLogConfig(resetCfg); err != nil {
+		s.auditRuntimeLogConfigFailure(operatorID, oldCfg, resetCfg, "reset_apply_failed: "+err.Error())
+		return nil, err
+	}
+	// 清理 runtime 覆盖配置，回退到 env/yaml baseline。
+	if err := s.settingRepo.Delete(ctx, SettingKeyOpsRuntimeLogConfig); err != nil && !errors.Is(err, ErrSettingNotFound) {
+		_ = applyOpsRuntimeLogConfig(oldCfg)
+		s.auditRuntimeLogConfigFailure(operatorID, oldCfg, resetCfg, "reset_persist_failed: "+err.Error())
+		return nil, err
+	}
+	now := time.Now().UTC().Format(time.RFC3339Nano)
+	resetCfg.Source = "baseline"
+	resetCfg.UpdatedAt = now
+	resetCfg.UpdatedByUserID = operatorID
+	s.auditRuntimeLogConfigChange(operatorID, oldCfg, resetCfg, "reset")
+	return resetCfg, nil
+}
+func applyOpsRuntimeLogConfig(cfg *OpsRuntimeLogConfig) error {
+	if cfg == nil {
+		return fmt.Errorf("nil runtime log config")
+	}
+	if err := logger.Reconfigure(func(opts *logger.InitOptions) error {
+		opts.Level = strings.ToLower(strings.TrimSpace(cfg.Level))
+		opts.Caller = cfg.Caller
+		opts.StacktraceLevel = strings.ToLower(strings.TrimSpace(cfg.StacktraceLevel))
+		opts.Sampling.Enabled = cfg.EnableSampling
+		opts.Sampling.Initial = cfg.SamplingInitial
+		opts.Sampling.Thereafter = cfg.SamplingNext
+		return nil
+	}); err != nil {
+		return err
+	}
+	return nil
+}
+func (s *OpsService) applyRuntimeLogConfigOnStartup(ctx context.Context) {
+	if s == nil {
+		return
+	}
+	cfg, err := s.GetRuntimeLogConfig(ctx)
+	if err != nil {
+		return
+	}
+	_ = applyOpsRuntimeLogConfig(cfg)
+}
+func (s *OpsService) auditRuntimeLogConfigChange(operatorID int64, oldCfg *OpsRuntimeLogConfig, newCfg *OpsRuntimeLogConfig, action string) {
+	oldRaw, _ := json.Marshal(oldCfg)
+	newRaw, _ := json.Marshal(newCfg)
+	logger.With(
+		zap.String("component", "audit.log_config_change"),
+		zap.String("action", strings.TrimSpace(action)),
+		zap.Int64("operator_id", operatorID),
+		zap.String("old", string(oldRaw)),
+		zap.String("new", string(newRaw)),
+	).Info("runtime log config changed")
+}
+func (s *OpsService) auditRuntimeLogConfigFailure(operatorID int64, oldCfg *OpsRuntimeLogConfig, newCfg *OpsRuntimeLogConfig, reason string) {
+	oldRaw, _ := json.Marshal(oldCfg)
+	newRaw, _ := json.Marshal(newCfg)
+	logger.With(
+		zap.String("component", "audit.log_config_change"),
+		zap.String("action", "failed"),
+		zap.Int64("operator_id", operatorID),
+		zap.String("reason", strings.TrimSpace(reason)),
+		zap.String("old", string(oldRaw)),
+		zap.String("new", string(newRaw)),
+	).Warn("runtime log config change failed")
+}
--- a/backend/internal/service/ops_models.go
+++ b/backend/internal/service/ops_models.go
@@ -2,6 +2,21 @@ package service
 import "time"
+type OpsSystemLog struct {
+	ID              int64          `json:"id"`
+	CreatedAt       time.Time      `json:"created_at"`
+	Level           string         `json:"level"`
+	Component       string         `json:"component"`
+	Message         string         `json:"message"`
+	RequestID       string         `json:"request_id"`
+	ClientRequestID string         `json:"client_request_id"`
+	UserID          *int64         `json:"user_id"`
+	AccountID       *int64         `json:"account_id"`
+	Platform        string         `json:"platform"`
+	Model           string         `json:"model"`
+	Extra           map[string]any `json:"extra,omitempty"`
+}
 type OpsErrorLog struct {
 	ID        int64     `json:"id"`
 	CreatedAt time.Time `json:"created_at"`

--- a/backend/internal/service/ops_port.go
+++ b/backend/internal/service/ops_port.go
@@ -10,6 +10,10 @@ type OpsRepository interface {
 	ListErrorLogs(ctx context.Context, filter *OpsErrorLogFilter) (*OpsErrorLogList, error)
 	GetErrorLogByID(ctx context.Context, id int64) (*OpsErrorLogDetail, error)
 	ListRequestDetails(ctx context.Context, filter *OpsRequestDetailFilter) ([]*OpsRequestDetail, int64, error)
+	BatchInsertSystemLogs(ctx context.Context, inputs []*OpsInsertSystemLogInput) (int64, error)
+	ListSystemLogs(ctx context.Context, filter *OpsSystemLogFilter) (*OpsSystemLogList, error)
+	DeleteSystemLogs(ctx context.Context, filter *OpsSystemLogCleanupFilter) (int64, error)
+	InsertSystemLogCleanupAudit(ctx context.Context, input *OpsSystemLogCleanupAudit) error
 	InsertRetryAttempt(ctx context.Context, input *OpsInsertRetryAttemptInput) (int64, error)
 	UpdateRetryAttempt(ctx context.Context, input *OpsUpdateRetryAttemptInput) error
@@ -205,6 +209,69 @@ type OpsInsertSystemMetricsInput struct {
 	ConcurrencyQueueDepth *int
 }
+type OpsInsertSystemLogInput struct {
+	CreatedAt       time.Time
+	Level           string
+	Component       string
+	Message         string
+	RequestID       string
+	ClientRequestID string
+	UserID          *int64
+	AccountID       *int64
+	Platform        string
+	Model           string
+	ExtraJSON       string
+}
+type OpsSystemLogFilter struct {
+	StartTime *time.Time
+	EndTime   *time.Time
+	Level     string
+	Component string
+	RequestID       string
+	ClientRequestID string
+	UserID          *int64
+	AccountID       *int64
+	Platform        string
+	Model           string
+	Query           string
+	Page     int
+	PageSize int
+}
+type OpsSystemLogCleanupFilter struct {
+	StartTime *time.Time
+	EndTime   *time.Time
+	Level     string
+	Component string
+	RequestID       string
+	ClientRequestID string
+	UserID          *int64
+	AccountID       *int64
+	Platform        string
+	Model           string
+	Query           string
+}
+type OpsSystemLogList struct {
+	Logs     []*OpsSystemLog `json:"logs"`
+	Total    int             `json:"total"`
+	Page     int             `json:"page"`
+	PageSize int             `json:"page_size"`
+}
+type OpsSystemLogCleanupAudit struct {
+	CreatedAt   time.Time
+	OperatorID  int64
+	Conditions  string
+	DeletedRows int64
+}
 type OpsSystemMetricsSnapshot struct {
 	ID            int64     `json:"id"`
 	CreatedAt     time.Time `json:"created_at"`

--- a/backend/internal/service/ops_repo_mock_test.go
+++ b/backend/internal/service/ops_repo_mock_test.go
+package service
+import (
+	"context"
+	"time"
+)
+// opsRepoMock is a test-only OpsRepository implementation with optional function hooks.
+type opsRepoMock struct {
+	BatchInsertSystemLogsFn       func(ctx context.Context, inputs []*OpsInsertSystemLogInput) (int64, error)
+	ListSystemLogsFn              func(ctx context.Context, filter *OpsSystemLogFilter) (*OpsSystemLogList, error)
+	DeleteSystemLogsFn            func(ctx context.Context, filter *OpsSystemLogCleanupFilter) (int64, error)
+	InsertSystemLogCleanupAuditFn func(ctx context.Context, input *OpsSystemLogCleanupAudit) error
+}
+func (m *opsRepoMock) InsertErrorLog(ctx context.Context, input *OpsInsertErrorLogInput) (int64, error) {
+	return 0, nil
+}
+func (m *opsRepoMock) ListErrorLogs(ctx context.Context, filter *OpsErrorLogFilter) (*OpsErrorLogList, error) {
+	return &OpsErrorLogList{Errors: []*OpsErrorLog{}, Page: 1, PageSize: 20}, nil
+}
+func (m *opsRepoMock) GetErrorLogByID(ctx context.Context, id int64) (*OpsErrorLogDetail, error) {
+	return &OpsErrorLogDetail{}, nil
+}
+func (m *opsRepoMock) ListRequestDetails(ctx context.Context, filter *OpsRequestDetailFilter) ([]*OpsRequestDetail, int64, error) {
+	return []*OpsRequestDetail{}, 0, nil
+}
+func (m *opsRepoMock) BatchInsertSystemLogs(ctx context.Context, inputs []*OpsInsertSystemLogInput) (int64, error) {
+	if m.BatchInsertSystemLogsFn != nil {
+		return m.BatchInsertSystemLogsFn(ctx, inputs)
+	}
+	return int64(len(inputs)), nil
+}
+func (m *opsRepoMock) ListSystemLogs(ctx context.Context, filter *OpsSystemLogFilter) (*OpsSystemLogList, error) {
+	if m.ListSystemLogsFn != nil {
+		return m.ListSystemLogsFn(ctx, filter)
+	}
+	return &OpsSystemLogList{Logs: []*OpsSystemLog{}, Total: 0, Page: 1, PageSize: 50}, nil
+}
+func (m *opsRepoMock) DeleteSystemLogs(ctx context.Context, filter *OpsSystemLogCleanupFilter) (int64, error) {
+	if m.DeleteSystemLogsFn != nil {
+		return m.DeleteSystemLogsFn(ctx, filter)
+	}
+	return 0, nil
+}
+func (m *opsRepoMock) InsertSystemLogCleanupAudit(ctx context.Context, input *OpsSystemLogCleanupAudit) error {
+	if m.InsertSystemLogCleanupAuditFn != nil {
+		return m.InsertSystemLogCleanupAuditFn(ctx, input)
+	}
+	return nil
+}
+func (m *opsRepoMock) InsertRetryAttempt(ctx context.Context, input *OpsInsertRetryAttemptInput) (int64, error) {
+	return 0, nil
+}
+func (m *opsRepoMock) UpdateRetryAttempt(ctx context.Context, input *OpsUpdateRetryAttemptInput) error {
+	return nil
+}
+func (m *opsRepoMock) GetLatestRetryAttemptForError(ctx context.Context, sourceErrorID int64) (*OpsRetryAttempt, error) {
+	return nil, nil
+}
+func (m *opsRepoMock) ListRetryAttemptsByErrorID(ctx context.Context, sourceErrorID int64, limit int) ([]*OpsRetryAttempt, error) {
+	return []*OpsRetryAttempt{}, nil
+}
+func (m *opsRepoMock) UpdateErrorResolution(ctx context.Context, errorID int64, resolved bool, resolvedByUserID *int64, resolvedRetryID *int64, resolvedAt *time.Time) error {
+	return nil
+}
+func (m *opsRepoMock) GetWindowStats(ctx context.Context, filter *OpsDashboardFilter) (*OpsWindowStats, error) {
+	return &OpsWindowStats{}, nil
+}
+func (m *opsRepoMock) GetRealtimeTrafficSummary(ctx context.Context, filter *OpsDashboardFilter) (*OpsRealtimeTrafficSummary, error) {
+	return &OpsRealtimeTrafficSummary{}, nil
+}
+func (m *opsRepoMock) GetDashboardOverview(ctx context.Context, filter *OpsDashboardFilter) (*OpsDashboardOverview, error) {
+	return &OpsDashboardOverview{}, nil
+}
+func (m *opsRepoMock) GetThroughputTrend(ctx context.Context, filter *OpsDashboardFilter, bucketSeconds int) (*OpsThroughputTrendResponse, error) {
+	return &OpsThroughputTrendResponse{}, nil
+}
+func (m *opsRepoMock) GetLatencyHistogram(ctx context.Context, filter *OpsDashboardFilter) (*OpsLatencyHistogramResponse, error) {
+	return &OpsLatencyHistogramResponse{}, nil
+}
+func (m *opsRepoMock) GetErrorTrend(ctx context.Context, filter *OpsDashboardFilter, bucketSeconds int) (*OpsErrorTrendResponse, error) {
+	return &OpsErrorTrendResponse{}, nil
+}
+func (m *opsRepoMock) GetErrorDistribution(ctx context.Context, filter *OpsDashboardFilter) (*OpsErrorDistributionResponse, error) {
+	return &OpsErrorDistributionResponse{}, nil
+}
+func (m *opsRepoMock) GetOpenAITokenStats(ctx context.Context, filter *OpsOpenAITokenStatsFilter) (*OpsOpenAITokenStatsResponse, error) {
+	return &OpsOpenAITokenStatsResponse{}, nil
+}
+func (m *opsRepoMock) InsertSystemMetrics(ctx context.Context, input *OpsInsertSystemMetricsInput) error {
+	return nil
+}
+func (m *opsRepoMock) GetLatestSystemMetrics(ctx context.Context, windowMinutes int) (*OpsSystemMetricsSnapshot, error) {
+	return &OpsSystemMetricsSnapshot{}, nil
+}
+func (m *opsRepoMock) UpsertJobHeartbeat(ctx context.Context, input *OpsUpsertJobHeartbeatInput) error {
+	return nil
+}
+func (m *opsRepoMock) ListJobHeartbeats(ctx context.Context) ([]*OpsJobHeartbeat, error) {
+	return []*OpsJobHeartbeat{}, nil
+}
+func (m *opsRepoMock) ListAlertRules(ctx context.Context) ([]*OpsAlertRule, error) {
+	return []*OpsAlertRule{}, nil
+}
+func (m *opsRepoMock) CreateAlertRule(ctx context.Context, input *OpsAlertRule) (*OpsAlertRule, error) {
+	return input, nil
+}
+func (m *opsRepoMock) UpdateAlertRule(ctx context.Context, input *OpsAlertRule) (*OpsAlertRule, error) {
+	return input, nil
+}
+func (m *opsRepoMock) DeleteAlertRule(ctx context.Context, id int64) error {
+	return nil
+}
+func (m *opsRepoMock) ListAlertEvents(ctx context.Context, filter *OpsAlertEventFilter) ([]*OpsAlertEvent, error) {
+	return []*OpsAlertEvent{}, nil
+}
+func (m *opsRepoMock) GetAlertEventByID(ctx context.Context, eventID int64) (*OpsAlertEvent, error) {
+	return &OpsAlertEvent{}, nil
+}
+func (m *opsRepoMock) GetActiveAlertEvent(ctx context.Context, ruleID int64) (*OpsAlertEvent, error) {
+	return nil, nil
+}
+func (m *opsRepoMock) GetLatestAlertEvent(ctx context.Context, ruleID int64) (*OpsAlertEvent, error) {
+	return nil, nil
+}
+func (m *opsRepoMock) CreateAlertEvent(ctx context.Context, event *OpsAlertEvent) (*OpsAlertEvent, error) {
+	return event, nil
+}
+func (m *opsRepoMock) UpdateAlertEventStatus(ctx context.Context, eventID int64, status string, resolvedAt *time.Time) error {
+	return nil
+}
+func (m *opsRepoMock) UpdateAlertEventEmailSent(ctx context.Context, eventID int64, emailSent bool) error {
+	return nil
+}
+func (m *opsRepoMock) CreateAlertSilence(ctx context.Context, input *OpsAlertSilence) (*OpsAlertSilence, error) {
+	return input, nil
+}
+func (m *opsRepoMock) IsAlertSilenced(ctx context.Context, ruleID int64, platform string, groupID *int64, region *string, now time.Time) (bool, error) {
+	return false, nil
+}
+func (m *opsRepoMock) UpsertHourlyMetrics(ctx context.Context, startTime, endTime time.Time) error {
+	return nil
+}
+func (m *opsRepoMock) UpsertDailyMetrics(ctx context.Context, startTime, endTime time.Time) error {
+	return nil
+}
+func (m *opsRepoMock) GetLatestHourlyBucketStart(ctx context.Context) (time.Time, bool, error) {
+	return time.Time{}, false, nil
+}
+func (m *opsRepoMock) GetLatestDailyBucketDate(ctx context.Context) (time.Time, bool, error) {
+	return time.Time{}, false, nil
+}
+var _ OpsRepository = (*opsRepoMock)(nil)
--- a/backend/internal/service/ops_service.go
+++ b/backend/internal/service/ops_service.go
@@ -37,6 +37,7 @@ type OpsService struct {
 	openAIGatewayService      *OpenAIGatewayService
 	geminiCompatService       *GeminiMessagesCompatService
 	antigravityGatewayService *AntigravityGatewayService
+	systemLogSink             *OpsSystemLogSink
 }
 func NewOpsService(
@@ -50,8 +51,9 @@ func NewOpsService(
 	openAIGatewayService *OpenAIGatewayService,
 	geminiCompatService *GeminiMessagesCompatService,
 	antigravityGatewayService *AntigravityGatewayService,
+	systemLogSink *OpsSystemLogSink,
 ) *OpsService {
-	return &OpsService{
+	svc := &OpsService{
 		opsRepo:     opsRepo,
 		settingRepo: settingRepo,
 		cfg:         cfg,
@@ -64,7 +66,10 @@ func NewOpsService(
 		openAIGatewayService:      openAIGatewayService,
 		geminiCompatService:       geminiCompatService,
 		antigravityGatewayService: antigravityGatewayService,
+		systemLogSink:             systemLogSink,
 	}
+	svc.applyRuntimeLogConfigOnStartup(context.Background())
+	return svc
 }
 func (s *OpsService) RequireMonitoringEnabled(ctx context.Context) error {

--- a/backend/internal/service/ops_system_log_service.go
+++ b/backend/internal/service/ops_system_log_service.go
+package service
+import (
+	"context"
+	"database/sql"
+	"encoding/json"
+	"errors"
+	"log"
+	"strings"
+	"time"
+	infraerrors "github.com/Wei-Shaw/sub2api/internal/pkg/errors"
+)
+func (s *OpsService) ListSystemLogs(ctx context.Context, filter *OpsSystemLogFilter) (*OpsSystemLogList, error) {
+	if err := s.RequireMonitoringEnabled(ctx); err != nil {
+		return nil, err
+	}
+	if s.opsRepo == nil {
+		return &OpsSystemLogList{
+			Logs:     []*OpsSystemLog{},
+			Total:    0,
+			Page:     1,
+			PageSize: 50,
+		}, nil
+	}
+	if filter == nil {
+		filter = &OpsSystemLogFilter{}
+	}
+	if filter.Page <= 0 {
+		filter.Page = 1
+	}
+	if filter.PageSize <= 0 {
+		filter.PageSize = 50
+	}
+	if filter.PageSize > 200 {
+		filter.PageSize = 200
+	}
+	result, err := s.opsRepo.ListSystemLogs(ctx, filter)
+	if err != nil {
+		return nil, infraerrors.InternalServer("OPS_SYSTEM_LOG_LIST_FAILED", "Failed to list system logs").WithCause(err)
+	}
+	return result, nil
+}
+func (s *OpsService) CleanupSystemLogs(ctx context.Context, filter *OpsSystemLogCleanupFilter, operatorID int64) (int64, error) {
+	if err := s.RequireMonitoringEnabled(ctx); err != nil {
+		return 0, err
+	}
+	if s.opsRepo == nil {
+		return 0, infraerrors.ServiceUnavailable("OPS_REPO_UNAVAILABLE", "Ops repository not available")
+	}
+	if operatorID <= 0 {
+		return 0, infraerrors.BadRequest("OPS_SYSTEM_LOG_CLEANUP_INVALID_OPERATOR", "invalid operator")
+	}
+	if filter == nil {
+		filter = &OpsSystemLogCleanupFilter{}
+	}
+	if filter.EndTime != nil && filter.StartTime != nil && filter.StartTime.After(*filter.EndTime) {
+		return 0, infraerrors.BadRequest("OPS_SYSTEM_LOG_CLEANUP_INVALID_RANGE", "invalid time range")
+	}
+	deletedRows, err := s.opsRepo.DeleteSystemLogs(ctx, filter)
+	if err != nil {
+		if errors.Is(err, sql.ErrNoRows) {
+			return 0, nil
+		}
+		if strings.Contains(strings.ToLower(err.Error()), "requires at least one filter") {
+			return 0, infraerrors.BadRequest("OPS_SYSTEM_LOG_CLEANUP_FILTER_REQUIRED", "cleanup requires at least one filter condition")
+		}
+		return 0, infraerrors.InternalServer("OPS_SYSTEM_LOG_CLEANUP_FAILED", "Failed to cleanup system logs").WithCause(err)
+	}
+	if auditErr := s.opsRepo.InsertSystemLogCleanupAudit(ctx, &OpsSystemLogCleanupAudit{
+		CreatedAt:   time.Now().UTC(),
+		OperatorID:  operatorID,
+		Conditions:  marshalSystemLogCleanupConditions(filter),
+		DeletedRows: deletedRows,
+	}); auditErr != nil {
+		// 审计失败不影响主流程，避免运维清理被阻塞。
+		log.Printf("[OpsSystemLog] cleanup audit failed: %v", auditErr)
+	}
+	return deletedRows, nil
+}
+func marshalSystemLogCleanupConditions(filter *OpsSystemLogCleanupFilter) string {
+	if filter == nil {
+		return "{}"
+	}
+	payload := map[string]any{
+		"level":             strings.TrimSpace(filter.Level),
+		"component":         strings.TrimSpace(filter.Component),
+		"request_id":        strings.TrimSpace(filter.RequestID),
+		"client_request_id": strings.TrimSpace(filter.ClientRequestID),
+		"platform":          strings.TrimSpace(filter.Platform),
+		"model":             strings.TrimSpace(filter.Model),
+		"query":             strings.TrimSpace(filter.Query),
+	}
+	if filter.UserID != nil {
+		payload["user_id"] = *filter.UserID
+	}
+	if filter.AccountID != nil {
+		payload["account_id"] = *filter.AccountID
+	}
+	if filter.StartTime != nil && !filter.StartTime.IsZero() {
+		payload["start_time"] = filter.StartTime.UTC().Format(time.RFC3339Nano)
+	}
+	if filter.EndTime != nil && !filter.EndTime.IsZero() {
+		payload["end_time"] = filter.EndTime.UTC().Format(time.RFC3339Nano)
+	}
+	raw, err := json.Marshal(payload)
+	if err != nil {
+		return "{}"
+	}
+	return string(raw)
+}
+func (s *OpsService) GetSystemLogSinkHealth() OpsSystemLogSinkHealth {
+	if s == nil || s.systemLogSink == nil {
+		return OpsSystemLogSinkHealth{}
+	}
+	return s.systemLogSink.Health()
+}
--- a/backend/internal/service/ops_system_log_sink.go
+++ b/backend/internal/service/ops_system_log_sink.go
+package service
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"os"
+	"strconv"
+	"strings"
+	"sync"
+	"sync/atomic"
+	"time"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/logger"
+	"github.com/Wei-Shaw/sub2api/internal/util/logredact"
+)
+type OpsSystemLogSinkHealth struct {
+	QueueDepth      int64  `json:"queue_depth"`
+	QueueCapacity   int64  `json:"queue_capacity"`
+	DroppedCount    uint64 `json:"dropped_count"`
+	WriteFailed     uint64 `json:"write_failed_count"`
+	WrittenCount    uint64 `json:"written_count"`
+	AvgWriteDelayMs uint64 `json:"avg_write_delay_ms"`
+	LastError       string `json:"last_error"`
+}
+type OpsSystemLogSink struct {
+	opsRepo OpsRepository
+	queue chan *logger.LogEvent
+	batchSize     int
+	flushInterval time.Duration
+	ctx    context.Context
+	cancel context.CancelFunc
+	wg     sync.WaitGroup
+	droppedCount uint64
+	writeFailed  uint64
+	writtenCount uint64
+	totalDelayNs uint64
+	lastError atomic.Value
+}
+func NewOpsSystemLogSink(opsRepo OpsRepository) *OpsSystemLogSink {
+	ctx, cancel := context.WithCancel(context.Background())
+	s := &OpsSystemLogSink{
+		opsRepo:       opsRepo,
+		queue:         make(chan *logger.LogEvent, 5000),
+		batchSize:     200,
+		flushInterval: time.Second,
+		ctx:           ctx,
+		cancel:        cancel,
+	}
+	s.lastError.Store("")
+	return s
+}
+func (s *OpsSystemLogSink) Start() {
+	if s == nil || s.opsRepo == nil {
+		return
+	}
+	s.wg.Add(1)
+	go s.run()
+}
+func (s *OpsSystemLogSink) Stop() {
+	if s == nil {
+		return
+	}
+	s.cancel()
+	s.wg.Wait()
+}
+func (s *OpsSystemLogSink) WriteLogEvent(event *logger.LogEvent) {
+	if s == nil || event == nil || !s.shouldIndex(event) {
+		return
+	}
+	select {
+	case s.queue <- event:
+	default:
+		atomic.AddUint64(&s.droppedCount, 1)
+	}
+}
+func (s *OpsSystemLogSink) shouldIndex(event *logger.LogEvent) bool {
+	level := strings.ToLower(strings.TrimSpace(event.Level))
+	switch level {
+	case "warn", "warning", "error", "fatal", "panic", "dpanic":
+		return true
+	}
+	component := strings.ToLower(strings.TrimSpace(event.Component))
+	if strings.Contains(component, "http.access") {
+		return true
+	}
+	if strings.Contains(component, "audit") {
+		return true
+	}
+	return false
+}
+func (s *OpsSystemLogSink) run() {
+	defer s.wg.Done()
+	ticker := time.NewTicker(s.flushInterval)
+	defer ticker.Stop()
+	batch := make([]*logger.LogEvent, 0, s.batchSize)
+	flush := func() {
+		if len(batch) == 0 {
+			return
+		}
+		started := time.Now()
+		inserted, err := s.flushBatch(batch)
+		delay := time.Since(started)
+		if err != nil {
+			atomic.AddUint64(&s.writeFailed, uint64(len(batch)))
+			s.lastError.Store(err.Error())
+			_, _ = fmt.Fprintf(os.Stderr, "time=%s level=WARN msg=\"ops system log sink flush failed\" err=%v batch=%d\n",
+				time.Now().Format(time.RFC3339Nano), err, len(batch),
+			)
+		} else {
+			atomic.AddUint64(&s.writtenCount, uint64(inserted))
+			atomic.AddUint64(&s.totalDelayNs, uint64(delay.Nanoseconds()))
+			s.lastError.Store("")
+		}
+		batch = batch[:0]
+	}
+	for {
+		select {
+		case <-s.ctx.Done():
+			flush()
+			return
+		case item := <-s.queue:
+			if item == nil {
+				continue
+			}
+			batch = append(batch, item)
+			if len(batch) >= s.batchSize {
+				flush()
+			}
+		case <-ticker.C:
+			flush()
+		}
+	}
+}
+func (s *OpsSystemLogSink) flushBatch(batch []*logger.LogEvent) (int, error) {
+	inputs := make([]*OpsInsertSystemLogInput, 0, len(batch))
+	for _, event := range batch {
+		if event == nil {
+			continue
+		}
+		createdAt := event.Time.UTC()
+		if createdAt.IsZero() {
+			createdAt = time.Now().UTC()
+		}
+		fields := copyMap(event.Fields)
+		requestID := asString(fields["request_id"])
+		clientRequestID := asString(fields["client_request_id"])
+		platform := asString(fields["platform"])
+		model := asString(fields["model"])
+		component := strings.TrimSpace(event.Component)
+		if fieldComponent := asString(fields["component"]); fieldComponent != "" {
+			component = fieldComponent
+		}
+		if component == "" {
+			component = "app"
+		}
+		userID := asInt64Ptr(fields["user_id"])
+		accountID := asInt64Ptr(fields["account_id"])
+		// 统一脱敏后写入索引。
+		message := logredact.RedactText(strings.TrimSpace(event.Message))
+		redactedExtra := logredact.RedactMap(fields)
+		extraJSONBytes, _ := json.Marshal(redactedExtra)
+		extraJSON := string(extraJSONBytes)
+		if strings.TrimSpace(extraJSON) == "" {
+			extraJSON = "{}"
+		}
+		inputs = append(inputs, &OpsInsertSystemLogInput{
+			CreatedAt:       createdAt,
+			Level:           strings.ToLower(strings.TrimSpace(event.Level)),
+			Component:       component,
+			Message:         message,
+			RequestID:       requestID,
+			ClientRequestID: clientRequestID,
+			UserID:          userID,
+			AccountID:       accountID,
+			Platform:        platform,
+			Model:           model,
+			ExtraJSON:       extraJSON,
+		})
+	}
+	if len(inputs) == 0 {
+		return 0, nil
+	}
+	ctx, cancel := context.WithTimeout(s.ctx, 5*time.Second)
+	defer cancel()
+	inserted, err := s.opsRepo.BatchInsertSystemLogs(ctx, inputs)
+	if err != nil {
+		return 0, err
+	}
+	return int(inserted), nil
+}
+func (s *OpsSystemLogSink) Health() OpsSystemLogSinkHealth {
+	if s == nil {
+		return OpsSystemLogSinkHealth{}
+	}
+	written := atomic.LoadUint64(&s.writtenCount)
+	totalDelay := atomic.LoadUint64(&s.totalDelayNs)
+	var avgDelay uint64
+	if written > 0 {
+		avgDelay = (totalDelay / written) / uint64(time.Millisecond)
+	}
+	lastErr, _ := s.lastError.Load().(string)
+	return OpsSystemLogSinkHealth{
+		QueueDepth:      int64(len(s.queue)),
+		QueueCapacity:   int64(cap(s.queue)),
+		DroppedCount:    atomic.LoadUint64(&s.droppedCount),
+		WriteFailed:     atomic.LoadUint64(&s.writeFailed),
+		WrittenCount:    written,
+		AvgWriteDelayMs: avgDelay,
+		LastError:       strings.TrimSpace(lastErr),
+	}
+}
+func copyMap(in map[string]any) map[string]any {
+	if len(in) == 0 {
+		return map[string]any{}
+	}
+	out := make(map[string]any, len(in))
+	for k, v := range in {
+		out[k] = v
+	}
+	return out
+}
+func asString(v any) string {
+	switch t := v.(type) {
+	case string:
+		return strings.TrimSpace(t)
+	case fmt.Stringer:
+		return strings.TrimSpace(t.String())
+	default:
+		return ""
+	}
+}
+func asInt64Ptr(v any) *int64 {
+	switch t := v.(type) {
+	case int:
+		n := int64(t)
+		if n <= 0 {
+			return nil
+		}
+		return &n
+	case int64:
+		n := t
+		if n <= 0 {
+			return nil
+		}
+		return &n
+	case float64:
+		n := int64(t)
+		if n <= 0 {
+			return nil
+		}
+		return &n
+	case json.Number:
+		if n, err := t.Int64(); err == nil {
+			if n <= 0 {
+				return nil
+			}
+			return &n
+		}
+	case string:
+		raw := strings.TrimSpace(t)
+		if raw == "" {
+			return nil
+		}
+		if n, err := strconv.ParseInt(raw, 10, 64); err == nil {
+			if n <= 0 {
+				return nil
+			}
+			return &n
+		}
+	}
+	return nil
+}
--- a/backend/internal/service/ops_system_log_sink_test.go
+++ b/backend/internal/service/ops_system_log_sink_test.go
+package service
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"fmt"
+	"strings"
+	"sync/atomic"
+	"testing"
+	"time"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/logger"
+)
+func TestOpsSystemLogSink_ShouldIndex(t *testing.T) {
+	sink := &OpsSystemLogSink{}
+	cases := []struct {
+		name  string
+		event *logger.LogEvent
+		want  bool
+	}{
+		{
+			name:  "warn level",
+			event: &logger.LogEvent{Level: "warn", Component: "app"},
+			want:  true,
+		},
+		{
+			name:  "error level",
+			event: &logger.LogEvent{Level: "error", Component: "app"},
+			want:  true,
+		},
+		{
+			name:  "access component",
+			event: &logger.LogEvent{Level: "info", Component: "http.access"},
+			want:  true,
+		},
+		{
+			name:  "audit component",
+			event: &logger.LogEvent{Level: "info", Component: "audit.log_config_change"},
+			want:  true,
+		},
+		{
+			name:  "plain info",
+			event: &logger.LogEvent{Level: "info", Component: "app"},
+			want:  false,
+		},
+	}
+	for _, tc := range cases {
+		if got := sink.shouldIndex(tc.event); got != tc.want {
+			t.Fatalf("%s: shouldIndex()=%v, want %v", tc.name, got, tc.want)
+		}
+	}
+}
+func TestOpsSystemLogSink_WriteLogEvent_ShouldDropWhenQueueFull(t *testing.T) {
+	sink := &OpsSystemLogSink{
+		queue: make(chan *logger.LogEvent, 1),
+	}
+	sink.WriteLogEvent(&logger.LogEvent{Level: "warn", Component: "app"})
+	sink.WriteLogEvent(&logger.LogEvent{Level: "warn", Component: "app"})
+	if got := len(sink.queue); got != 1 {
+		t.Fatalf("queue len = %d, want 1", got)
+	}
+	if dropped := atomic.LoadUint64(&sink.droppedCount); dropped != 1 {
+		t.Fatalf("droppedCount = %d, want 1", dropped)
+	}
+}
+func TestOpsSystemLogSink_Health(t *testing.T) {
+	sink := &OpsSystemLogSink{
+		queue: make(chan *logger.LogEvent, 10),
+	}
+	sink.lastError.Store("db timeout")
+	atomic.StoreUint64(&sink.droppedCount, 3)
+	atomic.StoreUint64(&sink.writeFailed, 2)
+	atomic.StoreUint64(&sink.writtenCount, 5)
+	atomic.StoreUint64(&sink.totalDelayNs, uint64(5000000)) // 5ms total -> avg 1ms
+	sink.queue <- &logger.LogEvent{Level: "warn", Component: "app"}
+	sink.queue <- &logger.LogEvent{Level: "warn", Component: "app"}
+	health := sink.Health()
+	if health.QueueDepth != 2 {
+		t.Fatalf("queue depth = %d, want 2", health.QueueDepth)
+	}
+	if health.QueueCapacity != 10 {
+		t.Fatalf("queue capacity = %d, want 10", health.QueueCapacity)
+	}
+	if health.DroppedCount != 3 {
+		t.Fatalf("dropped = %d, want 3", health.DroppedCount)
+	}
+	if health.WriteFailed != 2 {
+		t.Fatalf("write failed = %d, want 2", health.WriteFailed)
+	}
+	if health.WrittenCount != 5 {
+		t.Fatalf("written = %d, want 5", health.WrittenCount)
+	}
+	if health.AvgWriteDelayMs != 1 {
+		t.Fatalf("avg delay ms = %d, want 1", health.AvgWriteDelayMs)
+	}
+	if health.LastError != "db timeout" {
+		t.Fatalf("last error = %q, want db timeout", health.LastError)
+	}
+}
+func TestOpsSystemLogSink_StartStopAndFlushSuccess(t *testing.T) {
+	done := make(chan struct{}, 1)
+	var captured []*OpsInsertSystemLogInput
+	repo := &opsRepoMock{
+		BatchInsertSystemLogsFn: func(_ context.Context, inputs []*OpsInsertSystemLogInput) (int64, error) {
+			captured = append(captured, inputs...)
+			select {
+			case done <- struct{}{}:
+			default:
+			}
+			return int64(len(inputs)), nil
+		},
+	}
+	sink := NewOpsSystemLogSink(repo)
+	sink.batchSize = 1
+	sink.flushInterval = 10 * time.Millisecond
+	sink.Start()
+	defer sink.Stop()
+	sink.WriteLogEvent(&logger.LogEvent{
+		Time:      time.Now().UTC(),
+		Level:     "warn",
+		Component: "http.access",
+		Message:   `authorization="Bearer sk-test-123"`,
+		Fields: map[string]any{
+			"component":         "http.access",
+			"request_id":        "req-1",
+			"client_request_id": "creq-1",
+			"user_id":           "12",
+			"account_id":        json.Number("34"),
+			"platform":          "openai",
+			"model":             "gpt-5",
+		},
+	})
+	select {
+	case <-done:
+	case <-time.After(2 * time.Second):
+		t.Fatalf("timeout waiting for sink flush")
+	}
+	if len(captured) != 1 {
+		t.Fatalf("captured len = %d, want 1", len(captured))
+	}
+	item := captured[0]
+	if item.RequestID != "req-1" || item.ClientRequestID != "creq-1" {
+		t.Fatalf("unexpected request ids: %+v", item)
+	}
+	if item.UserID == nil || *item.UserID != 12 {
+		t.Fatalf("unexpected user_id: %+v", item.UserID)
+	}
+	if item.AccountID == nil || *item.AccountID != 34 {
+		t.Fatalf("unexpected account_id: %+v", item.AccountID)
+	}
+	if strings.TrimSpace(item.Message) == "" {
+		t.Fatalf("message should not be empty")
+	}
+	health := sink.Health()
+	if health.WrittenCount == 0 {
+		t.Fatalf("written_count should be >0")
+	}
+}
+func TestOpsSystemLogSink_FlushFailureUpdatesHealth(t *testing.T) {
+	repo := &opsRepoMock{
+		BatchInsertSystemLogsFn: func(_ context.Context, inputs []*OpsInsertSystemLogInput) (int64, error) {
+			return 0, errors.New("db unavailable")
+		},
+	}
+	sink := NewOpsSystemLogSink(repo)
+	sink.batchSize = 1
+	sink.flushInterval = 10 * time.Millisecond
+	sink.Start()
+	defer sink.Stop()
+	sink.WriteLogEvent(&logger.LogEvent{
+		Time:      time.Now().UTC(),
+		Level:     "warn",
+		Component: "app",
+		Message:   "boom",
+		Fields:    map[string]any{},
+	})
+	deadline := time.Now().Add(2 * time.Second)
+	for time.Now().Before(deadline) {
+		health := sink.Health()
+		if health.WriteFailed > 0 {
+			if !strings.Contains(health.LastError, "db unavailable") {
+				t.Fatalf("unexpected last error: %s", health.LastError)
+			}
+			return
+		}
+		time.Sleep(20 * time.Millisecond)
+	}
+	t.Fatalf("write_failed_count not updated")
+}
+type stringerValue string
+func (s stringerValue) String() string { return string(s) }
+func TestOpsSystemLogSink_HelperFunctions(t *testing.T) {
+	src := map[string]any{"a": 1}
+	cloned := copyMap(src)
+	src["a"] = 2
+	v, ok := cloned["a"].(int)
+	if !ok || v != 1 {
+		t.Fatalf("copyMap should create copy")
+	}
+	if got := asString(stringerValue(" hello ")); got != "hello" {
+		t.Fatalf("asString stringer = %q", got)
+	}
+	if got := asString(fmt.Errorf("x")); got != "" {
+		t.Fatalf("asString error should be empty, got %q", got)
+	}
+	if got := asString(123); got != "" {
+		t.Fatalf("asString non-string should be empty, got %q", got)
+	}
+	cases := []struct {
+		in   any
+		want int64
+		ok   bool
+	}{
+		{in: 5, want: 5, ok: true},
+		{in: int64(6), want: 6, ok: true},
+		{in: float64(7), want: 7, ok: true},
+		{in: json.Number("8"), want: 8, ok: true},
+		{in: "9", want: 9, ok: true},
+		{in: "0", ok: false},
+		{in: -1, ok: false},
+		{in: "abc", ok: false},
+	}
+	for _, tc := range cases {
+		got := asInt64Ptr(tc.in)
+		if tc.ok {
+			if got == nil || *got != tc.want {
+				t.Fatalf("asInt64Ptr(%v) = %+v, want %d", tc.in, got, tc.want)
+			}
+		} else if got != nil {
+			t.Fatalf("asInt64Ptr(%v) should be nil, got %d", tc.in, *got)
+		}
+	}
+}