Merge remote-tracking branch 'upstream/main'

# Conflicts: # frontend/src/components/account/CreateAccountModal.vue

Merge remote-tracking branch 'upstream/main'
# Conflicts: # frontend/src/components/account/CreateAccountModal.vue
7331220e · Edric Li · fb86002e · 4f13c8de · 7331220e · 7331220e
Commit 7331220e authored Jan 01, 2026 by Edric Li
--- a/backend/internal/service/gateway_request_test.go
+++ b/backend/internal/service/gateway_request_test.go
+package service
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+func TestParseGatewayRequest(t *testing.T) {
+	body := []byte(`{"model":"claude-3-7-sonnet","stream":true,"metadata":{"user_id":"session_123e4567-e89b-12d3-a456-426614174000"},"system":[{"type":"text","text":"hello","cache_control":{"type":"ephemeral"}}],"messages":[{"content":"hi"}]}`)
+	parsed, err := ParseGatewayRequest(body)
+	require.NoError(t, err)
+	require.Equal(t, "claude-3-7-sonnet", parsed.Model)
+	require.True(t, parsed.Stream)
+	require.Equal(t, "session_123e4567-e89b-12d3-a456-426614174000", parsed.MetadataUserID)
+	require.True(t, parsed.HasSystem)
+	require.NotNil(t, parsed.System)
+	require.Len(t, parsed.Messages, 1)
+}
+
+func TestParseGatewayRequest_SystemNull(t *testing.T) {
+	body := []byte(`{"model":"claude-3","system":null}`)
+	parsed, err := ParseGatewayRequest(body)
+	require.NoError(t, err)
+	// 显式传入 system:null 也应视为“字段已存在”，避免默认 system 被注入。
+	require.True(t, parsed.HasSystem)
+	require.Nil(t, parsed.System)
+}
+
+func TestParseGatewayRequest_InvalidModelType(t *testing.T) {
+	body := []byte(`{"model":123}`)
+	_, err := ParseGatewayRequest(body)
+	require.Error(t, err)
+}
+
+func TestParseGatewayRequest_InvalidStreamType(t *testing.T) {
+	body := []byte(`{"stream":"true"}`)
+	_, err := ParseGatewayRequest(body)
+	require.Error(t, err)
+}
--- a/backend/internal/service/gateway_service.go
+++ b/backend/internal/service/gateway_service.go
@@ -13,6 +13,7 @@ import (
 	"log"
 	"net/http"
 	"regexp"
+	"sort"
 	"strings"
 	"time"

@@ -33,7 +34,10 @@ const (

 // sseDataRe matches SSE data lines with optional whitespace after colon.
 // Some upstream APIs return non-standard "data:" without space (should be "data: ").
-var sseDataRe = regexp.MustCompile(`^data:\s*`)
+var (
+	sseDataRe      = regexp.MustCompile(`^data:\s*`)
+	sessionIDRegex = regexp.MustCompile(`session_([a-f0-9-]{36})`)
+)

 // allowedHeaders 白名单headers（参考CRS项目）
 var allowedHeaders = map[string]bool{
@@ -64,6 +68,20 @@ type GatewayCache interface {
 	RefreshSessionTTL(ctx context.Context, sessionHash string, ttl time.Duration) error
 }

+type AccountWaitPlan struct {
+	AccountID      int64
+	MaxConcurrency int
+	Timeout        time.Duration
+	MaxWaiting     int
+}
+
+type AccountSelectionResult struct {
+	Account     *Account
+	Acquired    bool
+	ReleaseFunc func()
+	WaitPlan    *AccountWaitPlan // nil means no wait allowed
+}
+
 // ClaudeUsage 表示Claude API返回的usage信息
 type ClaudeUsage struct {
 	InputTokens              int `json:"input_tokens"`
@@ -106,6 +124,7 @@ type GatewayService struct {
 	identityService     *IdentityService
 	httpUpstream        HTTPUpstream
 	deferredService     *DeferredService
+	concurrencyService  *ConcurrencyService
 }

 // NewGatewayService creates a new GatewayService
@@ -117,6 +136,7 @@ func NewGatewayService(
 	userSubRepo UserSubscriptionRepository,
 	cache GatewayCache,
 	cfg *config.Config,
+	concurrencyService *ConcurrencyService,
 	billingService *BillingService,
 	rateLimitService *RateLimitService,
 	billingCacheService *BillingCacheService,
@@ -132,6 +152,7 @@ func NewGatewayService(
 		userSubRepo:         userSubRepo,
 		cache:               cache,
 		cfg:                 cfg,
+		concurrencyService:  concurrencyService,
 		billingService:      billingService,
 		rateLimitService:    rateLimitService,
 		billingCacheService: billingCacheService,
@@ -141,40 +162,36 @@ func NewGatewayService(
 	}
 }

-// GenerateSessionHash 从请求体计算粘性会话hash
-func (s *GatewayService) GenerateSessionHash(body []byte) string {
-	var req map[string]any
-	if err := json.Unmarshal(body, &req); err != nil {
+// GenerateSessionHash 从预解析请求计算粘性会话 hash
+func (s *GatewayService) GenerateSessionHash(parsed *ParsedRequest) string {
+	if parsed == nil {
 		return ""
 	}

-	// 1. 最高优先级：从metadata.user_id提取session_xxx
-	if metadata, ok := req["metadata"].(map[string]any); ok {
-		if userID, ok := metadata["user_id"].(string); ok {
-			re := regexp.MustCompile(`session_([a-f0-9-]{36})`)
-			if match := re.FindStringSubmatch(userID); len(match) > 1 {
+	// 1. 最高优先级：从 metadata.user_id 提取 session_xxx
+	if parsed.MetadataUserID != "" {
+		if match := sessionIDRegex.FindStringSubmatch(parsed.MetadataUserID); len(match) > 1 {
 			return match[1]
 		}
 	}
-	}

-	// 2. 提取带cache_control: {type: "ephemeral"}的内容
-	cacheableContent := s.extractCacheableContent(req)
+	// 2. 提取带 cache_control: {type: "ephemeral"} 的内容
+	cacheableContent := s.extractCacheableContent(parsed)
 	if cacheableContent != "" {
 		return s.hashContent(cacheableContent)
 	}

-	// 3. Fallback: 使用system内容
-	if system := req["system"]; system != nil {
-		systemText := s.extractTextFromSystem(system)
+	// 3. Fallback: 使用 system 内容
+	if parsed.System != nil {
+		systemText := s.extractTextFromSystem(parsed.System)
 		if systemText != "" {
 			return s.hashContent(systemText)
 		}
 	}

-	// 4. 最后fallback: 使用第一条消息
-	if messages, ok := req["messages"].([]any); ok && len(messages) > 0 {
-		if firstMsg, ok := messages[0].(map[string]any); ok {
+	// 4. 最后 fallback: 使用第一条消息
+	if len(parsed.Messages) > 0 {
+		if firstMsg, ok := parsed.Messages[0].(map[string]any); ok {
 			msgText := s.extractTextFromContent(firstMsg["content"])
 			if msgText != "" {
 				return s.hashContent(msgText)
@@ -185,34 +202,45 @@ func (s *GatewayService) GenerateSessionHash(body []byte) string {
 	return ""
 }

-func (s *GatewayService) extractCacheableContent(req map[string]any) string {
-	var content string
+// BindStickySession sets session -> account binding with standard TTL.
+func (s *GatewayService) BindStickySession(ctx context.Context, sessionHash string, accountID int64) error {
+	if sessionHash == "" || accountID <= 0 {
+		return nil
+	}
+	return s.cache.SetSessionAccountID(ctx, sessionHash, accountID, stickySessionTTL)
+}
+
+func (s *GatewayService) extractCacheableContent(parsed *ParsedRequest) string {
+	if parsed == nil {
+		return ""
+	}

-	// 检查system中的cacheable内容
-	if system, ok := req["system"].([]any); ok {
+	var builder strings.Builder
+
+	// 检查 system 中的 cacheable 内容
+	if system, ok := parsed.System.([]any); ok {
 		for _, part := range system {
 			if partMap, ok := part.(map[string]any); ok {
 				if cc, ok := partMap["cache_control"].(map[string]any); ok {
 					if cc["type"] == "ephemeral" {
 						if text, ok := partMap["text"].(string); ok {
-							content += text
+							_, _ = builder.WriteString(text)
 						}
 					}
 				}
 			}
 		}
 	}
+	systemText := builder.String()

-	// 检查messages中的cacheable内容
-	if messages, ok := req["messages"].([]any); ok {
-		for _, msg := range messages {
+	// 检查 messages 中的 cacheable 内容
+	for _, msg := range parsed.Messages {
 		if msgMap, ok := msg.(map[string]any); ok {
 			if msgContent, ok := msgMap["content"].([]any); ok {
 				for _, part := range msgContent {
 					if partMap, ok := part.(map[string]any); ok {
 						if cc, ok := partMap["cache_control"].(map[string]any); ok {
 							if cc["type"] == "ephemeral" {
-									// 找到cacheable内容，提取第一条消息的文本
 								return s.extractTextFromContent(msgMap["content"])
 							}
 						}
@@ -221,9 +249,8 @@ func (s *GatewayService) extractCacheableContent(req map[string]any) string {
 			}
 		}
 	}
-	}

-	return content
+	return systemText
 }

 func (s *GatewayService) extractTextFromSystem(system any) string {
@@ -332,8 +359,354 @@ func (s *GatewayService) SelectAccountForModelWithExclusions(ctx context.Context
 	return s.selectAccountForModelWithPlatform(ctx, groupID, sessionHash, requestedModel, excludedIDs, platform)
 }

+// SelectAccountWithLoadAwareness selects account with load-awareness and wait plan.
+func (s *GatewayService) SelectAccountWithLoadAwareness(ctx context.Context, groupID *int64, sessionHash string, requestedModel string, excludedIDs map[int64]struct{}) (*AccountSelectionResult, error) {
+	cfg := s.schedulingConfig()
+	var stickyAccountID int64
+	if sessionHash != "" && s.cache != nil {
+		if accountID, err := s.cache.GetSessionAccountID(ctx, sessionHash); err == nil {
+			stickyAccountID = accountID
+		}
+	}
+	if s.concurrencyService == nil || !cfg.LoadBatchEnabled {
+		account, err := s.SelectAccountForModelWithExclusions(ctx, groupID, sessionHash, requestedModel, excludedIDs)
+		if err != nil {
+			return nil, err
+		}
+		result, err := s.tryAcquireAccountSlot(ctx, account.ID, account.Concurrency)
+		if err == nil && result.Acquired {
+			return &AccountSelectionResult{
+				Account:     account,
+				Acquired:    true,
+				ReleaseFunc: result.ReleaseFunc,
+			}, nil
+		}
+		if stickyAccountID > 0 && stickyAccountID == account.ID && s.concurrencyService != nil {
+			waitingCount, _ := s.concurrencyService.GetAccountWaitingCount(ctx, account.ID)
+			if waitingCount < cfg.StickySessionMaxWaiting {
+				return &AccountSelectionResult{
+					Account: account,
+					WaitPlan: &AccountWaitPlan{
+						AccountID:      account.ID,
+						MaxConcurrency: account.Concurrency,
+						Timeout:        cfg.StickySessionWaitTimeout,
+						MaxWaiting:     cfg.StickySessionMaxWaiting,
+					},
+				}, nil
+			}
+		}
+		return &AccountSelectionResult{
+			Account: account,
+			WaitPlan: &AccountWaitPlan{
+				AccountID:      account.ID,
+				MaxConcurrency: account.Concurrency,
+				Timeout:        cfg.FallbackWaitTimeout,
+				MaxWaiting:     cfg.FallbackMaxWaiting,
+			},
+		}, nil
+	}
+
+	platform, hasForcePlatform, err := s.resolvePlatform(ctx, groupID)
+	if err != nil {
+		return nil, err
+	}
+	preferOAuth := platform == PlatformGemini
+
+	accounts, useMixed, err := s.listSchedulableAccounts(ctx, groupID, platform, hasForcePlatform)
+	if err != nil {
+		return nil, err
+	}
+	if len(accounts) == 0 {
+		return nil, errors.New("no available accounts")
+	}
+
+	isExcluded := func(accountID int64) bool {
+		if excludedIDs == nil {
+			return false
+		}
+		_, excluded := excludedIDs[accountID]
+		return excluded
+	}
+
+	// ============ Layer 1: 粘性会话优先 ============
+	if sessionHash != "" {
+		accountID, err := s.cache.GetSessionAccountID(ctx, sessionHash)
+		if err == nil && accountID > 0 && !isExcluded(accountID) {
+			account, err := s.accountRepo.GetByID(ctx, accountID)
+			if err == nil && s.isAccountAllowedForPlatform(account, platform, useMixed) &&
+				account.IsSchedulable() &&
+				(requestedModel == "" || s.isModelSupportedByAccount(account, requestedModel)) {
+				result, err := s.tryAcquireAccountSlot(ctx, accountID, account.Concurrency)
+				if err == nil && result.Acquired {
+					_ = s.cache.RefreshSessionTTL(ctx, sessionHash, stickySessionTTL)
+					return &AccountSelectionResult{
+						Account:     account,
+						Acquired:    true,
+						ReleaseFunc: result.ReleaseFunc,
+					}, nil
+				}
+
+				waitingCount, _ := s.concurrencyService.GetAccountWaitingCount(ctx, accountID)
+				if waitingCount < cfg.StickySessionMaxWaiting {
+					return &AccountSelectionResult{
+						Account: account,
+						WaitPlan: &AccountWaitPlan{
+							AccountID:      accountID,
+							MaxConcurrency: account.Concurrency,
+							Timeout:        cfg.StickySessionWaitTimeout,
+							MaxWaiting:     cfg.StickySessionMaxWaiting,
+						},
+					}, nil
+				}
+			}
+		}
+	}
+
+	// ============ Layer 2: 负载感知选择 ============
+	candidates := make([]*Account, 0, len(accounts))
+	for i := range accounts {
+		acc := &accounts[i]
+		if isExcluded(acc.ID) {
+			continue
+		}
+		if !s.isAccountAllowedForPlatform(acc, platform, useMixed) {
+			continue
+		}
+		if requestedModel != "" && !s.isModelSupportedByAccount(acc, requestedModel) {
+			continue
+		}
+		candidates = append(candidates, acc)
+	}
+
+	if len(candidates) == 0 {
+		return nil, errors.New("no available accounts")
+	}
+
+	accountLoads := make([]AccountWithConcurrency, 0, len(candidates))
+	for _, acc := range candidates {
+		accountLoads = append(accountLoads, AccountWithConcurrency{
+			ID:             acc.ID,
+			MaxConcurrency: acc.Concurrency,
+		})
+	}
+
+	loadMap, err := s.concurrencyService.GetAccountsLoadBatch(ctx, accountLoads)
+	if err != nil {
+		if result, ok := s.tryAcquireByLegacyOrder(ctx, candidates, sessionHash, preferOAuth); ok {
+			return result, nil
+		}
+	} else {
+		type accountWithLoad struct {
+			account  *Account
+			loadInfo *AccountLoadInfo
+		}
+		var available []accountWithLoad
+		for _, acc := range candidates {
+			loadInfo := loadMap[acc.ID]
+			if loadInfo == nil {
+				loadInfo = &AccountLoadInfo{AccountID: acc.ID}
+			}
+			if loadInfo.LoadRate < 100 {
+				available = append(available, accountWithLoad{
+					account:  acc,
+					loadInfo: loadInfo,
+				})
+			}
+		}
+
+		if len(available) > 0 {
+			sort.SliceStable(available, func(i, j int) bool {
+				a, b := available[i], available[j]
+				if a.account.Priority != b.account.Priority {
+					return a.account.Priority < b.account.Priority
+				}
+				if a.loadInfo.LoadRate != b.loadInfo.LoadRate {
+					return a.loadInfo.LoadRate < b.loadInfo.LoadRate
+				}
+				switch {
+				case a.account.LastUsedAt == nil && b.account.LastUsedAt != nil:
+					return true
+				case a.account.LastUsedAt != nil && b.account.LastUsedAt == nil:
+					return false
+				case a.account.LastUsedAt == nil && b.account.LastUsedAt == nil:
+					if preferOAuth && a.account.Type != b.account.Type {
+						return a.account.Type == AccountTypeOAuth
+					}
+					return false
+				default:
+					return a.account.LastUsedAt.Before(*b.account.LastUsedAt)
+				}
+			})
+
+			for _, item := range available {
+				result, err := s.tryAcquireAccountSlot(ctx, item.account.ID, item.account.Concurrency)
+				if err == nil && result.Acquired {
+					if sessionHash != "" {
+						_ = s.cache.SetSessionAccountID(ctx, sessionHash, item.account.ID, stickySessionTTL)
+					}
+					return &AccountSelectionResult{
+						Account:     item.account,
+						Acquired:    true,
+						ReleaseFunc: result.ReleaseFunc,
+					}, nil
+				}
+			}
+		}
+	}
+
+	// ============ Layer 3: 兜底排队 ============
+	sortAccountsByPriorityAndLastUsed(candidates, preferOAuth)
+	for _, acc := range candidates {
+		return &AccountSelectionResult{
+			Account: acc,
+			WaitPlan: &AccountWaitPlan{
+				AccountID:      acc.ID,
+				MaxConcurrency: acc.Concurrency,
+				Timeout:        cfg.FallbackWaitTimeout,
+				MaxWaiting:     cfg.FallbackMaxWaiting,
+			},
+		}, nil
+	}
+	return nil, errors.New("no available accounts")
+}
+
+func (s *GatewayService) tryAcquireByLegacyOrder(ctx context.Context, candidates []*Account, sessionHash string, preferOAuth bool) (*AccountSelectionResult, bool) {
+	ordered := append([]*Account(nil), candidates...)
+	sortAccountsByPriorityAndLastUsed(ordered, preferOAuth)
+
+	for _, acc := range ordered {
+		result, err := s.tryAcquireAccountSlot(ctx, acc.ID, acc.Concurrency)
+		if err == nil && result.Acquired {
+			if sessionHash != "" {
+				_ = s.cache.SetSessionAccountID(ctx, sessionHash, acc.ID, stickySessionTTL)
+			}
+			return &AccountSelectionResult{
+				Account:     acc,
+				Acquired:    true,
+				ReleaseFunc: result.ReleaseFunc,
+			}, true
+		}
+	}
+
+	return nil, false
+}
+
+func (s *GatewayService) schedulingConfig() config.GatewaySchedulingConfig {
+	if s.cfg != nil {
+		return s.cfg.Gateway.Scheduling
+	}
+	return config.GatewaySchedulingConfig{
+		StickySessionMaxWaiting:  3,
+		StickySessionWaitTimeout: 45 * time.Second,
+		FallbackWaitTimeout:      30 * time.Second,
+		FallbackMaxWaiting:       100,
+		LoadBatchEnabled:         true,
+		SlotCleanupInterval:      30 * time.Second,
+	}
+}
+
+func (s *GatewayService) resolvePlatform(ctx context.Context, groupID *int64) (string, bool, error) {
+	forcePlatform, hasForcePlatform := ctx.Value(ctxkey.ForcePlatform).(string)
+	if hasForcePlatform && forcePlatform != "" {
+		return forcePlatform, true, nil
+	}
+	if groupID != nil {
+		group, err := s.groupRepo.GetByID(ctx, *groupID)
+		if err != nil {
+			return "", false, fmt.Errorf("get group failed: %w", err)
+		}
+		return group.Platform, false, nil
+	}
+	return PlatformAnthropic, false, nil
+}
+
+func (s *GatewayService) listSchedulableAccounts(ctx context.Context, groupID *int64, platform string, hasForcePlatform bool) ([]Account, bool, error) {
+	useMixed := (platform == PlatformAnthropic || platform == PlatformGemini) && !hasForcePlatform
+	if useMixed {
+		platforms := []string{platform, PlatformAntigravity}
+		var accounts []Account
+		var err error
+		if groupID != nil {
+			accounts, err = s.accountRepo.ListSchedulableByGroupIDAndPlatforms(ctx, *groupID, platforms)
+		} else {
+			accounts, err = s.accountRepo.ListSchedulableByPlatforms(ctx, platforms)
+		}
+		if err != nil {
+			return nil, useMixed, err
+		}
+		filtered := make([]Account, 0, len(accounts))
+		for _, acc := range accounts {
+			if acc.Platform == PlatformAntigravity && !acc.IsMixedSchedulingEnabled() {
+				continue
+			}
+			filtered = append(filtered, acc)
+		}
+		return filtered, useMixed, nil
+	}
+
+	var accounts []Account
+	var err error
+	if s.cfg != nil && s.cfg.RunMode == config.RunModeSimple {
+		accounts, err = s.accountRepo.ListSchedulableByPlatform(ctx, platform)
+	} else if groupID != nil {
+		accounts, err = s.accountRepo.ListSchedulableByGroupIDAndPlatform(ctx, *groupID, platform)
+		if err == nil && len(accounts) == 0 && hasForcePlatform {
+			accounts, err = s.accountRepo.ListSchedulableByPlatform(ctx, platform)
+		}
+	} else {
+		accounts, err = s.accountRepo.ListSchedulableByPlatform(ctx, platform)
+	}
+	if err != nil {
+		return nil, useMixed, err
+	}
+	return accounts, useMixed, nil
+}
+
+func (s *GatewayService) isAccountAllowedForPlatform(account *Account, platform string, useMixed bool) bool {
+	if account == nil {
+		return false
+	}
+	if useMixed {
+		if account.Platform == platform {
+			return true
+		}
+		return account.Platform == PlatformAntigravity && account.IsMixedSchedulingEnabled()
+	}
+	return account.Platform == platform
+}
+
+func (s *GatewayService) tryAcquireAccountSlot(ctx context.Context, accountID int64, maxConcurrency int) (*AcquireResult, error) {
+	if s.concurrencyService == nil {
+		return &AcquireResult{Acquired: true, ReleaseFunc: func() {}}, nil
+	}
+	return s.concurrencyService.AcquireAccountSlot(ctx, accountID, maxConcurrency)
+}
+
+func sortAccountsByPriorityAndLastUsed(accounts []*Account, preferOAuth bool) {
+	sort.SliceStable(accounts, func(i, j int) bool {
+		a, b := accounts[i], accounts[j]
+		if a.Priority != b.Priority {
+			return a.Priority < b.Priority
+		}
+		switch {
+		case a.LastUsedAt == nil && b.LastUsedAt != nil:
+			return true
+		case a.LastUsedAt != nil && b.LastUsedAt == nil:
+			return false
+		case a.LastUsedAt == nil && b.LastUsedAt == nil:
+			if preferOAuth && a.Type != b.Type {
+				return a.Type == AccountTypeOAuth
+			}
+			return false
+		default:
+			return a.LastUsedAt.Before(*b.LastUsedAt)
+		}
+	})
+}
+
 // selectAccountForModelWithPlatform 选择单平台账户（完全隔离）
 func (s *GatewayService) selectAccountForModelWithPlatform(ctx context.Context, groupID *int64, sessionHash string, requestedModel string, excludedIDs map[int64]struct{}, platform string) (*Account, error) {
+	preferOAuth := platform == PlatformGemini
 	// 1. 查询粘性会话
 	if sessionHash != "" {
 		accountID, err := s.cache.GetSessionAccountID(ctx, sessionHash)
@@ -389,7 +762,9 @@ func (s *GatewayService) selectAccountForModelWithPlatform(ctx context.Context,
 			case acc.LastUsedAt != nil && selected.LastUsedAt == nil:
 				// keep selected (never used is preferred)
 			case acc.LastUsedAt == nil && selected.LastUsedAt == nil:
-				// keep selected (both never used)
+				if preferOAuth && acc.Type != selected.Type && acc.Type == AccountTypeOAuth {
+					selected = acc
+				}
 			default:
 				if acc.LastUsedAt.Before(*selected.LastUsedAt) {
 					selected = acc
@@ -419,6 +794,7 @@ func (s *GatewayService) selectAccountForModelWithPlatform(ctx context.Context,
 // 查询原生平台账户 + 启用 mixed_scheduling 的 antigravity 账户
 func (s *GatewayService) selectAccountWithMixedScheduling(ctx context.Context, groupID *int64, sessionHash string, requestedModel string, excludedIDs map[int64]struct{}, nativePlatform string) (*Account, error) {
 	platforms := []string{nativePlatform, PlatformAntigravity}
+	preferOAuth := nativePlatform == PlatformGemini

 	// 1. 查询粘性会话
 	if sessionHash != "" {
@@ -478,7 +854,9 @@ func (s *GatewayService) selectAccountWithMixedScheduling(ctx context.Context, g
 			case acc.LastUsedAt != nil && selected.LastUsedAt == nil:
 				// keep selected (never used is preferred)
 			case acc.LastUsedAt == nil && selected.LastUsedAt == nil:
-				// keep selected (both never used)
+				if preferOAuth && acc.Platform == PlatformGemini && selected.Platform == PlatformGemini && acc.Type != selected.Type && acc.Type == AccountTypeOAuth {
+					selected = acc
+				}
 			default:
 				if acc.LastUsedAt.Before(*selected.LastUsedAt) {
 					selected = acc
@@ -515,24 +893,10 @@ func (s *GatewayService) isModelSupportedByAccount(account *Account, requestedMo
 }

 // IsAntigravityModelSupported 检查 Antigravity 平台是否支持指定模型
+// 所有 claude- 和 gemini- 前缀的模型都能通过映射或透传支持
 func IsAntigravityModelSupported(requestedModel string) bool {
-	// 直接支持的模型
-	if antigravitySupportedModels[requestedModel] {
-		return true
-	}
-	// 可映射的模型
-	if _, ok := antigravityModelMapping[requestedModel]; ok {
-		return true
-	}
-	// Gemini 前缀透传
-	if strings.HasPrefix(requestedModel, "gemini-") {
-		return true
-	}
-	// Claude 模型支持（通过默认映射到 claude-sonnet-4-5）
-	if strings.HasPrefix(requestedModel, "claude-") {
-		return true
-	}
-	return false
+	return strings.HasPrefix(requestedModel, "claude-") ||
+		strings.HasPrefix(requestedModel, "gemini-")
 }

 // GetAccessToken 获取账号凭证
@@ -588,19 +952,17 @@ func (s *GatewayService) shouldFailoverUpstreamError(statusCode int) bool {
 }

 // Forward 转发请求到Claude API
-func (s *GatewayService) Forward(ctx context.Context, c *gin.Context, account *Account, body []byte) (*ForwardResult, error) {
+func (s *GatewayService) Forward(ctx context.Context, c *gin.Context, account *Account, parsed *ParsedRequest) (*ForwardResult, error) {
 	startTime := time.Now()
-
-	// 解析请求获取model和stream
-	var req struct {
-		Model  string `json:"model"`
-		Stream bool   `json:"stream"`
-	}
-	if err := json.Unmarshal(body, &req); err != nil {
-		return nil, fmt.Errorf("parse request: %w", err)
+	if parsed == nil {
+		return nil, fmt.Errorf("parse request: empty request")
 	}

-	if !gjson.GetBytes(body, "system").Exists() {
+	body := parsed.Body
+	reqModel := parsed.Model
+	reqStream := parsed.Stream
+
+	if !parsed.HasSystem {
 		body, _ = sjson.SetBytes(body, "system", []any{
 			map[string]any{
 				"type": "text",
@@ -613,13 +975,13 @@ func (s *GatewayService) Forward(ctx context.Context, c *gin.Context, account *A
 	}

 	// 应用模型映射（仅对apikey类型账号）
-	originalModel := req.Model
+	originalModel := reqModel
 	if account.Type == AccountTypeApiKey {
-		mappedModel := account.GetMappedModel(req.Model)
-		if mappedModel != req.Model {
+		mappedModel := account.GetMappedModel(reqModel)
+		if mappedModel != reqModel {
 			// 替换请求体中的模型名
 			body = s.replaceModelInBody(body, mappedModel)
-			req.Model = mappedModel
+			reqModel = mappedModel
 			log.Printf("Model mapping applied: %s -> %s (account: %s)", originalModel, mappedModel, account.Name)
 		}
 	}
@@ -640,13 +1002,13 @@ func (s *GatewayService) Forward(ctx context.Context, c *gin.Context, account *A
 	var resp *http.Response
 	for attempt := 1; attempt <= maxRetries; attempt++ {
 		// 构建上游请求（每次重试需要重新构建，因为请求体需要重新读取）
-		upstreamReq, err := s.buildUpstreamRequest(ctx, c, account, body, token, tokenType)
+		upstreamReq, err := s.buildUpstreamRequest(ctx, c, account, body, token, tokenType, reqModel)
 		if err != nil {
 			return nil, err
 		}

 		// 发送请求
-		resp, err = s.httpUpstream.Do(upstreamReq, proxyURL)
+		resp, err = s.httpUpstream.Do(upstreamReq, proxyURL, account.ID, account.Concurrency)
 		if err != nil {
 			return nil, fmt.Errorf("upstream request failed: %w", err)
 		}
@@ -686,14 +1048,38 @@ func (s *GatewayService) Forward(ctx context.Context, c *gin.Context, account *A

 	// 处理错误响应（不可重试的错误）
 	if resp.StatusCode >= 400 {
+		// 可选：对部分 400 触发 failover（默认关闭以保持语义）
+		if resp.StatusCode == 400 && s.cfg != nil && s.cfg.Gateway.FailoverOn400 {
+			respBody, readErr := io.ReadAll(resp.Body)
+			if readErr != nil {
+				// ReadAll failed, fall back to normal error handling without consuming the stream
+				return s.handleErrorResponse(ctx, resp, c, account)
+			}
+			_ = resp.Body.Close()
+			resp.Body = io.NopCloser(bytes.NewReader(respBody))
+
+			if s.shouldFailoverOn400(respBody) {
+				if s.cfg.Gateway.LogUpstreamErrorBody {
+					log.Printf(
+						"Account %d: 400 error, attempting failover: %s",
+						account.ID,
+						truncateForLog(respBody, s.cfg.Gateway.LogUpstreamErrorBodyMaxBytes),
+					)
+				} else {
+					log.Printf("Account %d: 400 error, attempting failover", account.ID)
+				}
+				s.handleFailoverSideEffects(ctx, resp, account)
+				return nil, &UpstreamFailoverError{StatusCode: resp.StatusCode}
+			}
+		}
 		return s.handleErrorResponse(ctx, resp, c, account)
 	}

 	// 处理正常响应
 	var usage *ClaudeUsage
 	var firstTokenMs *int
-	if req.Stream {
-		streamResult, err := s.handleStreamingResponse(ctx, resp, c, account, startTime, originalModel, req.Model)
+	if reqStream {
+		streamResult, err := s.handleStreamingResponse(ctx, resp, c, account, startTime, originalModel, reqModel)
 		if err != nil {
 			if err.Error() == "have error in stream" {
 				return nil, &UpstreamFailoverError{
@@ -705,7 +1091,7 @@ func (s *GatewayService) Forward(ctx context.Context, c *gin.Context, account *A
 		usage = streamResult.usage
 		firstTokenMs = streamResult.firstTokenMs
 	} else {
-		usage, err = s.handleNonStreamingResponse(ctx, resp, c, account, originalModel, req.Model)
+		usage, err = s.handleNonStreamingResponse(ctx, resp, c, account, originalModel, reqModel)
 		if err != nil {
 			return nil, err
 		}
@@ -715,13 +1101,13 @@ func (s *GatewayService) Forward(ctx context.Context, c *gin.Context, account *A
 		RequestID:    resp.Header.Get("x-request-id"),
 		Usage:        *usage,
 		Model:        originalModel, // 使用原始模型用于计费和日志
-		Stream:       req.Stream,
+		Stream:       reqStream,
 		Duration:     time.Since(startTime),
 		FirstTokenMs: firstTokenMs,
 	}, nil
 }

-func (s *GatewayService) buildUpstreamRequest(ctx context.Context, c *gin.Context, account *Account, body []byte, token, tokenType string) (*http.Request, error) {
+func (s *GatewayService) buildUpstreamRequest(ctx context.Context, c *gin.Context, account *Account, body []byte, token, tokenType, modelID string) (*http.Request, error) {
 	// 确定目标URL
 	targetURL := claudeAPIURL
 	if account.Type == AccountTypeApiKey {
@@ -787,7 +1173,14 @@ func (s *GatewayService) buildUpstreamRequest(ctx context.Context, c *gin.Contex

 	// 处理anthropic-beta header（OAuth账号需要特殊处理）
 	if tokenType == "oauth" {
-		req.Header.Set("anthropic-beta", s.getBetaHeader(body, c.GetHeader("anthropic-beta")))
+		req.Header.Set("anthropic-beta", s.getBetaHeader(modelID, c.GetHeader("anthropic-beta")))
+	} else if s.cfg != nil && s.cfg.Gateway.InjectBetaForApiKey && req.Header.Get("anthropic-beta") == "" {
+		// API-key：仅在请求显式使用 beta 特性且客户端未提供时，按需补齐（默认关闭）
+		if requestNeedsBetaFeatures(body) {
+			if beta := defaultApiKeyBetaHeader(body); beta != "" {
+				req.Header.Set("anthropic-beta", beta)
+			}
+		}
 	}

 	return req, nil
@@ -795,7 +1188,7 @@ func (s *GatewayService) buildUpstreamRequest(ctx context.Context, c *gin.Contex

 // getBetaHeader 处理anthropic-beta header
 // 对于OAuth账号，需要确保包含oauth-2025-04-20
-func (s *GatewayService) getBetaHeader(body []byte, clientBetaHeader string) string {
+func (s *GatewayService) getBetaHeader(modelID string, clientBetaHeader string) string {
 	// 如果客户端传了anthropic-beta
 	if clientBetaHeader != "" {
 		// 已包含oauth beta则直接返回
@@ -832,20 +1225,89 @@ func (s *GatewayService) getBetaHeader(body []byte, clientBetaHeader string) str
 	}

 	// 客户端没传，根据模型生成
-	var modelID string
-	var reqMap map[string]any
-	if json.Unmarshal(body, &reqMap) == nil {
-		if m, ok := reqMap["model"].(string); ok {
-			modelID = m
+	// haiku 模型不需要 claude-code beta
+	if strings.Contains(strings.ToLower(modelID), "haiku") {
+		return claude.HaikuBetaHeader
+	}
+
+	return claude.DefaultBetaHeader
+}
+
+func requestNeedsBetaFeatures(body []byte) bool {
+	tools := gjson.GetBytes(body, "tools")
+	if tools.Exists() && tools.IsArray() && len(tools.Array()) > 0 {
+		return true
 	}
+	if strings.EqualFold(gjson.GetBytes(body, "thinking.type").String(), "enabled") {
+		return true
 	}
+	return false
+}

-	// haiku模型不需要claude-code beta
+func defaultApiKeyBetaHeader(body []byte) string {
+	modelID := gjson.GetBytes(body, "model").String()
 	if strings.Contains(strings.ToLower(modelID), "haiku") {
-		return claude.HaikuBetaHeader
+		return claude.ApiKeyHaikuBetaHeader
 	}
+	return claude.ApiKeyBetaHeader
+}

-	return claude.DefaultBetaHeader
+func truncateForLog(b []byte, maxBytes int) string {
+	if maxBytes <= 0 {
+		maxBytes = 2048
+	}
+	if len(b) > maxBytes {
+		b = b[:maxBytes]
+	}
+	s := string(b)
+	// 保持一行，避免污染日志格式
+	s = strings.ReplaceAll(s, "\n", "\\n")
+	s = strings.ReplaceAll(s, "\r", "\\r")
+	return s
+}
+
+func (s *GatewayService) shouldFailoverOn400(respBody []byte) bool {
+	// 只对“可能是兼容性差异导致”的 400 允许切换，避免无意义重试。
+	// 默认保守：无法识别则不切换。
+	msg := strings.ToLower(strings.TrimSpace(extractUpstreamErrorMessage(respBody)))
+	if msg == "" {
+		return false
+	}
+
+	// 缺少/错误的 beta header：换账号/链路可能成功（尤其是混合调度时）。
+	// 更精确匹配 beta 相关的兼容性问题，避免误触发切换。
+	if strings.Contains(msg, "anthropic-beta") ||
+		strings.Contains(msg, "beta feature") ||
+		strings.Contains(msg, "requires beta") {
+		return true
+	}
+
+	// thinking/tool streaming 等兼容性约束（常见于中间转换链路）
+	if strings.Contains(msg, "thinking") || strings.Contains(msg, "thought_signature") || strings.Contains(msg, "signature") {
+		return true
+	}
+	if strings.Contains(msg, "tool_use") || strings.Contains(msg, "tool_result") || strings.Contains(msg, "tools") {
+		return true
+	}
+
+	return false
+}
+
+func extractUpstreamErrorMessage(body []byte) string {
+	// Claude 风格：{"type":"error","error":{"type":"...","message":"..."}}
+	if m := gjson.GetBytes(body, "error.message").String(); strings.TrimSpace(m) != "" {
+		inner := strings.TrimSpace(m)
+		// 有些上游会把完整 JSON 作为字符串塞进 message
+		if strings.HasPrefix(inner, "{") {
+			if innerMsg := gjson.Get(inner, "error.message").String(); strings.TrimSpace(innerMsg) != "" {
+				return innerMsg
+			}
+		}
+		return m
+	}
+
+	// 兜底：尝试顶层 message
+	return gjson.GetBytes(body, "message").String()
 }

 func (s *GatewayService) handleErrorResponse(ctx context.Context, resp *http.Response, c *gin.Context, account *Account) (*ForwardResult, error) {
@@ -860,6 +1322,16 @@ func (s *GatewayService) handleErrorResponse(ctx context.Context, resp *http.Res

 	switch resp.StatusCode {
 	case 400:
+		// 仅记录上游错误摘要（避免输出请求内容）；需要时可通过配置打开
+		if s.cfg != nil && s.cfg.Gateway.LogUpstreamErrorBody {
+			log.Printf(
+				"Upstream 400 error (account=%d platform=%s type=%s): %s",
+				account.ID,
+				account.Platform,
+				account.Type,
+				truncateForLog(body, s.cfg.Gateway.LogUpstreamErrorBodyMaxBytes),
+			)
+		}
 		c.Data(http.StatusBadRequest, "application/json", body)
 		return nil, fmt.Errorf("upstream error: %d", resp.StatusCode)
 	case 401:
@@ -1248,13 +1720,7 @@ func (s *GatewayService) RecordUsage(ctx context.Context, input *RecordUsageInpu
 				log.Printf("Increment subscription usage failed: %v", err)
 			}
 			// 异步更新订阅缓存
-			go func() {
-				cacheCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
-				defer cancel()
-				if err := s.billingCacheService.UpdateSubscriptionUsage(cacheCtx, user.ID, *apiKey.GroupID, cost.TotalCost); err != nil {
-					log.Printf("Update subscription cache failed: %v", err)
-				}
-			}()
+			s.billingCacheService.QueueUpdateSubscriptionUsage(user.ID, *apiKey.GroupID, cost.TotalCost)
 		}
 	} else {
 		// 余额模式：扣除用户余额（使用 ActualCost 考虑倍率后的费用）
@@ -1263,13 +1729,7 @@ func (s *GatewayService) RecordUsage(ctx context.Context, input *RecordUsageInpu
 				log.Printf("Deduct balance failed: %v", err)
 			}
 			// 异步更新余额缓存
-			go func() {
-				cacheCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
-				defer cancel()
-				if err := s.billingCacheService.DeductBalanceCache(cacheCtx, user.ID, cost.ActualCost); err != nil {
-					log.Printf("Update balance cache failed: %v", err)
-				}
-			}()
+			s.billingCacheService.QueueDeductBalance(user.ID, cost.ActualCost)
 		}
 	}

@@ -1281,7 +1741,15 @@ func (s *GatewayService) RecordUsage(ctx context.Context, input *RecordUsageInpu

 // ForwardCountTokens 转发 count_tokens 请求到上游 API
 // 特点：不记录使用量、仅支持非流式响应
-func (s *GatewayService) ForwardCountTokens(ctx context.Context, c *gin.Context, account *Account, body []byte) error {
+func (s *GatewayService) ForwardCountTokens(ctx context.Context, c *gin.Context, account *Account, parsed *ParsedRequest) error {
+	if parsed == nil {
+		s.countTokensError(c, http.StatusBadRequest, "invalid_request_error", "Request body is empty")
+		return fmt.Errorf("parse request: empty request")
+	}
+
+	body := parsed.Body
+	reqModel := parsed.Model
+
 	// Antigravity 账户不支持 count_tokens 转发，返回估算值
 	// 参考 Antigravity-Manager 和 proxycast 实现
 	if account.Platform == PlatformAntigravity {
@@ -1291,14 +1759,12 @@ func (s *GatewayService) ForwardCountTokens(ctx context.Context, c *gin.Context,

 	// 应用模型映射（仅对 apikey 类型账号）
 	if account.Type == AccountTypeApiKey {
-		var req struct {
-			Model string `json:"model"`
-		}
-		if err := json.Unmarshal(body, &req); err == nil && req.Model != "" {
-			mappedModel := account.GetMappedModel(req.Model)
-			if mappedModel != req.Model {
+		if reqModel != "" {
+			mappedModel := account.GetMappedModel(reqModel)
+			if mappedModel != reqModel {
 				body = s.replaceModelInBody(body, mappedModel)
-				log.Printf("CountTokens model mapping applied: %s -> %s (account: %s)", req.Model, mappedModel, account.Name)
+				reqModel = mappedModel
+				log.Printf("CountTokens model mapping applied: %s -> %s (account: %s)", parsed.Model, mappedModel, account.Name)
 			}
 		}
 	}
@@ -1311,7 +1777,7 @@ func (s *GatewayService) ForwardCountTokens(ctx context.Context, c *gin.Context,
 	}

 	// 构建上游请求
-	upstreamReq, err := s.buildCountTokensRequest(ctx, c, account, body, token, tokenType)
+	upstreamReq, err := s.buildCountTokensRequest(ctx, c, account, body, token, tokenType, reqModel)
 	if err != nil {
 		s.countTokensError(c, http.StatusInternalServerError, "api_error", "Failed to build request")
 		return err
@@ -1324,7 +1790,7 @@ func (s *GatewayService) ForwardCountTokens(ctx context.Context, c *gin.Context,
 	}

 	// 发送请求
-	resp, err := s.httpUpstream.Do(upstreamReq, proxyURL)
+	resp, err := s.httpUpstream.Do(upstreamReq, proxyURL, account.ID, account.Concurrency)
 	if err != nil {
 		s.countTokensError(c, http.StatusBadGateway, "upstream_error", "Request failed")
 		return fmt.Errorf("upstream request failed: %w", err)
@@ -1345,6 +1811,18 @@ func (s *GatewayService) ForwardCountTokens(ctx context.Context, c *gin.Context,
 		// 标记账号状态（429/529等）
 		s.rateLimitService.HandleUpstreamError(ctx, account, resp.StatusCode, resp.Header, respBody)

+		// 记录上游错误摘要便于排障（不回显请求内容）
+		if s.cfg != nil && s.cfg.Gateway.LogUpstreamErrorBody {
+			log.Printf(
+				"count_tokens upstream error %d (account=%d platform=%s type=%s): %s",
+				resp.StatusCode,
+				account.ID,
+				account.Platform,
+				account.Type,
+				truncateForLog(respBody, s.cfg.Gateway.LogUpstreamErrorBodyMaxBytes),
+			)
+		}
+
 		// 返回简化的错误响应
 		errMsg := "Upstream request failed"
 		switch resp.StatusCode {
@@ -1363,7 +1841,7 @@ func (s *GatewayService) ForwardCountTokens(ctx context.Context, c *gin.Context,
 }

 // buildCountTokensRequest 构建 count_tokens 上游请求
-func (s *GatewayService) buildCountTokensRequest(ctx context.Context, c *gin.Context, account *Account, body []byte, token, tokenType string) (*http.Request, error) {
+func (s *GatewayService) buildCountTokensRequest(ctx context.Context, c *gin.Context, account *Account, body []byte, token, tokenType, modelID string) (*http.Request, error) {
 	// 确定目标 URL
 	targetURL := claudeAPICountTokensURL
 	if account.Type == AccountTypeApiKey {
@@ -1424,7 +1902,14 @@ func (s *GatewayService) buildCountTokensRequest(ctx context.Context, c *gin.Con

 	// OAuth 账号：处理 anthropic-beta header
 	if tokenType == "oauth" {
-		req.Header.Set("anthropic-beta", s.getBetaHeader(body, c.GetHeader("anthropic-beta")))
+		req.Header.Set("anthropic-beta", s.getBetaHeader(modelID, c.GetHeader("anthropic-beta")))
+	} else if s.cfg != nil && s.cfg.Gateway.InjectBetaForApiKey && req.Header.Get("anthropic-beta") == "" {
+		// API-key：与 messages 同步的按需 beta 注入（默认关闭）
+		if requestNeedsBetaFeatures(body) {
+			if beta := defaultApiKeyBetaHeader(body); beta != "" {
+				req.Header.Set("anthropic-beta", beta)
+			}
+		}
 	}

 	return req, nil

--- a/backend/internal/service/gateway_service_benchmark_test.go
+++ b/backend/internal/service/gateway_service_benchmark_test.go
+package service
+
+import (
+	"strconv"
+	"testing"
+)
+
+var benchmarkStringSink string
+
+// BenchmarkGenerateSessionHash_Metadata 关注 JSON 解析与正则匹配开销。
+func BenchmarkGenerateSessionHash_Metadata(b *testing.B) {
+	svc := &GatewayService{}
+	body := []byte(`{"metadata":{"user_id":"session_123e4567-e89b-12d3-a456-426614174000"},"messages":[{"content":"hello"}]}`)
+
+	b.ReportAllocs()
+	for i := 0; i < b.N; i++ {
+		parsed, err := ParseGatewayRequest(body)
+		if err != nil {
+			b.Fatalf("解析请求失败: %v", err)
+		}
+		benchmarkStringSink = svc.GenerateSessionHash(parsed)
+	}
+}
+
+// BenchmarkExtractCacheableContent_System 关注字符串拼接路径的性能。
+func BenchmarkExtractCacheableContent_System(b *testing.B) {
+	svc := &GatewayService{}
+	req := buildSystemCacheableRequest(12)
+
+	b.ReportAllocs()
+	for i := 0; i < b.N; i++ {
+		benchmarkStringSink = svc.extractCacheableContent(req)
+	}
+}
+
+func buildSystemCacheableRequest(parts int) *ParsedRequest {
+	systemParts := make([]any, 0, parts)
+	for i := 0; i < parts; i++ {
+		systemParts = append(systemParts, map[string]any{
+			"text": "system_part_" + strconv.Itoa(i),
+			"cache_control": map[string]any{
+				"type": "ephemeral",
+			},
+		})
+	}
+	return &ParsedRequest{
+		System:    systemParts,
+		HasSystem: true,
+	}
+}
--- a/backend/internal/service/gemini_messages_compat_service.go
+++ b/backend/internal/service/gemini_messages_compat_service.go
@@ -116,6 +116,17 @@ func (s *GeminiMessagesCompatService) SelectAccountForModelWithExclusions(ctx co
 						valid = true
 					}
 					if valid {
+						usable := true
+						if s.rateLimitService != nil && requestedModel != "" {
+							ok, err := s.rateLimitService.PreCheckUsage(ctx, account, requestedModel)
+							if err != nil {
+								log.Printf("[Gemini PreCheck] Account %d precheck error: %v", account.ID, err)
+							}
+							if !ok {
+								usable = false
+							}
+						}
+						if usable {
 							_ = s.cache.RefreshSessionTTL(ctx, cacheKey, geminiStickySessionTTL)
 							return account, nil
 						}
@@ -123,6 +134,7 @@ func (s *GeminiMessagesCompatService) SelectAccountForModelWithExclusions(ctx co
 				}
 			}
 		}
+	}

 	// 查询可调度账户（强制平台模式：优先按分组查找，找不到再查全部）
 	var accounts []Account
@@ -157,6 +169,15 @@ func (s *GeminiMessagesCompatService) SelectAccountForModelWithExclusions(ctx co
 		if requestedModel != "" && !s.isModelSupportedByAccount(acc, requestedModel) {
 			continue
 		}
+		if s.rateLimitService != nil && requestedModel != "" {
+			ok, err := s.rateLimitService.PreCheckUsage(ctx, acc, requestedModel)
+			if err != nil {
+				log.Printf("[Gemini PreCheck] Account %d precheck error: %v", acc.ID, err)
+			}
+			if !ok {
+				continue
+			}
+		}
 		if selected == nil {
 			selected = acc
 			continue
@@ -472,7 +493,7 @@ func (s *GeminiMessagesCompatService) Forward(ctx context.Context, c *gin.Contex
 		}
 		requestIDHeader = idHeader

-		resp, err = s.httpUpstream.Do(upstreamReq, proxyURL)
+		resp, err = s.httpUpstream.Do(upstreamReq, proxyURL, account.ID, account.Concurrency)
 		if err != nil {
 			if attempt < geminiMaxRetries {
 				log.Printf("Gemini account %d: upstream request failed, retry %d/%d: %v", account.ID, attempt, geminiMaxRetries, err)
@@ -725,7 +746,7 @@ func (s *GeminiMessagesCompatService) ForwardNative(ctx context.Context, c *gin.
 		}
 		requestIDHeader = idHeader

-		resp, err = s.httpUpstream.Do(upstreamReq, proxyURL)
+		resp, err = s.httpUpstream.Do(upstreamReq, proxyURL, account.ID, account.Concurrency)
 		if err != nil {
 			if attempt < geminiMaxRetries {
 				log.Printf("Gemini account %d: upstream request failed, retry %d/%d: %v", account.ID, attempt, geminiMaxRetries, err)
@@ -921,7 +942,10 @@ func sleepGeminiBackoff(attempt int) {
 	time.Sleep(sleepFor)
 }

-var sensitiveQueryParamRegex = regexp.MustCompile(`(?i)([?&](?:key|client_secret|access_token|refresh_token)=)[^&"\s]+`)
+var (
+	sensitiveQueryParamRegex = regexp.MustCompile(`(?i)([?&](?:key|client_secret|access_token|refresh_token)=)[^&"\s]+`)
+	retryInRegex             = regexp.MustCompile(`Please retry in ([0-9.]+)s`)
+)

 func sanitizeUpstreamErrorMessage(msg string) string {
 	if msg == "" {
@@ -1753,7 +1777,7 @@ func (s *GeminiMessagesCompatService) ForwardAIStudioGET(ctx context.Context, ac
 		return nil, fmt.Errorf("unsupported account type: %s", account.Type)
 	}

-	resp, err := s.httpUpstream.Do(req, proxyURL)
+	resp, err := s.httpUpstream.Do(req, proxyURL, account.ID, account.Concurrency)
 	if err != nil {
 		return nil, err
 	}
@@ -1883,13 +1907,44 @@ func (s *GeminiMessagesCompatService) handleGeminiUpstreamError(ctx context.Cont
 	if statusCode != 429 {
 		return
 	}
+
+	oauthType := account.GeminiOAuthType()
+	tierID := account.GeminiTierID()
+	projectID := strings.TrimSpace(account.GetCredential("project_id"))
+	isCodeAssist := account.IsGeminiCodeAssist()
+
 	resetAt := ParseGeminiRateLimitResetTime(body)
 	if resetAt == nil {
-		ra := time.Now().Add(5 * time.Minute)
+		// 根据账号类型使用不同的默认重置时间
+		var ra time.Time
+		if isCodeAssist {
+			// Code Assist: fallback cooldown by tier
+			cooldown := geminiCooldownForTier(tierID)
+			if s.rateLimitService != nil {
+				cooldown = s.rateLimitService.GeminiCooldown(ctx, account)
+			}
+			ra = time.Now().Add(cooldown)
+			log.Printf("[Gemini 429] Account %d (Code Assist, tier=%s, project=%s) rate limited, cooldown=%v", account.ID, tierID, projectID, time.Until(ra).Truncate(time.Second))
+		} else {
+			// API Key / AI Studio OAuth: PST 午夜
+			if ts := nextGeminiDailyResetUnix(); ts != nil {
+				ra = time.Unix(*ts, 0)
+				log.Printf("[Gemini 429] Account %d (API Key/AI Studio, type=%s) rate limited, reset at PST midnight (%v)", account.ID, account.Type, ra)
+			} else {
+				// 兜底：5 分钟
+				ra = time.Now().Add(5 * time.Minute)
+				log.Printf("[Gemini 429] Account %d rate limited, fallback to 5min", account.ID)
+			}
+		}
 		_ = s.accountRepo.SetRateLimited(ctx, account.ID, ra)
 		return
 	}
-	_ = s.accountRepo.SetRateLimited(ctx, account.ID, time.Unix(*resetAt, 0))
+
+	// 使用解析到的重置时间
+	resetTime := time.Unix(*resetAt, 0)
+	_ = s.accountRepo.SetRateLimited(ctx, account.ID, resetTime)
+	log.Printf("[Gemini 429] Account %d rate limited until %v (oauth_type=%s, tier=%s)",
+		account.ID, resetTime, oauthType, tierID)
 }

 // ParseGeminiRateLimitResetTime 解析 Gemini 格式的 429 响应，返回重置时间的 Unix 时间戳
@@ -1925,7 +1980,6 @@ func ParseGeminiRateLimitResetTime(body []byte) *int64 {
 	}

 	// Match "Please retry in Xs"
-	retryInRegex := regexp.MustCompile(`Please retry in ([0-9.]+)s`)
 	matches := retryInRegex.FindStringSubmatch(string(body))
 	if len(matches) == 2 {
 		if dur, err := time.ParseDuration(matches[1] + "s"); err == nil {
@@ -1946,16 +2000,7 @@ func looksLikeGeminiDailyQuota(message string) bool {
 }

 func nextGeminiDailyResetUnix() *int64 {
-	loc, err := time.LoadLocation("America/Los_Angeles")
-	if err != nil {
-		// Fallback: PST without DST.
-		loc = time.FixedZone("PST", -8*3600)
-	}
-	now := time.Now().In(loc)
-	reset := time.Date(now.Year(), now.Month(), now.Day(), 0, 5, 0, 0, loc)
-	if !reset.After(now) {
-		reset = reset.Add(24 * time.Hour)
-	}
+	reset := geminiDailyResetTime(time.Now())
 	ts := reset.Unix()
 	return &ts
 }
@@ -2243,16 +2288,46 @@ func convertClaudeToolsToGeminiTools(tools any) []any {
 		if !ok {
 			continue
 		}
-		name, _ := tm["name"].(string)
-		desc, _ := tm["description"].(string)
-		params := tm["input_schema"]
+
+		var name, desc string
+		var params any
+
+		// 检查是否为 custom 类型工具 (MCP)
+		toolType, _ := tm["type"].(string)
+		if toolType == "custom" {
+			// Custom 格式: 从 custom 字段获取 description 和 input_schema
+			custom, ok := tm["custom"].(map[string]any)
+			if !ok {
+				continue
+			}
+			name, _ = tm["name"].(string)
+			desc, _ = custom["description"].(string)
+			params = custom["input_schema"]
+		} else {
+			// 标准格式: 从顶层字段获取
+			name, _ = tm["name"].(string)
+			desc, _ = tm["description"].(string)
+			params = tm["input_schema"]
+		}
+
 		if name == "" {
 			continue
 		}
+
+		// 为 nil params 提供默认值
+		if params == nil {
+			params = map[string]any{
+				"type":       "object",
+				"properties": map[string]any{},
+			}
+		}
+		// 清理 JSON Schema
+		cleanedParams := cleanToolSchema(params)
+
 		funcDecls = append(funcDecls, map[string]any{
 			"name":        name,
 			"description": desc,
-			"parameters":  params,
+			"parameters":  cleanedParams,
 		})
 	}

@@ -2266,6 +2341,41 @@ func convertClaudeToolsToGeminiTools(tools any) []any {
 	}
 }

+// cleanToolSchema 清理工具的 JSON Schema，移除 Gemini 不支持的字段
+func cleanToolSchema(schema any) any {
+	if schema == nil {
+		return nil
+	}
+
+	switch v := schema.(type) {
+	case map[string]any:
+		cleaned := make(map[string]any)
+		for key, value := range v {
+			// 跳过不支持的字段
+			if key == "$schema" || key == "$id" || key == "$ref" ||
+				key == "additionalProperties" || key == "minLength" ||
+				key == "maxLength" || key == "minItems" || key == "maxItems" {
+				continue
+			}
+			// 递归清理嵌套对象
+			cleaned[key] = cleanToolSchema(value)
+		}
+		// 规范化 type 字段为大写
+		if typeVal, ok := cleaned["type"].(string); ok {
+			cleaned["type"] = strings.ToUpper(typeVal)
+		}
+		return cleaned
+	case []any:
+		cleaned := make([]any, len(v))
+		for i, item := range v {
+			cleaned[i] = cleanToolSchema(item)
+		}
+		return cleaned
+	default:
+		return v
+	}
+}
+
 func convertClaudeGenerationConfig(req map[string]any) map[string]any {
 	out := make(map[string]any)
 	if mt, ok := asInt(req["max_tokens"]); ok && mt > 0 {

--- a/backend/internal/service/gemini_messages_compat_service_test.go
+++ b/backend/internal/service/gemini_messages_compat_service_test.go
+package service
+
+import (
+	"testing"
+)
+
+// TestConvertClaudeToolsToGeminiTools_CustomType 测试custom类型工具转换
+func TestConvertClaudeToolsToGeminiTools_CustomType(t *testing.T) {
+	tests := []struct {
+		name        string
+		tools       any
+		expectedLen int
+		description string
+	}{
+		{
+			name: "Standard tools",
+			tools: []any{
+				map[string]any{
+					"name":         "get_weather",
+					"description":  "Get weather info",
+					"input_schema": map[string]any{"type": "object"},
+				},
+			},
+			expectedLen: 1,
+			description: "标准工具格式应该正常转换",
+		},
+		{
+			name: "Custom type tool (MCP format)",
+			tools: []any{
+				map[string]any{
+					"type": "custom",
+					"name": "mcp_tool",
+					"custom": map[string]any{
+						"description":  "MCP tool description",
+						"input_schema": map[string]any{"type": "object"},
+					},
+				},
+			},
+			expectedLen: 1,
+			description: "Custom类型工具应该从custom字段读取",
+		},
+		{
+			name: "Mixed standard and custom tools",
+			tools: []any{
+				map[string]any{
+					"name":         "standard_tool",
+					"description":  "Standard",
+					"input_schema": map[string]any{"type": "object"},
+				},
+				map[string]any{
+					"type": "custom",
+					"name": "custom_tool",
+					"custom": map[string]any{
+						"description":  "Custom",
+						"input_schema": map[string]any{"type": "object"},
+					},
+				},
+			},
+			expectedLen: 1,
+			description: "混合工具应该都能正确转换",
+		},
+		{
+			name: "Custom tool without custom field",
+			tools: []any{
+				map[string]any{
+					"type": "custom",
+					"name": "invalid_custom",
+					// 缺少 custom 字段
+				},
+			},
+			expectedLen: 0, // 应该被跳过
+			description: "缺少custom字段的custom工具应该被跳过",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			result := convertClaudeToolsToGeminiTools(tt.tools)
+
+			if tt.expectedLen == 0 {
+				if result != nil {
+					t.Errorf("%s: expected nil result, got %v", tt.description, result)
+				}
+				return
+			}
+
+			if result == nil {
+				t.Fatalf("%s: expected non-nil result", tt.description)
+			}
+
+			if len(result) != 1 {
+				t.Errorf("%s: expected 1 tool declaration, got %d", tt.description, len(result))
+				return
+			}
+
+			toolDecl, ok := result[0].(map[string]any)
+			if !ok {
+				t.Fatalf("%s: result[0] is not map[string]any", tt.description)
+			}
+
+			funcDecls, ok := toolDecl["functionDeclarations"].([]any)
+			if !ok {
+				t.Fatalf("%s: functionDeclarations is not []any", tt.description)
+			}
+
+			toolsArr, _ := tt.tools.([]any)
+			expectedFuncCount := 0
+			for _, tool := range toolsArr {
+				toolMap, _ := tool.(map[string]any)
+				if toolMap["name"] != "" {
+					// 检查是否为有效的custom工具
+					if toolMap["type"] == "custom" {
+						if toolMap["custom"] != nil {
+							expectedFuncCount++
+						}
+					} else {
+						expectedFuncCount++
+					}
+				}
+			}
+
+			if len(funcDecls) != expectedFuncCount {
+				t.Errorf("%s: expected %d function declarations, got %d",
+					tt.description, expectedFuncCount, len(funcDecls))
+			}
+		})
+	}
+}
--- a/backend/internal/service/gemini_oauth_service.go
+++ b/backend/internal/service/gemini_oauth_service.go
@@ -7,13 +7,14 @@ import (
 	"fmt"
 	"io"
 	"net/http"
-	"net/url"
+	"regexp"
 	"strconv"
 	"strings"
 	"time"

 	"github.com/Wei-Shaw/sub2api/internal/config"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/geminicli"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/httpclient"
 )

 type GeminiOAuthService struct {
@@ -163,6 +164,45 @@ type GeminiTokenInfo struct {
 	Scope        string `json:"scope,omitempty"`
 	ProjectID    string `json:"project_id,omitempty"`
 	OAuthType    string `json:"oauth_type,omitempty"` // "code_assist" 或 "ai_studio"
+	TierID       string `json:"tier_id,omitempty"`    // Gemini Code Assist tier: LEGACY/PRO/ULTRA
+}
+
+// validateTierID validates tier_id format and length
+func validateTierID(tierID string) error {
+	if tierID == "" {
+		return nil // Empty is allowed
+	}
+	if len(tierID) > 64 {
+		return fmt.Errorf("tier_id exceeds maximum length of 64 characters")
+	}
+	// Allow alphanumeric, underscore, hyphen, and slash (for tier paths)
+	if !regexp.MustCompile(`^[a-zA-Z0-9_/-]+$`).MatchString(tierID) {
+		return fmt.Errorf("tier_id contains invalid characters")
+	}
+	return nil
+}
+
+// extractTierIDFromAllowedTiers extracts tierID from LoadCodeAssist response
+// Prioritizes IsDefault tier, falls back to first non-empty tier
+func extractTierIDFromAllowedTiers(allowedTiers []geminicli.AllowedTier) string {
+	tierID := "LEGACY"
+	// First pass: look for default tier
+	for _, tier := range allowedTiers {
+		if tier.IsDefault && strings.TrimSpace(tier.ID) != "" {
+			tierID = strings.TrimSpace(tier.ID)
+			break
+		}
+	}
+	// Second pass: if still LEGACY, take first non-empty tier
+	if tierID == "LEGACY" {
+		for _, tier := range allowedTiers {
+			if strings.TrimSpace(tier.ID) != "" {
+				tierID = strings.TrimSpace(tier.ID)
+				break
+			}
+		}
+	}
+	return tierID
 }

 func (s *GeminiOAuthService) ExchangeCode(ctx context.Context, input *GeminiExchangeCodeInput) (*GeminiTokenInfo, error) {
@@ -219,25 +259,45 @@ func (s *GeminiOAuthService) ExchangeCode(ctx context.Context, input *GeminiExch
 	sessionProjectID := strings.TrimSpace(session.ProjectID)
 	s.sessionStore.Delete(input.SessionID)

-	// 计算过期时间时减去 5 分钟安全时间窗口,考虑网络延迟和时钟偏差
-	expiresAt := time.Now().Unix() + tokenResp.ExpiresIn - 300
+	// 计算过期时间：减去 5 分钟安全时间窗口（考虑网络延迟和时钟偏差）
+	// 同时设置下界保护，防止 expires_in 过小导致过去时间（引发刷新风暴）
+	const safetyWindow = 300 // 5 minutes
+	const minTTL = 30        // minimum 30 seconds
+	expiresAt := time.Now().Unix() + tokenResp.ExpiresIn - safetyWindow
+	minExpiresAt := time.Now().Unix() + minTTL
+	if expiresAt < minExpiresAt {
+		expiresAt = minExpiresAt
+	}

 	projectID := sessionProjectID
+	var tierID string

 	// 对于 code_assist 模式，project_id 是必需的
 	// 对于 ai_studio 模式，project_id 是可选的（不影响使用 AI Studio API）
 	if oauthType == "code_assist" {
 		if projectID == "" {
 			var err error
-			projectID, err = s.fetchProjectID(ctx, tokenResp.AccessToken, proxyURL)
+			projectID, tierID, err = s.fetchProjectID(ctx, tokenResp.AccessToken, proxyURL)
 			if err != nil {
 				// 记录警告但不阻断流程，允许后续补充 project_id
 				fmt.Printf("[GeminiOAuth] Warning: Failed to fetch project_id during token exchange: %v\n", err)
 			}
+		} else {
+			// 用户手动填了 project_id，仍需调用 LoadCodeAssist 获取 tierID
+			_, fetchedTierID, err := s.fetchProjectID(ctx, tokenResp.AccessToken, proxyURL)
+			if err != nil {
+				fmt.Printf("[GeminiOAuth] Warning: Failed to fetch tierID: %v\n", err)
+			} else {
+				tierID = fetchedTierID
+			}
 		}
 		if strings.TrimSpace(projectID) == "" {
 			return nil, fmt.Errorf("missing project_id for Code Assist OAuth: please fill Project ID (optional field) and regenerate the auth URL, or ensure your Google account has an ACTIVE GCP project")
 		}
+		// tierID 缺失时使用默认值
+		if tierID == "" {
+			tierID = "LEGACY"
+		}
 	}

 	return &GeminiTokenInfo{
@@ -248,6 +308,7 @@ func (s *GeminiOAuthService) ExchangeCode(ctx context.Context, input *GeminiExch
 		ExpiresAt:    expiresAt,
 		Scope:        tokenResp.Scope,
 		ProjectID:    projectID,
+		TierID:       tierID,
 		OAuthType:    oauthType,
 	}, nil
 }
@@ -266,8 +327,15 @@ func (s *GeminiOAuthService) RefreshToken(ctx context.Context, oauthType, refres

 		tokenResp, err := s.oauthClient.RefreshToken(ctx, oauthType, refreshToken, proxyURL)
 		if err == nil {
-			// 计算过期时间时减去 5 分钟安全时间窗口,考虑网络延迟和时钟偏差
-			expiresAt := time.Now().Unix() + tokenResp.ExpiresIn - 300
+			// 计算过期时间：减去 5 分钟安全时间窗口（考虑网络延迟和时钟偏差）
+			// 同时设置下界保护，防止 expires_in 过小导致过去时间（引发刷新风暴）
+			const safetyWindow = 300 // 5 minutes
+			const minTTL = 30        // minimum 30 seconds
+			expiresAt := time.Now().Unix() + tokenResp.ExpiresIn - safetyWindow
+			minExpiresAt := time.Now().Unix() + minTTL
+			if expiresAt < minExpiresAt {
+				expiresAt = minExpiresAt
+			}
 			return &GeminiTokenInfo{
 				AccessToken:  tokenResp.AccessToken,
 				RefreshToken: tokenResp.RefreshToken,
@@ -354,18 +422,39 @@ func (s *GeminiOAuthService) RefreshAccountToken(ctx context.Context, account *A
 		tokenInfo.ProjectID = existingProjectID
 	}

+	// 尝试从账号凭证获取 tierID（向后兼容）
+	existingTierID := strings.TrimSpace(account.GetCredential("tier_id"))
+
 	// For Code Assist, project_id is required. Auto-detect if missing.
 	// For AI Studio OAuth, project_id is optional and should not block refresh.
-	if oauthType == "code_assist" && strings.TrimSpace(tokenInfo.ProjectID) == "" {
-		projectID, err := s.fetchProjectID(ctx, tokenInfo.AccessToken, proxyURL)
+	if oauthType == "code_assist" {
+		// 先设置默认值或保留旧值，确保 tier_id 始终有值
+		if existingTierID != "" {
+			tokenInfo.TierID = existingTierID
+		} else {
+			tokenInfo.TierID = "LEGACY" // 默认值
+		}
+
+		// 尝试自动探测 project_id 和 tier_id
+		needDetect := strings.TrimSpace(tokenInfo.ProjectID) == "" || existingTierID == ""
+		if needDetect {
+			projectID, tierID, err := s.fetchProjectID(ctx, tokenInfo.AccessToken, proxyURL)
 			if err != nil {
-			return nil, fmt.Errorf("failed to auto-detect project_id: %w", err)
+				fmt.Printf("[GeminiOAuth] Warning: failed to auto-detect project/tier: %v\n", err)
+			} else {
+				if strings.TrimSpace(tokenInfo.ProjectID) == "" && projectID != "" {
+					tokenInfo.ProjectID = projectID
 				}
-		projectID = strings.TrimSpace(projectID)
-		if projectID == "" {
+				// 只有当原来没有 tier_id 且探测成功时才更新
+				if existingTierID == "" && tierID != "" {
+					tokenInfo.TierID = tierID
+				}
+			}
+		}
+
+		if strings.TrimSpace(tokenInfo.ProjectID) == "" {
 			return nil, fmt.Errorf("failed to auto-detect project_id: empty result")
 		}
-		tokenInfo.ProjectID = projectID
 	}

 	return tokenInfo, nil
@@ -388,6 +477,13 @@ func (s *GeminiOAuthService) BuildAccountCredentials(tokenInfo *GeminiTokenInfo)
 	if tokenInfo.ProjectID != "" {
 		creds["project_id"] = tokenInfo.ProjectID
 	}
+	if tokenInfo.TierID != "" {
+		// Validate tier_id before storing
+		if err := validateTierID(tokenInfo.TierID); err == nil {
+			creds["tier_id"] = tokenInfo.TierID
+		}
+		// Silently skip invalid tier_id (don't block account creation)
+	}
 	if tokenInfo.OAuthType != "" {
 		creds["oauth_type"] = tokenInfo.OAuthType
 	}
@@ -398,33 +494,22 @@ func (s *GeminiOAuthService) Stop() {
 	s.sessionStore.Stop()
 }

-func (s *GeminiOAuthService) fetchProjectID(ctx context.Context, accessToken, proxyURL string) (string, error) {
+func (s *GeminiOAuthService) fetchProjectID(ctx context.Context, accessToken, proxyURL string) (string, string, error) {
 	if s.codeAssist == nil {
-		return "", errors.New("code assist client not configured")
+		return "", "", errors.New("code assist client not configured")
 	}

 	loadResp, loadErr := s.codeAssist.LoadCodeAssist(ctx, accessToken, proxyURL, nil)
-	if loadErr == nil && loadResp != nil && strings.TrimSpace(loadResp.CloudAICompanionProject) != "" {
-		return strings.TrimSpace(loadResp.CloudAICompanionProject), nil
-	}

-	// Pick tier from allowedTiers; if no default tier is marked, pick the first non-empty tier ID.
+	// Extract tierID from response (works whether CloudAICompanionProject is set or not)
 	tierID := "LEGACY"
 	if loadResp != nil {
-		for _, tier := range loadResp.AllowedTiers {
-			if tier.IsDefault && strings.TrimSpace(tier.ID) != "" {
-				tierID = strings.TrimSpace(tier.ID)
-				break
-			}
-		}
-		if strings.TrimSpace(tierID) == "" || tierID == "LEGACY" {
-			for _, tier := range loadResp.AllowedTiers {
-				if strings.TrimSpace(tier.ID) != "" {
-					tierID = strings.TrimSpace(tier.ID)
-					break
-				}
-			}
+		tierID = extractTierIDFromAllowedTiers(loadResp.AllowedTiers)
 	}
+
+	// If LoadCodeAssist returned a project, use it
+	if loadErr == nil && loadResp != nil && strings.TrimSpace(loadResp.CloudAICompanionProject) != "" {
+		return strings.TrimSpace(loadResp.CloudAICompanionProject), tierID, nil
 	}

 	req := &geminicli.OnboardUserRequest{
@@ -443,39 +528,39 @@ func (s *GeminiOAuthService) fetchProjectID(ctx context.Context, accessToken, pr
 			// If Code Assist onboarding fails (e.g. INVALID_ARGUMENT), fallback to Cloud Resource Manager projects.
 			fallback, fbErr := fetchProjectIDFromResourceManager(ctx, accessToken, proxyURL)
 			if fbErr == nil && strings.TrimSpace(fallback) != "" {
-				return strings.TrimSpace(fallback), nil
+				return strings.TrimSpace(fallback), tierID, nil
 			}
-			return "", err
+			return "", tierID, err
 		}
 		if resp.Done {
 			if resp.Response != nil && resp.Response.CloudAICompanionProject != nil {
 				switch v := resp.Response.CloudAICompanionProject.(type) {
 				case string:
-					return strings.TrimSpace(v), nil
+					return strings.TrimSpace(v), tierID, nil
 				case map[string]any:
 					if id, ok := v["id"].(string); ok {
-						return strings.TrimSpace(id), nil
+						return strings.TrimSpace(id), tierID, nil
 					}
 				}
 			}

 			fallback, fbErr := fetchProjectIDFromResourceManager(ctx, accessToken, proxyURL)
 			if fbErr == nil && strings.TrimSpace(fallback) != "" {
-				return strings.TrimSpace(fallback), nil
+				return strings.TrimSpace(fallback), tierID, nil
 			}
-			return "", errors.New("onboardUser completed but no project_id returned")
+			return "", tierID, errors.New("onboardUser completed but no project_id returned")
 		}
 		time.Sleep(2 * time.Second)
 	}

 	fallback, fbErr := fetchProjectIDFromResourceManager(ctx, accessToken, proxyURL)
 	if fbErr == nil && strings.TrimSpace(fallback) != "" {
-		return strings.TrimSpace(fallback), nil
+		return strings.TrimSpace(fallback), tierID, nil
 	}
 	if loadErr != nil {
-		return "", fmt.Errorf("loadCodeAssist failed (%v) and onboardUser timeout after %d attempts", loadErr, maxAttempts)
+		return "", tierID, fmt.Errorf("loadCodeAssist failed (%v) and onboardUser timeout after %d attempts", loadErr, maxAttempts)
 	}
-	return "", fmt.Errorf("onboardUser timeout after %d attempts", maxAttempts)
+	return "", tierID, fmt.Errorf("onboardUser timeout after %d attempts", maxAttempts)
 }

 type googleCloudProject struct {
@@ -497,11 +582,12 @@ func fetchProjectIDFromResourceManager(ctx context.Context, accessToken, proxyUR
 	req.Header.Set("Authorization", "Bearer "+accessToken)
 	req.Header.Set("User-Agent", geminicli.GeminiCLIUserAgent)

-	client := &http.Client{Timeout: 30 * time.Second}
-	if strings.TrimSpace(proxyURL) != "" {
-		if proxyURLParsed, err := url.Parse(strings.TrimSpace(proxyURL)); err == nil {
-			client.Transport = &http.Transport{Proxy: http.ProxyURL(proxyURLParsed)}
-		}
+	client, err := httpclient.GetClient(httpclient.Options{
+		ProxyURL: strings.TrimSpace(proxyURL),
+		Timeout:  30 * time.Second,
+	})
+	if err != nil {
+		client = &http.Client{Timeout: 30 * time.Second}
 	}

 	resp, err := client.Do(req)

--- a/backend/internal/service/gemini_quota.go
+++ b/backend/internal/service/gemini_quota.go
+package service
+
+import (
+	"context"
+	"encoding/json"
+	"errors"
+	"log"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/Wei-Shaw/sub2api/internal/config"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/usagestats"
+)
+
+type geminiModelClass string
+
+const (
+	geminiModelPro   geminiModelClass = "pro"
+	geminiModelFlash geminiModelClass = "flash"
+)
+
+type GeminiDailyQuota struct {
+	ProRPD   int64
+	FlashRPD int64
+}
+
+type GeminiTierPolicy struct {
+	Quota    GeminiDailyQuota
+	Cooldown time.Duration
+}
+
+type GeminiQuotaPolicy struct {
+	tiers map[string]GeminiTierPolicy
+}
+
+type GeminiUsageTotals struct {
+	ProRequests   int64
+	FlashRequests int64
+	ProTokens     int64
+	FlashTokens   int64
+	ProCost       float64
+	FlashCost     float64
+}
+
+const geminiQuotaCacheTTL = time.Minute
+
+type geminiQuotaOverrides struct {
+	Tiers map[string]config.GeminiTierQuotaConfig `json:"tiers"`
+}
+
+type GeminiQuotaService struct {
+	cfg         *config.Config
+	settingRepo SettingRepository
+	mu          sync.Mutex
+	cachedAt    time.Time
+	policy      *GeminiQuotaPolicy
+}
+
+func NewGeminiQuotaService(cfg *config.Config, settingRepo SettingRepository) *GeminiQuotaService {
+	return &GeminiQuotaService{
+		cfg:         cfg,
+		settingRepo: settingRepo,
+	}
+}
+
+func (s *GeminiQuotaService) Policy(ctx context.Context) *GeminiQuotaPolicy {
+	if s == nil {
+		return newGeminiQuotaPolicy()
+	}
+
+	now := time.Now()
+	s.mu.Lock()
+	if s.policy != nil && now.Sub(s.cachedAt) < geminiQuotaCacheTTL {
+		policy := s.policy
+		s.mu.Unlock()
+		return policy
+	}
+	s.mu.Unlock()
+
+	policy := newGeminiQuotaPolicy()
+	if s.cfg != nil {
+		policy.ApplyOverrides(s.cfg.Gemini.Quota.Tiers)
+		if strings.TrimSpace(s.cfg.Gemini.Quota.Policy) != "" {
+			var overrides geminiQuotaOverrides
+			if err := json.Unmarshal([]byte(s.cfg.Gemini.Quota.Policy), &overrides); err != nil {
+				log.Printf("gemini quota: parse config policy failed: %v", err)
+			} else {
+				policy.ApplyOverrides(overrides.Tiers)
+			}
+		}
+	}
+
+	if s.settingRepo != nil {
+		value, err := s.settingRepo.GetValue(ctx, SettingKeyGeminiQuotaPolicy)
+		if err != nil && !errors.Is(err, ErrSettingNotFound) {
+			log.Printf("gemini quota: load setting failed: %v", err)
+		} else if strings.TrimSpace(value) != "" {
+			var overrides geminiQuotaOverrides
+			if err := json.Unmarshal([]byte(value), &overrides); err != nil {
+				log.Printf("gemini quota: parse setting failed: %v", err)
+			} else {
+				policy.ApplyOverrides(overrides.Tiers)
+			}
+		}
+	}
+
+	s.mu.Lock()
+	s.policy = policy
+	s.cachedAt = now
+	s.mu.Unlock()
+
+	return policy
+}
+
+func (s *GeminiQuotaService) QuotaForAccount(ctx context.Context, account *Account) (GeminiDailyQuota, bool) {
+	if account == nil || !account.IsGeminiCodeAssist() {
+		return GeminiDailyQuota{}, false
+	}
+	policy := s.Policy(ctx)
+	return policy.QuotaForTier(account.GeminiTierID())
+}
+
+func (s *GeminiQuotaService) CooldownForTier(ctx context.Context, tierID string) time.Duration {
+	policy := s.Policy(ctx)
+	return policy.CooldownForTier(tierID)
+}
+
+func newGeminiQuotaPolicy() *GeminiQuotaPolicy {
+	return &GeminiQuotaPolicy{
+		tiers: map[string]GeminiTierPolicy{
+			"LEGACY": {Quota: GeminiDailyQuota{ProRPD: 50, FlashRPD: 1500}, Cooldown: 30 * time.Minute},
+			"PRO":    {Quota: GeminiDailyQuota{ProRPD: 1500, FlashRPD: 4000}, Cooldown: 5 * time.Minute},
+			"ULTRA":  {Quota: GeminiDailyQuota{ProRPD: 2000, FlashRPD: 0}, Cooldown: 5 * time.Minute},
+		},
+	}
+}
+
+func (p *GeminiQuotaPolicy) ApplyOverrides(tiers map[string]config.GeminiTierQuotaConfig) {
+	if p == nil || len(tiers) == 0 {
+		return
+	}
+	for rawID, override := range tiers {
+		tierID := normalizeGeminiTierID(rawID)
+		if tierID == "" {
+			continue
+		}
+		policy, ok := p.tiers[tierID]
+		if !ok {
+			policy = GeminiTierPolicy{Cooldown: 5 * time.Minute}
+		}
+		if override.ProRPD != nil {
+			policy.Quota.ProRPD = clampGeminiQuotaInt64(*override.ProRPD)
+		}
+		if override.FlashRPD != nil {
+			policy.Quota.FlashRPD = clampGeminiQuotaInt64(*override.FlashRPD)
+		}
+		if override.CooldownMinutes != nil {
+			minutes := clampGeminiQuotaInt(*override.CooldownMinutes)
+			policy.Cooldown = time.Duration(minutes) * time.Minute
+		}
+		p.tiers[tierID] = policy
+	}
+}
+
+func (p *GeminiQuotaPolicy) QuotaForTier(tierID string) (GeminiDailyQuota, bool) {
+	policy, ok := p.policyForTier(tierID)
+	if !ok {
+		return GeminiDailyQuota{}, false
+	}
+	return policy.Quota, true
+}
+
+func (p *GeminiQuotaPolicy) CooldownForTier(tierID string) time.Duration {
+	policy, ok := p.policyForTier(tierID)
+	if ok && policy.Cooldown > 0 {
+		return policy.Cooldown
+	}
+	return 5 * time.Minute
+}
+
+func (p *GeminiQuotaPolicy) policyForTier(tierID string) (GeminiTierPolicy, bool) {
+	if p == nil {
+		return GeminiTierPolicy{}, false
+	}
+	normalized := normalizeGeminiTierID(tierID)
+	if normalized == "" {
+		normalized = "LEGACY"
+	}
+	if policy, ok := p.tiers[normalized]; ok {
+		return policy, true
+	}
+	policy, ok := p.tiers["LEGACY"]
+	return policy, ok
+}
+
+func normalizeGeminiTierID(tierID string) string {
+	return strings.ToUpper(strings.TrimSpace(tierID))
+}
+
+func clampGeminiQuotaInt64(value int64) int64 {
+	if value < 0 {
+		return 0
+	}
+	return value
+}
+
+func clampGeminiQuotaInt(value int) int {
+	if value < 0 {
+		return 0
+	}
+	return value
+}
+
+func geminiCooldownForTier(tierID string) time.Duration {
+	policy := newGeminiQuotaPolicy()
+	return policy.CooldownForTier(tierID)
+}
+
+func geminiModelClassFromName(model string) geminiModelClass {
+	name := strings.ToLower(strings.TrimSpace(model))
+	if strings.Contains(name, "flash") || strings.Contains(name, "lite") {
+		return geminiModelFlash
+	}
+	return geminiModelPro
+}
+
+func geminiAggregateUsage(stats []usagestats.ModelStat) GeminiUsageTotals {
+	var totals GeminiUsageTotals
+	for _, stat := range stats {
+		switch geminiModelClassFromName(stat.Model) {
+		case geminiModelFlash:
+			totals.FlashRequests += stat.Requests
+			totals.FlashTokens += stat.TotalTokens
+			totals.FlashCost += stat.ActualCost
+		default:
+			totals.ProRequests += stat.Requests
+			totals.ProTokens += stat.TotalTokens
+			totals.ProCost += stat.ActualCost
+		}
+	}
+	return totals
+}
+
+func geminiQuotaLocation() *time.Location {
+	loc, err := time.LoadLocation("America/Los_Angeles")
+	if err != nil {
+		return time.FixedZone("PST", -8*3600)
+	}
+	return loc
+}
+
+func geminiDailyWindowStart(now time.Time) time.Time {
+	loc := geminiQuotaLocation()
+	localNow := now.In(loc)
+	return time.Date(localNow.Year(), localNow.Month(), localNow.Day(), 0, 0, 0, 0, loc)
+}
+
+func geminiDailyResetTime(now time.Time) time.Time {
+	loc := geminiQuotaLocation()
+	localNow := now.In(loc)
+	start := time.Date(localNow.Year(), localNow.Month(), localNow.Day(), 0, 0, 0, 0, loc)
+	reset := start.Add(24 * time.Hour)
+	if !reset.After(localNow) {
+		reset = reset.Add(24 * time.Hour)
+	}
+	return reset
+}
--- a/backend/internal/service/gemini_token_provider.go
+++ b/backend/internal/service/gemini_token_provider.go
@@ -50,7 +50,7 @@ func (p *GeminiTokenProvider) GetAccessToken(ctx context.Context, account *Accou
 	}

 	// 2) Refresh if needed (pre-expiry skew).
-	expiresAt := parseExpiresAt(account)
+	expiresAt := account.GetCredentialAsTime("expires_at")
 	needsRefresh := expiresAt == nil || time.Until(*expiresAt) <= geminiTokenRefreshSkew
 	if needsRefresh && p.tokenCache != nil {
 		locked, err := p.tokenCache.AcquireRefreshLock(ctx, cacheKey, 30*time.Second)
@@ -66,7 +66,7 @@ func (p *GeminiTokenProvider) GetAccessToken(ctx context.Context, account *Accou
 			if err == nil && fresh != nil {
 				account = fresh
 			}
-			expiresAt = parseExpiresAt(account)
+			expiresAt = account.GetCredentialAsTime("expires_at")
 			if expiresAt == nil || time.Until(*expiresAt) <= geminiTokenRefreshSkew {
 				if p.geminiOAuthService == nil {
 					return "", errors.New("gemini oauth service not configured")
@@ -83,7 +83,7 @@ func (p *GeminiTokenProvider) GetAccessToken(ctx context.Context, account *Accou
 				}
 				account.Credentials = newCredentials
 				_ = p.accountRepo.Update(ctx, account)
-				expiresAt = parseExpiresAt(account)
+				expiresAt = account.GetCredentialAsTime("expires_at")
 			}
 		}
 	}
@@ -112,17 +112,21 @@ func (p *GeminiTokenProvider) GetAccessToken(ctx context.Context, account *Accou
 			}
 		}

-		detected, err := p.geminiOAuthService.fetchProjectID(ctx, accessToken, proxyURL)
+		detected, tierID, err := p.geminiOAuthService.fetchProjectID(ctx, accessToken, proxyURL)
 		if err != nil {
 			log.Printf("[GeminiTokenProvider] Auto-detect project_id failed: %v, fallback to AI Studio API mode", err)
 			return accessToken, nil
 		}
 		detected = strings.TrimSpace(detected)
+		tierID = strings.TrimSpace(tierID)
 		if detected != "" {
 			if account.Credentials == nil {
 				account.Credentials = make(map[string]any)
 			}
 			account.Credentials["project_id"] = detected
+			if tierID != "" {
+				account.Credentials["tier_id"] = tierID
+			}
 			_ = p.accountRepo.Update(ctx, account)
 		}
 	}
@@ -154,18 +158,3 @@ func geminiTokenCacheKey(account *Account) string {
 	}
 	return "account:" + strconv.FormatInt(account.ID, 10)
 }
-
-func parseExpiresAt(account *Account) *time.Time {
-	raw := strings.TrimSpace(account.GetCredential("expires_at"))
-	if raw == "" {
-		return nil
-	}
-	if unixSec, err := strconv.ParseInt(raw, 10, 64); err == nil && unixSec > 0 {
-		t := time.Unix(unixSec, 0)
-		return &t
-	}
-	if t, err := time.Parse(time.RFC3339, raw); err == nil {
-		return &t
-	}
-	return nil
-}
--- a/backend/internal/service/gemini_token_refresher.go
+++ b/backend/internal/service/gemini_token_refresher.go
@@ -2,7 +2,6 @@ package service

 import (
 	"context"
-	"strconv"
 	"time"
 )

@@ -22,16 +21,11 @@ func (r *GeminiTokenRefresher) NeedsRefresh(account *Account, refreshWindow time
 	if !r.CanRefresh(account) {
 		return false
 	}
-	expiresAtStr := account.GetCredential("expires_at")
-	if expiresAtStr == "" {
+	expiresAt := account.GetCredentialAsTime("expires_at")
+	if expiresAt == nil {
 		return false
 	}
-	expiresAt, err := strconv.ParseInt(expiresAtStr, 10, 64)
-	if err != nil {
-		return false
-	}
-	expiryTime := time.Unix(expiresAt, 0)
-	return time.Until(expiryTime) < refreshWindow
+	return time.Until(*expiresAt) < refreshWindow
 }

 func (r *GeminiTokenRefresher) Refresh(ctx context.Context, account *Account) (map[string]any, error) {

--- a/backend/internal/service/group.go
+++ b/backend/internal/service/group.go
@@ -15,6 +15,7 @@ type Group struct {
 	DailyLimitUSD       *float64
 	WeeklyLimitUSD      *float64
 	MonthlyLimitUSD     *float64
+	DefaultValidityDays int

 	CreatedAt time.Time
 	UpdatedAt time.Time

--- a/backend/internal/service/group_service.go
+++ b/backend/internal/service/group_service.go
@@ -4,7 +4,7 @@ import (
 	"context"
 	"fmt"

-	infraerrors "github.com/Wei-Shaw/sub2api/internal/infrastructure/errors"
+	infraerrors "github.com/Wei-Shaw/sub2api/internal/pkg/errors"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/pagination"
 )


--- a/backend/internal/service/http_upstream_port.go
+++ b/backend/internal/service/http_upstream_port.go
@@ -2,8 +2,29 @@ package service

 import "net/http"

-// HTTPUpstream interface for making HTTP requests to upstream APIs (Claude, OpenAI, etc.)
-// This is a generic interface that can be used for any HTTP-based upstream service.
+// HTTPUpstream 上游 HTTP 请求接口
+// 用于向上游 API（Claude、OpenAI、Gemini 等）发送请求
+// 这是一个通用接口，可用于任何基于 HTTP 的上游服务
+//
+// 设计说明：
+// - 支持可选代理配置
+// - 支持账户级连接池隔离
+// - 实现类负责连接池管理和复用
 type HTTPUpstream interface {
-	Do(req *http.Request, proxyURL string) (*http.Response, error)
+	// Do 执行 HTTP 请求
+	//
+	// 参数:
+	//   - req: HTTP 请求对象，由调用方构建
+	//   - proxyURL: 代理服务器地址，空字符串表示直连
+	//   - accountID: 账户 ID，用于连接池隔离（隔离策略为 account 或 account_proxy 时生效）
+	//   - accountConcurrency: 账户并发限制，用于动态调整连接池大小
+	//
+	// 返回:
+	//   - *http.Response: HTTP 响应，调用方必须关闭 Body
+	//   - error: 请求错误（网络错误、超时等）
+	//
+	// 注意:
+	//   - 调用方必须关闭 resp.Body，否则会导致连接泄漏
+	//   - 响应体可能已被包装以跟踪请求生命周期
+	Do(req *http.Request, proxyURL string, accountID int64, accountConcurrency int) (*http.Response, error)
 }
--- a/backend/internal/service/openai_gateway_service.go
+++ b/backend/internal/service/openai_gateway_service.go
@@ -13,6 +13,7 @@ import (
 	"log"
 	"net/http"
 	"regexp"
+	"sort"
 	"strconv"
 	"strings"
 	"time"
@@ -80,6 +81,7 @@ type OpenAIGatewayService struct {
 	userSubRepo         UserSubscriptionRepository
 	cache               GatewayCache
 	cfg                 *config.Config
+	concurrencyService  *ConcurrencyService
 	billingService      *BillingService
 	rateLimitService    *RateLimitService
 	billingCacheService *BillingCacheService
@@ -95,6 +97,7 @@ func NewOpenAIGatewayService(
 	userSubRepo UserSubscriptionRepository,
 	cache GatewayCache,
 	cfg *config.Config,
+	concurrencyService *ConcurrencyService,
 	billingService *BillingService,
 	rateLimitService *RateLimitService,
 	billingCacheService *BillingCacheService,
@@ -108,6 +111,7 @@ func NewOpenAIGatewayService(
 		userSubRepo:         userSubRepo,
 		cache:               cache,
 		cfg:                 cfg,
+		concurrencyService:  concurrencyService,
 		billingService:      billingService,
 		rateLimitService:    rateLimitService,
 		billingCacheService: billingCacheService,
@@ -126,6 +130,14 @@ func (s *OpenAIGatewayService) GenerateSessionHash(c *gin.Context) string {
 	return hex.EncodeToString(hash[:])
 }

+// BindStickySession sets session -> account binding with standard TTL.
+func (s *OpenAIGatewayService) BindStickySession(ctx context.Context, sessionHash string, accountID int64) error {
+	if sessionHash == "" || accountID <= 0 {
+		return nil
+	}
+	return s.cache.SetSessionAccountID(ctx, "openai:"+sessionHash, accountID, openaiStickySessionTTL)
+}
+
 // SelectAccount selects an OpenAI account with sticky session support
 func (s *OpenAIGatewayService) SelectAccount(ctx context.Context, groupID *int64, sessionHash string) (*Account, error) {
 	return s.SelectAccountForModel(ctx, groupID, sessionHash, "")
@@ -218,6 +230,254 @@ func (s *OpenAIGatewayService) SelectAccountForModelWithExclusions(ctx context.C
 	return selected, nil
 }

+// SelectAccountWithLoadAwareness selects an account with load-awareness and wait plan.
+func (s *OpenAIGatewayService) SelectAccountWithLoadAwareness(ctx context.Context, groupID *int64, sessionHash string, requestedModel string, excludedIDs map[int64]struct{}) (*AccountSelectionResult, error) {
+	cfg := s.schedulingConfig()
+	var stickyAccountID int64
+	if sessionHash != "" && s.cache != nil {
+		if accountID, err := s.cache.GetSessionAccountID(ctx, "openai:"+sessionHash); err == nil {
+			stickyAccountID = accountID
+		}
+	}
+	if s.concurrencyService == nil || !cfg.LoadBatchEnabled {
+		account, err := s.SelectAccountForModelWithExclusions(ctx, groupID, sessionHash, requestedModel, excludedIDs)
+		if err != nil {
+			return nil, err
+		}
+		result, err := s.tryAcquireAccountSlot(ctx, account.ID, account.Concurrency)
+		if err == nil && result.Acquired {
+			return &AccountSelectionResult{
+				Account:     account,
+				Acquired:    true,
+				ReleaseFunc: result.ReleaseFunc,
+			}, nil
+		}
+		if stickyAccountID > 0 && stickyAccountID == account.ID && s.concurrencyService != nil {
+			waitingCount, _ := s.concurrencyService.GetAccountWaitingCount(ctx, account.ID)
+			if waitingCount < cfg.StickySessionMaxWaiting {
+				return &AccountSelectionResult{
+					Account: account,
+					WaitPlan: &AccountWaitPlan{
+						AccountID:      account.ID,
+						MaxConcurrency: account.Concurrency,
+						Timeout:        cfg.StickySessionWaitTimeout,
+						MaxWaiting:     cfg.StickySessionMaxWaiting,
+					},
+				}, nil
+			}
+		}
+		return &AccountSelectionResult{
+			Account: account,
+			WaitPlan: &AccountWaitPlan{
+				AccountID:      account.ID,
+				MaxConcurrency: account.Concurrency,
+				Timeout:        cfg.FallbackWaitTimeout,
+				MaxWaiting:     cfg.FallbackMaxWaiting,
+			},
+		}, nil
+	}
+
+	accounts, err := s.listSchedulableAccounts(ctx, groupID)
+	if err != nil {
+		return nil, err
+	}
+	if len(accounts) == 0 {
+		return nil, errors.New("no available accounts")
+	}
+
+	isExcluded := func(accountID int64) bool {
+		if excludedIDs == nil {
+			return false
+		}
+		_, excluded := excludedIDs[accountID]
+		return excluded
+	}
+
+	// ============ Layer 1: Sticky session ============
+	if sessionHash != "" {
+		accountID, err := s.cache.GetSessionAccountID(ctx, "openai:"+sessionHash)
+		if err == nil && accountID > 0 && !isExcluded(accountID) {
+			account, err := s.accountRepo.GetByID(ctx, accountID)
+			if err == nil && account.IsSchedulable() && account.IsOpenAI() &&
+				(requestedModel == "" || account.IsModelSupported(requestedModel)) {
+				result, err := s.tryAcquireAccountSlot(ctx, accountID, account.Concurrency)
+				if err == nil && result.Acquired {
+					_ = s.cache.RefreshSessionTTL(ctx, "openai:"+sessionHash, openaiStickySessionTTL)
+					return &AccountSelectionResult{
+						Account:     account,
+						Acquired:    true,
+						ReleaseFunc: result.ReleaseFunc,
+					}, nil
+				}
+
+				waitingCount, _ := s.concurrencyService.GetAccountWaitingCount(ctx, accountID)
+				if waitingCount < cfg.StickySessionMaxWaiting {
+					return &AccountSelectionResult{
+						Account: account,
+						WaitPlan: &AccountWaitPlan{
+							AccountID:      accountID,
+							MaxConcurrency: account.Concurrency,
+							Timeout:        cfg.StickySessionWaitTimeout,
+							MaxWaiting:     cfg.StickySessionMaxWaiting,
+						},
+					}, nil
+				}
+			}
+		}
+	}
+
+	// ============ Layer 2: Load-aware selection ============
+	candidates := make([]*Account, 0, len(accounts))
+	for i := range accounts {
+		acc := &accounts[i]
+		if isExcluded(acc.ID) {
+			continue
+		}
+		if requestedModel != "" && !acc.IsModelSupported(requestedModel) {
+			continue
+		}
+		candidates = append(candidates, acc)
+	}
+
+	if len(candidates) == 0 {
+		return nil, errors.New("no available accounts")
+	}
+
+	accountLoads := make([]AccountWithConcurrency, 0, len(candidates))
+	for _, acc := range candidates {
+		accountLoads = append(accountLoads, AccountWithConcurrency{
+			ID:             acc.ID,
+			MaxConcurrency: acc.Concurrency,
+		})
+	}
+
+	loadMap, err := s.concurrencyService.GetAccountsLoadBatch(ctx, accountLoads)
+	if err != nil {
+		ordered := append([]*Account(nil), candidates...)
+		sortAccountsByPriorityAndLastUsed(ordered, false)
+		for _, acc := range ordered {
+			result, err := s.tryAcquireAccountSlot(ctx, acc.ID, acc.Concurrency)
+			if err == nil && result.Acquired {
+				if sessionHash != "" {
+					_ = s.cache.SetSessionAccountID(ctx, "openai:"+sessionHash, acc.ID, openaiStickySessionTTL)
+				}
+				return &AccountSelectionResult{
+					Account:     acc,
+					Acquired:    true,
+					ReleaseFunc: result.ReleaseFunc,
+				}, nil
+			}
+		}
+	} else {
+		type accountWithLoad struct {
+			account  *Account
+			loadInfo *AccountLoadInfo
+		}
+		var available []accountWithLoad
+		for _, acc := range candidates {
+			loadInfo := loadMap[acc.ID]
+			if loadInfo == nil {
+				loadInfo = &AccountLoadInfo{AccountID: acc.ID}
+			}
+			if loadInfo.LoadRate < 100 {
+				available = append(available, accountWithLoad{
+					account:  acc,
+					loadInfo: loadInfo,
+				})
+			}
+		}
+
+		if len(available) > 0 {
+			sort.SliceStable(available, func(i, j int) bool {
+				a, b := available[i], available[j]
+				if a.account.Priority != b.account.Priority {
+					return a.account.Priority < b.account.Priority
+				}
+				if a.loadInfo.LoadRate != b.loadInfo.LoadRate {
+					return a.loadInfo.LoadRate < b.loadInfo.LoadRate
+				}
+				switch {
+				case a.account.LastUsedAt == nil && b.account.LastUsedAt != nil:
+					return true
+				case a.account.LastUsedAt != nil && b.account.LastUsedAt == nil:
+					return false
+				case a.account.LastUsedAt == nil && b.account.LastUsedAt == nil:
+					return false
+				default:
+					return a.account.LastUsedAt.Before(*b.account.LastUsedAt)
+				}
+			})
+
+			for _, item := range available {
+				result, err := s.tryAcquireAccountSlot(ctx, item.account.ID, item.account.Concurrency)
+				if err == nil && result.Acquired {
+					if sessionHash != "" {
+						_ = s.cache.SetSessionAccountID(ctx, "openai:"+sessionHash, item.account.ID, openaiStickySessionTTL)
+					}
+					return &AccountSelectionResult{
+						Account:     item.account,
+						Acquired:    true,
+						ReleaseFunc: result.ReleaseFunc,
+					}, nil
+				}
+			}
+		}
+	}
+
+	// ============ Layer 3: Fallback wait ============
+	sortAccountsByPriorityAndLastUsed(candidates, false)
+	for _, acc := range candidates {
+		return &AccountSelectionResult{
+			Account: acc,
+			WaitPlan: &AccountWaitPlan{
+				AccountID:      acc.ID,
+				MaxConcurrency: acc.Concurrency,
+				Timeout:        cfg.FallbackWaitTimeout,
+				MaxWaiting:     cfg.FallbackMaxWaiting,
+			},
+		}, nil
+	}
+
+	return nil, errors.New("no available accounts")
+}
+
+func (s *OpenAIGatewayService) listSchedulableAccounts(ctx context.Context, groupID *int64) ([]Account, error) {
+	var accounts []Account
+	var err error
+	if s.cfg != nil && s.cfg.RunMode == config.RunModeSimple {
+		accounts, err = s.accountRepo.ListSchedulableByPlatform(ctx, PlatformOpenAI)
+	} else if groupID != nil {
+		accounts, err = s.accountRepo.ListSchedulableByGroupIDAndPlatform(ctx, *groupID, PlatformOpenAI)
+	} else {
+		accounts, err = s.accountRepo.ListSchedulableByPlatform(ctx, PlatformOpenAI)
+	}
+	if err != nil {
+		return nil, fmt.Errorf("query accounts failed: %w", err)
+	}
+	return accounts, nil
+}
+
+func (s *OpenAIGatewayService) tryAcquireAccountSlot(ctx context.Context, accountID int64, maxConcurrency int) (*AcquireResult, error) {
+	if s.concurrencyService == nil {
+		return &AcquireResult{Acquired: true, ReleaseFunc: func() {}}, nil
+	}
+	return s.concurrencyService.AcquireAccountSlot(ctx, accountID, maxConcurrency)
+}
+
+func (s *OpenAIGatewayService) schedulingConfig() config.GatewaySchedulingConfig {
+	if s.cfg != nil {
+		return s.cfg.Gateway.Scheduling
+	}
+	return config.GatewaySchedulingConfig{
+		StickySessionMaxWaiting:  3,
+		StickySessionWaitTimeout: 45 * time.Second,
+		FallbackWaitTimeout:      30 * time.Second,
+		FallbackMaxWaiting:       100,
+		LoadBatchEnabled:         true,
+		SlotCleanupInterval:      30 * time.Second,
+	}
+}
+
 // GetAccessToken gets the access token for an OpenAI account
 func (s *OpenAIGatewayService) GetAccessToken(ctx context.Context, account *Account) (string, string, error) {
 	switch account.Type {
@@ -311,7 +571,7 @@ func (s *OpenAIGatewayService) Forward(ctx context.Context, c *gin.Context, acco
 	}

 	// Send request
-	resp, err := s.httpUpstream.Do(upstreamReq, proxyURL)
+	resp, err := s.httpUpstream.Do(upstreamReq, proxyURL, account.ID, account.Concurrency)
 	if err != nil {
 		return nil, fmt.Errorf("upstream request failed: %w", err)
 	}
@@ -772,20 +1032,12 @@ func (s *OpenAIGatewayService) RecordUsage(ctx context.Context, input *OpenAIRec
 	if isSubscriptionBilling {
 		if cost.TotalCost > 0 {
 			_ = s.userSubRepo.IncrementUsage(ctx, subscription.ID, cost.TotalCost)
-			go func() {
-				cacheCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
-				defer cancel()
-				_ = s.billingCacheService.UpdateSubscriptionUsage(cacheCtx, user.ID, *apiKey.GroupID, cost.TotalCost)
-			}()
+			s.billingCacheService.QueueUpdateSubscriptionUsage(user.ID, *apiKey.GroupID, cost.TotalCost)
 		}
 	} else {
 		if cost.ActualCost > 0 {
 			_ = s.userRepo.DeductBalance(ctx, user.ID, cost.ActualCost)
-			go func() {
-				cacheCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
-				defer cancel()
-				_ = s.billingCacheService.DeductBalanceCache(cacheCtx, user.ID, cost.ActualCost)
-			}()
+			s.billingCacheService.QueueDeductBalance(user.ID, cost.ActualCost)
 		}
 	}


--- a/backend/internal/service/pricing_service.go
+++ b/backend/internal/service/pricing_service.go
@@ -18,6 +18,11 @@ import (
 	"github.com/Wei-Shaw/sub2api/internal/pkg/openai"
 )

+var (
+	openAIModelDatePattern = regexp.MustCompile(`-\d{8}$`)
+	openAIModelBasePattern = regexp.MustCompile(`^(gpt-\d+(?:\.\d+)?)(?:-|$)`)
+)
+
 // LiteLLMModelPricing LiteLLM价格数据结构
 // 只保留我们需要的字段，使用指针来处理可能缺失的值
 type LiteLLMModelPricing struct {
@@ -595,11 +600,8 @@ func (s *PricingService) matchByModelFamily(model string) *LiteLLMModelPricing {
 // 2. gpt-5.2-20251222 -> gpt-5.2（去掉日期版本号）
 // 3. 最终回退到 DefaultTestModel (gpt-5.1-codex)
 func (s *PricingService) matchOpenAIModel(model string) *LiteLLMModelPricing {
-	// 正则匹配日期后缀 (如 -20251222)
-	datePattern := regexp.MustCompile(`-\d{8}$`)
-
 	// 尝试的回退变体
-	variants := s.generateOpenAIModelVariants(model, datePattern)
+	variants := s.generateOpenAIModelVariants(model, openAIModelDatePattern)

 	for _, variant := range variants {
 		if pricing, ok := s.pricingData[variant]; ok {
@@ -638,14 +640,13 @@ func (s *PricingService) generateOpenAIModelVariants(model string, datePattern *

 	// 2. 提取基础版本号: gpt-5.2-codex -> gpt-5.2
 	// 只匹配纯数字版本号格式 gpt-X 或 gpt-X.Y，不匹配 gpt-4o 这种带字母后缀的
-	basePattern := regexp.MustCompile(`^(gpt-\d+(?:\.\d+)?)(?:-|$)`)
-	if matches := basePattern.FindStringSubmatch(model); len(matches) > 1 {
+	if matches := openAIModelBasePattern.FindStringSubmatch(model); len(matches) > 1 {
 		addVariant(matches[1])
 	}

 	// 3. 同时去掉日期后再提取基础版本号
 	if withoutDate != model {
-		if matches := basePattern.FindStringSubmatch(withoutDate); len(matches) > 1 {
+		if matches := openAIModelBasePattern.FindStringSubmatch(withoutDate); len(matches) > 1 {
 			addVariant(matches[1])
 		}
 	}

--- a/backend/internal/service/proxy_service.go
+++ b/backend/internal/service/proxy_service.go
@@ -4,7 +4,7 @@ import (
 	"context"
 	"fmt"

-	infraerrors "github.com/Wei-Shaw/sub2api/internal/infrastructure/errors"
+	infraerrors "github.com/Wei-Shaw/sub2api/internal/pkg/errors"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/pagination"
 )


--- a/backend/internal/service/ratelimit_service.go
+++ b/backend/internal/service/ratelimit_service.go
@@ -5,6 +5,8 @@ import (
 	"log"
 	"net/http"
 	"strconv"
+	"strings"
+	"sync"
 	"time"

 	"github.com/Wei-Shaw/sub2api/internal/config"
@@ -13,14 +15,29 @@ import (
 // RateLimitService 处理限流和过载状态管理
 type RateLimitService struct {
 	accountRepo        AccountRepository
+	usageRepo          UsageLogRepository
 	cfg                *config.Config
+	geminiQuotaService *GeminiQuotaService
+	usageCacheMu       sync.Mutex
+	usageCache         map[int64]*geminiUsageCacheEntry
 }

+type geminiUsageCacheEntry struct {
+	windowStart time.Time
+	cachedAt    time.Time
+	totals      GeminiUsageTotals
+}
+
+const geminiPrecheckCacheTTL = time.Minute
+
 // NewRateLimitService 创建RateLimitService实例
-func NewRateLimitService(accountRepo AccountRepository, cfg *config.Config) *RateLimitService {
+func NewRateLimitService(accountRepo AccountRepository, usageRepo UsageLogRepository, cfg *config.Config, geminiQuotaService *GeminiQuotaService) *RateLimitService {
 	return &RateLimitService{
 		accountRepo:        accountRepo,
+		usageRepo:          usageRepo,
 		cfg:                cfg,
+		geminiQuotaService: geminiQuotaService,
+		usageCache:         make(map[int64]*geminiUsageCacheEntry),
 	}
 }

@@ -62,6 +79,106 @@ func (s *RateLimitService) HandleUpstreamError(ctx context.Context, account *Acc
 	}
 }

+// PreCheckUsage proactively checks local quota before dispatching a request.
+// Returns false when the account should be skipped.
+func (s *RateLimitService) PreCheckUsage(ctx context.Context, account *Account, requestedModel string) (bool, error) {
+	if account == nil || !account.IsGeminiCodeAssist() || strings.TrimSpace(requestedModel) == "" {
+		return true, nil
+	}
+	if s.usageRepo == nil || s.geminiQuotaService == nil {
+		return true, nil
+	}
+
+	quota, ok := s.geminiQuotaService.QuotaForAccount(ctx, account)
+	if !ok {
+		return true, nil
+	}
+
+	var limit int64
+	switch geminiModelClassFromName(requestedModel) {
+	case geminiModelFlash:
+		limit = quota.FlashRPD
+	default:
+		limit = quota.ProRPD
+	}
+	if limit <= 0 {
+		return true, nil
+	}
+
+	now := time.Now()
+	start := geminiDailyWindowStart(now)
+	totals, ok := s.getGeminiUsageTotals(account.ID, start, now)
+	if !ok {
+		stats, err := s.usageRepo.GetModelStatsWithFilters(ctx, start, now, 0, 0, account.ID)
+		if err != nil {
+			return true, err
+		}
+		totals = geminiAggregateUsage(stats)
+		s.setGeminiUsageTotals(account.ID, start, now, totals)
+	}
+
+	var used int64
+	switch geminiModelClassFromName(requestedModel) {
+	case geminiModelFlash:
+		used = totals.FlashRequests
+	default:
+		used = totals.ProRequests
+	}
+
+	if used >= limit {
+		resetAt := geminiDailyResetTime(now)
+		if err := s.accountRepo.SetRateLimited(ctx, account.ID, resetAt); err != nil {
+			log.Printf("SetRateLimited failed for account %d: %v", account.ID, err)
+		}
+		log.Printf("[Gemini PreCheck] Account %d reached daily quota (%d/%d), rate limited until %v", account.ID, used, limit, resetAt)
+		return false, nil
+	}
+
+	return true, nil
+}
+
+func (s *RateLimitService) getGeminiUsageTotals(accountID int64, windowStart, now time.Time) (GeminiUsageTotals, bool) {
+	s.usageCacheMu.Lock()
+	defer s.usageCacheMu.Unlock()
+
+	if s.usageCache == nil {
+		return GeminiUsageTotals{}, false
+	}
+
+	entry, ok := s.usageCache[accountID]
+	if !ok || entry == nil {
+		return GeminiUsageTotals{}, false
+	}
+	if !entry.windowStart.Equal(windowStart) {
+		return GeminiUsageTotals{}, false
+	}
+	if now.Sub(entry.cachedAt) >= geminiPrecheckCacheTTL {
+		return GeminiUsageTotals{}, false
+	}
+	return entry.totals, true
+}
+
+func (s *RateLimitService) setGeminiUsageTotals(accountID int64, windowStart, now time.Time, totals GeminiUsageTotals) {
+	s.usageCacheMu.Lock()
+	defer s.usageCacheMu.Unlock()
+	if s.usageCache == nil {
+		s.usageCache = make(map[int64]*geminiUsageCacheEntry)
+	}
+	s.usageCache[accountID] = &geminiUsageCacheEntry{
+		windowStart: windowStart,
+		cachedAt:    now,
+		totals:      totals,
+	}
+}
+
+// GeminiCooldown returns the fallback cooldown duration for Gemini 429s based on tier.
+func (s *RateLimitService) GeminiCooldown(ctx context.Context, account *Account) time.Duration {
+	if account == nil {
+		return 5 * time.Minute
+	}
+	return s.geminiQuotaService.CooldownForTier(ctx, account.GeminiTierID())
+}
+
 // handleAuthError 处理认证类错误(401/403)，停止账号调度
 func (s *RateLimitService) handleAuthError(ctx context.Context, account *Account, errorMsg string) {
 	if err := s.accountRepo.SetError(ctx, account.ID, errorMsg); err != nil {

--- a/backend/internal/service/redeem_service.go
+++ b/backend/internal/service/redeem_service.go
@@ -9,7 +9,8 @@ import (
 	"strings"
 	"time"

-	infraerrors "github.com/Wei-Shaw/sub2api/internal/infrastructure/errors"
+	dbent "github.com/Wei-Shaw/sub2api/ent"
+	infraerrors "github.com/Wei-Shaw/sub2api/internal/pkg/errors"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/pagination"
 )

@@ -72,6 +73,7 @@ type RedeemService struct {
 	subscriptionService *SubscriptionService
 	cache               RedeemCache
 	billingCacheService *BillingCacheService
+	entClient           *dbent.Client
 }

 // NewRedeemService 创建兑换码服务实例
@@ -81,6 +83,7 @@ func NewRedeemService(
 	subscriptionService *SubscriptionService,
 	cache RedeemCache,
 	billingCacheService *BillingCacheService,
+	entClient *dbent.Client,
 ) *RedeemService {
 	return &RedeemService{
 		redeemRepo:          redeemRepo,
@@ -88,6 +91,7 @@ func NewRedeemService(
 		subscriptionService: subscriptionService,
 		cache:               cache,
 		billingCacheService: billingCacheService,
+		entClient:           entClient,
 	}
 }

@@ -248,9 +252,19 @@ func (s *RedeemService) Redeem(ctx context.Context, userID int64, code string) (
 	}
 	_ = user // 使用变量避免未使用错误

+	// 使用数据库事务保证兑换码标记与权益发放的原子性
+	tx, err := s.entClient.Tx(ctx)
+	if err != nil {
+		return nil, fmt.Errorf("begin transaction: %w", err)
+	}
+	defer func() { _ = tx.Rollback() }()
+
+	// 将事务放入 context，使 repository 方法能够使用同一事务
+	txCtx := dbent.NewTxContext(ctx, tx)
+
 	// 【关键】先标记兑换码为已使用，确保并发安全
 	// 利用数据库乐观锁（WHERE status = 'unused'）保证原子性
-	if err := s.redeemRepo.Use(ctx, redeemCode.ID, userID); err != nil {
+	if err := s.redeemRepo.Use(txCtx, redeemCode.ID, userID); err != nil {
 		if errors.Is(err, ErrRedeemCodeNotFound) || errors.Is(err, ErrRedeemCodeUsed) {
 			return nil, ErrRedeemCodeUsed
 		}
@@ -261,21 +275,13 @@ func (s *RedeemService) Redeem(ctx context.Context, userID int64, code string) (
 	switch redeemCode.Type {
 	case RedeemTypeBalance:
 		// 增加用户余额
-		if err := s.userRepo.UpdateBalance(ctx, userID, redeemCode.Value); err != nil {
+		if err := s.userRepo.UpdateBalance(txCtx, userID, redeemCode.Value); err != nil {
 			return nil, fmt.Errorf("update user balance: %w", err)
 		}
-		// 失效余额缓存
-		if s.billingCacheService != nil {
-			go func() {
-				cacheCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
-				defer cancel()
-				_ = s.billingCacheService.InvalidateUserBalance(cacheCtx, userID)
-			}()
-		}

 	case RedeemTypeConcurrency:
 		// 增加用户并发数
-		if err := s.userRepo.UpdateConcurrency(ctx, userID, int(redeemCode.Value)); err != nil {
+		if err := s.userRepo.UpdateConcurrency(txCtx, userID, int(redeemCode.Value)); err != nil {
 			return nil, fmt.Errorf("update user concurrency: %w", err)
 		}

@@ -284,7 +290,7 @@ func (s *RedeemService) Redeem(ctx context.Context, userID int64, code string) (
 		if validityDays <= 0 {
 			validityDays = 30
 		}
-		_, _, err := s.subscriptionService.AssignOrExtendSubscription(ctx, &AssignSubscriptionInput{
+		_, _, err := s.subscriptionService.AssignOrExtendSubscription(txCtx, &AssignSubscriptionInput{
 			UserID:       userID,
 			GroupID:      *redeemCode.GroupID,
 			ValidityDays: validityDays,
@@ -294,20 +300,19 @@ func (s *RedeemService) Redeem(ctx context.Context, userID int64, code string) (
 		if err != nil {
 			return nil, fmt.Errorf("assign or extend subscription: %w", err)
 		}
-		// 失效订阅缓存
-		if s.billingCacheService != nil {
-			groupID := *redeemCode.GroupID
-			go func() {
-				cacheCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
-				defer cancel()
-				_ = s.billingCacheService.InvalidateSubscription(cacheCtx, userID, groupID)
-			}()
-		}

 	default:
 		return nil, fmt.Errorf("unsupported redeem type: %s", redeemCode.Type)
 	}

+	// 提交事务
+	if err := tx.Commit(); err != nil {
+		return nil, fmt.Errorf("commit transaction: %w", err)
+	}
+
+	// 事务提交成功后失效缓存
+	s.invalidateRedeemCaches(ctx, userID, redeemCode)
+
 	// 重新获取更新后的兑换码
 	redeemCode, err = s.redeemRepo.GetByID(ctx, redeemCode.ID)
 	if err != nil {
@@ -317,6 +322,31 @@ func (s *RedeemService) Redeem(ctx context.Context, userID int64, code string) (
 	return redeemCode, nil
 }

+// invalidateRedeemCaches 失效兑换相关的缓存
+func (s *RedeemService) invalidateRedeemCaches(ctx context.Context, userID int64, redeemCode *RedeemCode) {
+	if s.billingCacheService == nil {
+		return
+	}
+
+	switch redeemCode.Type {
+	case RedeemTypeBalance:
+		go func() {
+			cacheCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+			defer cancel()
+			_ = s.billingCacheService.InvalidateUserBalance(cacheCtx, userID)
+		}()
+	case RedeemTypeSubscription:
+		if redeemCode.GroupID != nil {
+			groupID := *redeemCode.GroupID
+			go func() {
+				cacheCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+				defer cancel()
+				_ = s.billingCacheService.InvalidateSubscription(cacheCtx, userID, groupID)
+			}()
+		}
+	}
+}
+
 // GetByID 根据ID获取兑换码
 func (s *RedeemService) GetByID(ctx context.Context, id int64) (*RedeemCode, error) {
 	code, err := s.redeemRepo.GetByID(ctx, id)

--- a/backend/internal/service/setting_service.go
+++ b/backend/internal/service/setting_service.go
@@ -9,7 +9,7 @@ import (
 	"strconv"

 	"github.com/Wei-Shaw/sub2api/internal/config"
-	infraerrors "github.com/Wei-Shaw/sub2api/internal/infrastructure/errors"
+	infraerrors "github.com/Wei-Shaw/sub2api/internal/pkg/errors"
 )

 var (

--- a/backend/internal/service/subscription_service.go
+++ b/backend/internal/service/subscription_service.go
@@ -6,7 +6,7 @@ import (
 	"log"
 	"time"

-	infraerrors "github.com/Wei-Shaw/sub2api/internal/infrastructure/errors"
+	infraerrors "github.com/Wei-Shaw/sub2api/internal/pkg/errors"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/pagination"
 )

@@ -26,6 +26,7 @@ var (
 	ErrDailyLimitExceeded        = infraerrors.TooManyRequests("DAILY_LIMIT_EXCEEDED", "daily usage limit exceeded")
 	ErrWeeklyLimitExceeded       = infraerrors.TooManyRequests("WEEKLY_LIMIT_EXCEEDED", "weekly usage limit exceeded")
 	ErrMonthlyLimitExceeded      = infraerrors.TooManyRequests("MONTHLY_LIMIT_EXCEEDED", "monthly usage limit exceeded")
+	ErrSubscriptionNilInput      = infraerrors.BadRequest("SUBSCRIPTION_NIL_INPUT", "subscription input cannot be nil")
 )

 // SubscriptionService 订阅服务
@@ -489,6 +490,7 @@ func (s *SubscriptionService) CheckAndResetWindows(ctx context.Context, sub *Use
 }

 // CheckUsageLimits 检查使用限额（返回错误如果超限）
+// 用于中间件的快速预检查，additionalCost 通常为 0
 func (s *SubscriptionService) CheckUsageLimits(ctx context.Context, sub *UserSubscription, group *Group, additionalCost float64) error {
 	if !sub.CheckDailyLimit(group, additionalCost) {
 		return ErrDailyLimitExceeded

--- a/backend/internal/service/token_refresher.go
+++ b/backend/internal/service/token_refresher.go
@@ -43,17 +43,11 @@ func (r *ClaudeTokenRefresher) CanRefresh(account *Account) bool {
 // NeedsRefresh 检查token是否需要刷新
 // 基于 expires_at 字段判断是否在刷新窗口内
 func (r *ClaudeTokenRefresher) NeedsRefresh(account *Account, refreshWindow time.Duration) bool {
-	s := account.GetCredential("expires_at")
-	if s == "" {
-		return false
-	}
-
-	expiresAt, err := strconv.ParseInt(s, 10, 64)
-	if err != nil {
+	expiresAt := account.GetCredentialAsTime("expires_at")
+	if expiresAt == nil {
 		return false
 	}
-
-	return time.Until(time.Unix(expiresAt, 0)) < refreshWindow
+	return time.Until(*expiresAt) < refreshWindow
 }

 // Refresh 执行token刷新