Merge pull request #523 from touwaeriol/feat/antigravity-improvements

feat: Antigravity improvements and scope-to-model rate limiting refactor

Merge pull request #523 from touwaeriol/feat/antigravity-improvements
feat: Antigravity improvements and scope-to-model rate limiting refactor
149e4267 · Wesley Liddick · GitHub · 5fa93ebd · 9a479d1b · 149e4267
Unverified Commit 149e4267 authored Feb 09, 2026 by Wesley Liddick Committed by GitHub Feb 09, 2026
--- a/backend/internal/service/digest_session_store.go
+++ b/backend/internal/service/digest_session_store.go
+package service
+
+import (
+	"strconv"
+	"strings"
+	"time"
+
+	gocache "github.com/patrickmn/go-cache"
+)
+
+// digestSessionTTL 摘要会话默认 TTL
+const digestSessionTTL = 5 * time.Minute
+
+// sessionEntry flat cache 条目
+type sessionEntry struct {
+	uuid      string
+	accountID int64
+}
+
+// DigestSessionStore 内存摘要会话存储（flat cache 实现）
+// key: "{groupID}:{prefixHash}|{digestChain}" → *sessionEntry
+type DigestSessionStore struct {
+	cache *gocache.Cache
+}
+
+// NewDigestSessionStore 创建内存摘要会话存储
+func NewDigestSessionStore() *DigestSessionStore {
+	return &DigestSessionStore{
+		cache: gocache.New(digestSessionTTL, time.Minute),
+	}
+}
+
+// Save 保存摘要会话。oldDigestChain 为 Find 返回的 matchedChain，用于删旧 key。
+func (s *DigestSessionStore) Save(groupID int64, prefixHash, digestChain, uuid string, accountID int64, oldDigestChain string) {
+	if digestChain == "" {
+		return
+	}
+	ns := buildNS(groupID, prefixHash)
+	s.cache.Set(ns+digestChain, &sessionEntry{uuid: uuid, accountID: accountID}, gocache.DefaultExpiration)
+	if oldDigestChain != "" && oldDigestChain != digestChain {
+		s.cache.Delete(ns + oldDigestChain)
+	}
+}
+
+// Find 查找摘要会话，从完整 chain 逐段截断，返回最长匹配及对应 matchedChain。
+func (s *DigestSessionStore) Find(groupID int64, prefixHash, digestChain string) (uuid string, accountID int64, matchedChain string, found bool) {
+	if digestChain == "" {
+		return "", 0, "", false
+	}
+	ns := buildNS(groupID, prefixHash)
+	chain := digestChain
+	for {
+		if val, ok := s.cache.Get(ns + chain); ok {
+			if e, ok := val.(*sessionEntry); ok {
+				return e.uuid, e.accountID, chain, true
+			}
+		}
+		i := strings.LastIndex(chain, "-")
+		if i < 0 {
+			return "", 0, "", false
+		}
+		chain = chain[:i]
+	}
+}
+
+// buildNS 构建 namespace 前缀
+func buildNS(groupID int64, prefixHash string) string {
+	return strconv.FormatInt(groupID, 10) + ":" + prefixHash + "|"
+}
--- a/backend/internal/service/digest_session_store_test.go
+++ b/backend/internal/service/digest_session_store_test.go
+//go:build unit
+
+package service
+
+import (
+	"fmt"
+	"sync"
+	"testing"
+	"time"
+
+	gocache "github.com/patrickmn/go-cache"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+func TestDigestSessionStore_SaveAndFind(t *testing.T) {
+	store := NewDigestSessionStore()
+
+	store.Save(1, "prefix", "s:a1-u:b2-m:c3", "uuid-1", 100, "")
+
+	uuid, accountID, _, found := store.Find(1, "prefix", "s:a1-u:b2-m:c3")
+	require.True(t, found)
+	assert.Equal(t, "uuid-1", uuid)
+	assert.Equal(t, int64(100), accountID)
+}
+
+func TestDigestSessionStore_PrefixMatch(t *testing.T) {
+	store := NewDigestSessionStore()
+
+	// 保存短链
+	store.Save(1, "prefix", "u:a-m:b", "uuid-short", 10, "")
+
+	// 用长链查找，应前缀匹配到短链
+	uuid, accountID, matchedChain, found := store.Find(1, "prefix", "u:a-m:b-u:c-m:d")
+	require.True(t, found)
+	assert.Equal(t, "uuid-short", uuid)
+	assert.Equal(t, int64(10), accountID)
+	assert.Equal(t, "u:a-m:b", matchedChain)
+}
+
+func TestDigestSessionStore_LongestPrefixMatch(t *testing.T) {
+	store := NewDigestSessionStore()
+
+	store.Save(1, "prefix", "u:a", "uuid-1", 1, "")
+	store.Save(1, "prefix", "u:a-m:b", "uuid-2", 2, "")
+	store.Save(1, "prefix", "u:a-m:b-u:c", "uuid-3", 3, "")
+
+	// 应匹配最深的 "u:a-m:b-u:c"（从完整 chain 逐段截断，先命中最长的）
+	uuid, accountID, _, found := store.Find(1, "prefix", "u:a-m:b-u:c-m:d-u:e")
+	require.True(t, found)
+	assert.Equal(t, "uuid-3", uuid)
+	assert.Equal(t, int64(3), accountID)
+
+	// 查找中等长度，应匹配到 "u:a-m:b"
+	uuid, accountID, _, found = store.Find(1, "prefix", "u:a-m:b-u:x")
+	require.True(t, found)
+	assert.Equal(t, "uuid-2", uuid)
+	assert.Equal(t, int64(2), accountID)
+}
+
+func TestDigestSessionStore_SaveDeletesOldChain(t *testing.T) {
+	store := NewDigestSessionStore()
+
+	// 第一轮：保存 "u:a-m:b"
+	store.Save(1, "prefix", "u:a-m:b", "uuid-1", 100, "")
+
+	// 第二轮：同一 uuid 保存更长的链，传入旧 chain
+	store.Save(1, "prefix", "u:a-m:b-u:c-m:d", "uuid-1", 100, "u:a-m:b")
+
+	// 旧链 "u:a-m:b" 应已被删除
+	_, _, _, found := store.Find(1, "prefix", "u:a-m:b")
+	assert.False(t, found, "old chain should be deleted")
+
+	// 新链应能找到
+	uuid, accountID, _, found := store.Find(1, "prefix", "u:a-m:b-u:c-m:d")
+	require.True(t, found)
+	assert.Equal(t, "uuid-1", uuid)
+	assert.Equal(t, int64(100), accountID)
+}
+
+func TestDigestSessionStore_DifferentSessionsNoInterference(t *testing.T) {
+	store := NewDigestSessionStore()
+
+	// 相同系统提示词，不同用户提示词
+	store.Save(1, "prefix", "s:sys-u:user1", "uuid-1", 100, "")
+	store.Save(1, "prefix", "s:sys-u:user2", "uuid-2", 200, "")
+
+	uuid, accountID, _, found := store.Find(1, "prefix", "s:sys-u:user1-m:reply1")
+	require.True(t, found)
+	assert.Equal(t, "uuid-1", uuid)
+	assert.Equal(t, int64(100), accountID)
+
+	uuid, accountID, _, found = store.Find(1, "prefix", "s:sys-u:user2-m:reply2")
+	require.True(t, found)
+	assert.Equal(t, "uuid-2", uuid)
+	assert.Equal(t, int64(200), accountID)
+}
+
+func TestDigestSessionStore_NoMatch(t *testing.T) {
+	store := NewDigestSessionStore()
+
+	store.Save(1, "prefix", "u:a-m:b", "uuid-1", 100, "")
+
+	// 完全不同的 chain
+	_, _, _, found := store.Find(1, "prefix", "u:x-m:y")
+	assert.False(t, found)
+}
+
+func TestDigestSessionStore_DifferentPrefixHash(t *testing.T) {
+	store := NewDigestSessionStore()
+
+	store.Save(1, "prefix1", "u:a-m:b", "uuid-1", 100, "")
+
+	// 不同 prefixHash 应隔离
+	_, _, _, found := store.Find(1, "prefix2", "u:a-m:b")
+	assert.False(t, found)
+}
+
+func TestDigestSessionStore_DifferentGroupID(t *testing.T) {
+	store := NewDigestSessionStore()
+
+	store.Save(1, "prefix", "u:a-m:b", "uuid-1", 100, "")
+
+	// 不同 groupID 应隔离
+	_, _, _, found := store.Find(2, "prefix", "u:a-m:b")
+	assert.False(t, found)
+}
+
+func TestDigestSessionStore_EmptyDigestChain(t *testing.T) {
+	store := NewDigestSessionStore()
+
+	// 空链不应保存
+	store.Save(1, "prefix", "", "uuid-1", 100, "")
+	_, _, _, found := store.Find(1, "prefix", "")
+	assert.False(t, found)
+}
+
+func TestDigestSessionStore_TTLExpiration(t *testing.T) {
+	store := &DigestSessionStore{
+		cache: gocache.New(100*time.Millisecond, 50*time.Millisecond),
+	}
+
+	store.Save(1, "prefix", "u:a-m:b", "uuid-1", 100, "")
+
+	// 立即应该能找到
+	_, _, _, found := store.Find(1, "prefix", "u:a-m:b")
+	require.True(t, found)
+
+	// 等待过期 + 清理周期
+	time.Sleep(300 * time.Millisecond)
+
+	// 过期后应找不到
+	_, _, _, found = store.Find(1, "prefix", "u:a-m:b")
+	assert.False(t, found)
+}
+
+func TestDigestSessionStore_ConcurrentSafety(t *testing.T) {
+	store := NewDigestSessionStore()
+
+	var wg sync.WaitGroup
+	const goroutines = 50
+	const operations = 100
+
+	wg.Add(goroutines)
+	for g := 0; g < goroutines; g++ {
+		go func(id int) {
+			defer wg.Done()
+			prefix := fmt.Sprintf("prefix-%d", id%5)
+			for i := 0; i < operations; i++ {
+				chain := fmt.Sprintf("u:%d-m:%d", id, i)
+				uuid := fmt.Sprintf("uuid-%d-%d", id, i)
+				store.Save(1, prefix, chain, uuid, int64(id), "")
+				store.Find(1, prefix, chain)
+			}
+		}(g)
+	}
+	wg.Wait()
+}
+
+func TestDigestSessionStore_MultipleSessions(t *testing.T) {
+	store := NewDigestSessionStore()
+
+	sessions := []struct {
+		chain     string
+		uuid      string
+		accountID int64
+	}{
+		{"u:session1", "uuid-1", 1},
+		{"u:session2-m:reply2", "uuid-2", 2},
+		{"u:session3-m:reply3-u:msg3", "uuid-3", 3},
+	}
+
+	for _, sess := range sessions {
+		store.Save(1, "prefix", sess.chain, sess.uuid, sess.accountID, "")
+	}
+
+	// 验证每个会话都能正确查找
+	for _, sess := range sessions {
+		uuid, accountID, _, found := store.Find(1, "prefix", sess.chain)
+		require.True(t, found, "should find session: %s", sess.chain)
+		assert.Equal(t, sess.uuid, uuid)
+		assert.Equal(t, sess.accountID, accountID)
+	}
+
+	// 验证继续对话的场景
+	uuid, accountID, _, found := store.Find(1, "prefix", "u:session2-m:reply2-u:newmsg")
+	require.True(t, found)
+	assert.Equal(t, "uuid-2", uuid)
+	assert.Equal(t, int64(2), accountID)
+}
+
+func TestDigestSessionStore_Performance1000Sessions(t *testing.T) {
+	store := NewDigestSessionStore()
+
+	// 插入 1000 个会话
+	for i := 0; i < 1000; i++ {
+		chain := fmt.Sprintf("s:sys-u:user%d-m:reply%d", i, i)
+		store.Save(1, "prefix", chain, fmt.Sprintf("uuid-%d", i), int64(i), "")
+	}
+
+	// 查找性能测试
+	start := time.Now()
+	const lookups = 10000
+	for i := 0; i < lookups; i++ {
+		idx := i % 1000
+		chain := fmt.Sprintf("s:sys-u:user%d-m:reply%d-u:newmsg", idx, idx)
+		_, _, _, found := store.Find(1, "prefix", chain)
+		assert.True(t, found)
+	}
+	elapsed := time.Since(start)
+	t.Logf("%d lookups in %v (%.0f ns/op)", lookups, elapsed, float64(elapsed.Nanoseconds())/lookups)
+}
+
+func TestDigestSessionStore_FindReturnsMatchedChain(t *testing.T) {
+	store := NewDigestSessionStore()
+
+	store.Save(1, "prefix", "u:a-m:b-u:c", "uuid-1", 100, "")
+
+	// 精确匹配
+	_, _, matchedChain, found := store.Find(1, "prefix", "u:a-m:b-u:c")
+	require.True(t, found)
+	assert.Equal(t, "u:a-m:b-u:c", matchedChain)
+
+	// 前缀匹配（截断后命中）
+	_, _, matchedChain, found = store.Find(1, "prefix", "u:a-m:b-u:c-m:d-u:e")
+	require.True(t, found)
+	assert.Equal(t, "u:a-m:b-u:c", matchedChain)
+}
+
+func TestDigestSessionStore_CacheItemCountStable(t *testing.T) {
+	store := NewDigestSessionStore()
+
+	// 模拟 100 个独立会话，每个进行 10 轮对话
+	// 正确传递 oldDigestChain 时，每个会话始终只保留 1 个 key
+	for conv := 0; conv < 100; conv++ {
+		var prevMatchedChain string
+		for round := 0; round < 10; round++ {
+			chain := fmt.Sprintf("s:sys-u:user%d", conv)
+			for r := 0; r < round; r++ {
+				chain += fmt.Sprintf("-m:a%d-u:q%d", r, r+1)
+			}
+			uuid := fmt.Sprintf("uuid-conv%d", conv)
+
+			_, _, matched, _ := store.Find(1, "prefix", chain)
+			store.Save(1, "prefix", chain, uuid, int64(conv), matched)
+			prevMatchedChain = matched
+			_ = prevMatchedChain
+		}
+	}
+
+	// 100 个会话 × 1 key/会话 = 应该 ≤ 100 个 key
+	// 允许少量并发残留，但绝不能接近 100×10=1000
+	itemCount := store.cache.ItemCount()
+	assert.LessOrEqual(t, itemCount, 100, "cache should have at most 100 items (1 per conversation), got %d", itemCount)
+	t.Logf("Cache item count after 100 conversations × 10 rounds: %d", itemCount)
+}
+
+func TestDigestSessionStore_TTLPreventsUnboundedGrowth(t *testing.T) {
+	// 使用极短 TTL 验证大量写入后 cache 能被清理
+	store := &DigestSessionStore{
+		cache: gocache.New(100*time.Millisecond, 50*time.Millisecond),
+	}
+
+	// 插入 500 个不同的 key（无 oldDigestChain，模拟最坏场景：全是新会话首轮）
+	for i := 0; i < 500; i++ {
+		chain := fmt.Sprintf("u:user%d", i)
+		store.Save(1, "prefix", chain, fmt.Sprintf("uuid-%d", i), int64(i), "")
+	}
+
+	assert.Equal(t, 500, store.cache.ItemCount())
+
+	// 等待 TTL + 清理周期
+	time.Sleep(300 * time.Millisecond)
+
+	assert.Equal(t, 0, store.cache.ItemCount(), "all items should be expired and cleaned up")
+}
+
+func TestDigestSessionStore_SaveSameChainNoDelete(t *testing.T) {
+	store := NewDigestSessionStore()
+
+	// 保存 chain
+	store.Save(1, "prefix", "u:a-m:b", "uuid-1", 100, "")
+
+	// 用户重发相同消息：oldDigestChain == digestChain，不应删掉刚设置的 key
+	store.Save(1, "prefix", "u:a-m:b", "uuid-1", 100, "u:a-m:b")
+
+	// 仍然能找到
+	uuid, accountID, _, found := store.Find(1, "prefix", "u:a-m:b")
+	require.True(t, found)
+	assert.Equal(t, "uuid-1", uuid)
+	assert.Equal(t, int64(100), accountID)
+}
--- a/backend/internal/service/error_policy_integration_test.go
+++ b/backend/internal/service/error_policy_integration_test.go
+//go:build unit
+
+package service
+
+import (
+	"context"
+	"io"
+	"net/http"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/Wei-Shaw/sub2api/internal/config"
+	"github.com/Wei-Shaw/sub2api/internal/pkg/antigravity"
+	"github.com/stretchr/testify/require"
+)
+
+// ---------------------------------------------------------------------------
+// Mocks (scoped to this file by naming convention)
+// ---------------------------------------------------------------------------
+
+// epFixedUpstream returns a fixed response for every request.
+type epFixedUpstream struct {
+	statusCode int
+	body       string
+	calls      int
+}
+
+func (u *epFixedUpstream) Do(req *http.Request, proxyURL string, accountID int64, accountConcurrency int) (*http.Response, error) {
+	u.calls++
+	return &http.Response{
+		StatusCode: u.statusCode,
+		Header:     http.Header{},
+		Body:       io.NopCloser(strings.NewReader(u.body)),
+	}, nil
+}
+
+func (u *epFixedUpstream) DoWithTLS(req *http.Request, proxyURL string, accountID int64, accountConcurrency int, enableTLSFingerprint bool) (*http.Response, error) {
+	return u.Do(req, proxyURL, accountID, accountConcurrency)
+}
+
+// epAccountRepo records SetTempUnschedulable / SetError calls.
+type epAccountRepo struct {
+	mockAccountRepoForGemini
+	tempCalls   int
+	setErrCalls int
+}
+
+func (r *epAccountRepo) SetTempUnschedulable(_ context.Context, _ int64, _ time.Time, _ string) error {
+	r.tempCalls++
+	return nil
+}
+
+func (r *epAccountRepo) SetError(_ context.Context, _ int64, _ string) error {
+	r.setErrCalls++
+	return nil
+}
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+func saveAndSetBaseURLs(t *testing.T) {
+	t.Helper()
+	oldBaseURLs := append([]string(nil), antigravity.BaseURLs...)
+	oldAvail := antigravity.DefaultURLAvailability
+	antigravity.BaseURLs = []string{"https://ep-test.example"}
+	antigravity.DefaultURLAvailability = antigravity.NewURLAvailability(time.Minute)
+	t.Cleanup(func() {
+		antigravity.BaseURLs = oldBaseURLs
+		antigravity.DefaultURLAvailability = oldAvail
+	})
+}
+
+func newRetryParams(account *Account, upstream HTTPUpstream, handleError func(context.Context, string, *Account, int, http.Header, []byte, string, int64, string, bool) *handleModelRateLimitResult) antigravityRetryLoopParams {
+	return antigravityRetryLoopParams{
+		ctx:            context.Background(),
+		prefix:         "[ep-test]",
+		account:        account,
+		accessToken:    "token",
+		action:         "generateContent",
+		body:           []byte(`{"input":"test"}`),
+		httpUpstream:   upstream,
+		requestedModel: "claude-sonnet-4-5",
+		handleError:    handleError,
+	}
+}
+
+// ---------------------------------------------------------------------------
+// TestRetryLoop_ErrorPolicy_CustomErrorCodes
+// ---------------------------------------------------------------------------
+
+func TestRetryLoop_ErrorPolicy_CustomErrorCodes(t *testing.T) {
+	tests := []struct {
+		name              string
+		upstreamStatus    int
+		upstreamBody      string
+		customCodes       []any
+		expectHandleError int
+		expectUpstream    int
+		expectStatusCode  int
+	}{
+		{
+			name:              "429_in_custom_codes_matched",
+			upstreamStatus:    429,
+			upstreamBody:      `{"error":"rate limited"}`,
+			customCodes:       []any{float64(429)},
+			expectHandleError: 1,
+			expectUpstream:    1,
+			expectStatusCode:  429,
+		},
+		{
+			name:              "429_not_in_custom_codes_skipped",
+			upstreamStatus:    429,
+			upstreamBody:      `{"error":"rate limited"}`,
+			customCodes:       []any{float64(500)},
+			expectHandleError: 0,
+			expectUpstream:    1,
+			expectStatusCode:  429,
+		},
+		{
+			name:              "500_in_custom_codes_matched",
+			upstreamStatus:    500,
+			upstreamBody:      `{"error":"internal"}`,
+			customCodes:       []any{float64(500)},
+			expectHandleError: 1,
+			expectUpstream:    1,
+			expectStatusCode:  500,
+		},
+		{
+			name:              "500_not_in_custom_codes_skipped",
+			upstreamStatus:    500,
+			upstreamBody:      `{"error":"internal"}`,
+			customCodes:       []any{float64(429)},
+			expectHandleError: 0,
+			expectUpstream:    1,
+			expectStatusCode:  500,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			saveAndSetBaseURLs(t)
+
+			upstream := &epFixedUpstream{statusCode: tt.upstreamStatus, body: tt.upstreamBody}
+			repo := &epAccountRepo{}
+			rlSvc := NewRateLimitService(repo, nil, &config.Config{}, nil, nil)
+
+			account := &Account{
+				ID:          100,
+				Type:        AccountTypeAPIKey,
+				Platform:    PlatformAntigravity,
+				Schedulable: true,
+				Status:      StatusActive,
+				Concurrency: 1,
+				Credentials: map[string]any{
+					"custom_error_codes_enabled": true,
+					"custom_error_codes":         tt.customCodes,
+				},
+			}
+
+			svc := &AntigravityGatewayService{rateLimitService: rlSvc}
+
+			var handleErrorCount int
+			p := newRetryParams(account, upstream, func(_ context.Context, _ string, _ *Account, _ int, _ http.Header, _ []byte, _ string, _ int64, _ string, _ bool) *handleModelRateLimitResult {
+				handleErrorCount++
+				return nil
+			})
+
+			result, err := svc.antigravityRetryLoop(p)
+
+			require.NoError(t, err)
+			require.NotNil(t, result)
+			require.NotNil(t, result.resp)
+			defer func() { _ = result.resp.Body.Close() }()
+
+			require.Equal(t, tt.expectStatusCode, result.resp.StatusCode)
+			require.Equal(t, tt.expectHandleError, handleErrorCount, "handleError call count")
+			require.Equal(t, tt.expectUpstream, upstream.calls, "upstream call count")
+		})
+	}
+}
+
+// ---------------------------------------------------------------------------
+// TestRetryLoop_ErrorPolicy_TempUnschedulable
+// ---------------------------------------------------------------------------
+
+func TestRetryLoop_ErrorPolicy_TempUnschedulable(t *testing.T) {
+	tempRulesAccount := func(rules []any) *Account {
+		return &Account{
+			ID:          200,
+			Type:        AccountTypeOAuth,
+			Platform:    PlatformAntigravity,
+			Schedulable: true,
+			Status:      StatusActive,
+			Concurrency: 1,
+			Credentials: map[string]any{
+				"temp_unschedulable_enabled": true,
+				"temp_unschedulable_rules":   rules,
+			},
+		}
+	}
+
+	overloadedRule := map[string]any{
+		"error_code":       float64(503),
+		"keywords":         []any{"overloaded"},
+		"duration_minutes": float64(10),
+	}
+
+	rateLimitRule := map[string]any{
+		"error_code":       float64(429),
+		"keywords":         []any{"rate limited keyword"},
+		"duration_minutes": float64(5),
+	}
+
+	t.Run("503_overloaded_matches_rule", func(t *testing.T) {
+		saveAndSetBaseURLs(t)
+
+		upstream := &epFixedUpstream{statusCode: 503, body: `overloaded`}
+		repo := &epAccountRepo{}
+		rlSvc := NewRateLimitService(repo, nil, &config.Config{}, nil, nil)
+		svc := &AntigravityGatewayService{rateLimitService: rlSvc}
+
+		account := tempRulesAccount([]any{overloadedRule})
+		p := newRetryParams(account, upstream, func(_ context.Context, _ string, _ *Account, _ int, _ http.Header, _ []byte, _ string, _ int64, _ string, _ bool) *handleModelRateLimitResult {
+			t.Error("handleError should not be called for temp unschedulable")
+			return nil
+		})
+
+		result, err := svc.antigravityRetryLoop(p)
+
+		require.Nil(t, result)
+		var switchErr *AntigravityAccountSwitchError
+		require.ErrorAs(t, err, &switchErr)
+		require.Equal(t, account.ID, switchErr.OriginalAccountID)
+		require.Equal(t, 1, upstream.calls, "should not retry")
+	})
+
+	t.Run("429_rate_limited_keyword_matches_rule", func(t *testing.T) {
+		saveAndSetBaseURLs(t)
+
+		upstream := &epFixedUpstream{statusCode: 429, body: `rate limited keyword`}
+		repo := &epAccountRepo{}
+		rlSvc := NewRateLimitService(repo, nil, &config.Config{}, nil, nil)
+		svc := &AntigravityGatewayService{rateLimitService: rlSvc}
+
+		account := tempRulesAccount([]any{rateLimitRule})
+		p := newRetryParams(account, upstream, func(_ context.Context, _ string, _ *Account, _ int, _ http.Header, _ []byte, _ string, _ int64, _ string, _ bool) *handleModelRateLimitResult {
+			t.Error("handleError should not be called for temp unschedulable")
+			return nil
+		})
+
+		result, err := svc.antigravityRetryLoop(p)
+
+		require.Nil(t, result)
+		var switchErr *AntigravityAccountSwitchError
+		require.ErrorAs(t, err, &switchErr)
+		require.Equal(t, account.ID, switchErr.OriginalAccountID)
+		require.Equal(t, 1, upstream.calls, "should not retry")
+	})
+
+	t.Run("503_body_no_match_continues_default_retry", func(t *testing.T) {
+		saveAndSetBaseURLs(t)
+
+		upstream := &epFixedUpstream{statusCode: 503, body: `random`}
+		repo := &epAccountRepo{}
+		rlSvc := NewRateLimitService(repo, nil, &config.Config{}, nil, nil)
+		svc := &AntigravityGatewayService{rateLimitService: rlSvc}
+
+		account := tempRulesAccount([]any{overloadedRule})
+
+		// Use a short-lived context: the backoff sleep (~1s) will be
+		// interrupted, proving the code entered the default retry path
+		// instead of breaking early via error policy.
+		ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
+		defer cancel()
+
+		p := newRetryParams(account, upstream, func(_ context.Context, _ string, _ *Account, _ int, _ http.Header, _ []byte, _ string, _ int64, _ string, _ bool) *handleModelRateLimitResult {
+			return nil
+		})
+		p.ctx = ctx
+
+		result, err := svc.antigravityRetryLoop(p)
+
+		// Context cancellation during backoff proves default retry was entered
+		require.Nil(t, result)
+		require.ErrorIs(t, err, context.DeadlineExceeded)
+		require.GreaterOrEqual(t, upstream.calls, 1, "should have called upstream at least once")
+	})
+}
+
+// ---------------------------------------------------------------------------
+// TestRetryLoop_ErrorPolicy_NilRateLimitService
+// ---------------------------------------------------------------------------
+
+func TestRetryLoop_ErrorPolicy_NilRateLimitService(t *testing.T) {
+	saveAndSetBaseURLs(t)
+
+	upstream := &epFixedUpstream{statusCode: 429, body: `{"error":"rate limited"}`}
+	// rateLimitService is nil — must not panic
+	svc := &AntigravityGatewayService{rateLimitService: nil}
+
+	account := &Account{
+		ID:          300,
+		Type:        AccountTypeOAuth,
+		Platform:    PlatformAntigravity,
+		Schedulable: true,
+		Status:      StatusActive,
+		Concurrency: 1,
+	}
+
+	ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
+	defer cancel()
+
+	p := newRetryParams(account, upstream, func(_ context.Context, _ string, _ *Account, _ int, _ http.Header, _ []byte, _ string, _ int64, _ string, _ bool) *handleModelRateLimitResult {
+		return nil
+	})
+	p.ctx = ctx
+
+	// Should not panic; enters the default retry path (eventually times out)
+	result, err := svc.antigravityRetryLoop(p)
+
+	require.Nil(t, result)
+	require.ErrorIs(t, err, context.DeadlineExceeded)
+	require.GreaterOrEqual(t, upstream.calls, 1)
+}
+
+// ---------------------------------------------------------------------------
+// TestRetryLoop_ErrorPolicy_NoPolicy_OriginalBehavior
+// ---------------------------------------------------------------------------
+
+func TestRetryLoop_ErrorPolicy_NoPolicy_OriginalBehavior(t *testing.T) {
+	saveAndSetBaseURLs(t)
+
+	upstream := &epFixedUpstream{statusCode: 429, body: `{"error":"rate limited"}`}
+	repo := &epAccountRepo{}
+	rlSvc := NewRateLimitService(repo, nil, &config.Config{}, nil, nil)
+	svc := &AntigravityGatewayService{rateLimitService: rlSvc}
+
+	// Plain OAuth account with no error policy configured
+	account := &Account{
+		ID:          400,
+		Type:        AccountTypeOAuth,
+		Platform:    PlatformAntigravity,
+		Schedulable: true,
+		Status:      StatusActive,
+		Concurrency: 1,
+	}
+
+	var handleErrorCount int
+	p := newRetryParams(account, upstream, func(_ context.Context, _ string, _ *Account, _ int, _ http.Header, _ []byte, _ string, _ int64, _ string, _ bool) *handleModelRateLimitResult {
+		handleErrorCount++
+		return nil
+	})
+
+	result, err := svc.antigravityRetryLoop(p)
+
+	require.NoError(t, err)
+	require.NotNil(t, result)
+	require.NotNil(t, result.resp)
+	defer func() { _ = result.resp.Body.Close() }()
+
+	require.Equal(t, http.StatusTooManyRequests, result.resp.StatusCode)
+	require.Equal(t, antigravityMaxRetries, upstream.calls, "should exhaust all retries")
+	require.Equal(t, 1, handleErrorCount, "handleError should be called once after retries exhausted")
+}
--- a/backend/internal/service/error_policy_test.go
+++ b/backend/internal/service/error_policy_test.go
+//go:build unit
+
+package service
+
+import (
+	"context"
+	"net/http"
+	"testing"
+	"time"
+
+	"github.com/Wei-Shaw/sub2api/internal/config"
+	"github.com/stretchr/testify/require"
+)
+
+// ---------------------------------------------------------------------------
+// TestCheckErrorPolicy — 6 table-driven cases for the pure logic function
+// ---------------------------------------------------------------------------
+
+func TestCheckErrorPolicy(t *testing.T) {
+	tests := []struct {
+		name       string
+		account    *Account
+		statusCode int
+		body       []byte
+		expected   ErrorPolicyResult
+	}{
+		{
+			name: "no_policy_oauth_returns_none",
+			account: &Account{
+				ID:       1,
+				Type:     AccountTypeOAuth,
+				Platform: PlatformAntigravity,
+				// no custom error codes, no temp rules
+			},
+			statusCode: 500,
+			body:       []byte(`"error"`),
+			expected:   ErrorPolicyNone,
+		},
+		{
+			name: "custom_error_codes_hit_returns_matched",
+			account: &Account{
+				ID:       2,
+				Type:     AccountTypeAPIKey,
+				Platform: PlatformAntigravity,
+				Credentials: map[string]any{
+					"custom_error_codes_enabled": true,
+					"custom_error_codes":         []any{float64(429), float64(500)},
+				},
+			},
+			statusCode: 500,
+			body:       []byte(`"error"`),
+			expected:   ErrorPolicyMatched,
+		},
+		{
+			name: "custom_error_codes_miss_returns_skipped",
+			account: &Account{
+				ID:       3,
+				Type:     AccountTypeAPIKey,
+				Platform: PlatformAntigravity,
+				Credentials: map[string]any{
+					"custom_error_codes_enabled": true,
+					"custom_error_codes":         []any{float64(429), float64(500)},
+				},
+			},
+			statusCode: 503,
+			body:       []byte(`"error"`),
+			expected:   ErrorPolicySkipped,
+		},
+		{
+			name: "temp_unschedulable_hit_returns_temp_unscheduled",
+			account: &Account{
+				ID:       4,
+				Type:     AccountTypeOAuth,
+				Platform: PlatformAntigravity,
+				Credentials: map[string]any{
+					"temp_unschedulable_enabled": true,
+					"temp_unschedulable_rules": []any{
+						map[string]any{
+							"error_code":       float64(503),
+							"keywords":         []any{"overloaded"},
+							"duration_minutes": float64(10),
+							"description":      "overloaded rule",
+						},
+					},
+				},
+			},
+			statusCode: 503,
+			body:       []byte(`overloaded service`),
+			expected:   ErrorPolicyTempUnscheduled,
+		},
+		{
+			name: "temp_unschedulable_body_miss_returns_none",
+			account: &Account{
+				ID:       5,
+				Type:     AccountTypeOAuth,
+				Platform: PlatformAntigravity,
+				Credentials: map[string]any{
+					"temp_unschedulable_enabled": true,
+					"temp_unschedulable_rules": []any{
+						map[string]any{
+							"error_code":       float64(503),
+							"keywords":         []any{"overloaded"},
+							"duration_minutes": float64(10),
+							"description":      "overloaded rule",
+						},
+					},
+				},
+			},
+			statusCode: 503,
+			body:       []byte(`random msg`),
+			expected:   ErrorPolicyNone,
+		},
+		{
+			name: "custom_error_codes_override_temp_unschedulable",
+			account: &Account{
+				ID:       6,
+				Type:     AccountTypeAPIKey,
+				Platform: PlatformAntigravity,
+				Credentials: map[string]any{
+					"custom_error_codes_enabled": true,
+					"custom_error_codes":         []any{float64(503)},
+					"temp_unschedulable_enabled": true,
+					"temp_unschedulable_rules": []any{
+						map[string]any{
+							"error_code":       float64(503),
+							"keywords":         []any{"overloaded"},
+							"duration_minutes": float64(10),
+							"description":      "overloaded rule",
+						},
+					},
+				},
+			},
+			statusCode: 503,
+			body:       []byte(`overloaded`),
+			expected:   ErrorPolicyMatched, // custom codes take precedence
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			repo := &errorPolicyRepoStub{}
+			svc := NewRateLimitService(repo, nil, &config.Config{}, nil, nil)
+
+			result := svc.CheckErrorPolicy(context.Background(), tt.account, tt.statusCode, tt.body)
+			require.Equal(t, tt.expected, result, "unexpected ErrorPolicyResult")
+		})
+	}
+}
+
+// ---------------------------------------------------------------------------
+// TestApplyErrorPolicy — 4 table-driven cases for the wrapper method
+// ---------------------------------------------------------------------------
+
+func TestApplyErrorPolicy(t *testing.T) {
+	tests := []struct {
+		name              string
+		account           *Account
+		statusCode        int
+		body              []byte
+		expectedHandled   bool
+		expectedSwitchErr bool // expect *AntigravityAccountSwitchError
+		handleErrorCalls  int
+	}{
+		{
+			name: "none_not_handled",
+			account: &Account{
+				ID:       10,
+				Type:     AccountTypeOAuth,
+				Platform: PlatformAntigravity,
+			},
+			statusCode:       500,
+			body:             []byte(`"error"`),
+			expectedHandled:  false,
+			handleErrorCalls: 0,
+		},
+		{
+			name: "skipped_handled_no_handleError",
+			account: &Account{
+				ID:       11,
+				Type:     AccountTypeAPIKey,
+				Platform: PlatformAntigravity,
+				Credentials: map[string]any{
+					"custom_error_codes_enabled": true,
+					"custom_error_codes":         []any{float64(429)},
+				},
+			},
+			statusCode:       500, // not in custom codes
+			body:             []byte(`"error"`),
+			expectedHandled:  true,
+			handleErrorCalls: 0,
+		},
+		{
+			name: "matched_handled_calls_handleError",
+			account: &Account{
+				ID:       12,
+				Type:     AccountTypeAPIKey,
+				Platform: PlatformAntigravity,
+				Credentials: map[string]any{
+					"custom_error_codes_enabled": true,
+					"custom_error_codes":         []any{float64(500)},
+				},
+			},
+			statusCode:       500,
+			body:             []byte(`"error"`),
+			expectedHandled:  true,
+			handleErrorCalls: 1,
+		},
+		{
+			name: "temp_unscheduled_returns_switch_error",
+			account: &Account{
+				ID:       13,
+				Type:     AccountTypeOAuth,
+				Platform: PlatformAntigravity,
+				Credentials: map[string]any{
+					"temp_unschedulable_enabled": true,
+					"temp_unschedulable_rules": []any{
+						map[string]any{
+							"error_code":       float64(503),
+							"keywords":         []any{"overloaded"},
+							"duration_minutes": float64(10),
+						},
+					},
+				},
+			},
+			statusCode:        503,
+			body:              []byte(`overloaded`),
+			expectedHandled:   true,
+			expectedSwitchErr: true,
+			handleErrorCalls:  0,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			repo := &errorPolicyRepoStub{}
+			rlSvc := NewRateLimitService(repo, nil, &config.Config{}, nil, nil)
+			svc := &AntigravityGatewayService{
+				rateLimitService: rlSvc,
+			}
+
+			var handleErrorCount int
+			p := antigravityRetryLoopParams{
+				ctx:     context.Background(),
+				prefix:  "[test]",
+				account: tt.account,
+				handleError: func(ctx context.Context, prefix string, account *Account, statusCode int, headers http.Header, body []byte, requestedModel string, groupID int64, sessionHash string, isStickySession bool) *handleModelRateLimitResult {
+					handleErrorCount++
+					return nil
+				},
+				isStickySession: true,
+			}
+
+			handled, retErr := svc.applyErrorPolicy(p, tt.statusCode, http.Header{}, tt.body)
+
+			require.Equal(t, tt.expectedHandled, handled, "handled mismatch")
+			require.Equal(t, tt.handleErrorCalls, handleErrorCount, "handleError call count mismatch")
+
+			if tt.expectedSwitchErr {
+				var switchErr *AntigravityAccountSwitchError
+				require.ErrorAs(t, retErr, &switchErr)
+				require.Equal(t, tt.account.ID, switchErr.OriginalAccountID)
+			} else {
+				require.NoError(t, retErr)
+			}
+		})
+	}
+}
+
+// ---------------------------------------------------------------------------
+// errorPolicyRepoStub — minimal AccountRepository stub for error policy tests
+// ---------------------------------------------------------------------------
+
+type errorPolicyRepoStub struct {
+	mockAccountRepoForGemini
+	tempCalls    int
+	setErrCalls  int
+	lastErrorMsg string
+}
+
+func (r *errorPolicyRepoStub) SetTempUnschedulable(ctx context.Context, id int64, until time.Time, reason string) error {
+	r.tempCalls++
+	return nil
+}
+
+func (r *errorPolicyRepoStub) SetError(ctx context.Context, id int64, errorMsg string) error {
+	r.setErrCalls++
+	r.lastErrorMsg = errorMsg
+	return nil
+}
--- a/backend/internal/service/gateway_multiplatform_test.go
+++ b/backend/internal/service/gateway_multiplatform_test.go
@@ -142,9 +142,6 @@ func (m *mockAccountRepoForPlatform) ListSchedulableByGroupIDAndPlatforms(ctx co
 func (m *mockAccountRepoForPlatform) SetRateLimited(ctx context.Context, id int64, resetAt time.Time) error {
 	return nil
 }
-func (m *mockAccountRepoForPlatform) SetAntigravityQuotaScopeLimit(ctx context.Context, id int64, scope AntigravityQuotaScope, resetAt time.Time) error {
-	return nil
-}
 func (m *mockAccountRepoForPlatform) SetModelRateLimit(ctx context.Context, id int64, scope string, resetAt time.Time) error {
 	return nil
 }
@@ -216,30 +213,6 @@ func (m *mockGatewayCacheForPlatform) DeleteSessionAccountID(ctx context.Context
 	return nil
 }

-func (m *mockGatewayCacheForPlatform) IncrModelCallCount(ctx context.Context, accountID int64, model string) (int64, error) {
-	return 0, nil
-}
-
-func (m *mockGatewayCacheForPlatform) GetModelLoadBatch(ctx context.Context, accountIDs []int64, model string) (map[int64]*ModelLoadInfo, error) {
-	return nil, nil
-}
-
-func (m *mockGatewayCacheForPlatform) FindGeminiSession(ctx context.Context, groupID int64, prefixHash, digestChain string) (uuid string, accountID int64, found bool) {
-	return "", 0, false
-}
-
-func (m *mockGatewayCacheForPlatform) SaveGeminiSession(ctx context.Context, groupID int64, prefixHash, digestChain, uuid string, accountID int64) error {
-	return nil
-}
-
-func (m *mockGatewayCacheForPlatform) FindAnthropicSession(ctx context.Context, groupID int64, prefixHash, digestChain string) (uuid string, accountID int64, found bool) {
-	return "", 0, false
-}
-
-func (m *mockGatewayCacheForPlatform) SaveAnthropicSession(ctx context.Context, groupID int64, prefixHash, digestChain, uuid string, accountID int64) error {
-	return nil
-}
-
 type mockGroupRepoForGateway struct {
 	groups           map[int64]*Group
 	getByIDCalls     int

--- a/backend/internal/service/gateway_request.go
+++ b/backend/internal/service/gateway_request.go
@@ -6,9 +6,19 @@ import (
 	"fmt"
 	"math"

+	"github.com/Wei-Shaw/sub2api/internal/domain"
 	"github.com/Wei-Shaw/sub2api/internal/pkg/antigravity"
 )

+// SessionContext 粘性会话上下文，用于区分不同来源的请求。
+// 仅在 GenerateSessionHash 第 3 级 fallback（消息内容 hash）时混入，
+// 避免不同用户发送相同消息产生相同 hash 导致账号集中。
+type SessionContext struct {
+	ClientIP  string
+	UserAgent string
+	APIKeyID  int64
+}
+
 // ParsedRequest 保存网关请求的预解析结果
 //
 // 性能优化说明：
@@ -22,20 +32,22 @@ import (
 // 2. 将解析结果 ParsedRequest 传递给 Service 层
 // 3. 避免重复 json.Unmarshal，减少 CPU 和内存开销
 type ParsedRequest struct {
-	Body            []byte // 原始请求体（保留用于转发）
-	Model           string // 请求的模型名称
-	Stream          bool   // 是否为流式请求
-	MetadataUserID  string // metadata.user_id（用于会话亲和）
-	System          any    // system 字段内容
-	Messages        []any  // messages 数组
-	HasSystem       bool   // 是否包含 system 字段（包含 null 也视为显式传入）
-	ThinkingEnabled bool   // 是否开启 thinking（部分平台会影响最终模型名）
-	MaxTokens       int    // max_tokens 值（用于探测请求拦截）
+	Body            []byte          // 原始请求体（保留用于转发）
+	Model           string          // 请求的模型名称
+	Stream          bool            // 是否为流式请求
+	MetadataUserID  string          // metadata.user_id（用于会话亲和）
+	System          any             // system 字段内容
+	Messages        []any           // messages 数组
+	HasSystem       bool            // 是否包含 system 字段（包含 null 也视为显式传入）
+	ThinkingEnabled bool            // 是否开启 thinking（部分平台会影响最终模型名）
+	MaxTokens       int             // max_tokens 值（用于探测请求拦截）
+	SessionContext  *SessionContext // 可选：请求上下文区分因子（nil 时行为不变）
 }

-// ParseGatewayRequest 解析网关请求体并返回结构化结果
-// 性能优化：一次解析提取所有需要的字段，避免重复 Unmarshal
-func ParseGatewayRequest(body []byte) (*ParsedRequest, error) {
+// ParseGatewayRequest 解析网关请求体并返回结构化结果。
+// protocol 指定请求协议格式（domain.PlatformAnthropic / domain.PlatformGemini），
+// 不同协议使用不同的 system/messages 字段名。
+func ParseGatewayRequest(body []byte, protocol string) (*ParsedRequest, error) {
 	var req map[string]any
 	if err := json.Unmarshal(body, &req); err != nil {
 		return nil, err
@@ -64,14 +76,29 @@ func ParseGatewayRequest(body []byte) (*ParsedRequest, error) {
 			parsed.MetadataUserID = userID
 		}
 	}
-	// system 字段只要存在就视为显式提供（即使为 null），
-	// 以避免客户端传 null 时被默认 system 误注入。
-	if system, ok := req["system"]; ok {
-		parsed.HasSystem = true
-		parsed.System = system
-	}
-	if messages, ok := req["messages"].([]any); ok {
-		parsed.Messages = messages
+
+	switch protocol {
+	case domain.PlatformGemini:
+		// Gemini 原生格式: systemInstruction.parts / contents
+		if sysInst, ok := req["systemInstruction"].(map[string]any); ok {
+			if parts, ok := sysInst["parts"].([]any); ok {
+				parsed.System = parts
+			}
+		}
+		if contents, ok := req["contents"].([]any); ok {
+			parsed.Messages = contents
+		}
+	default:
+		// Anthropic / OpenAI 格式: system / messages
+		// system 字段只要存在就视为显式提供（即使为 null），
+		// 以避免客户端传 null 时被默认 system 误注入。
+		if system, ok := req["system"]; ok {
+			parsed.HasSystem = true
+			parsed.System = system
+		}
+		if messages, ok := req["messages"].([]any); ok {
+			parsed.Messages = messages
+		}
 	}

 	// thinking: {type: "enabled"}

--- a/backend/internal/service/gateway_request_test.go
+++ b/backend/internal/service/gateway_request_test.go
@@ -4,12 +4,13 @@ import (
 	"encoding/json"
 	"testing"

+	"github.com/Wei-Shaw/sub2api/internal/domain"
 	"github.com/stretchr/testify/require"
 )

 func TestParseGatewayRequest(t *testing.T) {
 	body := []byte(`{"model":"claude-3-7-sonnet","stream":true,"metadata":{"user_id":"session_123e4567-e89b-12d3-a456-426614174000"},"system":[{"type":"text","text":"hello","cache_control":{"type":"ephemeral"}}],"messages":[{"content":"hi"}]}`)
-	parsed, err := ParseGatewayRequest(body)
+	parsed, err := ParseGatewayRequest(body, "")
 	require.NoError(t, err)
 	require.Equal(t, "claude-3-7-sonnet", parsed.Model)
 	require.True(t, parsed.Stream)
@@ -22,7 +23,7 @@ func TestParseGatewayRequest(t *testing.T) {

 func TestParseGatewayRequest_ThinkingEnabled(t *testing.T) {
 	body := []byte(`{"model":"claude-sonnet-4-5","thinking":{"type":"enabled"},"messages":[{"content":"hi"}]}`)
-	parsed, err := ParseGatewayRequest(body)
+	parsed, err := ParseGatewayRequest(body, "")
 	require.NoError(t, err)
 	require.Equal(t, "claude-sonnet-4-5", parsed.Model)
 	require.True(t, parsed.ThinkingEnabled)
@@ -30,21 +31,21 @@ func TestParseGatewayRequest_ThinkingEnabled(t *testing.T) {

 func TestParseGatewayRequest_MaxTokens(t *testing.T) {
 	body := []byte(`{"model":"claude-haiku-4-5","max_tokens":1}`)
-	parsed, err := ParseGatewayRequest(body)
+	parsed, err := ParseGatewayRequest(body, "")
 	require.NoError(t, err)
 	require.Equal(t, 1, parsed.MaxTokens)
 }

 func TestParseGatewayRequest_MaxTokensNonIntegralIgnored(t *testing.T) {
 	body := []byte(`{"model":"claude-haiku-4-5","max_tokens":1.5}`)
-	parsed, err := ParseGatewayRequest(body)
+	parsed, err := ParseGatewayRequest(body, "")
 	require.NoError(t, err)
 	require.Equal(t, 0, parsed.MaxTokens)
 }

 func TestParseGatewayRequest_SystemNull(t *testing.T) {
 	body := []byte(`{"model":"claude-3","system":null}`)
-	parsed, err := ParseGatewayRequest(body)
+	parsed, err := ParseGatewayRequest(body, "")
 	require.NoError(t, err)
 	// 显式传入 system:null 也应视为“字段已存在”，避免默认 system 被注入。
 	require.True(t, parsed.HasSystem)
@@ -53,16 +54,112 @@ func TestParseGatewayRequest_SystemNull(t *testing.T) {

 func TestParseGatewayRequest_InvalidModelType(t *testing.T) {
 	body := []byte(`{"model":123}`)
-	_, err := ParseGatewayRequest(body)
+	_, err := ParseGatewayRequest(body, "")
 	require.Error(t, err)
 }

 func TestParseGatewayRequest_InvalidStreamType(t *testing.T) {
 	body := []byte(`{"stream":"true"}`)
-	_, err := ParseGatewayRequest(body)
+	_, err := ParseGatewayRequest(body, "")
 	require.Error(t, err)
 }

+// ============ Gemini 原生格式解析测试 ============
+
+func TestParseGatewayRequest_GeminiContents(t *testing.T) {
+	body := []byte(`{
+		"contents": [
+			{"role": "user", "parts": [{"text": "Hello"}]},
+			{"role": "model", "parts": [{"text": "Hi there"}]},
+			{"role": "user", "parts": [{"text": "How are you?"}]}
+		]
+	}`)
+	parsed, err := ParseGatewayRequest(body, domain.PlatformGemini)
+	require.NoError(t, err)
+	require.Len(t, parsed.Messages, 3, "should parse contents as Messages")
+	require.False(t, parsed.HasSystem, "Gemini format should not set HasSystem")
+	require.Nil(t, parsed.System, "no systemInstruction means nil System")
+}
+
+func TestParseGatewayRequest_GeminiSystemInstruction(t *testing.T) {
+	body := []byte(`{
+		"systemInstruction": {
+			"parts": [{"text": "You are a helpful assistant."}]
+		},
+		"contents": [
+			{"role": "user", "parts": [{"text": "Hello"}]}
+		]
+	}`)
+	parsed, err := ParseGatewayRequest(body, domain.PlatformGemini)
+	require.NoError(t, err)
+	require.NotNil(t, parsed.System, "should parse systemInstruction.parts as System")
+	parts, ok := parsed.System.([]any)
+	require.True(t, ok)
+	require.Len(t, parts, 1)
+	partMap, ok := parts[0].(map[string]any)
+	require.True(t, ok)
+	require.Equal(t, "You are a helpful assistant.", partMap["text"])
+	require.Len(t, parsed.Messages, 1)
+}
+
+func TestParseGatewayRequest_GeminiWithModel(t *testing.T) {
+	body := []byte(`{
+		"model": "gemini-2.5-pro",
+		"contents": [{"role": "user", "parts": [{"text": "test"}]}]
+	}`)
+	parsed, err := ParseGatewayRequest(body, domain.PlatformGemini)
+	require.NoError(t, err)
+	require.Equal(t, "gemini-2.5-pro", parsed.Model)
+	require.Len(t, parsed.Messages, 1)
+}
+
+func TestParseGatewayRequest_GeminiIgnoresAnthropicFields(t *testing.T) {
+	// Gemini 格式下 system/messages 字段应被忽略
+	body := []byte(`{
+		"system": "should be ignored",
+		"messages": [{"role": "user", "content": "ignored"}],
+		"contents": [{"role": "user", "parts": [{"text": "real content"}]}]
+	}`)
+	parsed, err := ParseGatewayRequest(body, domain.PlatformGemini)
+	require.NoError(t, err)
+	require.False(t, parsed.HasSystem, "Gemini protocol should not parse Anthropic system field")
+	require.Nil(t, parsed.System, "no systemInstruction = nil System")
+	require.Len(t, parsed.Messages, 1, "should use contents, not messages")
+}
+
+func TestParseGatewayRequest_GeminiEmptyContents(t *testing.T) {
+	body := []byte(`{"contents": []}`)
+	parsed, err := ParseGatewayRequest(body, domain.PlatformGemini)
+	require.NoError(t, err)
+	require.Empty(t, parsed.Messages)
+}
+
+func TestParseGatewayRequest_GeminiNoContents(t *testing.T) {
+	body := []byte(`{"model": "gemini-2.5-flash"}`)
+	parsed, err := ParseGatewayRequest(body, domain.PlatformGemini)
+	require.NoError(t, err)
+	require.Nil(t, parsed.Messages)
+	require.Equal(t, "gemini-2.5-flash", parsed.Model)
+}
+
+func TestParseGatewayRequest_AnthropicIgnoresGeminiFields(t *testing.T) {
+	// Anthropic 格式下 contents/systemInstruction 字段应被忽略
+	body := []byte(`{
+		"system": "real system",
+		"messages": [{"role": "user", "content": "real content"}],
+		"contents": [{"role": "user", "parts": [{"text": "ignored"}]}],
+		"systemInstruction": {"parts": [{"text": "ignored"}]}
+	}`)
+	parsed, err := ParseGatewayRequest(body, domain.PlatformAnthropic)
+	require.NoError(t, err)
+	require.True(t, parsed.HasSystem)
+	require.Equal(t, "real system", parsed.System)
+	require.Len(t, parsed.Messages, 1)
+	msg, ok := parsed.Messages[0].(map[string]any)
+	require.True(t, ok)
+	require.Equal(t, "real content", msg["content"])
+}
+
 func TestFilterThinkingBlocks(t *testing.T) {
 	containsThinkingBlock := func(body []byte) bool {
 		var req map[string]any

--- a/backend/internal/service/gateway_service.go
+++ b/backend/internal/service/gateway_service.go
--- a/backend/internal/service/gateway_service_benchmark_test.go
+++ b/backend/internal/service/gateway_service_benchmark_test.go
@@ -14,7 +14,7 @@ func BenchmarkGenerateSessionHash_Metadata(b *testing.B) {

 	b.ReportAllocs()
 	for i := 0; i < b.N; i++ {
-		parsed, err := ParseGatewayRequest(body)
+		parsed, err := ParseGatewayRequest(body, "")
 		if err != nil {
 			b.Fatalf("解析请求失败: %v", err)
 		}

--- a/backend/internal/service/gemini_error_policy_test.go
+++ b/backend/internal/service/gemini_error_policy_test.go
+//go:build unit
+
+package service
+
+import (
+	"context"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+	"time"
+
+	"github.com/Wei-Shaw/sub2api/internal/config"
+	"github.com/gin-gonic/gin"
+	"github.com/stretchr/testify/require"
+)
+
+// ---------------------------------------------------------------------------
+// TestShouldFailoverGeminiUpstreamError — verifies the failover decision
+// for the ErrorPolicyNone path (original logic preserved).
+// ---------------------------------------------------------------------------
+
+func TestShouldFailoverGeminiUpstreamError(t *testing.T) {
+	svc := &GeminiMessagesCompatService{}
+
+	tests := []struct {
+		name       string
+		statusCode int
+		expected   bool
+	}{
+		{"401_failover", 401, true},
+		{"403_failover", 403, true},
+		{"429_failover", 429, true},
+		{"529_failover", 529, true},
+		{"500_failover", 500, true},
+		{"502_failover", 502, true},
+		{"503_failover", 503, true},
+		{"400_no_failover", 400, false},
+		{"404_no_failover", 404, false},
+		{"422_no_failover", 422, false},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := svc.shouldFailoverGeminiUpstreamError(tt.statusCode)
+			require.Equal(t, tt.expected, got)
+		})
+	}
+}
+
+// ---------------------------------------------------------------------------
+// TestCheckErrorPolicy_GeminiAccounts — verifies CheckErrorPolicy works
+// correctly for Gemini platform accounts (API Key type).
+// ---------------------------------------------------------------------------
+
+func TestCheckErrorPolicy_GeminiAccounts(t *testing.T) {
+	tests := []struct {
+		name       string
+		account    *Account
+		statusCode int
+		body       []byte
+		expected   ErrorPolicyResult
+	}{
+		{
+			name: "gemini_apikey_custom_codes_hit",
+			account: &Account{
+				ID:       100,
+				Type:     AccountTypeAPIKey,
+				Platform: PlatformGemini,
+				Credentials: map[string]any{
+					"custom_error_codes_enabled": true,
+					"custom_error_codes":         []any{float64(429), float64(500)},
+				},
+			},
+			statusCode: 429,
+			body:       []byte(`{"error":"rate limited"}`),
+			expected:   ErrorPolicyMatched,
+		},
+		{
+			name: "gemini_apikey_custom_codes_miss",
+			account: &Account{
+				ID:       101,
+				Type:     AccountTypeAPIKey,
+				Platform: PlatformGemini,
+				Credentials: map[string]any{
+					"custom_error_codes_enabled": true,
+					"custom_error_codes":         []any{float64(429)},
+				},
+			},
+			statusCode: 500,
+			body:       []byte(`{"error":"internal"}`),
+			expected:   ErrorPolicySkipped,
+		},
+		{
+			name: "gemini_apikey_no_custom_codes_returns_none",
+			account: &Account{
+				ID:       102,
+				Type:     AccountTypeAPIKey,
+				Platform: PlatformGemini,
+			},
+			statusCode: 500,
+			body:       []byte(`{"error":"internal"}`),
+			expected:   ErrorPolicyNone,
+		},
+		{
+			name: "gemini_apikey_temp_unschedulable_hit",
+			account: &Account{
+				ID:       103,
+				Type:     AccountTypeAPIKey,
+				Platform: PlatformGemini,
+				Credentials: map[string]any{
+					"temp_unschedulable_enabled": true,
+					"temp_unschedulable_rules": []any{
+						map[string]any{
+							"error_code":       float64(503),
+							"keywords":         []any{"overloaded"},
+							"duration_minutes": float64(10),
+						},
+					},
+				},
+			},
+			statusCode: 503,
+			body:       []byte(`overloaded service`),
+			expected:   ErrorPolicyTempUnscheduled,
+		},
+		{
+			name: "gemini_custom_codes_override_temp_unschedulable",
+			account: &Account{
+				ID:       104,
+				Type:     AccountTypeAPIKey,
+				Platform: PlatformGemini,
+				Credentials: map[string]any{
+					"custom_error_codes_enabled": true,
+					"custom_error_codes":         []any{float64(503)},
+					"temp_unschedulable_enabled": true,
+					"temp_unschedulable_rules": []any{
+						map[string]any{
+							"error_code":       float64(503),
+							"keywords":         []any{"overloaded"},
+							"duration_minutes": float64(10),
+						},
+					},
+				},
+			},
+			statusCode: 503,
+			body:       []byte(`overloaded`),
+			expected:   ErrorPolicyMatched, // custom codes take precedence
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			repo := &errorPolicyRepoStub{}
+			svc := NewRateLimitService(repo, nil, &config.Config{}, nil, nil)
+
+			result := svc.CheckErrorPolicy(context.Background(), tt.account, tt.statusCode, tt.body)
+			require.Equal(t, tt.expected, result)
+		})
+	}
+}
+
+// ---------------------------------------------------------------------------
+// TestGeminiErrorPolicyIntegration — verifies the Gemini error handling
+// paths produce the correct behavior for each ErrorPolicyResult.
+//
+// These tests simulate the inline error policy switch in handleClaudeCompat
+// and forwardNativeGemini by calling the same methods in the same order.
+// ---------------------------------------------------------------------------
+
+func TestGeminiErrorPolicyIntegration(t *testing.T) {
+	gin.SetMode(gin.TestMode)
+
+	tests := []struct {
+		name                 string
+		account              *Account
+		statusCode           int
+		respBody             []byte
+		expectFailover       bool // expect UpstreamFailoverError
+		expectHandleError    bool // expect handleGeminiUpstreamError to be called
+		expectShouldFailover bool // for None path, whether shouldFailover triggers
+	}{
+		{
+			name: "custom_codes_matched_429_failover",
+			account: &Account{
+				ID:       200,
+				Type:     AccountTypeAPIKey,
+				Platform: PlatformGemini,
+				Credentials: map[string]any{
+					"custom_error_codes_enabled": true,
+					"custom_error_codes":         []any{float64(429)},
+				},
+			},
+			statusCode:        429,
+			respBody:          []byte(`{"error":"rate limited"}`),
+			expectFailover:    true,
+			expectHandleError: true,
+		},
+		{
+			name: "custom_codes_skipped_500_no_failover",
+			account: &Account{
+				ID:       201,
+				Type:     AccountTypeAPIKey,
+				Platform: PlatformGemini,
+				Credentials: map[string]any{
+					"custom_error_codes_enabled": true,
+					"custom_error_codes":         []any{float64(429)},
+				},
+			},
+			statusCode:        500,
+			respBody:          []byte(`{"error":"internal"}`),
+			expectFailover:    false,
+			expectHandleError: false,
+		},
+		{
+			name: "temp_unschedulable_matched_failover",
+			account: &Account{
+				ID:       202,
+				Type:     AccountTypeAPIKey,
+				Platform: PlatformGemini,
+				Credentials: map[string]any{
+					"temp_unschedulable_enabled": true,
+					"temp_unschedulable_rules": []any{
+						map[string]any{
+							"error_code":       float64(503),
+							"keywords":         []any{"overloaded"},
+							"duration_minutes": float64(10),
+						},
+					},
+				},
+			},
+			statusCode:        503,
+			respBody:          []byte(`overloaded`),
+			expectFailover:    true,
+			expectHandleError: true,
+		},
+		{
+			name: "no_policy_429_failover_via_shouldFailover",
+			account: &Account{
+				ID:       203,
+				Type:     AccountTypeAPIKey,
+				Platform: PlatformGemini,
+			},
+			statusCode:           429,
+			respBody:             []byte(`{"error":"rate limited"}`),
+			expectFailover:       true,
+			expectHandleError:    true,
+			expectShouldFailover: true,
+		},
+		{
+			name: "no_policy_400_no_failover",
+			account: &Account{
+				ID:       204,
+				Type:     AccountTypeAPIKey,
+				Platform: PlatformGemini,
+			},
+			statusCode:        400,
+			respBody:          []byte(`{"error":"bad request"}`),
+			expectFailover:    false,
+			expectHandleError: true,
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			repo := &geminiErrorPolicyRepo{}
+			rlSvc := NewRateLimitService(repo, nil, &config.Config{}, nil, nil)
+			svc := &GeminiMessagesCompatService{
+				accountRepo:      repo,
+				rateLimitService: rlSvc,
+			}
+
+			writer := httptest.NewRecorder()
+			c, _ := gin.CreateTestContext(writer)
+			c.Request = httptest.NewRequest(http.MethodPost, "/v1/messages", nil)
+
+			// Simulate the Claude compat error handling path (same logic as native).
+			// This mirrors the inline switch in handleClaudeCompat.
+			var handleErrorCalled bool
+			var gotFailover bool
+
+			ctx := context.Background()
+			statusCode := tt.statusCode
+			respBody := tt.respBody
+			account := tt.account
+			headers := http.Header{}
+
+			if svc.rateLimitService != nil {
+				switch svc.rateLimitService.CheckErrorPolicy(ctx, account, statusCode, respBody) {
+				case ErrorPolicySkipped:
+					// Skipped → return error directly (no handleGeminiUpstreamError, no failover)
+					gotFailover = false
+					handleErrorCalled = false
+					goto verify
+				case ErrorPolicyMatched, ErrorPolicyTempUnscheduled:
+					svc.handleGeminiUpstreamError(ctx, account, statusCode, headers, respBody)
+					handleErrorCalled = true
+					gotFailover = true
+					goto verify
+				}
+			}
+
+			// ErrorPolicyNone → original logic
+			svc.handleGeminiUpstreamError(ctx, account, statusCode, headers, respBody)
+			handleErrorCalled = true
+			if svc.shouldFailoverGeminiUpstreamError(statusCode) {
+				gotFailover = true
+			}
+
+		verify:
+			require.Equal(t, tt.expectFailover, gotFailover, "failover mismatch")
+			require.Equal(t, tt.expectHandleError, handleErrorCalled, "handleGeminiUpstreamError call mismatch")
+
+			if tt.expectShouldFailover {
+				require.True(t, svc.shouldFailoverGeminiUpstreamError(statusCode),
+					"shouldFailoverGeminiUpstreamError should return true for status %d", statusCode)
+			}
+		})
+	}
+}
+
+// ---------------------------------------------------------------------------
+// TestGeminiErrorPolicy_NilRateLimitService — verifies nil safety
+// ---------------------------------------------------------------------------
+
+func TestGeminiErrorPolicy_NilRateLimitService(t *testing.T) {
+	svc := &GeminiMessagesCompatService{
+		rateLimitService: nil,
+	}
+
+	// When rateLimitService is nil, error policy is skipped → falls through to
+	// shouldFailoverGeminiUpstreamError (original logic).
+	// Verify this doesn't panic and follows expected behavior.
+
+	ctx := context.Background()
+	account := &Account{
+		ID:       300,
+		Type:     AccountTypeAPIKey,
+		Platform: PlatformGemini,
+		Credentials: map[string]any{
+			"custom_error_codes_enabled": true,
+			"custom_error_codes":         []any{float64(429)},
+		},
+	}
+
+	// The nil check should prevent CheckErrorPolicy from being called
+	if svc.rateLimitService != nil {
+		t.Fatal("rateLimitService should be nil for this test")
+	}
+
+	// shouldFailoverGeminiUpstreamError still works
+	require.True(t, svc.shouldFailoverGeminiUpstreamError(429))
+	require.False(t, svc.shouldFailoverGeminiUpstreamError(400))
+
+	// handleGeminiUpstreamError should not panic with nil rateLimitService
+	require.NotPanics(t, func() {
+		svc.handleGeminiUpstreamError(ctx, account, 500, http.Header{}, []byte(`error`))
+	})
+}
+
+// ---------------------------------------------------------------------------
+// geminiErrorPolicyRepo — minimal AccountRepository stub for Gemini error
+// policy tests. Embeds mockAccountRepoForGemini and adds tracking.
+// ---------------------------------------------------------------------------
+
+type geminiErrorPolicyRepo struct {
+	mockAccountRepoForGemini
+	setErrorCalls       int
+	setRateLimitedCalls int
+	setTempCalls        int
+}
+
+func (r *geminiErrorPolicyRepo) SetError(_ context.Context, _ int64, _ string) error {
+	r.setErrorCalls++
+	return nil
+}
+
+func (r *geminiErrorPolicyRepo) SetRateLimited(_ context.Context, _ int64, _ time.Time) error {
+	r.setRateLimitedCalls++
+	return nil
+}
+
+func (r *geminiErrorPolicyRepo) SetTempUnschedulable(_ context.Context, _ int64, _ time.Time, _ string) error {
+	r.setTempCalls++
+	return nil
+}
--- a/backend/internal/service/gemini_messages_compat_service.go
+++ b/backend/internal/service/gemini_messages_compat_service.go
--- a/backend/internal/service/gemini_multiplatform_test.go
+++ b/backend/internal/service/gemini_multiplatform_test.go
@@ -133,9 +133,6 @@ func (m *mockAccountRepoForGemini) ListSchedulableByGroupIDAndPlatforms(ctx cont
 func (m *mockAccountRepoForGemini) SetRateLimited(ctx context.Context, id int64, resetAt time.Time) error {
 	return nil
 }
-func (m *mockAccountRepoForGemini) SetAntigravityQuotaScopeLimit(ctx context.Context, id int64, scope AntigravityQuotaScope, resetAt time.Time) error {
-	return nil
-}
 func (m *mockAccountRepoForGemini) SetModelRateLimit(ctx context.Context, id int64, scope string, resetAt time.Time) error {
 	return nil
 }
@@ -269,30 +266,6 @@ func (m *mockGatewayCacheForGemini) DeleteSessionAccountID(ctx context.Context,
 	return nil
 }

-func (m *mockGatewayCacheForGemini) IncrModelCallCount(ctx context.Context, accountID int64, model string) (int64, error) {
-	return 0, nil
-}
-
-func (m *mockGatewayCacheForGemini) GetModelLoadBatch(ctx context.Context, accountIDs []int64, model string) (map[int64]*ModelLoadInfo, error) {
-	return nil, nil
-}
-
-func (m *mockGatewayCacheForGemini) FindGeminiSession(ctx context.Context, groupID int64, prefixHash, digestChain string) (uuid string, accountID int64, found bool) {
-	return "", 0, false
-}
-
-func (m *mockGatewayCacheForGemini) SaveGeminiSession(ctx context.Context, groupID int64, prefixHash, digestChain, uuid string, accountID int64) error {
-	return nil
-}
-
-func (m *mockGatewayCacheForGemini) FindAnthropicSession(ctx context.Context, groupID int64, prefixHash, digestChain string) (uuid string, accountID int64, found bool) {
-	return "", 0, false
-}
-
-func (m *mockGatewayCacheForGemini) SaveAnthropicSession(ctx context.Context, groupID int64, prefixHash, digestChain, uuid string, accountID int64) error {
-	return nil
-}
-
 // TestGeminiMessagesCompatService_SelectAccountForModelWithExclusions_GeminiPlatform 测试 Gemini 单平台选择
 func TestGeminiMessagesCompatService_SelectAccountForModelWithExclusions_GeminiPlatform(t *testing.T) {
 	ctx := context.Background()

--- a/backend/internal/service/gemini_session.go
+++ b/backend/internal/service/gemini_session.go
@@ -6,26 +6,11 @@ import (
 	"encoding/json"
 	"strconv"
 	"strings"
-	"time"

 	"github.com/Wei-Shaw/sub2api/internal/pkg/antigravity"
 	"github.com/cespare/xxhash/v2"
 )

-// Gemini 会话 ID Fallback 相关常量
-const (
-	// geminiSessionTTLSeconds Gemini 会话缓存 TTL（5 分钟）
-	geminiSessionTTLSeconds = 300
-
-	// geminiSessionKeyPrefix Gemini 会话 Redis key 前缀
-	geminiSessionKeyPrefix = "gemini:sess:"
-)
-
-// GeminiSessionTTL 返回 Gemini 会话缓存 TTL
-func GeminiSessionTTL() time.Duration {
-	return geminiSessionTTLSeconds * time.Second
-}
-
 // shortHash 使用 XXHash64 + Base36 生成短 hash（16 字符）
 // XXHash64 比 SHA256 快约 10 倍，Base36 比 Hex 短约 20%
 func shortHash(data []byte) string {
@@ -79,35 +64,6 @@ func GenerateGeminiPrefixHash(userID, apiKeyID int64, ip, userAgent, platform, m
 	return base64.RawURLEncoding.EncodeToString(hash[:12])
 }

-// BuildGeminiSessionKey 构建 Gemini 会话 Redis key
-// 格式: gemini:sess:{groupID}:{prefixHash}:{digestChain}
-func BuildGeminiSessionKey(groupID int64, prefixHash, digestChain string) string {
-	return geminiSessionKeyPrefix + strconv.FormatInt(groupID, 10) + ":" + prefixHash + ":" + digestChain
-}
-
-// GenerateDigestChainPrefixes 生成摘要链的所有前缀（从长到短）
-// 用于 MGET 批量查询最长匹配
-func GenerateDigestChainPrefixes(chain string) []string {
-	if chain == "" {
-		return nil
-	}
-
-	var prefixes []string
-	c := chain
-
-	for c != "" {
-		prefixes = append(prefixes, c)
-		// 找到最后一个 "-" 的位置
-		if i := strings.LastIndex(c, "-"); i > 0 {
-			c = c[:i]
-		} else {
-			break
-		}
-	}
-
-	return prefixes
-}
-
 // ParseGeminiSessionValue 解析 Gemini 会话缓存值
 // 格式: {uuid}:{accountID}
 func ParseGeminiSessionValue(value string) (uuid string, accountID int64, ok bool) {
@@ -139,15 +95,6 @@ func FormatGeminiSessionValue(uuid string, accountID int64) string {
 // geminiDigestSessionKeyPrefix Gemini 摘要 fallback 会话 key 前缀
 const geminiDigestSessionKeyPrefix = "gemini:digest:"

-// geminiTrieKeyPrefix Gemini Trie 会话 key 前缀
-const geminiTrieKeyPrefix = "gemini:trie:"
-
-// BuildGeminiTrieKey 构建 Gemini Trie Redis key
-// 格式: gemini:trie:{groupID}:{prefixHash}
-func BuildGeminiTrieKey(groupID int64, prefixHash string) string {
-	return geminiTrieKeyPrefix + strconv.FormatInt(groupID, 10) + ":" + prefixHash
-}
-
 // GenerateGeminiDigestSessionKey 生成 Gemini 摘要 fallback 的 sessionKey
 // 组合 prefixHash 前 8 位 + uuid 前 8 位，确保不同会话产生不同的 sessionKey
 // 用于在 SelectAccountWithLoadAwareness 中保持粘性会话

--- a/backend/internal/service/gemini_session_integration_test.go
+++ b/backend/internal/service/gemini_session_integration_test.go
--- a/backend/internal/service/gemini_session_test.go
+++ b/backend/internal/service/gemini_session_test.go
--- a/backend/internal/service/generate_session_hash_test.go
+++ b/backend/internal/service/generate_session_hash_test.go
--- a/backend/internal/service/model_rate_limit_test.go
+++ b/backend/internal/service/model_rate_limit_test.go
--- a/backend/internal/service/openai_gateway_service.go
+++ b/backend/internal/service/openai_gateway_service.go
@@ -580,10 +580,6 @@ func (s *OpenAIGatewayService) SelectAccountWithLoadAwareness(ctx context.Contex
 			}
 		}
 	} else {
-		type accountWithLoad struct {
-			account  *Account
-			loadInfo *AccountLoadInfo
-		}
 		var available []accountWithLoad
 		for _, acc := range candidates {
 			loadInfo := loadMap[acc.ID]
@@ -618,6 +614,7 @@ func (s *OpenAIGatewayService) SelectAccountWithLoadAwareness(ctx context.Contex
 					return a.account.LastUsedAt.Before(*b.account.LastUsedAt)
 				}
 			})
+			shuffleWithinSortGroups(available)

 			for _, item := range available {
 				result, err := s.tryAcquireAccountSlot(ctx, item.account.ID, item.account.Concurrency)

--- a/backend/internal/service/openai_gateway_service_test.go
+++ b/backend/internal/service/openai_gateway_service_test.go
--- a/backend/internal/service/ops_account_availability.go
+++ b/backend/internal/service/ops_account_availability.go