Merge remote-tracking branch 'upstream/main'

# Conflicts: # backend/internal/service/openai_gateway_messages.go

Merge remote-tracking branch 'upstream/main'
# Conflicts: # backend/internal/service/openai_gateway_messages.go
ca8692c7 · InCerry · b6d46fd5 · 318aa5e0 · ca8692c7 · ca8692c7
Commit ca8692c7 authored Mar 31, 2026 by InCerry
--- a/README.md
+++ b/README.md
@@ -49,9 +49,13 @@ Sub2API is an AI API gateway platform designed to distribute and manage API quot

 <table>
 <tr>
-<td width="180" align="center" valign="middle"><a href="https://shop.pincc.ai/"><img src="assets/partners/logos/pincc-logo.png" alt="pincc" width="120"></a></td>
+<td width="180" align="center" valign="middle"><a href="https://shop.pincc.ai/"><img src="assets/partners/logos/pincc-logo.png" alt="pincc" width="150"></a></td>
 <td valign="middle"><b><a href="https://shop.pincc.ai/">PinCC</a></b> is the official relay service built on Sub2API, offering stable access to Claude Code, Codex, Gemini and other popular models — ready to use, no deployment or maintenance required.</td>
 </tr>
+<tr>
+<td width="180"><a href="https://www.packyapi.com/register?aff=sub2api"><img src="assets/partners/logos/packycode.png" alt="PackyCode" width="150"></a></td>
+<td>Thanks to PackyCode for sponsoring this project! PackyCode is a reliable and efficient API relay service provider, offering relay services for Claude Code, Codex, Gemini, and more. PackyCode provides special discounts for our software users: register using <a href="https://www.packyapi.com/register?aff=sub2api">this link</a> and enter the "sub2api" promo code during first recharge to get 10% off.</td>
+</tr>
 </table>

 ## Ecosystem

--- a/README_CN.md
+++ b/README_CN.md
@@ -48,9 +48,13 @@ Sub2API 是一个 AI API 网关平台，用于分发和管理 AI 产品订阅的

 <table>
 <tr>
-<td width="180" align="center" valign="middle"><a href="https://shop.pincc.ai/"><img src="assets/partners/logos/pincc-logo.png" alt="pincc" width="120"></a></td>
+<td width="180" align="center" valign="middle"><a href="https://shop.pincc.ai/"><img src="assets/partners/logos/pincc-logo.png" alt="pincc" width="150"></a></td>
 <td valign="middle"><b><a href="https://shop.pincc.ai/">PinCC</a></b> 是基于 Sub2API 搭建的官方中转服务，提供 Claude Code、Codex、Gemini 等主流模型的稳定中转，开箱即用，免去自建部署与运维烦恼。</td>
 </tr>
+<tr>
+<td width="180"><a href="https://www.packyapi.com/register?aff=sub2api"><img src="assets/partners/logos/packycode.png" alt="PackyCode" width="150"></a></td>
+<td>感谢 PackyCode 赞助了本项目！PackyCode 是一家稳定、高效的API中转服务商，提供 Claude Code、Codex、Gemini 等多种中转服务。PackyCode 为本软件的用户提供了特别优惠，使用<a href="https://www.packyapi.com/register?aff=sub2api">此链接</a>注册并在充值时填写"sub2api"优惠码，首次充值可以享受9折优惠！</td>
+</tr>
 </table>

 ## 生态项目

--- a/README_JA.md
+++ b/README_JA.md
@@ -49,9 +49,13 @@ Sub2API は、AI 製品のサブスクリプションから API クォータを

 <table>
 <tr>
-<td width="180" align="center" valign="middle"><a href="https://shop.pincc.ai/"><img src="assets/partners/logos/pincc-logo.png" alt="pincc" width="120"></a></td>
+<td width="180" align="center" valign="middle"><a href="https://shop.pincc.ai/"><img src="assets/partners/logos/pincc-logo.png" alt="pincc" width="150"></a></td>
 <td valign="middle"><b><a href="https://shop.pincc.ai/">PinCC</a></b> は Sub2API 上に構築された公式リレーサービスで、Claude Code、Codex、Gemini などの人気モデルへの安定したアクセスを提供します。デプロイやメンテナンスは不要で、すぐにご利用いただけます。</td>
 </tr>
+<tr>
+<td width="180"><a href="https://www.packyapi.com/register?aff=sub2api"><img src="assets/partners/logos/packycode.png" alt="PackyCode" width="150"></a></td>
+<td>PackyCode のご支援に感謝します！PackyCode は Claude Code、Codex、Gemini などのリレーサービスを提供する信頼性の高い API 中継プラットフォームです。本ソフト利用者向けに特別割引があります：<a href="https://www.packyapi.com/register?aff=sub2api">このリンク</a>で登録し、チャージ時に「sub2api」クーポンを入力すると 10% オフになります。</td>
+</tr>
 </table>

 ## エコシステム

--- a/assets/partners/logos/packycode.png
+++ b/assets/partners/logos/packycode.png
--- a/backend/cmd/server/VERSION
+++ b/backend/cmd/server/VERSION
-0.1.105
+0.1.106
--- a/backend/cmd/server/wire_gen.go
+++ b/backend/cmd/server/wire_gen.go
@@ -137,7 +137,8 @@ func initializeApplication(buildInfo handler.BuildInfo) (*Application, error) {
 	schedulerOutboxRepository := repository.NewSchedulerOutboxRepository(db)
 	schedulerSnapshotService := service.ProvideSchedulerSnapshotService(schedulerCache, schedulerOutboxRepository, accountRepository, groupRepository, configConfig)
 	antigravityTokenProvider := service.ProvideAntigravityTokenProvider(accountRepository, geminiTokenCache, antigravityOAuthService, oauthRefreshAPI, tempUnschedCache)
-	antigravityGatewayService := service.NewAntigravityGatewayService(accountRepository, gatewayCache, schedulerSnapshotService, antigravityTokenProvider, rateLimitService, httpUpstream, settingService)
+	internal500CounterCache := repository.NewInternal500CounterCache(redisClient)
+	antigravityGatewayService := service.NewAntigravityGatewayService(accountRepository, gatewayCache, schedulerSnapshotService, antigravityTokenProvider, rateLimitService, httpUpstream, settingService, internal500CounterCache)
 	tlsFingerprintProfileRepository := repository.NewTLSFingerprintProfileRepository(client)
 	tlsFingerprintProfileCache := repository.NewTLSFingerprintProfileCache(redisClient)
 	tlsFingerprintProfileService := service.NewTLSFingerprintProfileService(tlsFingerprintProfileRepository, tlsFingerprintProfileCache)

--- a/backend/internal/config/config.go
+++ b/backend/internal/config/config.go
@@ -1281,8 +1281,8 @@ func setDefaults() {
 	viper.SetDefault("rate_limit.oauth_401_cooldown_minutes", 10)

 	// Pricing - 从 model-price-repo 同步模型定价和上下文窗口数据（固定到 commit，避免分支漂移）
-	viper.SetDefault("pricing.remote_url", "https://raw.githubusercontent.com/Wei-Shaw/model-price-repo/c7947e9871687e664180bc971d4837f1fc2784a9/model_prices_and_context_window.json")
-	viper.SetDefault("pricing.hash_url", "https://raw.githubusercontent.com/Wei-Shaw/model-price-repo/c7947e9871687e664180bc971d4837f1fc2784a9/model_prices_and_context_window.sha256")
+	viper.SetDefault("pricing.remote_url", "https://raw.githubusercontent.com/Wei-Shaw/model-price-repo/main/model_prices_and_context_window.json")
+	viper.SetDefault("pricing.hash_url", "https://raw.githubusercontent.com/Wei-Shaw/model-price-repo/main/model_prices_and_context_window.sha256")
 	viper.SetDefault("pricing.data_dir", "./data")
 	viper.SetDefault("pricing.fallback_file", "./resources/model-pricing/model_prices_and_context_window.json")
 	viper.SetDefault("pricing.update_interval_hours", 24)

--- a/backend/internal/handler/dto/mappers.go
+++ b/backend/internal/handler/dto/mappers.go
@@ -268,6 +268,14 @@ func AccountFromServiceShallow(a *service.Account) *Account {
 			target := a.GetCacheTTLOverrideTarget()
 			out.CacheTTLOverrideTarget = &target
 		}
+		// 自定义 Base URL 中继转发
+		if a.IsCustomBaseURLEnabled() {
+			enabled := true
+			out.CustomBaseURLEnabled = &enabled
+			if customURL := a.GetCustomBaseURL(); customURL != "" {
+				out.CustomBaseURL = &customURL
+			}
+		}
 	}

 	// 提取账号配额限制（apikey / bedrock 类型有效）

--- a/backend/internal/handler/dto/types.go
+++ b/backend/internal/handler/dto/types.go
@@ -198,6 +198,10 @@ type Account struct {
 	CacheTTLOverrideEnabled *bool   `json:"cache_ttl_override_enabled,omitempty"`
 	CacheTTLOverrideTarget  *string `json:"cache_ttl_override_target,omitempty"`

+	// 自定义 Base URL 中继转发（仅 Anthropic OAuth/SetupToken 账号有效）
+	CustomBaseURLEnabled *bool   `json:"custom_base_url_enabled,omitempty"`
+	CustomBaseURL        *string `json:"custom_base_url,omitempty"`
+
 	// API Key 账号配额限制
 	QuotaLimit       *float64 `json:"quota_limit,omitempty"`
 	QuotaUsed        *float64 `json:"quota_used,omitempty"`

--- a/backend/internal/handler/openai_gateway_handler.go
+++ b/backend/internal/handler/openai_gateway_handler.go
@@ -541,6 +541,7 @@ func (h *OpenAIGatewayHandler) Messages(c *gin.Context) {
 		return
 	}
 	reqModel := modelResult.String()
+	routingModel := service.NormalizeOpenAICompatRequestedModel(reqModel)
 	reqStream := gjson.GetBytes(body, "stream").Bool()

 	reqLog = reqLog.With(zap.String("model", reqModel), zap.Bool("stream", reqStream))
@@ -606,7 +607,7 @@ func (h *OpenAIGatewayHandler) Messages(c *gin.Context) {
 			apiKey.GroupID,
 			"", // no previous_response_id
 			sessionHash,
-			reqModel,
+			routingModel,
 			failedAccountIDs,
 			service.OpenAIUpstreamTransportAny,
 		)
@@ -621,7 +622,7 @@ func (h *OpenAIGatewayHandler) Messages(c *gin.Context) {
 				if apiKey.Group != nil {
 					defaultModel = apiKey.Group.DefaultMappedModel
 				}
-				if defaultModel != "" && defaultModel != reqModel {
+				if defaultModel != "" && defaultModel != routingModel {
 					reqLog.Info("openai_messages.fallback_to_default_model",
 						zap.String("default_mapped_model", defaultModel),
 					)

--- a/backend/internal/pkg/oauth/oauth.go
+++ b/backend/internal/pkg/oauth/oauth.go
@@ -24,20 +24,18 @@ const (
 	RedirectURI  = "https://platform.claude.com/oauth/code/callback"

 	// Scopes - Browser URL (includes org:create_api_key for user authorization)
-	ScopeOAuth = "org:create_api_key user:profile user:inference user:sessions:claude_code user:mcp_servers"
+	ScopeOAuth = "org:create_api_key user:profile user:inference user:sessions:claude_code user:mcp_servers user:file_upload"
 	// Scopes - Internal API call (org:create_api_key not supported in API)
-	ScopeAPI = "user:profile user:inference user:sessions:claude_code user:mcp_servers"
+	ScopeAPI = "user:profile user:inference user:sessions:claude_code user:mcp_servers user:file_upload"
 	// Scopes - Setup token (inference only)
 	ScopeInference = "user:inference"

-	// Code Verifier character set (RFC 7636 compliant)
-	codeVerifierCharset = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~"
-
 	// Session TTL
 	SessionTTL = 30 * time.Minute
 )

 // OAuthSession stores OAuth flow state
+
 type OAuthSession struct {
 	State        string    `json:"state"`
 	CodeVerifier string    `json:"code_verifier"`
@@ -147,30 +145,14 @@ func GenerateSessionID() (string, error) {
 	return hex.EncodeToString(bytes), nil
 }

-// GenerateCodeVerifier generates a PKCE code verifier using character set method
+// GenerateCodeVerifier generates a PKCE code verifier (RFC 7636).
+// Uses 32 random bytes → base64url-no-pad, producing a 43-char verifier.
 func GenerateCodeVerifier() (string, error) {
-	const targetLen = 32
-	charsetLen := len(codeVerifierCharset)
-	limit := 256 - (256 % charsetLen)
-
-	result := make([]byte, 0, targetLen)
-	randBuf := make([]byte, targetLen*2)
-
-	for len(result) < targetLen {
-		if _, err := rand.Read(randBuf); err != nil {
-			return "", err
-		}
-		for _, b := range randBuf {
-			if int(b) < limit {
-				result = append(result, codeVerifierCharset[int(b)%charsetLen])
-				if len(result) >= targetLen {
-					break
-				}
-			}
-		}
+	bytes, err := GenerateRandomBytes(32)
+	if err != nil {
+		return "", err
 	}
-
-	return base64URLEncode(result), nil
+	return base64URLEncode(bytes), nil
 }

 // GenerateCodeChallenge generates a PKCE code challenge using S256 method

--- a/backend/internal/repository/api_key_repo.go
+++ b/backend/internal/repository/api_key_repo.go
@@ -3,6 +3,7 @@ package repository
 import (
 	"context"
 	"database/sql"
+	"fmt"
 	"time"

 	dbent "github.com/Wei-Shaw/sub2api/ent"
@@ -257,9 +258,12 @@ func (r *apiKeyRepository) Update(ctx context.Context, key *service.APIKey) erro
 }

 func (r *apiKeyRepository) Delete(ctx context.Context, id int64) error {
+	// 存在唯一键约束 生成tombstone key 用来释放原key，长度远小于 128，满足 schema 限制
+	tombstoneKey := fmt.Sprintf("__deleted__%d__%d", id, time.Now().UnixNano())
 	// 显式软删除：避免依赖 Hook 行为，确保 deleted_at 一定被设置。
 	affected, err := r.client.APIKey.Update().
 		Where(apikey.IDEQ(id), apikey.DeletedAtIsNil()).
+		SetKey(tombstoneKey).
 		SetDeletedAt(time.Now()).
 		Save(ctx)
 	if err != nil {

--- a/backend/internal/repository/api_key_repo_integration_test.go
+++ b/backend/internal/repository/api_key_repo_integration_test.go
@@ -151,6 +151,31 @@ func (s *APIKeyRepoSuite) TestDelete() {
 	s.Require().Error(err, "expected error after delete")
 }

+func (s *APIKeyRepoSuite) TestCreate_AfterSoftDelete_AllowsSameKey() {
+	user := s.mustCreateUser("recreate-after-soft-delete@test.com")
+	const reusedKey = "sk-reuse-after-soft-delete"
+
+	first := &service.APIKey{
+		UserID: user.ID,
+		Key:    reusedKey,
+		Name:   "First Key",
+		Status: service.StatusActive,
+	}
+	s.Require().NoError(s.repo.Create(s.ctx, first), "create first key")
+
+	s.Require().NoError(s.repo.Delete(s.ctx, first.ID), "soft delete first key")
+
+	second := &service.APIKey{
+		UserID: user.ID,
+		Key:    reusedKey,
+		Name:   "Second Key",
+		Status: service.StatusActive,
+	}
+	s.Require().NoError(s.repo.Create(s.ctx, second), "create second key with same key")
+	s.Require().NotZero(second.ID)
+	s.Require().NotEqual(first.ID, second.ID, "recreated key should be a new row")
+}
+
 // --- ListByUserID / CountByUserID ---

 func (s *APIKeyRepoSuite) TestListByUserID() {

--- a/backend/internal/repository/internal500_counter_cache.go
+++ b/backend/internal/repository/internal500_counter_cache.go
+package repository
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/Wei-Shaw/sub2api/internal/service"
+	"github.com/redis/go-redis/v9"
+)
+
+const (
+	internal500CounterPrefix     = "internal500_count:account:"
+	internal500CounterTTLSeconds = 86400 // 24 小时兜底
+)
+
+// internal500CounterIncrScript 使用 Lua 脚本原子性地增加计数并返回当前值
+// 如果 key 不存在，则创建并设置过期时间
+var internal500CounterIncrScript = redis.NewScript(`
+	local key = KEYS[1]
+	local ttl = tonumber(ARGV[1])
+
+	local count = redis.call('INCR', key)
+	if count == 1 then
+		redis.call('EXPIRE', key, ttl)
+	end
+
+	return count
+`)
+
+type internal500CounterCache struct {
+	rdb *redis.Client
+}
+
+// NewInternal500CounterCache 创建 INTERNAL 500 连续失败计数器缓存实例
+func NewInternal500CounterCache(rdb *redis.Client) service.Internal500CounterCache {
+	return &internal500CounterCache{rdb: rdb}
+}
+
+// IncrementInternal500Count 原子递增计数并返回当前值
+func (c *internal500CounterCache) IncrementInternal500Count(ctx context.Context, accountID int64) (int64, error) {
+	key := fmt.Sprintf("%s%d", internal500CounterPrefix, accountID)
+
+	result, err := internal500CounterIncrScript.Run(ctx, c.rdb, []string{key}, internal500CounterTTLSeconds).Int64()
+	if err != nil {
+		return 0, fmt.Errorf("increment internal500 count: %w", err)
+	}
+
+	return result, nil
+}
+
+// ResetInternal500Count 清零计数器（成功响应时调用）
+func (c *internal500CounterCache) ResetInternal500Count(ctx context.Context, accountID int64) error {
+	key := fmt.Sprintf("%s%d", internal500CounterPrefix, accountID)
+	return c.rdb.Del(ctx, key).Err()
+}
--- a/backend/internal/repository/wire.go
+++ b/backend/internal/repository/wire.go
@@ -81,6 +81,7 @@ var ProviderSet = wire.NewSet(
 	NewAPIKeyCache,
 	NewTempUnschedCache,
 	NewTimeoutCounterCache,
+	NewInternal500CounterCache,
 	ProvideConcurrencyCache,
 	ProvideSessionLimitCache,
 	NewRPMCache,

--- a/backend/internal/service/account.go
+++ b/backend/internal/service/account.go
@@ -1229,6 +1229,28 @@ func (a *Account) IsSessionIDMaskingEnabled() bool {
 	return false
 }

+// IsCustomBaseURLEnabled 检查是否启用自定义 base URL 中继转发
+// 仅适用于 Anthropic OAuth/SetupToken 类型账号
+func (a *Account) IsCustomBaseURLEnabled() bool {
+	if !a.IsAnthropicOAuthOrSetupToken() {
+		return false
+	}
+	if a.Extra == nil {
+		return false
+	}
+	if v, ok := a.Extra["custom_base_url_enabled"]; ok {
+		if enabled, ok := v.(bool); ok {
+			return enabled
+		}
+	}
+	return false
+}
+
+// GetCustomBaseURL 返回自定义中继服务的 base URL
+func (a *Account) GetCustomBaseURL() string {
+	return a.GetExtraString("custom_base_url")
+}
+
 // IsCacheTTLOverrideEnabled 检查是否启用缓存 TTL 强制替换
 // 仅适用于 Anthropic OAuth/SetupToken 类型账号
 // 启用后将所有 cache creation tokens 归入指定的 TTL 类型（5m 或 1h）

--- a/backend/internal/service/admin_service.go
+++ b/backend/internal/service/admin_service.go
@@ -1866,6 +1866,18 @@ func (s *adminServiceImpl) ClearAccountError(ctx context.Context, id int64) (*Ac
 	if err := s.accountRepo.ClearError(ctx, id); err != nil {
 		return nil, err
 	}
+	if err := s.accountRepo.ClearRateLimit(ctx, id); err != nil {
+		return nil, err
+	}
+	if err := s.accountRepo.ClearAntigravityQuotaScopes(ctx, id); err != nil {
+		return nil, err
+	}
+	if err := s.accountRepo.ClearModelRateLimits(ctx, id); err != nil {
+		return nil, err
+	}
+	if err := s.accountRepo.ClearTempUnschedulable(ctx, id); err != nil {
+		return nil, err
+	}
 	return s.accountRepo.GetByID(ctx, id)
 }


--- a/backend/internal/service/admin_service_clear_error_test.go
+++ b/backend/internal/service/admin_service_clear_error_test.go
+//go:build unit
+
+package service
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	"github.com/stretchr/testify/require"
+)
+
+type accountRepoStubForClearAccountError struct {
+	mockAccountRepoForGemini
+	account                 *Account
+	clearErrorCalls         int
+	clearRateLimitCalls     int
+	clearAntigravityCalls   int
+	clearModelRateLimitCalls int
+	clearTempUnschedCalls   int
+}
+
+func (r *accountRepoStubForClearAccountError) GetByID(ctx context.Context, id int64) (*Account, error) {
+	return r.account, nil
+}
+
+func (r *accountRepoStubForClearAccountError) ClearError(ctx context.Context, id int64) error {
+	r.clearErrorCalls++
+	r.account.Status = StatusActive
+	r.account.ErrorMessage = ""
+	return nil
+}
+
+func (r *accountRepoStubForClearAccountError) ClearRateLimit(ctx context.Context, id int64) error {
+	r.clearRateLimitCalls++
+	r.account.RateLimitedAt = nil
+	r.account.RateLimitResetAt = nil
+	return nil
+}
+
+func (r *accountRepoStubForClearAccountError) ClearAntigravityQuotaScopes(ctx context.Context, id int64) error {
+	r.clearAntigravityCalls++
+	return nil
+}
+
+func (r *accountRepoStubForClearAccountError) ClearModelRateLimits(ctx context.Context, id int64) error {
+	r.clearModelRateLimitCalls++
+	return nil
+}
+
+func (r *accountRepoStubForClearAccountError) ClearTempUnschedulable(ctx context.Context, id int64) error {
+	r.clearTempUnschedCalls++
+	r.account.TempUnschedulableUntil = nil
+	r.account.TempUnschedulableReason = ""
+	return nil
+}
+
+func TestAdminService_ClearAccountError_AlsoClearsRecoverableRuntimeState(t *testing.T) {
+	until := time.Now().Add(10 * time.Minute)
+	resetAt := time.Now().Add(5 * time.Minute)
+	repo := &accountRepoStubForClearAccountError{
+		account: &Account{
+			ID:                     31,
+			Platform:               PlatformOpenAI,
+			Type:                   AccountTypeOAuth,
+			Status:                 StatusError,
+			ErrorMessage:           "refresh failed",
+			RateLimitResetAt:       &resetAt,
+			TempUnschedulableUntil: &until,
+			TempUnschedulableReason: "missing refresh token",
+		},
+	}
+	svc := &adminServiceImpl{accountRepo: repo}
+
+	updated, err := svc.ClearAccountError(context.Background(), 31)
+	require.NoError(t, err)
+	require.NotNil(t, updated)
+	require.Equal(t, 1, repo.clearErrorCalls)
+	require.Equal(t, 1, repo.clearRateLimitCalls)
+	require.Equal(t, 1, repo.clearAntigravityCalls)
+	require.Equal(t, 1, repo.clearModelRateLimitCalls)
+	require.Equal(t, 1, repo.clearTempUnschedCalls)
+	require.Nil(t, updated.RateLimitResetAt)
+	require.Nil(t, updated.TempUnschedulableUntil)
+	require.Empty(t, updated.TempUnschedulableReason)
+}
--- a/backend/internal/service/antigravity_gateway_service.go
+++ b/backend/internal/service/antigravity_gateway_service.go
@@ -614,6 +614,7 @@ func (s *AntigravityGatewayService) antigravityRetryLoop(p antigravityRetryLoopP
 urlFallbackLoop:
 	for urlIdx, baseURL := range availableURLs {
 		usedBaseURL = baseURL
+		allAttemptsInternal500 := true // 追踪本轮所有 attempt 是否全部命中 INTERNAL 500
 		for attempt := 1; attempt <= antigravityMaxRetries; attempt++ {
 			select {
 			case <-p.ctx.Done():
@@ -766,10 +767,19 @@ urlFallbackLoop:
 							logger.LegacyPrintf("service.antigravity_gateway", "%s status=context_canceled_during_backoff", p.prefix)
 							return nil, p.ctx.Err()
 						}
+						// 追踪 INTERNAL 500：非匹配的 attempt 清除标记
+						if !isAntigravityInternalServerError(resp.StatusCode, respBody) {
+							allAttemptsInternal500 = false
+						}
 						continue
 					}
 				}

+				// INTERNAL 500 渐进惩罚：3 次重试全部命中特定 500 时递增计数器并惩罚
+				if allAttemptsInternal500 && isAntigravityInternalServerError(resp.StatusCode, respBody) {
+					s.handleInternal500RetryExhausted(p.ctx, p.prefix, p.account)
+				}
+
 				// 其他 4xx 错误或重试用尽，直接返回
 				resp = &http.Response{
 					StatusCode: resp.StatusCode,
@@ -788,6 +798,11 @@ urlFallbackLoop:
 		antigravity.DefaultURLAvailability.MarkSuccess(usedBaseURL)
 	}

+	// 成功响应时清零 INTERNAL 500 连续失败计数器（覆盖所有成功路径，含 smart retry）
+	if resp != nil && resp.StatusCode < 400 {
+		s.resetInternal500Counter(p.ctx, p.prefix, p.account.ID)
+	}
+
 	return &antigravityRetryLoopResult{resp: resp}, nil
 }

@@ -862,6 +877,7 @@ type AntigravityGatewayService struct {
 	settingService    *SettingService
 	cache             GatewayCache // 用于模型级限流时清除粘性会话绑定
 	schedulerSnapshot *SchedulerSnapshotService
+	internal500Cache  Internal500CounterCache // INTERNAL 500 渐进惩罚计数器
 }

 func NewAntigravityGatewayService(
@@ -872,6 +888,7 @@ func NewAntigravityGatewayService(
 	rateLimitService *RateLimitService,
 	httpUpstream HTTPUpstream,
 	settingService *SettingService,
+	internal500Cache Internal500CounterCache,
 ) *AntigravityGatewayService {
 	return &AntigravityGatewayService{
 		accountRepo:       accountRepo,
@@ -881,6 +898,7 @@ func NewAntigravityGatewayService(
 		settingService:    settingService,
 		cache:             cache,
 		schedulerSnapshot: schedulerSnapshot,
+		internal500Cache:  internal500Cache,
 	}
 }


--- a/backend/internal/service/antigravity_internal500_penalty.go
+++ b/backend/internal/service/antigravity_internal500_penalty.go
+package service
+
+import (
+	"context"
+	"fmt"
+	"log/slog"
+	"net/http"
+	"time"
+
+	"github.com/tidwall/gjson"
+)
+
+// INTERNAL 500 渐进惩罚：连续多轮全部返回特定 500 错误时的惩罚时长
+const (
+	internal500PenaltyTier1Duration  = 30 * time.Minute // 第 1 轮：临时不可调度 30 分钟
+	internal500PenaltyTier2Duration  = 2 * time.Hour    // 第 2 轮：临时不可调度 2 小时
+	internal500PenaltyTier3Threshold = 3                // 第 3+ 轮：永久禁用
+)
+
+// isAntigravityInternalServerError 检测特定的 INTERNAL 500 错误
+// 必须同时匹配 error.code==500, error.message=="Internal error encountered.", error.status=="INTERNAL"
+func isAntigravityInternalServerError(statusCode int, body []byte) bool {
+	if statusCode != http.StatusInternalServerError {
+		return false
+	}
+	return gjson.GetBytes(body, "error.code").Int() == 500 &&
+		gjson.GetBytes(body, "error.message").String() == "Internal error encountered." &&
+		gjson.GetBytes(body, "error.status").String() == "INTERNAL"
+}
+
+// applyInternal500Penalty 根据连续 INTERNAL 500 轮次数应用渐进惩罚
+// count=1: temp_unschedulable 10 分钟
+// count=2: temp_unschedulable 10 小时
+// count>=3: SetError 永久禁用
+func (s *AntigravityGatewayService) applyInternal500Penalty(
+	ctx context.Context, prefix string, account *Account, count int64,
+) {
+	switch {
+	case count >= int64(internal500PenaltyTier3Threshold):
+		reason := fmt.Sprintf("INTERNAL 500 consecutive failures: %d rounds", count)
+		if err := s.accountRepo.SetError(ctx, account.ID, reason); err != nil {
+			slog.Error("internal500_set_error_failed", "account_id", account.ID, "error", err)
+			return
+		}
+		slog.Warn("internal500_account_disabled",
+			"account_id", account.ID, "account_name", account.Name, "consecutive_count", count)
+	case count == 2:
+		until := time.Now().Add(internal500PenaltyTier2Duration)
+		reason := fmt.Sprintf("INTERNAL 500 x%d (temp unsched %v)", count, internal500PenaltyTier2Duration)
+		if err := s.accountRepo.SetTempUnschedulable(ctx, account.ID, until, reason); err != nil {
+			slog.Error("internal500_temp_unsched_failed", "account_id", account.ID, "error", err)
+			return
+		}
+		slog.Warn("internal500_temp_unschedulable",
+			"account_id", account.ID, "account_name", account.Name,
+			"duration", internal500PenaltyTier2Duration, "consecutive_count", count)
+	case count == 1:
+		until := time.Now().Add(internal500PenaltyTier1Duration)
+		reason := fmt.Sprintf("INTERNAL 500 x%d (temp unsched %v)", count, internal500PenaltyTier1Duration)
+		if err := s.accountRepo.SetTempUnschedulable(ctx, account.ID, until, reason); err != nil {
+			slog.Error("internal500_temp_unsched_failed", "account_id", account.ID, "error", err)
+			return
+		}
+		slog.Info("internal500_temp_unschedulable",
+			"account_id", account.ID, "account_name", account.Name,
+			"duration", internal500PenaltyTier1Duration, "consecutive_count", count)
+	}
+}
+
+// handleInternal500RetryExhausted 处理 INTERNAL 500 重试耗尽：递增计数器并应用惩罚
+func (s *AntigravityGatewayService) handleInternal500RetryExhausted(
+	ctx context.Context, prefix string, account *Account,
+) {
+	if s.internal500Cache == nil {
+		return
+	}
+	count, err := s.internal500Cache.IncrementInternal500Count(ctx, account.ID)
+	if err != nil {
+		slog.Error("internal500_counter_increment_failed",
+			"prefix", prefix, "account_id", account.ID, "error", err)
+		return
+	}
+	s.applyInternal500Penalty(ctx, prefix, account, count)
+}
+
+// resetInternal500Counter 成功响应时清零 INTERNAL 500 计数器
+func (s *AntigravityGatewayService) resetInternal500Counter(
+	ctx context.Context, prefix string, accountID int64,
+) {
+	if s.internal500Cache == nil {
+		return
+	}
+	if err := s.internal500Cache.ResetInternal500Count(ctx, accountID); err != nil {
+		slog.Error("internal500_counter_reset_failed",
+			"prefix", prefix, "account_id", accountID, "error", err)
+	}
+}