Unverified Commit c5c12d4c authored by Wesley Liddick's avatar Wesley Liddick Committed by GitHub
Browse files

Revert "feat(gateway): 实现负载感知的账号调度优化 (#114)" (#117)

This reverts commit 8d252303.
parent 8d252303
package service
import (
"testing"
)
// TestConvertClaudeToolsToGeminiTools_CustomType 测试custom类型工具转换
func TestConvertClaudeToolsToGeminiTools_CustomType(t *testing.T) {
tests := []struct {
name string
tools any
expectedLen int
description string
}{
{
name: "Standard tools",
tools: []any{
map[string]any{
"name": "get_weather",
"description": "Get weather info",
"input_schema": map[string]any{"type": "object"},
},
},
expectedLen: 1,
description: "标准工具格式应该正常转换",
},
{
name: "Custom type tool (MCP format)",
tools: []any{
map[string]any{
"type": "custom",
"name": "mcp_tool",
"custom": map[string]any{
"description": "MCP tool description",
"input_schema": map[string]any{"type": "object"},
},
},
},
expectedLen: 1,
description: "Custom类型工具应该从custom字段读取",
},
{
name: "Mixed standard and custom tools",
tools: []any{
map[string]any{
"name": "standard_tool",
"description": "Standard",
"input_schema": map[string]any{"type": "object"},
},
map[string]any{
"type": "custom",
"name": "custom_tool",
"custom": map[string]any{
"description": "Custom",
"input_schema": map[string]any{"type": "object"},
},
},
},
expectedLen: 1,
description: "混合工具应该都能正确转换",
},
{
name: "Custom tool without custom field",
tools: []any{
map[string]any{
"type": "custom",
"name": "invalid_custom",
// 缺少 custom 字段
},
},
expectedLen: 0, // 应该被跳过
description: "缺少custom字段的custom工具应该被跳过",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := convertClaudeToolsToGeminiTools(tt.tools)
if tt.expectedLen == 0 {
if result != nil {
t.Errorf("%s: expected nil result, got %v", tt.description, result)
}
return
}
if result == nil {
t.Fatalf("%s: expected non-nil result", tt.description)
}
if len(result) != 1 {
t.Errorf("%s: expected 1 tool declaration, got %d", tt.description, len(result))
return
}
toolDecl, ok := result[0].(map[string]any)
if !ok {
t.Fatalf("%s: result[0] is not map[string]any", tt.description)
}
funcDecls, ok := toolDecl["functionDeclarations"].([]any)
if !ok {
t.Fatalf("%s: functionDeclarations is not []any", tt.description)
}
toolsArr, _ := tt.tools.([]any)
expectedFuncCount := 0
for _, tool := range toolsArr {
toolMap, _ := tool.(map[string]any)
if toolMap["name"] != "" {
// 检查是否为有效的custom工具
if toolMap["type"] == "custom" {
if toolMap["custom"] != nil {
expectedFuncCount++
}
} else {
expectedFuncCount++
}
}
}
if len(funcDecls) != expectedFuncCount {
t.Errorf("%s: expected %d function declarations, got %d",
tt.description, expectedFuncCount, len(funcDecls))
}
})
}
}
......@@ -7,7 +7,6 @@ import (
"fmt"
"io"
"net/http"
"regexp"
"strconv"
"strings"
"time"
......@@ -164,45 +163,6 @@ type GeminiTokenInfo struct {
Scope string `json:"scope,omitempty"`
ProjectID string `json:"project_id,omitempty"`
OAuthType string `json:"oauth_type,omitempty"` // "code_assist" 或 "ai_studio"
TierID string `json:"tier_id,omitempty"` // Gemini Code Assist tier: LEGACY/PRO/ULTRA
}
// validateTierID validates tier_id format and length
func validateTierID(tierID string) error {
if tierID == "" {
return nil // Empty is allowed
}
if len(tierID) > 64 {
return fmt.Errorf("tier_id exceeds maximum length of 64 characters")
}
// Allow alphanumeric, underscore, hyphen, and slash (for tier paths)
if !regexp.MustCompile(`^[a-zA-Z0-9_/-]+$`).MatchString(tierID) {
return fmt.Errorf("tier_id contains invalid characters")
}
return nil
}
// extractTierIDFromAllowedTiers extracts tierID from LoadCodeAssist response
// Prioritizes IsDefault tier, falls back to first non-empty tier
func extractTierIDFromAllowedTiers(allowedTiers []geminicli.AllowedTier) string {
tierID := "LEGACY"
// First pass: look for default tier
for _, tier := range allowedTiers {
if tier.IsDefault && strings.TrimSpace(tier.ID) != "" {
tierID = strings.TrimSpace(tier.ID)
break
}
}
// Second pass: if still LEGACY, take first non-empty tier
if tierID == "LEGACY" {
for _, tier := range allowedTiers {
if strings.TrimSpace(tier.ID) != "" {
tierID = strings.TrimSpace(tier.ID)
break
}
}
}
return tierID
}
func (s *GeminiOAuthService) ExchangeCode(ctx context.Context, input *GeminiExchangeCodeInput) (*GeminiTokenInfo, error) {
......@@ -263,14 +223,13 @@ func (s *GeminiOAuthService) ExchangeCode(ctx context.Context, input *GeminiExch
expiresAt := time.Now().Unix() + tokenResp.ExpiresIn - 300
projectID := sessionProjectID
var tierID string
// 对于 code_assist 模式,project_id 是必需的
// 对于 ai_studio 模式,project_id 是可选的(不影响使用 AI Studio API)
if oauthType == "code_assist" {
if projectID == "" {
var err error
projectID, tierID, err = s.fetchProjectID(ctx, tokenResp.AccessToken, proxyURL)
projectID, err = s.fetchProjectID(ctx, tokenResp.AccessToken, proxyURL)
if err != nil {
// 记录警告但不阻断流程,允许后续补充 project_id
fmt.Printf("[GeminiOAuth] Warning: Failed to fetch project_id during token exchange: %v\n", err)
......@@ -289,7 +248,6 @@ func (s *GeminiOAuthService) ExchangeCode(ctx context.Context, input *GeminiExch
ExpiresAt: expiresAt,
Scope: tokenResp.Scope,
ProjectID: projectID,
TierID: tierID,
OAuthType: oauthType,
}, nil
}
......@@ -399,7 +357,7 @@ func (s *GeminiOAuthService) RefreshAccountToken(ctx context.Context, account *A
// For Code Assist, project_id is required. Auto-detect if missing.
// For AI Studio OAuth, project_id is optional and should not block refresh.
if oauthType == "code_assist" && strings.TrimSpace(tokenInfo.ProjectID) == "" {
projectID, tierID, err := s.fetchProjectID(ctx, tokenInfo.AccessToken, proxyURL)
projectID, err := s.fetchProjectID(ctx, tokenInfo.AccessToken, proxyURL)
if err != nil {
return nil, fmt.Errorf("failed to auto-detect project_id: %w", err)
}
......@@ -408,7 +366,6 @@ func (s *GeminiOAuthService) RefreshAccountToken(ctx context.Context, account *A
return nil, fmt.Errorf("failed to auto-detect project_id: empty result")
}
tokenInfo.ProjectID = projectID
tokenInfo.TierID = tierID
}
return tokenInfo, nil
......@@ -431,13 +388,6 @@ func (s *GeminiOAuthService) BuildAccountCredentials(tokenInfo *GeminiTokenInfo)
if tokenInfo.ProjectID != "" {
creds["project_id"] = tokenInfo.ProjectID
}
if tokenInfo.TierID != "" {
// Validate tier_id before storing
if err := validateTierID(tokenInfo.TierID); err == nil {
creds["tier_id"] = tokenInfo.TierID
}
// Silently skip invalid tier_id (don't block account creation)
}
if tokenInfo.OAuthType != "" {
creds["oauth_type"] = tokenInfo.OAuthType
}
......@@ -448,26 +398,34 @@ func (s *GeminiOAuthService) Stop() {
s.sessionStore.Stop()
}
func (s *GeminiOAuthService) fetchProjectID(ctx context.Context, accessToken, proxyURL string) (string, string, error) {
func (s *GeminiOAuthService) fetchProjectID(ctx context.Context, accessToken, proxyURL string) (string, error) {
if s.codeAssist == nil {
return "", "", errors.New("code assist client not configured")
return "", errors.New("code assist client not configured")
}
loadResp, loadErr := s.codeAssist.LoadCodeAssist(ctx, accessToken, proxyURL, nil)
// Extract tierID from response (works whether CloudAICompanionProject is set or not)
tierID := "LEGACY"
if loadResp != nil {
tierID = extractTierIDFromAllowedTiers(loadResp.AllowedTiers)
}
// If LoadCodeAssist returned a project, use it
if loadErr == nil && loadResp != nil && strings.TrimSpace(loadResp.CloudAICompanionProject) != "" {
return strings.TrimSpace(loadResp.CloudAICompanionProject), tierID, nil
return strings.TrimSpace(loadResp.CloudAICompanionProject), nil
}
// Pick tier from allowedTiers; if no default tier is marked, pick the first non-empty tier ID.
// (tierID already extracted above, reuse it)
tierID := "LEGACY"
if loadResp != nil {
for _, tier := range loadResp.AllowedTiers {
if tier.IsDefault && strings.TrimSpace(tier.ID) != "" {
tierID = strings.TrimSpace(tier.ID)
break
}
}
if strings.TrimSpace(tierID) == "" || tierID == "LEGACY" {
for _, tier := range loadResp.AllowedTiers {
if strings.TrimSpace(tier.ID) != "" {
tierID = strings.TrimSpace(tier.ID)
break
}
}
}
}
req := &geminicli.OnboardUserRequest{
TierID: tierID,
......@@ -485,39 +443,39 @@ func (s *GeminiOAuthService) fetchProjectID(ctx context.Context, accessToken, pr
// If Code Assist onboarding fails (e.g. INVALID_ARGUMENT), fallback to Cloud Resource Manager projects.
fallback, fbErr := fetchProjectIDFromResourceManager(ctx, accessToken, proxyURL)
if fbErr == nil && strings.TrimSpace(fallback) != "" {
return strings.TrimSpace(fallback), tierID, nil
return strings.TrimSpace(fallback), nil
}
return "", "", err
return "", err
}
if resp.Done {
if resp.Response != nil && resp.Response.CloudAICompanionProject != nil {
switch v := resp.Response.CloudAICompanionProject.(type) {
case string:
return strings.TrimSpace(v), tierID, nil
return strings.TrimSpace(v), nil
case map[string]any:
if id, ok := v["id"].(string); ok {
return strings.TrimSpace(id), tierID, nil
return strings.TrimSpace(id), nil
}
}
}
fallback, fbErr := fetchProjectIDFromResourceManager(ctx, accessToken, proxyURL)
if fbErr == nil && strings.TrimSpace(fallback) != "" {
return strings.TrimSpace(fallback), tierID, nil
return strings.TrimSpace(fallback), nil
}
return "", "", errors.New("onboardUser completed but no project_id returned")
return "", errors.New("onboardUser completed but no project_id returned")
}
time.Sleep(2 * time.Second)
}
fallback, fbErr := fetchProjectIDFromResourceManager(ctx, accessToken, proxyURL)
if fbErr == nil && strings.TrimSpace(fallback) != "" {
return strings.TrimSpace(fallback), tierID, nil
return strings.TrimSpace(fallback), nil
}
if loadErr != nil {
return "", "", fmt.Errorf("loadCodeAssist failed (%v) and onboardUser timeout after %d attempts", loadErr, maxAttempts)
return "", fmt.Errorf("loadCodeAssist failed (%v) and onboardUser timeout after %d attempts", loadErr, maxAttempts)
}
return "", "", fmt.Errorf("onboardUser timeout after %d attempts", maxAttempts)
return "", fmt.Errorf("onboardUser timeout after %d attempts", maxAttempts)
}
type googleCloudProject struct {
......
......@@ -112,7 +112,7 @@ func (p *GeminiTokenProvider) GetAccessToken(ctx context.Context, account *Accou
}
}
detected, tierID, err := p.geminiOAuthService.fetchProjectID(ctx, accessToken, proxyURL)
detected, err := p.geminiOAuthService.fetchProjectID(ctx, accessToken, proxyURL)
if err != nil {
log.Printf("[GeminiTokenProvider] Auto-detect project_id failed: %v, fallback to AI Studio API mode", err)
return accessToken, nil
......@@ -123,9 +123,6 @@ func (p *GeminiTokenProvider) GetAccessToken(ctx context.Context, account *Accou
account.Credentials = make(map[string]any)
}
account.Credentials["project_id"] = detected
if tierID != "" {
account.Credentials["tier_id"] = tierID
}
_ = p.accountRepo.Update(ctx, account)
}
}
......
......@@ -13,7 +13,6 @@ import (
"log"
"net/http"
"regexp"
"sort"
"strconv"
"strings"
"time"
......@@ -81,7 +80,6 @@ type OpenAIGatewayService struct {
userSubRepo UserSubscriptionRepository
cache GatewayCache
cfg *config.Config
concurrencyService *ConcurrencyService
billingService *BillingService
rateLimitService *RateLimitService
billingCacheService *BillingCacheService
......@@ -97,7 +95,6 @@ func NewOpenAIGatewayService(
userSubRepo UserSubscriptionRepository,
cache GatewayCache,
cfg *config.Config,
concurrencyService *ConcurrencyService,
billingService *BillingService,
rateLimitService *RateLimitService,
billingCacheService *BillingCacheService,
......@@ -111,7 +108,6 @@ func NewOpenAIGatewayService(
userSubRepo: userSubRepo,
cache: cache,
cfg: cfg,
concurrencyService: concurrencyService,
billingService: billingService,
rateLimitService: rateLimitService,
billingCacheService: billingCacheService,
......@@ -130,14 +126,6 @@ func (s *OpenAIGatewayService) GenerateSessionHash(c *gin.Context) string {
return hex.EncodeToString(hash[:])
}
// BindStickySession sets session -> account binding with standard TTL.
func (s *OpenAIGatewayService) BindStickySession(ctx context.Context, sessionHash string, accountID int64) error {
if sessionHash == "" || accountID <= 0 {
return nil
}
return s.cache.SetSessionAccountID(ctx, "openai:"+sessionHash, accountID, openaiStickySessionTTL)
}
// SelectAccount selects an OpenAI account with sticky session support
func (s *OpenAIGatewayService) SelectAccount(ctx context.Context, groupID *int64, sessionHash string) (*Account, error) {
return s.SelectAccountForModel(ctx, groupID, sessionHash, "")
......@@ -230,254 +218,6 @@ func (s *OpenAIGatewayService) SelectAccountForModelWithExclusions(ctx context.C
return selected, nil
}
// SelectAccountWithLoadAwareness selects an account with load-awareness and wait plan.
func (s *OpenAIGatewayService) SelectAccountWithLoadAwareness(ctx context.Context, groupID *int64, sessionHash string, requestedModel string, excludedIDs map[int64]struct{}) (*AccountSelectionResult, error) {
cfg := s.schedulingConfig()
var stickyAccountID int64
if sessionHash != "" && s.cache != nil {
if accountID, err := s.cache.GetSessionAccountID(ctx, "openai:"+sessionHash); err == nil {
stickyAccountID = accountID
}
}
if s.concurrencyService == nil || !cfg.LoadBatchEnabled {
account, err := s.SelectAccountForModelWithExclusions(ctx, groupID, sessionHash, requestedModel, excludedIDs)
if err != nil {
return nil, err
}
result, err := s.tryAcquireAccountSlot(ctx, account.ID, account.Concurrency)
if err == nil && result.Acquired {
return &AccountSelectionResult{
Account: account,
Acquired: true,
ReleaseFunc: result.ReleaseFunc,
}, nil
}
if stickyAccountID > 0 && stickyAccountID == account.ID && s.concurrencyService != nil {
waitingCount, _ := s.concurrencyService.GetAccountWaitingCount(ctx, account.ID)
if waitingCount < cfg.StickySessionMaxWaiting {
return &AccountSelectionResult{
Account: account,
WaitPlan: &AccountWaitPlan{
AccountID: account.ID,
MaxConcurrency: account.Concurrency,
Timeout: cfg.StickySessionWaitTimeout,
MaxWaiting: cfg.StickySessionMaxWaiting,
},
}, nil
}
}
return &AccountSelectionResult{
Account: account,
WaitPlan: &AccountWaitPlan{
AccountID: account.ID,
MaxConcurrency: account.Concurrency,
Timeout: cfg.FallbackWaitTimeout,
MaxWaiting: cfg.FallbackMaxWaiting,
},
}, nil
}
accounts, err := s.listSchedulableAccounts(ctx, groupID)
if err != nil {
return nil, err
}
if len(accounts) == 0 {
return nil, errors.New("no available accounts")
}
isExcluded := func(accountID int64) bool {
if excludedIDs == nil {
return false
}
_, excluded := excludedIDs[accountID]
return excluded
}
// ============ Layer 1: Sticky session ============
if sessionHash != "" {
accountID, err := s.cache.GetSessionAccountID(ctx, "openai:"+sessionHash)
if err == nil && accountID > 0 && !isExcluded(accountID) {
account, err := s.accountRepo.GetByID(ctx, accountID)
if err == nil && account.IsSchedulable() && account.IsOpenAI() &&
(requestedModel == "" || account.IsModelSupported(requestedModel)) {
result, err := s.tryAcquireAccountSlot(ctx, accountID, account.Concurrency)
if err == nil && result.Acquired {
_ = s.cache.RefreshSessionTTL(ctx, "openai:"+sessionHash, openaiStickySessionTTL)
return &AccountSelectionResult{
Account: account,
Acquired: true,
ReleaseFunc: result.ReleaseFunc,
}, nil
}
waitingCount, _ := s.concurrencyService.GetAccountWaitingCount(ctx, accountID)
if waitingCount < cfg.StickySessionMaxWaiting {
return &AccountSelectionResult{
Account: account,
WaitPlan: &AccountWaitPlan{
AccountID: accountID,
MaxConcurrency: account.Concurrency,
Timeout: cfg.StickySessionWaitTimeout,
MaxWaiting: cfg.StickySessionMaxWaiting,
},
}, nil
}
}
}
}
// ============ Layer 2: Load-aware selection ============
candidates := make([]*Account, 0, len(accounts))
for i := range accounts {
acc := &accounts[i]
if isExcluded(acc.ID) {
continue
}
if requestedModel != "" && !acc.IsModelSupported(requestedModel) {
continue
}
candidates = append(candidates, acc)
}
if len(candidates) == 0 {
return nil, errors.New("no available accounts")
}
accountLoads := make([]AccountWithConcurrency, 0, len(candidates))
for _, acc := range candidates {
accountLoads = append(accountLoads, AccountWithConcurrency{
ID: acc.ID,
MaxConcurrency: acc.Concurrency,
})
}
loadMap, err := s.concurrencyService.GetAccountsLoadBatch(ctx, accountLoads)
if err != nil {
ordered := append([]*Account(nil), candidates...)
sortAccountsByPriorityAndLastUsed(ordered, false)
for _, acc := range ordered {
result, err := s.tryAcquireAccountSlot(ctx, acc.ID, acc.Concurrency)
if err == nil && result.Acquired {
if sessionHash != "" {
_ = s.cache.SetSessionAccountID(ctx, "openai:"+sessionHash, acc.ID, openaiStickySessionTTL)
}
return &AccountSelectionResult{
Account: acc,
Acquired: true,
ReleaseFunc: result.ReleaseFunc,
}, nil
}
}
} else {
type accountWithLoad struct {
account *Account
loadInfo *AccountLoadInfo
}
var available []accountWithLoad
for _, acc := range candidates {
loadInfo := loadMap[acc.ID]
if loadInfo == nil {
loadInfo = &AccountLoadInfo{AccountID: acc.ID}
}
if loadInfo.LoadRate < 100 {
available = append(available, accountWithLoad{
account: acc,
loadInfo: loadInfo,
})
}
}
if len(available) > 0 {
sort.SliceStable(available, func(i, j int) bool {
a, b := available[i], available[j]
if a.account.Priority != b.account.Priority {
return a.account.Priority < b.account.Priority
}
if a.loadInfo.LoadRate != b.loadInfo.LoadRate {
return a.loadInfo.LoadRate < b.loadInfo.LoadRate
}
switch {
case a.account.LastUsedAt == nil && b.account.LastUsedAt != nil:
return true
case a.account.LastUsedAt != nil && b.account.LastUsedAt == nil:
return false
case a.account.LastUsedAt == nil && b.account.LastUsedAt == nil:
return false
default:
return a.account.LastUsedAt.Before(*b.account.LastUsedAt)
}
})
for _, item := range available {
result, err := s.tryAcquireAccountSlot(ctx, item.account.ID, item.account.Concurrency)
if err == nil && result.Acquired {
if sessionHash != "" {
_ = s.cache.SetSessionAccountID(ctx, "openai:"+sessionHash, item.account.ID, openaiStickySessionTTL)
}
return &AccountSelectionResult{
Account: item.account,
Acquired: true,
ReleaseFunc: result.ReleaseFunc,
}, nil
}
}
}
}
// ============ Layer 3: Fallback wait ============
sortAccountsByPriorityAndLastUsed(candidates, false)
for _, acc := range candidates {
return &AccountSelectionResult{
Account: acc,
WaitPlan: &AccountWaitPlan{
AccountID: acc.ID,
MaxConcurrency: acc.Concurrency,
Timeout: cfg.FallbackWaitTimeout,
MaxWaiting: cfg.FallbackMaxWaiting,
},
}, nil
}
return nil, errors.New("no available accounts")
}
func (s *OpenAIGatewayService) listSchedulableAccounts(ctx context.Context, groupID *int64) ([]Account, error) {
var accounts []Account
var err error
if s.cfg != nil && s.cfg.RunMode == config.RunModeSimple {
accounts, err = s.accountRepo.ListSchedulableByPlatform(ctx, PlatformOpenAI)
} else if groupID != nil {
accounts, err = s.accountRepo.ListSchedulableByGroupIDAndPlatform(ctx, *groupID, PlatformOpenAI)
} else {
accounts, err = s.accountRepo.ListSchedulableByPlatform(ctx, PlatformOpenAI)
}
if err != nil {
return nil, fmt.Errorf("query accounts failed: %w", err)
}
return accounts, nil
}
func (s *OpenAIGatewayService) tryAcquireAccountSlot(ctx context.Context, accountID int64, maxConcurrency int) (*AcquireResult, error) {
if s.concurrencyService == nil {
return &AcquireResult{Acquired: true, ReleaseFunc: func() {}}, nil
}
return s.concurrencyService.AcquireAccountSlot(ctx, accountID, maxConcurrency)
}
func (s *OpenAIGatewayService) schedulingConfig() config.GatewaySchedulingConfig {
if s.cfg != nil {
return s.cfg.Gateway.Scheduling
}
return config.GatewaySchedulingConfig{
StickySessionMaxWaiting: 3,
StickySessionWaitTimeout: 45 * time.Second,
FallbackWaitTimeout: 30 * time.Second,
FallbackMaxWaiting: 100,
LoadBatchEnabled: true,
SlotCleanupInterval: 30 * time.Second,
}
}
// GetAccessToken gets the access token for an OpenAI account
func (s *OpenAIGatewayService) GetAccessToken(ctx context.Context, account *Account) (string, string, error) {
switch account.Type {
......
......@@ -73,15 +73,6 @@ func ProvideDeferredService(accountRepo AccountRepository, timingWheel *TimingWh
return svc
}
// ProvideConcurrencyService creates ConcurrencyService and starts slot cleanup worker.
func ProvideConcurrencyService(cache ConcurrencyCache, accountRepo AccountRepository, cfg *config.Config) *ConcurrencyService {
svc := NewConcurrencyService(cache)
if cfg != nil {
svc.StartSlotCleanupWorker(accountRepo, cfg.Gateway.Scheduling.SlotCleanupInterval)
}
return svc
}
// ProviderSet is the Wire provider set for all services
var ProviderSet = wire.NewSet(
// Core services
......@@ -116,7 +107,7 @@ var ProviderSet = wire.NewSet(
ProvideEmailQueueService,
NewTurnstileService,
NewSubscriptionService,
ProvideConcurrencyService,
NewConcurrencyService,
NewIdentityService,
NewCRSSyncService,
ProvideUpdateService,
......
......@@ -122,21 +122,6 @@ pricing:
# Hash check interval in minutes
hash_check_interval_minutes: 10
# =============================================================================
# Gateway (Optional)
# =============================================================================
gateway:
# Wait time (in seconds) for upstream response headers (streaming body not affected)
response_header_timeout: 300
# Log upstream error response body summary (safe/truncated; does not log request content)
log_upstream_error_body: false
# Max bytes to log from upstream error body
log_upstream_error_body_max_bytes: 2048
# Auto inject anthropic-beta for API-key accounts when needed (default off)
inject_beta_for_apikey: false
# Allow failover on selected 400 errors (default off)
failover_on_400: false
# =============================================================================
# Gemini OAuth (Required for Gemini accounts)
# =============================================================================
......
```mermaid
flowchart TD
%% Master dispatch
A[HTTP Request] --> B{Route}
B -->|v1 messages| GA0
B -->|openai v1 responses| OA0
B -->|v1beta models model action| GM0
B -->|v1 messages count tokens| GT0
B -->|v1beta models list or get| GL0
%% =========================
%% FLOW A: Claude Gateway
%% =========================
subgraph FLOW_A["v1 messages Claude Gateway"]
GA0[Auth middleware] --> GA1[Read body]
GA1 -->|empty| GA1E[400 invalid_request_error]
GA1 --> GA2[ParseGatewayRequest]
GA2 -->|parse error| GA2E[400 invalid_request_error]
GA2 --> GA3{model present}
GA3 -->|no| GA3E[400 invalid_request_error]
GA3 --> GA4[streamStarted false]
GA4 --> GA5[IncrementWaitCount user]
GA5 -->|queue full| GA5E[429 rate_limit_error]
GA5 --> GA6[AcquireUserSlotWithWait]
GA6 -->|timeout or fail| GA6E[429 rate_limit_error]
GA6 --> GA7[BillingEligibility check post wait]
GA7 -->|fail| GA7E[403 billing_error]
GA7 --> GA8[Generate sessionHash]
GA8 --> GA9[Resolve platform]
GA9 --> GA10{platform gemini}
GA10 -->|yes| GA10Y[sessionKey gemini hash]
GA10 -->|no| GA10N[sessionKey hash]
GA10Y --> GA11
GA10N --> GA11
GA11[SelectAccountWithLoadAwareness] -->|err and no failed| GA11E1[503 no available accounts]
GA11 -->|err and failed| GA11E2[map failover error]
GA11 --> GA12[Warmup intercept]
GA12 -->|yes| GA12Y[return mock and release if held]
GA12 -->|no| GA13[Acquire account slot or wait]
GA13 -->|wait queue full| GA13E1[429 rate_limit_error]
GA13 -->|wait timeout| GA13E2[429 concurrency limit]
GA13 --> GA14[BindStickySession if waited]
GA14 --> GA15{account platform antigravity}
GA15 -->|yes| GA15Y[ForwardGemini antigravity]
GA15 -->|no| GA15N[Forward Claude]
GA15Y --> GA16[Release account slot and dec account wait]
GA15N --> GA16
GA16 --> GA17{UpstreamFailoverError}
GA17 -->|yes| GA18[mark failedAccountIDs and map error if exceed]
GA18 -->|loop| GA11
GA17 -->|no| GA19[success async RecordUsage and return]
GA19 --> GA20[defer release user slot and dec wait count]
end
%% =========================
%% FLOW B: OpenAI
%% =========================
subgraph FLOW_B["openai v1 responses"]
OA0[Auth middleware] --> OA1[Read body]
OA1 -->|empty| OA1E[400 invalid_request_error]
OA1 --> OA2[json Unmarshal body]
OA2 -->|parse error| OA2E[400 invalid_request_error]
OA2 --> OA3{model present}
OA3 -->|no| OA3E[400 invalid_request_error]
OA3 --> OA4{User Agent Codex CLI}
OA4 -->|no| OA4N[set default instructions]
OA4 -->|yes| OA4Y[no change]
OA4N --> OA5
OA4Y --> OA5
OA5[streamStarted false] --> OA6[IncrementWaitCount user]
OA6 -->|queue full| OA6E[429 rate_limit_error]
OA6 --> OA7[AcquireUserSlotWithWait]
OA7 -->|timeout or fail| OA7E[429 rate_limit_error]
OA7 --> OA8[BillingEligibility check post wait]
OA8 -->|fail| OA8E[403 billing_error]
OA8 --> OA9[sessionHash sha256 session_id]
OA9 --> OA10[SelectAccountWithLoadAwareness]
OA10 -->|err and no failed| OA10E1[503 no available accounts]
OA10 -->|err and failed| OA10E2[map failover error]
OA10 --> OA11[Acquire account slot or wait]
OA11 -->|wait queue full| OA11E1[429 rate_limit_error]
OA11 -->|wait timeout| OA11E2[429 concurrency limit]
OA11 --> OA12[BindStickySession openai hash if waited]
OA12 --> OA13[Forward OpenAI upstream]
OA13 --> OA14[Release account slot and dec account wait]
OA14 --> OA15{UpstreamFailoverError}
OA15 -->|yes| OA16[mark failedAccountIDs and map error if exceed]
OA16 -->|loop| OA10
OA15 -->|no| OA17[success async RecordUsage and return]
OA17 --> OA18[defer release user slot and dec wait count]
end
%% =========================
%% FLOW C: Gemini Native
%% =========================
subgraph FLOW_C["v1beta models model action Gemini Native"]
GM0[Auth middleware] --> GM1[Validate platform]
GM1 -->|invalid| GM1E[400 googleError]
GM1 --> GM2[Parse path modelName action]
GM2 -->|invalid| GM2E[400 googleError]
GM2 --> GM3{action supported}
GM3 -->|no| GM3E[404 googleError]
GM3 --> GM4[Read body]
GM4 -->|empty| GM4E[400 googleError]
GM4 --> GM5[streamStarted false]
GM5 --> GM6[IncrementWaitCount user]
GM6 -->|queue full| GM6E[429 googleError]
GM6 --> GM7[AcquireUserSlotWithWait]
GM7 -->|timeout or fail| GM7E[429 googleError]
GM7 --> GM8[BillingEligibility check post wait]
GM8 -->|fail| GM8E[403 googleError]
GM8 --> GM9[Generate sessionHash]
GM9 --> GM10[sessionKey gemini hash]
GM10 --> GM11[SelectAccountWithLoadAwareness]
GM11 -->|err and no failed| GM11E1[503 googleError]
GM11 -->|err and failed| GM11E2[mapGeminiUpstreamError]
GM11 --> GM12[Acquire account slot or wait]
GM12 -->|wait queue full| GM12E1[429 googleError]
GM12 -->|wait timeout| GM12E2[429 googleError]
GM12 --> GM13[BindStickySession if waited]
GM13 --> GM14{account platform antigravity}
GM14 -->|yes| GM14Y[ForwardGemini antigravity]
GM14 -->|no| GM14N[ForwardNative]
GM14Y --> GM15[Release account slot and dec account wait]
GM14N --> GM15
GM15 --> GM16{UpstreamFailoverError}
GM16 -->|yes| GM17[mark failedAccountIDs and map error if exceed]
GM17 -->|loop| GM11
GM16 -->|no| GM18[success async RecordUsage and return]
GM18 --> GM19[defer release user slot and dec wait count]
end
%% =========================
%% FLOW D: CountTokens
%% =========================
subgraph FLOW_D["v1 messages count tokens"]
GT0[Auth middleware] --> GT1[Read body]
GT1 -->|empty| GT1E[400 invalid_request_error]
GT1 --> GT2[ParseGatewayRequest]
GT2 -->|parse error| GT2E[400 invalid_request_error]
GT2 --> GT3{model present}
GT3 -->|no| GT3E[400 invalid_request_error]
GT3 --> GT4[BillingEligibility check]
GT4 -->|fail| GT4E[403 billing_error]
GT4 --> GT5[ForwardCountTokens]
end
%% =========================
%% FLOW E: Gemini Models List Get
%% =========================
subgraph FLOW_E["v1beta models list or get"]
GL0[Auth middleware] --> GL1[Validate platform]
GL1 -->|invalid| GL1E[400 googleError]
GL1 --> GL2{force platform antigravity}
GL2 -->|yes| GL2Y[return static fallback models]
GL2 -->|no| GL3[SelectAccountForAIStudioEndpoints]
GL3 -->|no gemini and has antigravity| GL3Y[return fallback models]
GL3 -->|no accounts| GL3E[503 googleError]
GL3 --> GL4[ForwardAIStudioGET]
GL4 -->|error| GL4E[502 googleError]
GL4 --> GL5[Passthrough response or fallback]
end
%% =========================
%% SHARED: Account Selection
%% =========================
subgraph SELECT["SelectAccountWithLoadAwareness detail"]
S0[Start] --> S1{concurrencyService nil OR load batch disabled}
S1 -->|yes| S2[SelectAccountForModelWithExclusions legacy]
S2 --> S3[tryAcquireAccountSlot]
S3 -->|acquired| S3Y[SelectionResult Acquired true ReleaseFunc]
S3 -->|not acquired| S3N[WaitPlan FallbackTimeout MaxWaiting]
S1 -->|no| S4[Resolve platform]
S4 --> S5[List schedulable accounts]
S5 --> S6[Layer1 Sticky session]
S6 -->|hit and valid| S6A[tryAcquireAccountSlot]
S6A -->|acquired| S6AY[SelectionResult Acquired true]
S6A -->|not acquired and waitingCount < StickyMax| S6AN[WaitPlan StickyTimeout Max]
S6 --> S7[Layer2 Load aware]
S7 --> S7A[Load batch concurrency plus wait to loadRate]
S7A --> S7B[Sort priority load LRU OAuth prefer for Gemini]
S7B --> S7C[tryAcquireAccountSlot in order]
S7C -->|first success| S7CY[SelectionResult Acquired true]
S7C -->|none| S8[Layer3 Fallback wait]
S8 --> S8A[Sort priority LRU]
S8A --> S8B[WaitPlan FallbackTimeout Max]
end
%% =========================
%% SHARED: Wait Acquire
%% =========================
subgraph WAIT["AcquireXSlotWithWait detail"]
W0[Try AcquireXSlot immediately] -->|acquired| W1[return ReleaseFunc]
W0 -->|not acquired| W2[Wait loop with timeout]
W2 --> W3[Backoff 100ms x1.5 jitter max2s]
W2 --> W4[If streaming and ping format send SSE ping]
W2 --> W5[Retry AcquireXSlot on timer]
W5 -->|acquired| W1
W2 -->|timeout| W6[ConcurrencyError IsTimeout true]
end
%% =========================
%% SHARED: Account Wait Queue
%% =========================
subgraph AQ["Account Wait Queue Redis Lua"]
Q1[IncrementAccountWaitCount] --> Q2{current >= max}
Q2 -->|yes| Q2Y[return false]
Q2 -->|no| Q3[INCR and if first set TTL]
Q3 --> Q4[return true]
Q5[DecrementAccountWaitCount] --> Q6[if current > 0 then DECR]
end
%% =========================
%% SHARED: Background cleanup
%% =========================
subgraph CLEANUP["Slot Cleanup Worker"]
C0[StartSlotCleanupWorker interval] --> C1[List schedulable accounts]
C1 --> C2[CleanupExpiredAccountSlots per account]
C2 --> C3[Repeat every interval]
end
```
......@@ -952,7 +952,6 @@
"integrity": "sha512-N2clP5pJhB2YnZJ3PIHFk5RkygRX5WO/5f0WC08tp0wd+sv0rsJk3MqWn3CbNmT2J505a5336jaQj4ph1AdMug==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"undici-types": "~6.21.0"
}
......@@ -1368,7 +1367,6 @@
}
],
"license": "MIT",
"peer": true,
"dependencies": {
"baseline-browser-mapping": "^2.9.0",
"caniuse-lite": "^1.0.30001759",
......@@ -1445,7 +1443,6 @@
"resolved": "https://registry.npmmirror.com/chart.js/-/chart.js-4.5.1.tgz",
"integrity": "sha512-GIjfiT9dbmHRiYi6Nl2yFCq7kkwdkp1W/lp2J99rX0yo9tgJGn3lKQATztIjb5tVtevcBtIdICNWqlq5+E8/Pw==",
"license": "MIT",
"peer": true,
"dependencies": {
"@kurkle/color": "^0.3.0"
},
......@@ -2043,7 +2040,6 @@
"integrity": "sha512-/imKNG4EbWNrVjoNC/1H5/9GFy+tqjGBHCaSsN+P2RnPqjsLmv6UD3Ej+Kj8nBWaRAwyk7kK5ZUc+OEatnTR3A==",
"dev": true,
"license": "MIT",
"peer": true,
"bin": {
"jiti": "bin/jiti.js"
}
......@@ -2352,7 +2348,6 @@
}
],
"license": "MIT",
"peer": true,
"dependencies": {
"nanoid": "^3.3.11",
"picocolors": "^1.1.1",
......@@ -2826,7 +2821,6 @@
"integrity": "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==",
"dev": true,
"license": "MIT",
"peer": true,
"engines": {
"node": ">=12"
},
......@@ -2860,7 +2854,6 @@
"integrity": "sha512-hjcS1mhfuyi4WW8IWtjP7brDrG2cuDZukyrYrSauoXGNgx0S7zceP07adYkJycEr56BOUTNPzbInooiN3fn1qw==",
"devOptional": true,
"license": "Apache-2.0",
"peer": true,
"bin": {
"tsc": "bin/tsc",
"tsserver": "bin/tsserver"
......@@ -2933,7 +2926,6 @@
"integrity": "sha512-o5a9xKjbtuhY6Bi5S3+HvbRERmouabWbyUcpXXUA1u+GNUKoROi9byOJ8M0nHbHYHkYICiMlqxkg1KkYmm25Sw==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"esbuild": "^0.21.3",
"postcss": "^8.4.43",
......@@ -3105,7 +3097,6 @@
"resolved": "https://registry.npmmirror.com/vue/-/vue-3.5.25.tgz",
"integrity": "sha512-YLVdgv2K13WJ6n+kD5owehKtEXwdwXuj2TTyJMsO7pSeKw2bfRNZGjhB7YzrpbMYj5b5QsUebHpOqR3R3ziy/g==",
"license": "MIT",
"peer": true,
"dependencies": {
"@vue/compiler-dom": "3.5.25",
"@vue/compiler-sfc": "3.5.25",
......@@ -3199,7 +3190,6 @@
"integrity": "sha512-P7OP77b2h/Pmk+lZdJ0YWs+5tJ6J2+uOQPo7tlBnY44QqQSPYvS0qVT4wqDJgwrZaLe47etJLLQRFia71GYITw==",
"dev": true,
"license": "MIT",
"peer": true,
"dependencies": {
"@volar/typescript": "2.4.15",
"@vue/language-core": "2.2.12"
......
......@@ -83,14 +83,6 @@
></div>
</div>
</div>
<!-- Tier Indicator -->
<span
v-if="tierDisplay"
class="inline-flex items-center rounded bg-blue-100 px-1.5 py-0.5 text-xs font-medium text-blue-700 dark:bg-blue-900/30 dark:text-blue-400"
>
{{ tierDisplay }}
</span>
</div>
</template>
......@@ -148,23 +140,4 @@ const statusText = computed(() => {
return props.account.status
})
// Computed: tier display
const tierDisplay = computed(() => {
const credentials = props.account.credentials as Record<string, any> | undefined
const tierId = credentials?.tier_id
if (!tierId || tierId === 'unknown') return null
const tierMap: Record<string, string> = {
'free': 'Free',
'payg': 'Pay-as-you-go',
'pay-as-you-go': 'Pay-as-you-go',
'enterprise': 'Enterprise',
'LEGACY': 'Legacy',
'PRO': 'Pro',
'ULTRA': 'Ultra'
}
return tierMap[tierId] || tierId
})
</script>
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment