Commit ee4bfcbb authored by Elysia's avatar Elysia
Browse files

Merge remote-tracking branch 'origin/main'

parents 32d619a5 cac23020
......@@ -864,7 +864,8 @@ func isOpenAIWSClientDisconnectError(err error) bool {
strings.Contains(message, "unexpected eof") ||
strings.Contains(message, "use of closed network connection") ||
strings.Contains(message, "connection reset by peer") ||
strings.Contains(message, "broken pipe")
strings.Contains(message, "broken pipe") ||
strings.Contains(message, "an established connection was aborted")
}
func classifyOpenAIWSReadFallbackReason(err error) string {
......
......@@ -64,8 +64,12 @@ func (s *OpsService) getAccountsLoadMapBestEffort(ctx context.Context, accounts
if acc.ID <= 0 {
continue
}
if prev, ok := unique[acc.ID]; !ok || acc.Concurrency > prev {
unique[acc.ID] = acc.Concurrency
c := acc.Concurrency
if c <= 0 {
c = 1
}
if prev, ok := unique[acc.ID]; !ok || c > prev {
unique[acc.ID] = c
}
}
......
......@@ -389,13 +389,9 @@ func (c *OpsMetricsCollector) collectConcurrencyQueueDepth(parentCtx context.Con
if acc.ID <= 0 {
continue
}
maxConc := acc.Concurrency
if maxConc < 0 {
maxConc = 0
}
batch = append(batch, AccountWithConcurrency{
ID: acc.ID,
MaxConcurrency: maxConc,
MaxConcurrency: acc.Concurrency,
})
}
if len(batch) == 0 {
......
......@@ -23,6 +23,17 @@ import (
var (
openAIModelDatePattern = regexp.MustCompile(`-\d{8}$`)
openAIModelBasePattern = regexp.MustCompile(`^(gpt-\d+(?:\.\d+)?)(?:-|$)`)
openAIGPT54FallbackPricing = &LiteLLMModelPricing{
InputCostPerToken: 2.5e-06, // $2.5 per MTok
OutputCostPerToken: 1.5e-05, // $15 per MTok
CacheReadInputTokenCost: 2.5e-07, // $0.25 per MTok
LongContextInputTokenThreshold: 272000,
LongContextInputCostMultiplier: 2.0,
LongContextOutputCostMultiplier: 1.5,
LiteLLMProvider: "openai",
Mode: "chat",
SupportsPromptCaching: true,
}
)
// LiteLLMModelPricing LiteLLM价格数据结构
......@@ -33,6 +44,9 @@ type LiteLLMModelPricing struct {
CacheCreationInputTokenCost float64 `json:"cache_creation_input_token_cost"`
CacheCreationInputTokenCostAbove1hr float64 `json:"cache_creation_input_token_cost_above_1hr"`
CacheReadInputTokenCost float64 `json:"cache_read_input_token_cost"`
LongContextInputTokenThreshold int `json:"long_context_input_token_threshold,omitempty"`
LongContextInputCostMultiplier float64 `json:"long_context_input_cost_multiplier,omitempty"`
LongContextOutputCostMultiplier float64 `json:"long_context_output_cost_multiplier,omitempty"`
LiteLLMProvider string `json:"litellm_provider"`
Mode string `json:"mode"`
SupportsPromptCaching bool `json:"supports_prompt_caching"`
......@@ -660,7 +674,8 @@ func (s *PricingService) matchByModelFamily(model string) *LiteLLMModelPricing {
// 2. gpt-5.2-codex -> gpt-5.2(去掉后缀如 -codex, -mini, -max 等)
// 3. gpt-5.2-20251222 -> gpt-5.2(去掉日期版本号)
// 4. gpt-5.3-codex -> gpt-5.2-codex
// 5. 最终回退到 DefaultTestModel (gpt-5.1-codex)
// 5. gpt-5.4* -> 业务静态兜底价
// 6. 最终回退到 DefaultTestModel (gpt-5.1-codex)
func (s *PricingService) matchOpenAIModel(model string) *LiteLLMModelPricing {
if strings.HasPrefix(model, "gpt-5.3-codex-spark") {
if pricing, ok := s.pricingData["gpt-5.1-codex"]; ok {
......@@ -690,6 +705,12 @@ func (s *PricingService) matchOpenAIModel(model string) *LiteLLMModelPricing {
}
}
if strings.HasPrefix(model, "gpt-5.4") {
logger.With(zap.String("component", "service.pricing")).
Info(fmt.Sprintf("[Pricing] OpenAI fallback matched %s -> %s", model, "gpt-5.4(static)"))
return openAIGPT54FallbackPricing
}
// 最终回退到 DefaultTestModel
defaultModel := strings.ToLower(openai.DefaultTestModel)
if pricing, ok := s.pricingData[defaultModel]; ok {
......
......@@ -51,3 +51,20 @@ func TestGetModelPricing_OpenAIFallbackMatchedLoggedAsInfo(t *testing.T) {
require.True(t, logSink.ContainsMessageAtLevel("[Pricing] OpenAI fallback matched gpt-5.3-codex -> gpt-5.2-codex", "info"))
require.False(t, logSink.ContainsMessageAtLevel("[Pricing] OpenAI fallback matched gpt-5.3-codex -> gpt-5.2-codex", "warn"))
}
func TestGetModelPricing_Gpt54UsesStaticFallbackWhenRemoteMissing(t *testing.T) {
svc := &PricingService{
pricingData: map[string]*LiteLLMModelPricing{
"gpt-5.1-codex": &LiteLLMModelPricing{InputCostPerToken: 1.25e-6},
},
}
got := svc.GetModelPricing("gpt-5.4")
require.NotNil(t, got)
require.InDelta(t, 2.5e-6, got.InputCostPerToken, 1e-12)
require.InDelta(t, 1.5e-5, got.OutputCostPerToken, 1e-12)
require.InDelta(t, 2.5e-7, got.CacheReadInputTokenCost, 1e-12)
require.Equal(t, 272000, got.LongContextInputTokenThreshold)
require.InDelta(t, 2.0, got.LongContextInputCostMultiplier, 1e-12)
require.InDelta(t, 1.5, got.LongContextOutputCostMultiplier, 1e-12)
}
......@@ -34,7 +34,7 @@ func TestCalculateProgress_BasicFields(t *testing.T) {
assert.Equal(t, int64(100), progress.ID)
assert.Equal(t, "Premium", progress.GroupName)
assert.Equal(t, sub.ExpiresAt, progress.ExpiresAt)
assert.Equal(t, 29, progress.ExpiresInDays) // 约 30 天
assert.True(t, progress.ExpiresInDays == 29 || progress.ExpiresInDays == 30, "ExpiresInDays should be 29 or 30, got %d", progress.ExpiresInDays)
assert.Nil(t, progress.Daily, "无日限额时 Daily 应为 nil")
assert.Nil(t, progress.Weekly, "无周限额时 Weekly 应为 nil")
assert.Nil(t, progress.Monthly, "无月限额时 Monthly 应为 nil")
......
ALTER TABLE accounts ADD COLUMN IF NOT EXISTS load_factor INTEGER;
......@@ -5140,6 +5140,39 @@
"supports_vision": true,
"supports_web_search": true
},
"gpt-5.4": {
"cache_read_input_token_cost": 2.5e-07,
"input_cost_per_token": 2.5e-06,
"litellm_provider": "openai",
"max_input_tokens": 1050000,
"max_output_tokens": 128000,
"max_tokens": 128000,
"mode": "chat",
"output_cost_per_token": 1.5e-05,
"supported_endpoints": [
"/v1/chat/completions",
"/v1/responses"
],
"supported_modalities": [
"text",
"image"
],
"supported_output_modalities": [
"text",
"image"
],
"supports_function_calling": true,
"supports_native_streaming": true,
"supports_parallel_function_calling": true,
"supports_pdf_input": true,
"supports_prompt_caching": true,
"supports_reasoning": true,
"supports_response_schema": true,
"supports_service_tier": true,
"supports_system_messages": true,
"supports_tool_choice": true,
"supports_vision": true
},
"gpt-5.3-codex": {
"cache_read_input_token_cost": 1.75e-07,
"cache_read_input_token_cost_priority": 3.5e-07,
......
......@@ -240,6 +240,18 @@ export async function clearRateLimit(id: number): Promise<Account> {
return data
}
/**
* Reset account quota usage
* @param id - Account ID
* @returns Updated account
*/
export async function resetAccountQuota(id: number): Promise<Account> {
const { data } = await apiClient.post<Account>(
`/admin/accounts/${id}/reset-quota`
)
return data
}
/**
* Get temporary unschedulable status
* @param id - Account ID
......@@ -576,6 +588,7 @@ export const accountsAPI = {
getTodayStats,
getBatchTodayStats,
clearRateLimit,
resetAccountQuota,
getTempUnschedulableStatus,
resetTempUnschedulable,
setSchedulable,
......
......@@ -71,6 +71,24 @@
<span class="text-[9px] opacity-60">{{ rpmStrategyTag }}</span>
</span>
</div>
<!-- API Key 账号配额限制 -->
<div v-if="showQuotaLimit" class="flex items-center gap-1">
<span
:class="[
'inline-flex items-center gap-1 rounded-md px-1.5 py-0.5 text-[10px] font-medium',
quotaClass
]"
:title="quotaTooltip"
>
<svg class="h-2.5 w-2.5" fill="none" viewBox="0 0 24 24" stroke="currentColor" stroke-width="2">
<path stroke-linecap="round" stroke-linejoin="round" d="M2.25 18.75a60.07 60.07 0 0115.797 2.101c.727.198 1.453-.342 1.453-1.096V18.75M3.75 4.5v.75A.75.75 0 013 6h-.75m0 0v-.375c0-.621.504-1.125 1.125-1.125H20.25M2.25 6v9m18-10.5v.75c0 .414.336.75.75.75h.75m-1.5-1.5h.375c.621 0 1.125.504 1.125 1.125v9.75c0 .621-.504 1.125-1.125 1.125h-.375m1.5-1.5H21a.75.75 0 00-.75.75v.75m0 0H3.75m0 0h-.375a1.125 1.125 0 01-1.125-1.125V15m1.5 1.5v-.75A.75.75 0 003 15h-.75M15 10.5a3 3 0 11-6 0 3 3 0 016 0zm3 0h.008v.008H18V10.5zm-12 0h.008v.008H6V10.5z" />
</svg>
<span class="font-mono">${{ formatCost(currentQuotaUsed) }}</span>
<span class="text-gray-400 dark:text-gray-500">/</span>
<span class="font-mono">${{ formatCost(account.quota_limit) }}</span>
</span>
</div>
</div>
</template>
......@@ -286,6 +304,48 @@ const rpmTooltip = computed(() => {
}
})
// 是否显示配额限制(仅 apikey 类型且设置了 quota_limit)
const showQuotaLimit = computed(() => {
return (
props.account.type === 'apikey' &&
props.account.quota_limit !== undefined &&
props.account.quota_limit !== null &&
props.account.quota_limit > 0
)
})
// 当前已用配额
const currentQuotaUsed = computed(() => props.account.quota_used ?? 0)
// 配额状态样式
const quotaClass = computed(() => {
if (!showQuotaLimit.value) return ''
const used = currentQuotaUsed.value
const limit = props.account.quota_limit || 0
if (used >= limit) {
return 'bg-red-100 text-red-700 dark:bg-red-900/30 dark:text-red-400'
}
if (used >= limit * 0.8) {
return 'bg-yellow-100 text-yellow-700 dark:bg-yellow-900/30 dark:text-yellow-400'
}
return 'bg-emerald-100 text-emerald-700 dark:bg-emerald-900/30 dark:text-emerald-400'
})
// 配额提示文字
const quotaTooltip = computed(() => {
if (!showQuotaLimit.value) return ''
const used = currentQuotaUsed.value
const limit = props.account.quota_limit || 0
if (used >= limit) {
return t('admin.accounts.capacity.quota.exceeded')
}
return t('admin.accounts.capacity.quota.normal')
})
// 格式化费用显示
const formatCost = (value: number | null | undefined) => {
if (value === null || value === undefined) return '0'
......
......@@ -469,7 +469,7 @@
</div>
<!-- Concurrency & Priority -->
<div class="grid grid-cols-2 gap-4 border-t border-gray-200 pt-4 dark:border-dark-600 lg:grid-cols-3">
<div class="grid grid-cols-2 gap-4 border-t border-gray-200 pt-4 dark:border-dark-600 lg:grid-cols-4">
<div>
<div class="mb-3 flex items-center justify-between">
<label
......@@ -496,8 +496,39 @@
class="input"
:class="!enableConcurrency && 'cursor-not-allowed opacity-50'"
aria-labelledby="bulk-edit-concurrency-label"
@input="concurrency = Math.max(1, concurrency || 1)"
/>
</div>
<div>
<div class="mb-3 flex items-center justify-between">
<label
id="bulk-edit-load-factor-label"
class="input-label mb-0"
for="bulk-edit-load-factor-enabled"
>
{{ t('admin.accounts.loadFactor') }}
</label>
<input
v-model="enableLoadFactor"
id="bulk-edit-load-factor-enabled"
type="checkbox"
aria-controls="bulk-edit-load-factor"
class="rounded border-gray-300 text-primary-600 focus:ring-primary-500"
/>
</div>
<input
v-model.number="loadFactor"
id="bulk-edit-load-factor"
type="number"
min="1"
:disabled="!enableLoadFactor"
class="input"
:class="!enableLoadFactor && 'cursor-not-allowed opacity-50'"
aria-labelledby="bulk-edit-load-factor-label"
@input="loadFactor = (loadFactor &amp;&amp; loadFactor >= 1) ? loadFactor : null"
/>
<p class="input-hint">{{ t('admin.accounts.loadFactorHint') }}</p>
</div>
<div>
<div class="mb-3 flex items-center justify-between">
<label
......@@ -869,6 +900,7 @@ const enableCustomErrorCodes = ref(false)
const enableInterceptWarmup = ref(false)
const enableProxy = ref(false)
const enableConcurrency = ref(false)
const enableLoadFactor = ref(false)
const enablePriority = ref(false)
const enableRateMultiplier = ref(false)
const enableStatus = ref(false)
......@@ -889,6 +921,7 @@ const customErrorCodeInput = ref<number | null>(null)
const interceptWarmupRequests = ref(false)
const proxyId = ref<number | null>(null)
const concurrency = ref(1)
const loadFactor = ref<number | null>(null)
const priority = ref(1)
const rateMultiplier = ref(1)
const status = ref<'active' | 'inactive'>('active')
......@@ -918,6 +951,7 @@ const allModels = [
{ value: 'claude-3-haiku-20240307', label: 'Claude 3 Haiku' },
{ value: 'gpt-5.3-codex', label: 'GPT-5.3 Codex' },
{ value: 'gpt-5.3-codex-spark', label: 'GPT-5.3 Codex Spark' },
{ value: 'gpt-5.4', label: 'GPT-5.4' },
{ value: 'gpt-5.2-2025-12-11', label: 'GPT-5.2' },
{ value: 'gpt-5.2-codex', label: 'GPT-5.2 Codex' },
{ value: 'gpt-5.1-codex-max', label: 'GPT-5.1 Codex Max' },
......@@ -1032,6 +1066,12 @@ const presetMappings = [
to: 'gpt-5.3-codex-spark',
color: 'bg-emerald-100 text-emerald-700 hover:bg-emerald-200 dark:bg-emerald-900/30 dark:text-emerald-400'
},
{
label: 'GPT-5.4',
from: 'gpt-5.4',
to: 'gpt-5.4',
color: 'bg-rose-100 text-rose-700 hover:bg-rose-200 dark:bg-rose-900/30 dark:text-rose-400'
},
{
label: '5.2→5.3',
from: 'gpt-5.2-codex',
......@@ -1195,6 +1235,12 @@ const buildUpdatePayload = (): Record<string, unknown> | null => {
updates.concurrency = concurrency.value
}
if (enableLoadFactor.value) {
// 空值/NaN/0 时发送 0(后端约定 <= 0 表示清除)
const lf = loadFactor.value
updates.load_factor = (lf != null && !Number.isNaN(lf) && lf > 0) ? lf : 0
}
if (enablePriority.value) {
updates.priority = priority.value
}
......@@ -1340,6 +1386,7 @@ const handleSubmit = async () => {
enableInterceptWarmup.value ||
enableProxy.value ||
enableConcurrency.value ||
enableLoadFactor.value ||
enablePriority.value ||
enableRateMultiplier.value ||
enableStatus.value ||
......@@ -1430,6 +1477,7 @@ watch(
enableInterceptWarmup.value = false
enableProxy.value = false
enableConcurrency.value = false
enableLoadFactor.value = false
enablePriority.value = false
enableRateMultiplier.value = false
enableStatus.value = false
......@@ -1446,6 +1494,7 @@ watch(
interceptWarmupRequests.value = false
proxyId.value = null
concurrency.value = 1
loadFactor.value = null
priority.value = 1
rateMultiplier.value = 1
status.value = 'active'
......
......@@ -1227,6 +1227,9 @@
</div>
<!-- API Key 账号配额限制 -->
<QuotaLimitCard v-if="form.type === 'apikey'" v-model="editQuotaLimit" />
<!-- Temp Unschedulable Rules -->
<div class="border-t border-gray-200 pt-4 dark:border-dark-600 space-y-4">
<div class="mb-3 flex items-center justify-between">
......@@ -1749,10 +1752,18 @@
<ProxySelector v-model="form.proxy_id" :proxies="proxies" />
</div>
<div class="grid grid-cols-2 gap-4 lg:grid-cols-3">
<div class="grid grid-cols-2 gap-4 lg:grid-cols-4">
<div>
<label class="input-label">{{ t('admin.accounts.concurrency') }}</label>
<input v-model.number="form.concurrency" type="number" min="1" class="input" />
<input v-model.number="form.concurrency" type="number" min="1" class="input"
@input="form.concurrency = Math.max(1, form.concurrency || 1)" />
</div>
<div>
<label class="input-label">{{ t('admin.accounts.loadFactor') }}</label>
<input v-model.number="form.load_factor" type="number" min="1"
class="input" :placeholder="String(form.concurrency || 1)"
@input="form.load_factor = (form.load_factor &amp;&amp; form.load_factor >= 1) ? form.load_factor : null" />
<p class="input-hint">{{ t('admin.accounts.loadFactorHint') }}</p>
</div>
<div>
<label class="input-label">{{ t('admin.accounts.priority') }}</label>
......@@ -2337,11 +2348,12 @@ import Icon from '@/components/icons/Icon.vue'
import ProxySelector from '@/components/common/ProxySelector.vue'
import GroupSelector from '@/components/common/GroupSelector.vue'
import ModelWhitelistSelector from '@/components/account/ModelWhitelistSelector.vue'
import QuotaLimitCard from '@/components/account/QuotaLimitCard.vue'
import { applyInterceptWarmup } from '@/components/account/credentialsBuilder'
import { formatDateTimeLocalInput, parseDateTimeLocalInput } from '@/utils/format'
import { createStableObjectKeyResolver } from '@/utils/stableObjectKey'
import {
OPENAI_WS_MODE_CTX_POOL,
// OPENAI_WS_MODE_CTX_POOL,
OPENAI_WS_MODE_OFF,
OPENAI_WS_MODE_PASSTHROUGH,
isOpenAIWSModeEnabled,
......@@ -2460,6 +2472,7 @@ const accountCategory = ref<'oauth-based' | 'apikey'>('oauth-based') // UI selec
const addMethod = ref<AddMethod>('oauth') // For oauth-based: 'oauth' or 'setup-token'
const apiKeyBaseUrl = ref('https://api.anthropic.com')
const apiKeyValue = ref('')
const editQuotaLimit = ref<number | null>(null)
const modelMappings = ref<ModelMapping[]>([])
const modelRestrictionMode = ref<'whitelist' | 'mapping'>('whitelist')
const allowedModels = ref<string[]>([])
......@@ -2542,7 +2555,8 @@ const geminiSelectedTier = computed(() => {
const openAIWSModeOptions = computed(() => [
{ value: OPENAI_WS_MODE_OFF, label: t('admin.accounts.openai.wsModeOff') },
{ value: OPENAI_WS_MODE_CTX_POOL, label: t('admin.accounts.openai.wsModeCtxPool') },
// TODO: ctx_pool 选项暂时隐藏,待测试完成后恢复
// { value: OPENAI_WS_MODE_CTX_POOL, label: t('admin.accounts.openai.wsModeCtxPool') },
{ value: OPENAI_WS_MODE_PASSTHROUGH, label: t('admin.accounts.openai.wsModePassthrough') }
])
......@@ -2632,6 +2646,7 @@ const form = reactive({
credentials: {} as Record<string, unknown>,
proxy_id: null as number | null,
concurrency: 10,
load_factor: null as number | null,
priority: 1,
rate_multiplier: 1,
group_ids: [] as number[],
......@@ -3111,6 +3126,7 @@ const resetForm = () => {
form.credentials = {}
form.proxy_id = null
form.concurrency = 10
form.load_factor = null
form.priority = 1
form.rate_multiplier = 1
form.group_ids = []
......@@ -3119,6 +3135,7 @@ const resetForm = () => {
addMethod.value = 'oauth'
apiKeyBaseUrl.value = 'https://api.anthropic.com'
apiKeyValue.value = ''
editQuotaLimit.value = null
modelMappings.value = []
modelRestrictionMode.value = 'whitelist'
allowedModels.value = [...claudeModels] // Default fill related models
......@@ -3482,6 +3499,7 @@ const handleImportAccessToken = async (accessTokenInput: string) => {
extra: soraExtra,
proxy_id: form.proxy_id,
concurrency: form.concurrency,
load_factor: form.load_factor ?? undefined,
priority: form.priority,
rate_multiplier: form.rate_multiplier,
group_ids: form.group_ids,
......@@ -3532,15 +3550,21 @@ const createAccountAndFinish = async (
if (!applyTempUnschedConfig(credentials)) {
return
}
// Inject quota_limit for apikey accounts
let finalExtra = extra
if (type === 'apikey' && editQuotaLimit.value != null && editQuotaLimit.value > 0) {
finalExtra = { ...(extra || {}), quota_limit: editQuotaLimit.value }
}
await doCreateAccount({
name: form.name,
notes: form.notes,
platform,
type,
credentials,
extra,
extra: finalExtra,
proxy_id: form.proxy_id,
concurrency: form.concurrency,
load_factor: form.load_factor ?? undefined,
priority: form.priority,
rate_multiplier: form.rate_multiplier,
group_ids: form.group_ids,
......@@ -3596,6 +3620,7 @@ const handleOpenAIExchange = async (authCode: string) => {
extra,
proxy_id: form.proxy_id,
concurrency: form.concurrency,
load_factor: form.load_factor ?? undefined,
priority: form.priority,
rate_multiplier: form.rate_multiplier,
group_ids: form.group_ids,
......@@ -3625,6 +3650,7 @@ const handleOpenAIExchange = async (authCode: string) => {
extra: soraExtra,
proxy_id: form.proxy_id,
concurrency: form.concurrency,
load_factor: form.load_factor ?? undefined,
priority: form.priority,
rate_multiplier: form.rate_multiplier,
group_ids: form.group_ids,
......@@ -3702,6 +3728,7 @@ const handleOpenAIValidateRT = async (refreshTokenInput: string) => {
extra,
proxy_id: form.proxy_id,
concurrency: form.concurrency,
load_factor: form.load_factor ?? undefined,
priority: form.priority,
rate_multiplier: form.rate_multiplier,
group_ids: form.group_ids,
......@@ -3729,6 +3756,7 @@ const handleOpenAIValidateRT = async (refreshTokenInput: string) => {
extra: soraExtra,
proxy_id: form.proxy_id,
concurrency: form.concurrency,
load_factor: form.load_factor ?? undefined,
priority: form.priority,
rate_multiplier: form.rate_multiplier,
group_ids: form.group_ids,
......@@ -3817,6 +3845,7 @@ const handleSoraValidateST = async (sessionTokenInput: string) => {
extra: soraExtra,
proxy_id: form.proxy_id,
concurrency: form.concurrency,
load_factor: form.load_factor ?? undefined,
priority: form.priority,
rate_multiplier: form.rate_multiplier,
group_ids: form.group_ids,
......@@ -3905,6 +3934,7 @@ const handleAntigravityValidateRT = async (refreshTokenInput: string) => {
extra: {},
proxy_id: form.proxy_id,
concurrency: form.concurrency,
load_factor: form.load_factor ?? undefined,
priority: form.priority,
rate_multiplier: form.rate_multiplier,
group_ids: form.group_ids,
......@@ -4063,8 +4093,11 @@ const handleAnthropicExchange = async (authCode: string) => {
}
// Add RPM limit settings
if (rpmLimitEnabled.value && baseRpm.value != null && baseRpm.value > 0) {
extra.base_rpm = baseRpm.value
if (rpmLimitEnabled.value) {
const DEFAULT_BASE_RPM = 15
extra.base_rpm = (baseRpm.value != null && baseRpm.value > 0)
? baseRpm.value
: DEFAULT_BASE_RPM
extra.rpm_strategy = rpmStrategy.value
if (rpmStickyBuffer.value != null && rpmStickyBuffer.value > 0) {
extra.rpm_sticky_buffer = rpmStickyBuffer.value
......@@ -4175,8 +4208,11 @@ const handleCookieAuth = async (sessionKey: string) => {
}
// Add RPM limit settings
if (rpmLimitEnabled.value && baseRpm.value != null && baseRpm.value > 0) {
extra.base_rpm = baseRpm.value
if (rpmLimitEnabled.value) {
const DEFAULT_BASE_RPM = 15
extra.base_rpm = (baseRpm.value != null && baseRpm.value > 0)
? baseRpm.value
: DEFAULT_BASE_RPM
extra.rpm_strategy = rpmStrategy.value
if (rpmStickyBuffer.value != null && rpmStickyBuffer.value > 0) {
extra.rpm_sticky_buffer = rpmStickyBuffer.value
......@@ -4222,6 +4258,7 @@ const handleCookieAuth = async (sessionKey: string) => {
extra,
proxy_id: form.proxy_id,
concurrency: form.concurrency,
load_factor: form.load_factor ?? undefined,
priority: form.priority,
rate_multiplier: form.rate_multiplier,
group_ids: form.group_ids,
......
......@@ -650,10 +650,18 @@
<ProxySelector v-model="form.proxy_id" :proxies="proxies" />
</div>
<div class="grid grid-cols-2 gap-4 lg:grid-cols-3">
<div class="grid grid-cols-2 gap-4 lg:grid-cols-4">
<div>
<label class="input-label">{{ t('admin.accounts.concurrency') }}</label>
<input v-model.number="form.concurrency" type="number" min="1" class="input" />
<input v-model.number="form.concurrency" type="number" min="1" class="input"
@input="form.concurrency = Math.max(1, form.concurrency || 1)" />
</div>
<div>
<label class="input-label">{{ t('admin.accounts.loadFactor') }}</label>
<input v-model.number="form.load_factor" type="number" min="1"
class="input" :placeholder="String(form.concurrency || 1)"
@input="form.load_factor = (form.load_factor &amp;&amp; form.load_factor >= 1) ? form.load_factor : null" />
<p class="input-hint">{{ t('admin.accounts.loadFactorHint') }}</p>
</div>
<div>
<label class="input-label">{{ t('admin.accounts.priority') }}</label>
......@@ -759,6 +767,9 @@
</div>
</div>
<!-- API Key 账号配额限制 -->
<QuotaLimitCard v-if="account?.type === 'apikey'" v-model="editQuotaLimit" />
<!-- OpenAI OAuth Codex 官方客户端限制开关 -->
<div
v-if="account?.platform === 'openai' && account?.type === 'oauth'"
......@@ -1269,11 +1280,12 @@ import Icon from '@/components/icons/Icon.vue'
import ProxySelector from '@/components/common/ProxySelector.vue'
import GroupSelector from '@/components/common/GroupSelector.vue'
import ModelWhitelistSelector from '@/components/account/ModelWhitelistSelector.vue'
import QuotaLimitCard from '@/components/account/QuotaLimitCard.vue'
import { applyInterceptWarmup } from '@/components/account/credentialsBuilder'
import { formatDateTimeLocalInput, parseDateTimeLocalInput } from '@/utils/format'
import { createStableObjectKeyResolver } from '@/utils/stableObjectKey'
import {
OPENAI_WS_MODE_CTX_POOL,
// OPENAI_WS_MODE_CTX_POOL,
OPENAI_WS_MODE_OFF,
OPENAI_WS_MODE_PASSTHROUGH,
isOpenAIWSModeEnabled,
......@@ -1386,9 +1398,11 @@ const openaiOAuthResponsesWebSocketV2Mode = ref<OpenAIWSMode>(OPENAI_WS_MODE_OFF
const openaiAPIKeyResponsesWebSocketV2Mode = ref<OpenAIWSMode>(OPENAI_WS_MODE_OFF)
const codexCLIOnlyEnabled = ref(false)
const anthropicPassthroughEnabled = ref(false)
const editQuotaLimit = ref<number | null>(null)
const openAIWSModeOptions = computed(() => [
{ value: OPENAI_WS_MODE_OFF, label: t('admin.accounts.openai.wsModeOff') },
{ value: OPENAI_WS_MODE_CTX_POOL, label: t('admin.accounts.openai.wsModeCtxPool') },
// TODO: ctx_pool 选项暂时隐藏,待测试完成后恢复
// { value: OPENAI_WS_MODE_CTX_POOL, label: t('admin.accounts.openai.wsModeCtxPool') },
{ value: OPENAI_WS_MODE_PASSTHROUGH, label: t('admin.accounts.openai.wsModePassthrough') }
])
const openaiResponsesWebSocketV2Mode = computed({
......@@ -1464,6 +1478,7 @@ const form = reactive({
notes: '',
proxy_id: null as number | null,
concurrency: 1,
load_factor: null as number | null,
priority: 1,
rate_multiplier: 1,
status: 'active' as 'active' | 'inactive',
......@@ -1497,9 +1512,12 @@ watch(
form.notes = newAccount.notes || ''
form.proxy_id = newAccount.proxy_id
form.concurrency = newAccount.concurrency
form.load_factor = newAccount.load_factor ?? null
form.priority = newAccount.priority
form.rate_multiplier = newAccount.rate_multiplier ?? 1
form.status = newAccount.status as 'active' | 'inactive'
form.status = (newAccount.status === 'active' || newAccount.status === 'inactive')
? newAccount.status
: 'active'
form.group_ids = newAccount.group_ids || []
form.expires_at = newAccount.expires_at ?? null
......@@ -1540,6 +1558,14 @@ watch(
anthropicPassthroughEnabled.value = extra?.anthropic_passthrough === true
}
// Load quota limit for apikey accounts
if (newAccount.type === 'apikey') {
const quotaVal = extra?.quota_limit as number | undefined
editQuotaLimit.value = (quotaVal && quotaVal > 0) ? quotaVal : null
} else {
editQuotaLimit.value = null
}
// Load antigravity model mapping (Antigravity 只支持映射模式)
if (newAccount.platform === 'antigravity') {
const credentials = newAccount.credentials as Record<string, unknown> | undefined
......@@ -2039,6 +2065,11 @@ const handleSubmit = async () => {
if (!props.account) return
const accountID = props.account.id
if (form.status !== 'active' && form.status !== 'inactive') {
appStore.showError(t('admin.accounts.pleaseSelectStatus'))
return
}
const updatePayload: Record<string, unknown> = { ...form }
try {
// 后端期望 proxy_id: 0 表示清除代理,而不是 null
......@@ -2048,6 +2079,11 @@ const handleSubmit = async () => {
if (form.expires_at === null) {
updatePayload.expires_at = 0
}
// load_factor: 空值/NaN/0/负数 时发送 0(后端约定 <= 0 = 清除)
const lf = form.load_factor
if (lf == null || Number.isNaN(lf) || lf <= 0) {
updatePayload.load_factor = 0
}
updatePayload.auto_pause_on_expired = autoPauseOnExpired.value
// For apikey type, handle credentials update
......@@ -2187,8 +2223,11 @@ const handleSubmit = async () => {
}
// RPM limit settings
if (rpmLimitEnabled.value && baseRpm.value != null && baseRpm.value > 0) {
newExtra.base_rpm = baseRpm.value
if (rpmLimitEnabled.value) {
const DEFAULT_BASE_RPM = 15
newExtra.base_rpm = (baseRpm.value != null && baseRpm.value > 0)
? baseRpm.value
: DEFAULT_BASE_RPM
newExtra.rpm_strategy = rpmStrategy.value
if (rpmStickyBuffer.value != null && rpmStickyBuffer.value > 0) {
newExtra.rpm_sticky_buffer = rpmStickyBuffer.value
......@@ -2282,6 +2321,19 @@ const handleSubmit = async () => {
updatePayload.extra = newExtra
}
// For apikey accounts, handle quota_limit in extra
if (props.account.type === 'apikey') {
const currentExtra = (updatePayload.extra as Record<string, unknown>) ||
(props.account.extra as Record<string, unknown>) || {}
const newExtra: Record<string, unknown> = { ...currentExtra }
if (editQuotaLimit.value != null && editQuotaLimit.value > 0) {
newExtra.quota_limit = editQuotaLimit.value
} else {
delete newExtra.quota_limit
}
updatePayload.extra = newExtra
}
const canContinue = await ensureAntigravityMixedChannelConfirmed(async () => {
await submitUpdateAccount(accountID, updatePayload)
})
......
<script setup lang="ts">
import { ref, watch } from 'vue'
import { useI18n } from 'vue-i18n'
const { t } = useI18n()
const props = defineProps<{
modelValue: number | null
}>()
const emit = defineEmits<{
'update:modelValue': [value: number | null]
}>()
const enabled = ref(props.modelValue != null && props.modelValue > 0)
// Sync enabled state when modelValue changes externally (e.g. account load)
watch(
() => props.modelValue,
(val) => {
enabled.value = val != null && val > 0
}
)
// When toggle is turned off, clear the value
watch(enabled, (val) => {
if (!val) {
emit('update:modelValue', null)
}
})
const onInput = (e: Event) => {
const raw = (e.target as HTMLInputElement).valueAsNumber
emit('update:modelValue', Number.isNaN(raw) ? null : raw)
}
</script>
<template>
<div class="border-t border-gray-200 pt-4 dark:border-dark-600 space-y-4">
<div class="mb-3">
<h3 class="input-label mb-0 text-base font-semibold">{{ t('admin.accounts.quotaLimit') }}</h3>
<p class="mt-1 text-xs text-gray-500 dark:text-gray-400">
{{ t('admin.accounts.quotaLimitHint') }}
</p>
</div>
<div class="rounded-lg border border-gray-200 p-4 dark:border-dark-600">
<div class="mb-3 flex items-center justify-between">
<div>
<label class="input-label mb-0">{{ t('admin.accounts.quotaLimitToggle') }}</label>
<p class="mt-1 text-xs text-gray-500 dark:text-gray-400">
{{ t('admin.accounts.quotaLimitToggleHint') }}
</p>
</div>
<button
type="button"
@click="enabled = !enabled"
:class="[
'relative inline-flex h-6 w-11 flex-shrink-0 cursor-pointer rounded-full border-2 border-transparent transition-colors duration-200 ease-in-out focus:outline-none focus:ring-2 focus:ring-primary-500 focus:ring-offset-2',
enabled ? 'bg-primary-600' : 'bg-gray-200 dark:bg-dark-600'
]"
>
<span
:class="[
'pointer-events-none inline-block h-5 w-5 transform rounded-full bg-white shadow ring-0 transition duration-200 ease-in-out',
enabled ? 'translate-x-5' : 'translate-x-0'
]"
/>
</button>
</div>
<div v-if="enabled" class="space-y-3">
<div>
<label class="input-label">{{ t('admin.accounts.quotaLimitAmount') }}</label>
<div class="relative">
<span class="absolute left-3 top-1/2 -translate-y-1/2 text-gray-500 dark:text-gray-400">$</span>
<input
:value="modelValue"
@input="onInput"
type="number"
min="0"
step="0.01"
class="input pl-7"
:placeholder="t('admin.accounts.quotaLimitPlaceholder')"
/>
</div>
<p class="input-hint">{{ t('admin.accounts.quotaLimitAmountHint') }}</p>
</div>
</div>
</div>
</div>
</template>
......@@ -41,6 +41,10 @@
<Icon name="clock" size="sm" />
{{ t('admin.accounts.clearRateLimit') }}
</button>
<button v-if="hasQuotaLimit" @click="$emit('reset-quota', account); $emit('close')" class="flex w-full items-center gap-2 px-4 py-2 text-sm text-teal-600 hover:bg-gray-100 dark:hover:bg-dark-700">
<Icon name="refresh" size="sm" />
{{ t('admin.accounts.resetQuota') }}
</button>
</template>
</div>
</div>
......@@ -55,7 +59,7 @@ import { Icon } from '@/components/icons'
import type { Account } from '@/types'
const props = defineProps<{ show: boolean; account: Account | null; position: { top: number; left: number } | null }>()
const emit = defineEmits(['close', 'test', 'stats', 'schedule', 'reauth', 'refresh-token', 'reset-status', 'clear-rate-limit'])
const emit = defineEmits(['close', 'test', 'stats', 'schedule', 'reauth', 'refresh-token', 'reset-status', 'clear-rate-limit', 'reset-quota'])
const { t } = useI18n()
const isRateLimited = computed(() => {
if (props.account?.rate_limit_reset_at && new Date(props.account.rate_limit_reset_at) > new Date()) {
......@@ -71,6 +75,12 @@ const isRateLimited = computed(() => {
return false
})
const isOverloaded = computed(() => props.account?.overload_until && new Date(props.account.overload_until) > new Date())
const hasQuotaLimit = computed(() => {
return props.account?.type === 'apikey' &&
props.account?.quota_limit !== undefined &&
props.account?.quota_limit !== null &&
props.account?.quota_limit > 0
})
const handleKeydown = (event: KeyboardEvent) => {
if (event.key === 'Escape') emit('close')
......
......@@ -63,7 +63,8 @@ const chartColors = computed(() => ({
grid: isDarkMode.value ? '#374151' : '#e5e7eb',
input: '#3b82f6',
output: '#10b981',
cache: '#f59e0b'
cacheCreation: '#f59e0b',
cacheRead: '#06b6d4'
}))
const chartData = computed(() => {
......@@ -89,10 +90,18 @@ const chartData = computed(() => {
tension: 0.3
},
{
label: 'Cache',
data: props.trendData.map((d) => d.cache_tokens),
borderColor: chartColors.value.cache,
backgroundColor: `${chartColors.value.cache}20`,
label: 'Cache Creation',
data: props.trendData.map((d) => d.cache_creation_tokens),
borderColor: chartColors.value.cacheCreation,
backgroundColor: `${chartColors.value.cacheCreation}20`,
fill: true,
tension: 0.3
},
{
label: 'Cache Read',
data: props.trendData.map((d) => d.cache_read_tokens),
borderColor: chartColors.value.cacheRead,
backgroundColor: `${chartColors.value.cacheRead}20`,
fill: true,
tension: 0.3
}
......
......@@ -443,7 +443,22 @@ $env:ANTHROPIC_AUTH_TOKEN="${apiKey}"`
content = ''
}
return [{ path, content }]
const vscodeSettingsPath = activeTab.value === 'unix'
? '~/.claude/settings.json'
: '%userprofile%\\.claude\\settings.json'
const vscodeContent = `{
"env": {
"ANTHROPIC_BASE_URL": "${baseUrl}",
"ANTHROPIC_AUTH_TOKEN": "${apiKey}",
"CLAUDE_CODE_ATTRIBUTION_HEADER": "0"
}
}`
return [
{ path, content },
{ path: vscodeSettingsPath, content: vscodeContent, hint: 'VSCode Claude Code' }
]
}
function generateGeminiCliContent(baseUrl: string, apiKey: string): FileConfig {
......@@ -496,16 +511,18 @@ function generateOpenAIFiles(baseUrl: string, apiKey: string): FileConfig[] {
const configDir = isWindows ? '%userprofile%\\.codex' : '~/.codex'
// config.toml content
const configContent = `model_provider = "sub2api"
model = "gpt-5.3-codex"
model_reasoning_effort = "high"
network_access = "enabled"
const configContent = `model_provider = "OpenAI"
model = "gpt-5.4"
review_model = "gpt-5.4"
model_reasoning_effort = "xhigh"
disable_response_storage = true
network_access = "enabled"
windows_wsl_setup_acknowledged = true
model_verbosity = "high"
model_context_window = 1000000
model_auto_compact_token_limit = 900000
[model_providers.sub2api]
name = "sub2api"
[model_providers.OpenAI]
name = "OpenAI"
base_url = "${baseUrl}"
wire_api = "responses"
requires_openai_auth = true`
......@@ -533,16 +550,18 @@ function generateOpenAIWsFiles(baseUrl: string, apiKey: string): FileConfig[] {
const configDir = isWindows ? '%userprofile%\\.codex' : '~/.codex'
// config.toml content with WebSocket v2
const configContent = `model_provider = "sub2api"
model = "gpt-5.3-codex"
model_reasoning_effort = "high"
network_access = "enabled"
const configContent = `model_provider = "OpenAI"
model = "gpt-5.4"
review_model = "gpt-5.4"
model_reasoning_effort = "xhigh"
disable_response_storage = true
network_access = "enabled"
windows_wsl_setup_acknowledged = true
model_verbosity = "high"
model_context_window = 1000000
model_auto_compact_token_limit = 900000
[model_providers.sub2api]
name = "sub2api"
[model_providers.OpenAI]
name = "OpenAI"
base_url = "${baseUrl}"
wire_api = "responses"
supports_websockets = true
......@@ -655,6 +674,22 @@ function generateOpenCodeConfig(platform: string, baseUrl: string, apiKey: strin
xhigh: {}
}
},
'gpt-5.4': {
name: 'GPT-5.4',
limit: {
context: 1050000,
output: 128000
},
options: {
store: false
},
variants: {
low: {},
medium: {},
high: {},
xhigh: {}
}
},
'gpt-5.3-codex-spark': {
name: 'GPT-5.3 Codex Spark',
limit: {
......
......@@ -2,6 +2,13 @@ import { describe, expect, it } from 'vitest'
import { buildModelMappingObject, getModelsByPlatform } from '../useModelWhitelist'
describe('useModelWhitelist', () => {
it('openai 模型列表包含 GPT-5.4 官方快照', () => {
const models = getModelsByPlatform('openai')
expect(models).toContain('gpt-5.4')
expect(models).toContain('gpt-5.4-2026-03-05')
})
it('antigravity 模型列表包含图片模型兼容项', () => {
const models = getModelsByPlatform('antigravity')
......@@ -15,4 +22,12 @@ describe('useModelWhitelist', () => {
'gemini-3.1-flash-image': 'gemini-3.1-flash-image'
})
})
it('whitelist 模式会保留 GPT-5.4 官方快照的精确映射', () => {
const mapping = buildModelMappingObject('whitelist', ['gpt-5.4-2026-03-05'], [])
expect(mapping).toEqual({
'gpt-5.4-2026-03-05': 'gpt-5.4-2026-03-05'
})
})
})
......@@ -24,6 +24,8 @@ const openaiModels = [
// GPT-5.2 系列
'gpt-5.2', 'gpt-5.2-2025-12-11', 'gpt-5.2-chat-latest',
'gpt-5.2-codex', 'gpt-5.2-pro', 'gpt-5.2-pro-2025-12-11',
// GPT-5.4 系列
'gpt-5.4', 'gpt-5.4-2026-03-05',
// GPT-5.3 系列
'gpt-5.3-codex', 'gpt-5.3-codex-spark',
'chatgpt-4o-latest',
......@@ -277,6 +279,7 @@ const openaiPresetMappings = [
{ label: 'GPT-5.3 Codex Spark', from: 'gpt-5.3-codex-spark', to: 'gpt-5.3-codex-spark', color: 'bg-teal-100 text-teal-700 hover:bg-teal-200 dark:bg-teal-900/30 dark:text-teal-400' },
{ label: 'GPT-5.1', from: 'gpt-5.1', to: 'gpt-5.1', color: 'bg-orange-100 text-orange-700 hover:bg-orange-200 dark:bg-orange-900/30 dark:text-orange-400' },
{ label: 'GPT-5.2', from: 'gpt-5.2', to: 'gpt-5.2', color: 'bg-red-100 text-red-700 hover:bg-red-200 dark:bg-red-900/30 dark:text-red-400' },
{ label: 'GPT-5.4', from: 'gpt-5.4', to: 'gpt-5.4', color: 'bg-rose-100 text-rose-700 hover:bg-rose-200 dark:bg-rose-900/30 dark:text-rose-400' },
{ label: 'GPT-5.1 Codex', from: 'gpt-5.1-codex', to: 'gpt-5.1-codex', color: 'bg-cyan-100 text-cyan-700 hover:bg-cyan-200 dark:bg-cyan-900/30 dark:text-cyan-400' }
]
......
......@@ -133,6 +133,8 @@ export default {
requests: 'Requests',
inputTokens: 'Input Tokens',
outputTokens: 'Output Tokens',
cacheCreationTokens: 'Cache Creation',
cacheReadTokens: 'Cache Read',
totalTokens: 'Total Tokens',
cost: 'Cost',
// Status
......@@ -155,11 +157,19 @@ export default {
subscriptionExpires: 'Subscription Expires',
// Usage stat cells
todayRequests: 'Today Requests',
todayInputTokens: 'Today Input',
todayOutputTokens: 'Today Output',
todayTokens: 'Today Tokens',
todayCacheCreation: 'Today Cache Creation',
todayCacheRead: 'Today Cache Read',
todayCost: 'Today Cost',
rpmTpm: 'RPM / TPM',
totalRequests: 'Total Requests',
totalInputTokens: 'Total Input',
totalOutputTokens: 'Total Output',
totalTokensLabel: 'Total Tokens',
totalCacheCreation: 'Total Cache Creation',
totalCacheRead: 'Total Cache Read',
totalCost: 'Total Cost',
avgDuration: 'Avg Duration',
// Messages
......@@ -1724,6 +1734,10 @@ export default {
stickyExemptWarning: 'RPM limit (Sticky Exempt) - Approaching limit',
stickyExemptOver: 'RPM limit (Sticky Exempt) - Over limit, sticky only'
},
quota: {
exceeded: 'Quota exceeded, account paused',
normal: 'Quota normal'
},
},
tempUnschedulable: {
title: 'Temp Unschedulable',
......@@ -1769,6 +1783,14 @@ export default {
}
},
clearRateLimit: 'Clear Rate Limit',
resetQuota: 'Reset Quota',
quotaLimit: 'Quota Limit',
quotaLimitPlaceholder: '0 means unlimited',
quotaLimitHint: 'Set max spending limit (USD). Account will be paused when reached. Changing limit won\'t reset usage.',
quotaLimitToggle: 'Enable Quota Limit',
quotaLimitToggleHint: 'When enabled, account will be paused when usage reaches the set limit',
quotaLimitAmount: 'Limit Amount',
quotaLimitAmountHint: 'Maximum spending limit (USD). Account will be auto-paused when reached. Changing limit won\'t reset usage.',
testConnection: 'Test Connection',
reAuthorize: 'Re-Authorize',
refreshToken: 'Refresh Token',
......@@ -1981,10 +2003,12 @@ export default {
proxy: 'Proxy',
noProxy: 'No Proxy',
concurrency: 'Concurrency',
loadFactor: 'Load Factor',
loadFactorHint: 'Higher load factor increases scheduling frequency',
priority: 'Priority',
priorityHint: 'Lower value accounts are used first',
billingRateMultiplier: 'Billing Rate Multiplier',
billingRateMultiplierHint: '>=0, 0 means free. Affects account billing only',
billingRateMultiplierHint: '0 = free, affects account billing only',
expiresAt: 'Expires At',
expiresAtHint: 'Leave empty for no expiration',
higherPriorityFirst: 'Lower value means higher priority',
......@@ -2000,6 +2024,7 @@ export default {
accountUpdated: 'Account updated successfully',
failedToCreate: 'Failed to create account',
failedToUpdate: 'Failed to update account',
pleaseSelectStatus: 'Please select a valid account status',
mixedChannelWarningTitle: 'Mixed Channel Warning',
mixedChannelWarning: 'Warning: Group "{groupName}" contains both {currentPlatform} and {otherPlatform} accounts. Mixing different channels may cause thinking block signature validation issues, which will fallback to non-thinking mode. Are you sure you want to continue?',
pleaseEnterAccountName: 'Please enter account name',
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment