Commit 5c39e6f2 authored by QTom's avatar QTom
Browse files

fix(ops_alert): wg.Add 竞态修复 + leader lock release context 泄漏

1. Start() 中 wg.Add(1) 从 run() goroutine 内部移到 go s.run() 之前,
   防止 Stop().wg.Wait() 在 Add 之前返回导致孤儿 goroutine。
2. tryAcquireLeaderLock 返回的 release 闭包改用独立的
   context.Background()+5s 超时,避免捕获的 evaluateOnce ctx
   在 defer 执行时已过期导致锁释放失败(最长阻塞 90s TTL)。
parent a225a241
......@@ -88,6 +88,7 @@ func (s *OpsAlertEvaluatorService) Start() {
if s.stopCh == nil {
s.stopCh = make(chan struct{})
}
s.wg.Add(1)
go s.run()
})
}
......@@ -105,7 +106,6 @@ func (s *OpsAlertEvaluatorService) Stop() {
}
func (s *OpsAlertEvaluatorService) run() {
s.wg.Add(1)
defer s.wg.Done()
// Start immediately to produce early feedback in ops dashboard.
......@@ -848,7 +848,9 @@ func (s *OpsAlertEvaluatorService) tryAcquireLeaderLock(ctx context.Context, loc
return nil, false
}
return func() {
_, _ = opsAlertEvaluatorReleaseScript.Run(ctx, s.redisClient, []string{key}, s.instanceID).Result()
releaseCtx, releaseCancel := context.WithTimeout(context.Background(), 5*time.Second)
defer releaseCancel()
_, _ = opsAlertEvaluatorReleaseScript.Run(releaseCtx, s.redisClient, []string{key}, s.instanceID).Result()
}, true
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment