Commit 544ad0ec authored by Lei Li's avatar Lei Li
Browse files

feat: 增加升级失败回滚操作

parent a3280358
# Default ignored files
/shelf/
/workspace.xml
# Editor-based HTTP Client requests
/httpRequests/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/public.iml" filepath="$PROJECT_DIR$/.idea/public.iml" />
</modules>
</component>
</project>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<module type="WEB_MODULE" version="4">
<component name="Go" enabled="true" />
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>
\ No newline at end of file
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
</component>
</project>
\ No newline at end of file
...@@ -14,6 +14,7 @@ import ( ...@@ -14,6 +14,7 @@ import (
"time" "time"
"linkfog.com/public/lib/common" "linkfog.com/public/lib/common"
"linkfog.com/public/lib/file"
"linkfog.com/public/lib/l" "linkfog.com/public/lib/l"
) )
...@@ -43,6 +44,8 @@ type MonitorDaemon struct { ...@@ -43,6 +44,8 @@ type MonitorDaemon struct {
restartInterval *restartInterval restartInterval *restartInterval
cmdline string cmdline string
childPid int childPid int
agentMd5 string
cmdline0 string
isProcAbnormalCallback IsProcAbnormalCallback isProcAbnormalCallback IsProcAbnormalCallback
} }
...@@ -146,8 +149,16 @@ func New(cmdline string, opts ...MonitorDaemonOpt) (*MonitorDaemon, error) { ...@@ -146,8 +149,16 @@ func New(cmdline string, opts ...MonitorDaemonOpt) (*MonitorDaemon, error) {
opt(&m) opt(&m)
} }
m.cmdline0 = strings.Split(m.cmdline, " ")[0]
md5, err := file.GetFileMD5(m.cmdline0)
if err != nil {
l.Errorf("calculation of agent MD5(%s) failed err:%s", m.cmdline0, err)
} else {
m.agentMd5 = md5
}
//第一次启动进程 //第一次启动进程
err := m.startProcess() err = m.startProcess()
if err != nil { if err != nil {
l.Errorf("start child process failed %s err:%v", cmdline, err) l.Errorf("start child process failed %s err:%v", cmdline, err)
return nil, err return nil, err
...@@ -243,29 +254,32 @@ func (m *MonitorDaemon) runWithSignal(sigs []os.Signal) { ...@@ -243,29 +254,32 @@ func (m *MonitorDaemon) runWithSignal(sigs []os.Signal) {
} }
if !isAlive && m.restartInterval.isNeedStart() { if !isAlive && m.restartInterval.isNeedStart() {
//忽略错误,只打印日志
err := m.startProcess() md5, err := file.GetFileMD5(m.cmdline0)
if err != nil {
l.Errorf("calculation of agent MD5(%s) failed err:%s", m.cmdline0, err)
}
err = m.startProcess()
if err != nil { if err != nil {
// 异常情况,直接无法启动,进行回滚操作
errRetryTimes++ errRetryTimes++
l.Errorf("monitorProcess start child process failed %s, errRetryTimes:%d, err:%v", m.cmdline, errRetryTimes, err) l.Errorf("monitorProcess start child process failed %s, errRetryTimes:%d, err:%v", m.cmdline, errRetryTimes, err)
if errRetryTimes > m.maxRetryTimes { if md5 != m.agentMd5 && errRetryTimes > m.maxRetryTimes {
if _, err := os.Stat(m.agentBackupPath); err == nil || !os.IsNotExist(err) { m.rollbackForAbnormalCase()
agentName := strings.Split(m.cmdline, " ")[0]
if agentName != "" {
err := os.Rename(m.agentBackupPath, agentName)
if err != nil {
l.Errorf("monitorProcess rollback agent process failed err:%v", err)
} else {
errRetryTimes = 0 errRetryTimes = 0
} }
}
} else { } else {
if md5 == m.agentMd5 {
errRetryTimes++
// 异常情况,启动几秒后停止,进行回滚操作
if errRetryTimes > m.maxRetryTimes {
m.rollbackForAbnormalCase()
errRetryTimes = 0 errRetryTimes = 0
} }
}
} else { } else {
errRetryTimes = 0 m.agentMd5 = md5
l.Infof("monitorProcess start child process success pid: %d", m.childPid) }
l.Infof("monitorProcess start child process success pid: %d, errRetryTimes:%d", m.childPid, errRetryTimes)
} }
m.restartInterval.setLastStart() m.restartInterval.setLastStart()
} }
...@@ -435,3 +449,21 @@ func readStderr(stderrReader io.Reader) { ...@@ -435,3 +449,21 @@ func readStderr(stderrReader io.Reader) {
} }
} }
func (m *MonitorDaemon) rollbackForAbnormalCase() {
if _, err := os.Stat(m.agentBackupPath); err == nil || !os.IsNotExist(err) {
if m.cmdline0 != "" {
err := os.Rename(m.agentBackupPath, m.cmdline0)
if err != nil {
l.Errorf("monitorProcess rollback agent process failed err:%v", err)
} else {
md5, err := file.GetFileMD5(m.cmdline0)
if err != nil {
l.Errorf("calculation of agent MD5(%s) failed err:%s", m.cmdline0, err)
} else {
m.agentMd5 = md5
}
l.Infof("monitorProcess rollback agent process successful md5:%s", md5)
}
}
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment