Commit 89455b57 authored by Lei Li's avatar Lei Li
Browse files

Initial commit

parents
package resources
import (
"github.com/shirou/gopsutil/mem"
)
func GetHostVirtualMemory() (*mem.VirtualMemoryStat, error) {
return mem.VirtualMemory()
}
func GetHostVirtualMemoryEx() (*mem.VirtualMemoryExStat, error) {
return mem.VirtualMemoryEx()
}
func GetHostSwapMemory() (*mem.SwapMemoryStat, error) {
return mem.SwapMemory()
}
package resources
import (
"time"
"github.com/shirou/gopsutil/net"
)
type HostNetIOUsage net.IOCountersStat
type HostNetIOCountersStatWrap struct {
IOStat []net.IOCountersStat
Ts int64
}
func GetHostNetIOUsageWithInterval(d time.Duration) ([]HostNetIOUsage, error) {
usages := make([]HostNetIOUsage, 0)
pre, err := net.IOCounters(false)
if err != nil {
return usages, err
}
preWrap := HostNetIOCountersStatWrap{IOStat: pre, Ts: time.Now().Unix()}
time.Sleep(d)
now, err := net.IOCounters(false)
if err != nil {
return usages, err
}
nowWrap := HostNetIOCountersStatWrap{IOStat: now, Ts: time.Now().Unix()}
return calculateNetIOUsage(&preWrap, &nowWrap), nil
}
func GetHostNetIOUsageWithPre(preWrap *HostNetIOCountersStatWrap) (
[]HostNetIOUsage, *HostNetIOCountersStatWrap, error) {
usages := make([]HostNetIOUsage, 0)
now, err := net.IOCounters(false)
if err != nil {
return usages, nil, err
}
nowWrap := HostNetIOCountersStatWrap{IOStat: now, Ts: time.Now().Unix()}
if preWrap == nil {
return calculateNetIOUsage(&nowWrap, &nowWrap), &nowWrap, nil
}
return calculateNetIOUsage(preWrap, &nowWrap), &nowWrap, nil
}
func calculateNetIOUsage(preWrap, nowWrap *HostNetIOCountersStatWrap) []HostNetIOUsage {
usages := make([]HostNetIOUsage, 0)
duration := nowWrap.Ts - preWrap.Ts
for _, nowStat := range nowWrap.IOStat {
usage := HostNetIOUsage{
Name: nowStat.Name,
}
for _, preStat := range preWrap.IOStat {
if preStat.Name == nowStat.Name {
usage.BytesSent = iops(preStat.BytesSent, nowStat.BytesSent, uint64(duration))
usage.BytesRecv = iops(preStat.BytesRecv, nowStat.BytesRecv, uint64(duration))
usage.PacketsSent = iops(preStat.PacketsSent, nowStat.PacketsSent, uint64(duration))
usage.PacketsRecv = iops(preStat.PacketsRecv, nowStat.PacketsRecv, uint64(duration))
usage.Errin = iops(preStat.Errin, nowStat.Errin, uint64(duration))
usage.Errout = iops(preStat.Errout, nowStat.Errout, uint64(duration))
usage.Dropin = iops(preStat.Dropin, nowStat.Dropin, uint64(duration))
usage.Dropout = iops(preStat.Dropout, nowStat.Dropout, uint64(duration))
usage.Fifoin = iops(preStat.Fifoin, nowStat.Fifoin, uint64(duration))
usage.Fifoout = iops(preStat.Fifoout, nowStat.Fifoout, uint64(duration))
break
}
}
usages = append(usages, usage)
}
return usages
}
func iops(pre, now, duration uint64) uint64 {
diff := (now - pre)
if diff > 0 && duration > 0 {
return diff / duration
}
return 0
}
package resources
import (
"linkfog.com/public/lib/l"
"linkfog.com/public/lib/unit"
)
var (
preHostDiskIOCountersStat *HostDiskIOCountersStatWrap
preHostCPUTimesStat *HostCPUTimesStatWrap
preHostNetIOCountersStat *HostNetIOCountersStatWrap
cpuUsage float64
memUsage float64
diskUsage float64
inodeUsage float64
)
func getAndPrintHostInfo() {
getAndPrintHostCPUInfo()
getAndPrintHostCPUStates()
getAndPrintHostMemoryUsage()
// getAndPrintDockerRootDiskSpaceUsage()
getAndPrintHostDiskSpaceUsage()
getAndPrintHostDiskIOUsage()
getAndPrintHostNetIOUsage()
printHostSummary()
}
func getAndPrintHostCPUInfo() {
cnt, err := GetHostCPUCount()
if err != nil {
l.Errorf("get host cpu count error: %v", err)
return
}
usage, err := GetHostCPUUsage()
if err != nil {
l.Errorf("get host cpu usage error: %v", err)
return
}
cpuUsage = usage
avg, err := GetHostCPULoad()
if err != nil {
l.Errorf("get host cpu load error: %v", err)
return
}
l.Debugf("host cpu info, count:%d, usage:%.1f%%, load1:%.2f, load5:%.2f, load15:%.2f",
cnt, usage, avg.Load1, avg.Load5, avg.Load15)
}
func getAndPrintHostCPUStates() {
var states []HostCPUStatesPct
var err error
states, preHostCPUTimesStat, err = GetHostCPUStatesWithPre(preHostCPUTimesStat)
if err != nil {
l.Errorf("get host cpu states error: %v", err)
return
}
for _, state := range states {
l.Debugf("host cpu states, cpu:%s, us:%.1f%%, sy:%.1f%%, ni:%.1f%%, id:%.1f%%, wa:%.1f%%, "+
"hi:%.1f%%, si:%.1f%%, st:%.1f%%, gu:%.1f%%, gn:%.1f%%", state.CPU,
state.User, state.System, state.Nice, state.Idle, state.Iowait,
state.Irq, state.Softirq, state.Steal, state.Guest, state.GuestNice)
}
}
func getAndPrintHostMemoryUsage() {
vmStat, err := GetHostVirtualMemory()
if err != nil {
l.Errorf("get host virtual memory error: %v", err)
return
}
memUsage = vmStat.UsedPercent
vmExStat, err := GetHostVirtualMemoryEx()
if err != nil {
l.Errorf("get host virtual memoryEx error: %v", err)
return
}
l.Debugf("host mem usage, total:%s, used:%s, avail:%s, act_file:%s, inact_file:%s, "+
"act_anon:%s, inact_anon:%s, buff:%s, cache:%s, shr:%s, slab:%s", unit.ByteSize(vmStat.Total),
unit.ByteSize(vmStat.Used), unit.ByteSize(vmStat.Available),
unit.ByteSize(vmExStat.ActiveFile), unit.ByteSize(vmExStat.InactiveFile),
unit.ByteSize(vmExStat.ActiveAnon), unit.ByteSize(vmExStat.InactiveAnon),
unit.ByteSize(vmStat.Buffers), unit.ByteSize(vmStat.Cached),
unit.ByteSize(vmStat.Shared), unit.ByteSize(vmStat.Slab))
smStat, err := GetHostSwapMemory()
if err != nil {
l.Errorf("get host swap memory error: %v", err)
return
}
l.Debugf("host swap mem usage, total:%s, used:%s, avail:%s",
unit.ByteSize(smStat.Total), unit.ByteSize(smStat.Used),
unit.ByteSize(smStat.Free))
}
//func getAndPrintDockerRootDiskSpaceUsage() {
// usage, err := GetDockerRootDiskSpaceUsage()
// if err != nil {
// l.Errorf("get docker root disk space usage error: %v", err)
// return
// }
// diskUsage = usage.UsedPercent
// inodeUsage = usage.InodesUsedPercent
//
// l.Debugf("host docker root disk space usage, path:%s, size:%s, used:%s, avail:%s, usage:%.2f%%, "+
// "isize:%s, iused:%s, iavail:%s, iusage:%.2f%%", usage.Path,
// unit.ByteSize(usage.Total), unit.ByteSize(usage.Used),
// unit.ByteSize(usage.Free), usage.UsedPercent,
// unit.ByteSize(usage.InodesTotal), unit.ByteSize(usage.InodesUsed),
// unit.ByteSize(usage.InodesFree), usage.InodesUsedPercent)
//}
func getAndPrintHostDiskSpaceUsage() {
usages, err := GetHostDiskSpaceUsage()
if err != nil {
l.Errorf("get host disk space usage error: %v", err)
return
}
filter := make(map[string]struct{})
for _, usage := range usages {
if _, ok := filter[usage.PartitionStat.Device]; ok {
continue
}
filter[usage.PartitionStat.Device] = struct{}{}
l.Debugf("host disk space usage, dev:%s, size:%s, used:%s, avail:%s, usage:%.2f%%, "+
"isize:%d, iused:%d, iavail:%d, iusage:%.2f%%",
usage.PartitionStat.Device,
unit.ByteSize(usage.UsageStat.Total),
unit.ByteSize(usage.UsageStat.Used),
unit.ByteSize(usage.UsageStat.Free),
usage.UsageStat.UsedPercent,
usage.UsageStat.InodesTotal,
usage.UsageStat.InodesUsed,
usage.UsageStat.InodesFree,
usage.UsageStat.InodesUsedPercent)
}
}
func getAndPrintHostDiskIOUsage() {
var usages []HostDiskIOUsage
var err error
usages, preHostDiskIOCountersStat, err = GetHostDiskIOUsageWithPre(preHostDiskIOCountersStat)
if err != nil {
l.Errorf("get host disk io usage error: %v", err)
return
}
for _, usage := range usages {
l.Debugf("host disk io stats, dev:%s, rps:%s, wps:%s, r:%s, w:%s", usage.Device,
unit.ByteSize(usage.ReadUsage), unit.ByteSize(usage.WriteUsage),
unit.ByteSize(usage.ReadBytes), unit.ByteSize(usage.WriteBytes))
}
}
func getAndPrintHostNetIOUsage() {
var usages []HostNetIOUsage
var err error
usages, preHostNetIOCountersStat, err = GetHostNetIOUsageWithPre(preHostNetIOCountersStat)
if err != nil {
l.Errorf("get host net io usage error: %v", err)
return
}
for _, usage := range usages {
l.Infof("host net io stats, if:%s, "+
"rx/s:%s, tx/s:%s, rx_pkg/s:%d, tx_pkg/s:%d, rx_err/s:%d, tx_err/s:%d, "+
"rx_drop/s:%d, tx_drop/s:%d, rx_fifo/s:%d, tx_fifo/s:%d", usage.Name,
unit.ByteSize(usage.BytesRecv), unit.ByteSize(usage.BytesSent),
usage.PacketsRecv, usage.PacketsSent,
usage.Errin, usage.Errout,
usage.Dropin, usage.Dropout,
usage.Fifoin, usage.Fifoout)
}
}
func printHostSummary() {
l.Infof("host stats, cpu_usage:%.2f%%, mem_usage:%.2f%%, disk_usage:%.2f%%, inode_usage:%.2f%%",
cpuUsage, memUsage, diskUsage, inodeUsage)
}
package resources
import (
"agent/cmd/agent/global"
"errors"
"linkfog.com/public/lib/l"
"time"
)
var (
detectInterval = 30 * time.Second // 周期检测间隔
detectCPURateRange = 30 * time.Second // 计算cpu稳定占比的区间
)
type Resources struct {
isRunning bool
watch *Watch
}
func New() *Resources {
r := &Resources{}
r.watch = NewWatch(WatchConfig{
DetectInterval: detectInterval,
DetectCPURateRange: detectCPURateRange,
})
return r
}
func (r *Resources) Name() string {
return global.ResourceModuleName
}
func (r *Resources) Start() error {
if r.IsRunning() {
return nil
}
l.Info("init resources module")
if r.watch == nil {
return errors.New("resources watch is nil")
}
l.Info("start resources watch")
go r.watch.StartWatch()
r.isRunning = true
l.Info("init resources module success")
return nil
}
func (r *Resources) Stop() {
if r.IsRunning() {
r.isRunning = false
r.watch.Stop()
}
}
func (r *Resources) IsRunning() bool {
return r.isRunning
}
package resources
import (
"agent/cmd/agent/config"
"os"
"path/filepath"
"runtime"
"sort"
"time"
cgroupStats "linkfog.com/public/lib/cgroup/stats"
cgroupTypes "linkfog.com/public/lib/cgroup/types"
"linkfog.com/public/lib/l"
"linkfog.com/public/lib/unit"
"github.com/prometheus/procfs"
"github.com/shirou/gopsutil/process"
)
type Watch struct {
cfg WatchConfig
cgroup *cgroupStats.Stats // 采集cgroup cpu和memory
curMemUsage float64
curKMemUsage float64
curCPUUsage float64
curCPULoad float64
curIOUsage map[string]*cgroupTypes.IOUsage
memSeqWarnCnt int
cpuSeqWarnCnt int
cpuLoadSeqWarnCnt int
ioRdSeqWarnCnt int
ioWrSeqWarnCnt int
hasReachLimit bool
running bool
}
type WatchConfig struct {
MaxMem float64 // 最大内存限值
MaxCPU float64 // 最大cpu限值
MaxCPULoad float64 // 最大cpu负载限值(loadavg 5m)
MaxRIO float64 // 最大读io限值
MaxWIO float64 // 最大写io限值
MemWarnPct float64 // 内存告警阈值
MemSeqWarnTimes int // 内存告警连续n次触发降级
CPUWarnPct float64 // cpu告警阈值
CPUSeqWarnTimes int // cpu告警连续n次触发降级
CPULoadSeqWarnTimes int // cpu负载告警连续n次触发降级
IOWarnPct float64 // io告警阈值
IOSeqWarnTimes int // io告警连续n次触发降级
PProfProfilePath string // 采集pprof数据存放目录
PProfCollectDura time.Duration // 采集pprof时长
DetectInterval time.Duration // 周期检测间隔
DetectCPURateRange time.Duration // 计算cpu稳定占比的区间
ExpandToHostCgroupDir string
ExpandHostCgroupAvailMemSize uint64
}
func NewWatch(cfg WatchConfig) *Watch {
l.Info("resources watch create cgroup stats")
stats := cgroupStats.NewStats(-1, cfg.DetectInterval, cfg.DetectCPURateRange)
err := stats.TryCgroupPath()
if err != nil {
l.Error("resources watch cgroup stats try cgroup path err:", err)
return nil
}
l.Infof("cgroup stats mem dir:%s", stats.GetMemoryDir())
l.Infof("cgroup stats cpu dir:%s", stats.GetCPUDir())
l.Infof("cgroup stats io dir:%s", stats.GetIODir())
w := &Watch{
cfg: cfg,
cgroup: stats,
running: false,
}
l.Infof("resources watch init, cfg:%+v", cfg)
return w
}
func (w *Watch) StartWatch() {
w.running = true
defer func() {
w.running = false
}()
l.Info("resources watch start")
for {
if !w.running {
break
}
time.Sleep(w.cfg.DetectInterval)
w.watchResourcesUsage()
}
l.Info("resources watch stopped")
}
func (w *Watch) Stop() {
if !w.running {
return
}
w.running = false
l.Info("resources watch quit success")
}
func (w *Watch) watchResourcesUsage() {
getAndPrintHostInfo()
getAndPrintProcessInfo()
w.getAndPrintContainerInfo()
w.printResourcesUsage()
}
func getAndPrintProcessInfo() {
gn := runtime.NumGoroutine()
proc, err := process.NewProcess(int32(os.Getpid()))
if err != nil {
l.Error("get process info error:", err)
return
}
fn, err := proc.NumFDs()
if err != nil {
l.Error("get process fd num error:", err)
return
}
l.Infof("process stats, goroutine_num:%d, fd_num:%d", gn, fn)
}
func (w *Watch) getAndPrintContainerInfo() {
w.getMemUsage()
w.getCPUUsage()
w.getCPULoad()
w.getIOUsage()
mem, err := cgroupStats.GetMemoryUsage(w.cgroup.GetMemoryDir())
if err != nil {
l.Error("get memory usage error,", err)
return
}
kmem, err := cgroupStats.GetKernelMemoryUsage(w.cgroup.GetMemoryDir())
if err != nil {
l.Error("get kernel memory usage error,", err)
return
}
w.curKMemUsage = kmem
activeFile, inactiveFile, activeAnon, inactiveAnon, err := cgroupStats.GetMemoryStatFields(w.cgroup.GetMemoryDir())
if err != nil {
l.Errorf("get cgroup stats error,", err)
return
}
l.Infof("container cgroup mem stats, total_active_file:%s, total_inactive_file:%s, "+
"total_active_anon:%s, total_inactive_anon:%s, usage_in_bytes:%s, kmem:%s",
unit.ByteSize(activeFile), unit.ByteSize(inactiveFile),
unit.ByteSize(activeAnon), unit.ByteSize(inactiveAnon),
unit.ByteSize(uint64(mem)), unit.ByteSize(uint64(kmem)))
if uint64(w.curKMemUsage) < 200*1024*1024 {
return
}
slabInfo, err := cgroupStats.GetKernelMemorySlabInfo(filepath.Join(w.cgroup.GetMemoryDir()))
if err != nil {
l.Error("get cgroup slab error,", err)
return
}
if len(slabInfo.Slabs) < 5 {
l.Error("get cgroup slab error: slab num too few")
return
}
sort.Sort(slabInfo)
l.Debugf("container cgroup memory.kmem.slabinfo top3, %s:%d, %s:%d, %s:%d",
slabInfo.Slabs[0].Name, slabInfo.Slabs[0].Cache,
slabInfo.Slabs[1].Name, slabInfo.Slabs[1].Cache,
slabInfo.Slabs[2].Name, slabInfo.Slabs[2].Cache)
}
func (w *Watch) getMemUsage() {
memUsage, err := w.cgroup.GetMemoryWorkingSet()
if err != nil {
memUsage = float64(0)
l.Error("get memory usage error,", err)
}
w.curMemUsage = memUsage
}
func (w *Watch) getCPUUsage() {
cpuUsage, err := w.cgroup.GetCPUUsage()
if err != nil {
cpuUsage = float64(0)
l.Error("get cpu usage error,", err)
}
w.curCPUUsage = cpuUsage
}
func (w *Watch) getCPULoad() {
cpuLoad, err := w.getCPULoadAvg5M(config.MountProc)
if err != nil {
cpuLoad = float64(0)
l.Error("get cpu 5m loadavg error,", err)
}
w.curCPULoad = cpuLoad
}
func (w *Watch) getIOUsage() {
ioUsage, err := w.cgroup.GetIOUsage()
if err != nil {
ioUsage = make(map[string]*cgroupTypes.IOUsage, 0)
l.Error("get block io usage error,", err)
}
w.curIOUsage = ioUsage
}
func (w *Watch) printResourcesUsage() {
var ioTotalRbps uint64
var ioTotalWbps uint64
if ioUsage, ok := w.curIOUsage["total"]; ok {
ioTotalRbps = uint64(ioUsage.Read)
ioTotalWbps = uint64(ioUsage.Write)
}
l.Debugf("resources watch cfg, %s", w.cfg)
l.Infof("resources usage, mem:%s, cpu:%.2f, load:%.2f, rps:%s, wps:%s",
unit.ByteSize(uint64(w.curMemUsage)),
w.curCPUUsage,
w.curCPULoad,
unit.ByteSize(ioTotalRbps), unit.ByteSize(ioTotalWbps))
w.printAllDevIOUsage()
}
func (w *Watch) printAllDevIOUsage() {
for dev, ioUsage := range w.curIOUsage {
if dev == "total" {
continue
}
if ioUsage.Read == 0 && ioUsage.Write == 0 {
continue
}
l.Debugf("device io usage, dev:%s, rps:%s, wps:%s", dev,
unit.ByteSize(uint64(ioUsage.Read)), unit.ByteSize(uint64(ioUsage.Write)))
}
}
func (w *Watch) getCPULoadAvg5M(procDir string) (float64, error) {
var fs procfs.FS
var err error
if procDir != "" {
if procDir[len(procDir)-1] == '/' {
procDir = procDir[:len(procDir)-1]
}
fs, err = procfs.NewFS(procDir)
} else {
fs, err = procfs.NewDefaultFS()
procDir = procfs.DefaultMountPoint
}
if err != nil {
return 0, err
}
loadAvg, err := fs.LoadAvg()
if err != nil {
return 0, err
}
return loadAvg.Load5, nil
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment