diff --git a/ChangeLog.md b/ChangeLog.md index 9bba04d..7293638 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -11,6 +11,8 @@ 1. Counter 类型的数据上报逻辑大幅更改。现在 swcollector 将在本地计算出相应的数值,再以 Gauge 类型上报。如果出现异常的数据,则在本地直接抛弃。因此最终呈现的绘图至多只会出现断点,而不再会出现极端的异常图形。 2. 优化了 gosnmp 的端口采集,现在 gosnmp 端口采集的超时情况应该大幅度降低了 +#### bug修复 #### +1. 现在当 cpu 和 mem 采集异常的时候,应该能正确的抛弃。而不是上报一个 0 了 ## 3.2.1.1 ## 1. debugmetric 现在支持配置多个 endpoint 和 metric 了 diff --git a/README.md b/README.md index e87088a..674d5ec 100644 --- a/README.md +++ b/README.md @@ -93,7 +93,7 @@ swcollector需要部署到有交换机SNMP访问权限的服务器上。 "enabled": true, "ipRange":[ #交换机IP地址段,对该网段有效IP,先发Ping包探测,对存活IP发SNMP请求 "192.168.56.101/32", - "192.168.56.102/32", + "192.168.56.102-192.168.56.120",#现在支持这样的配置方式,对区域内的ip进行ping探测,对存活ip发起snmp请求。 "172.16.114.233" ], "gosnmp":true, #是否使用 gosnmp 采集, false 则使用 snmpwalk @@ -119,8 +119,9 @@ swcollector需要部署到有交换机SNMP访问权限的服务器上。 "unknownProtosPktlimit": 0, #unknownProtosPkt的上限,如果采集计算出的包速率超过这个数值,则抛弃不上报。如果填0,则不进行最大值比较。 "ignoreOutQLen":true, #不采集IfOutQLen "outQLenPktlimit": 0, #outQLenPkt的上限,如果采集计算出的包速率超过这个数值,则抛弃不上报。如果填0,则不进行最大值比较。 - "fastPingMode": true, - "limitConcur": 1000 + "fastPingMode": true, + "limitConcur": 1000, #交换机采集的并发限制 + "limitCon": 4 #对于单台交换机上,多个指标采集的并发限制 }, "transfer": { "enabled": true, diff --git a/cfg.example.json b/cfg.example.json index b4e3cdd..6b32835 100644 --- a/cfg.example.json +++ b/cfg.example.json @@ -1,5 +1,6 @@ { "debug": true, + "lowermetrics": true, "debugmetric":{ "endpoints":["192.168.56.101","192.168.56.102"], "metrics":["switch.if.In","switch.if.Out"], @@ -9,7 +10,7 @@ "enabled": true, "ipRange":[ "192.168.56.101/32", - "192.168.56.102/32", + "192.168.56.102-192.168.56.120", "172.16.114.233" ], "gosnmp":true, @@ -36,7 +37,8 @@ "ignoreOutQLen":true, "outQLenPktlimit": 0, "fastPingMode": true, - "limitConcur": 1000 + "limitConcur": 1000, + "limitCon": 4 }, "transfer": { "enabled": true, diff --git a/funcs/swifstat.go b/funcs/swifstat.go index e4c202d..69d7b99 100644 --- a/funcs/swifstat.go +++ b/funcs/swifstat.go @@ -2,6 +2,7 @@ package funcs import ( "log" + "sync" "github.com/gaochao1/swcollector/g" "github.com/open-falcon/common/model" @@ -20,12 +21,48 @@ type ChIfStat struct { IfStatsList *[]sw.IfStats } -var ( - AliveIp []string - lastIfStat = map[string]*[]sw.IfStats{} - pingTimeout int - pingRetry int +type LastifMap struct { + lock *sync.RWMutex + ifstat map[string]*[]sw.IfStats +} + +func NewLastifMap() { + lastifmap = &LastifMap{ + lock: new(sync.RWMutex), + ifstat: make(map[string]*[]sw.IfStats), + } +} +func (m *LastifMap) Get(k string) *[]sw.IfStats { + m.lock.RLock() + defer m.lock.RUnlock() + if val, ok := m.ifstat[k]; ok { + return val + } + return nil +} + +func (m *LastifMap) Set(k string, v *[]sw.IfStats) { + m.lock.Lock() + defer m.lock.Unlock() + m.ifstat[k] = v + return +} + +func (m *LastifMap) Check(k string) bool { + m.lock.RLock() + defer m.lock.RUnlock() + if _, ok := m.ifstat[k]; !ok { + return false + } + return true +} + +var ( + AliveIp []string + pingTimeout int + pingRetry int + lastifmap *LastifMap community string snmpTimeout int snmpRetry int @@ -42,6 +79,7 @@ var ( ignoreOutQLen bool ignoreSpeedPercent bool fastPingMode bool + limitCon int ) func initVariable() { @@ -52,6 +90,7 @@ func initVariable() { community = g.Config().Switch.Community snmpTimeout = g.Config().Switch.SnmpTimeout snmpRetry = g.Config().Switch.SnmpRetry + limitCon = g.Config().Switch.LimitCon gosnmp = g.Config().Switch.Gosnmp ignoreIface = g.Config().Switch.IgnoreIface @@ -128,7 +167,7 @@ func swIfMetrics() (L []*model.MetricValue) { L = append(L, GaugeValueIp(ifStat.TS, ip, "switch.if.Speed", ifStat.IfSpeed, ifNameTag, ifIndexTag)) } if ignoreBroadcastPkt == false { - if lastIfStatList, ok := lastIfStat[chIfStat.Ip]; ok { + if lastIfStatList := lastifmap.Get(chIfStat.Ip); lastIfStatList != nil { for _, lastifStat := range *lastIfStatList { if ifStat.IfIndex == lastifStat.IfIndex { interval := ifStat.TS - lastifStat.TS @@ -154,7 +193,7 @@ func swIfMetrics() (L []*model.MetricValue) { } } if ignoreMulticastPkt == false { - if lastIfStatList, ok := lastIfStat[chIfStat.Ip]; ok { + if lastIfStatList := lastifmap.Get(chIfStat.Ip); lastIfStatList != nil { for _, lastifStat := range *lastIfStatList { if ifStat.IfIndex == lastifStat.IfIndex { interval := ifStat.TS - lastifStat.TS @@ -181,7 +220,7 @@ func swIfMetrics() (L []*model.MetricValue) { } if ignoreDiscards == false { - if lastIfStatList, ok := lastIfStat[chIfStat.Ip]; ok { + if lastIfStatList := lastifmap.Get(chIfStat.Ip); lastIfStatList != nil { for _, lastifStat := range *lastIfStatList { if ifStat.IfIndex == lastifStat.IfIndex { interval := ifStat.TS - lastifStat.TS @@ -208,7 +247,7 @@ func swIfMetrics() (L []*model.MetricValue) { } if ignoreErrors == false { - if lastIfStatList, ok := lastIfStat[chIfStat.Ip]; ok { + if lastIfStatList := lastifmap.Get(chIfStat.Ip); lastIfStatList != nil { for _, lastifStat := range *lastIfStatList { if ifStat.IfIndex == lastifStat.IfIndex { interval := ifStat.TS - lastifStat.TS @@ -235,7 +274,7 @@ func swIfMetrics() (L []*model.MetricValue) { } if ignoreUnknownProtos == false { - if lastIfStatList, ok := lastIfStat[chIfStat.Ip]; ok { + if lastIfStatList := lastifmap.Get(chIfStat.Ip); lastIfStatList != nil { for _, lastifStat := range *lastIfStatList { if ifStat.IfIndex == lastifStat.IfIndex { interval := ifStat.TS - lastifStat.TS @@ -254,7 +293,7 @@ func swIfMetrics() (L []*model.MetricValue) { } if ignoreOutQLen == false { - if lastIfStatList, ok := lastIfStat[chIfStat.Ip]; ok { + if lastIfStatList := lastifmap.Get(chIfStat.Ip); lastIfStatList != nil { for _, lastifStat := range *lastIfStatList { if ifStat.IfIndex == lastifStat.IfIndex { interval := ifStat.TS - lastifStat.TS @@ -274,7 +313,7 @@ func swIfMetrics() (L []*model.MetricValue) { //如果IgnorePkt为false,采集Pkt if ignorePkt == false { - if lastIfStatList, ok := lastIfStat[chIfStat.Ip]; ok { + if lastIfStatList := lastifmap.Get(chIfStat.Ip); lastIfStatList != nil { for _, lastifStat := range *lastIfStatList { if ifStat.IfIndex == lastifStat.IfIndex { interval := ifStat.TS - lastifStat.TS @@ -299,7 +338,7 @@ func swIfMetrics() (L []*model.MetricValue) { } } } - if lastIfStatList, ok := lastIfStat[chIfStat.Ip]; ok { + if lastIfStatList := lastifmap.Get(chIfStat.Ip); lastIfStatList != nil { for _, lastifStat := range *lastIfStatList { if ifStat.IfIndex == lastifStat.IfIndex { interval := ifStat.TS - lastifStat.TS @@ -335,7 +374,7 @@ func swIfMetrics() (L []*model.MetricValue) { } } } - lastIfStat[chIfStat.Ip] = chIfStat.IfStatsList + lastifmap.Set(chIfStat.Ip, chIfStat.IfStatsList) } } @@ -396,7 +435,7 @@ func coreSwIfMetrics(ip string, ch chan ChIfStat, limitCh chan bool) { var err error if gosnmp { - ifList, err = sw.ListIfStats(ip, community, snmpTimeout, ignoreIface, snmpRetry, ignorePkt, ignoreOperStatus, ignoreBroadcastPkt, ignoreMulticastPkt, ignoreDiscards, ignoreErrors, ignoreUnknownProtos, ignoreOutQLen) + ifList, err = sw.ListIfStats(ip, community, snmpTimeout, ignoreIface, snmpRetry, limitCon, ignorePkt, ignoreOperStatus, ignoreBroadcastPkt, ignoreMulticastPkt, ignoreDiscards, ignoreErrors, ignoreUnknownProtos, ignoreOutQLen) } else { ifList, err = sw.ListIfStatsSnmpWalk(ip, community, snmpTimeout*5, ignoreIface, snmpRetry, ignorePkt, ignoreOperStatus, ignoreBroadcastPkt, ignoreMulticastPkt, ignoreDiscards, ignoreErrors, ignoreUnknownProtos, ignoreOutQLen) } diff --git a/g/cfg.go b/g/cfg.go index 3191f5c..ee30f10 100644 --- a/g/cfg.go +++ b/g/cfg.go @@ -44,6 +44,7 @@ type SwitchConfig struct { UnknownProtosPktlimit float64 `json:"unknownProtosPktlimit"` IgnoreOutQLen bool `json:"ignoreOutQLen` OutQLenPktlimit float64 `json:"outQLenPktlimit"` + LimitCon int `json:limitCon` LimitConcur int `json:"limitConcur"` FastPingMode bool `json:"fastPingMode"` } diff --git a/g/const.go b/g/const.go index 7c0f510..75edbcc 100644 --- a/g/const.go +++ b/g/const.go @@ -16,7 +16,8 @@ import ( // 4.0.0 caculate counter type on swcollect local,add speedpercent // 4.0.1 fix sometimes ifstat pannic // 4.0.2 fix speedpercent bug +// 4.0.4 add lock on map;add limconn for switch snmp request const ( - VERSION = "4.0.2" + VERSION = "4.0.4" COLLECT_INTERVAL = time.Second ) diff --git a/g/var.go b/g/var.go index 19aff67..c3dbafc 100644 --- a/g/var.go +++ b/g/var.go @@ -54,6 +54,7 @@ func SendToTransfer(metrics []*model.MetricValue) { debug_metrics := Config().Debugmetric.Metrics debug_tags := Config().Debugmetric.Tags debug_Tags := strings.Split(debug_tags, ",") + if debug { for _, metric := range metrics { metric_tags := strings.Split(metric.Tags, ",") diff --git a/main.go b/main.go index 2d67f47..7c0b092 100644 --- a/main.go +++ b/main.go @@ -34,7 +34,7 @@ func main() { funcs.CheckCollector() os.Exit(0) } - + funcs.NewLastifMap() funcs.BuildMappers() cron.Collect()