鉴于每个月需要把公司上千台的服务的cpu使用利率,CPU负载,内存使用率、网络流量等数据取出,以前是手动通过zabbix的平台去获取,每次都需要花费1-2小时才能完成这个需求,太耗时,因此优化为systat软件采集数据,然后通过python脚本处理/var/log/sa下采集到数据,然后入库到mysql数据库中,以后每次取数据,只需要一行sql代码就能获取到数据,方便快捷,本文把采集数据的脚本截图,如下所示:
一次公司需求记录,python处理sysstat收集的sa性能数据
一次公司需求记录,python处理sysstat收集的sa性能数据
一次公司需求记录,python处理sysstat收集的sa性能数据
一次公司需求记录,python处理sysstat收集的sa性能数据
一次公司需求记录,python处理sysstat收集的sa性能数据
最后附上完整代码:
#!/bin/python2.7
#coding: utf-8
#################脚本用法:要取哪天的数据就输入哪天的时间,比如9月1输入应为20190901
import os
import sys
import datetime
import commands
import time

def get_cpu(sdate,edate):
cpunum = int(commands.getoutput("cat /proc/cpuinfo |grep processor|wc -l"))
path = os.popen("find /var/log/sa/ -type f -newermt "+sdate+" ! -newermt "+edate+" |grep -v sar").readlines()
path = [m.rstrip("\n") for m in path]
for n in path:
idle = os.popen("sar -f "+n+" -u|awk '{print $NF}'|sed '1,3d'|sed '$d'|sed '/^$/d'").readlines()
data = list(idle)
data = [i.rstrip("\n") for i in idle]
data = [j for j in data if j != '']

    while '%idle' in data:
        data.remove("%idle")
    while 'RESTART' in data:
        data.remove("RESTART")

    data_tmp = list(map(float,data))
    data_num = [100-x for x in data_tmp]
    max_num = max(data_num)
    min_num = min(data_num)
    average_num=sum(data_num)/len(data_num)

    print("%s CPU useed max:%.4f,min:%.4f,average:%.4f"  % (n,max_num,min_num,average_num))

def get_dev(sdate,edate):
net = commands.getoutput("route -n|sed '1,2d'|awk -F' ' '{print $1,$8}'|grep 0.0.0.0|awk -F' ' '{print $2}'|sed -n '1p'")
rxkB=list()
txkB=list()
path = os.popen("find /var/log/sa/ -type f -newermt "+sdate+" ! -newermt "+edate+" |grep -v sar").readlines()
path = [m.rstrip("\n") for m in path]

for n in path:
    rxkB = commands.getoutput("sar -f "+n+" -n DEV|grep "+net+" |grep -v Average|awk -F' ' '{print $6}'|sed '$d'").split("\n")
    txkB = commands.getoutput("sar -f "+n+" -n DEV|grep "+net+" |grep -v Average|awk -F' ' '{print $7}'").split("\n")

    data_txkB = list(map(float,txkB))
    max_txkB=max(data_txkB)
    min_txkB=min(data_txkB)
    average_txkB=sum(data_txkB)/len(data_txkB)

    data_rxkB = list(map(float,rxkB))
    max_rxkB=max(data_rxkB)
    min_rxkB=min(data_rxkB)
    average_rxkB=sum(data_rxkB)/len(data_rxkB)
    print "%s rxval max:%.4f,min:%.4f,average:%.4f,dev:%s" %(n,max_rxkB,min_rxkB,average_rxkB,net)
    print "%s txval max:%.4f,min:%.4f,average:%.4f,dev:%s" %(n,max_txkB,min_txkB,average_txkB,net)

def get_mem(sdate,edate):
path = os.popen("find /var/log/sa/ -type f -newermt "+sdate+" ! -newermt "+edate+" |grep -v sar").readlines()
path = [m.rstrip("\n") for m in path]
for n in path:

    a=int(commands.getoutput("sar -f "+n+" -r|awk 'END{print NF}'"))
    if(a >= 11):
         memused = os.popen("sar -f "+n+" -r|grep -v ^$|grep -iv Linux|grep -iv memused|grep -iv Average|awk '{print(($(NF-8)-$(NF-5))/($(NF-9)+$(NF-8)))}'").readlines()
    else:
         memused = os.popen("sar -f "+n+" -r|grep -v ^$|grep -iv Linux|grep -iv memused|grep -iv Average|awk '{print(($(NF-5)-$(NF-2))/($(NF-6)+$(NF-5)))}'").readlines()
    data = list(memused)
    data = [i.rstrip("\n") for i in memused]
    data_num = list(map(float,data))
    max_num=max(data_num)
    min_num=min(data_num)
    average_num=sum(data_num)/len(data_num)
    print "%s memused max:%.4f,min:%.4f,average:%.4f" %(n,max_num,min_num,average_num)

def get_ldavg(sdate,edate):
cpunum = int(commands.getoutput("cat /proc/cpuinfo |grep processor|wc -l"))
path = os.popen("find /var/log/sa/ -type f -newermt "+sdate+" ! -newermt "+edate+" |grep -v sar").readlines()
path = [m.rstrip("\n") for m in path]
for n in path:

    a=int(commands.getoutput("sar -f "+n+" -q|sed -n '10p'|awk 'END{print NF}'"))
    if a == 7:
        ldavg_1 = os.popen("sar -f "+n+" -q|awk -F' ' '{print $5}'|sed '1,3d'|sed '$d'|sed '/^$/d'|grep -v ldavg-1").readlines()
        ldavg_15 = os.popen("sar -f "+n+" -q|awk -F' ' '{print $7}'|sed '1,3d'|sed '$d'|sed '/^$/d'|grep -v ldavg-15").readlines()
    elif a == 8:
        ldavg_1 = os.popen("sar -f "+n+" -q|awk -F' ' '{print $5}'|sed '1,3d'|sed '$d'|sed '/^$/d'|grep -v ldavg-1").readlines()
        ldavg_15 = os.popen("sar -f "+n+" -q|awk -F' ' '{print $7}'|sed '1,3d'|sed '$d'|sed '/^$/d'|grep -v ldavg-15").readlines()
    elif a == 6:
        ldavg_1 = os.popen("sar -f "+n+" -q|awk -F' ' '{print $4}'|sed '1,3d'|sed '$d'|sed '/^$/d'|grep -v ldavg-1").readlines()
        ldavg_15 = os.popen("sar -f "+n+" -q|awk -F' ' '{print $6}'|sed '1,3d'|sed '$d'|sed '/^$/d'|grep -v ldavg-15").readlines()

    data1 = [i.rstrip("\n") for i in ldavg_1]
    data15 =[j.rstrip("\n") for j in ldavg_15]
    data1 = list(map(float,data1))
    data15 = list(map(float,data15))
    max_data1=max(data1)
    min_data1=min(data1)
    average_data1=sum(data1)/len(data1)
    max_data15=max(data15)
    min_data15=min(data15)
    average_data15=sum(data15)/len(data15)
    print "%s cpu 1minute load max:%.4f,min:%.4f,average:%.4f" %(n,max_data1/cpunum,min_data1/cpunum,average_data1/cpunum)
    print "%s cpu 15minute load max:%.4f,min:%.4f,average:%.4f" %(n,max_data15/cpunum,min_data15/cpunum,average_data15/cpunum)

if name == "main":
sdate=sys.argv[1]
sdate=datetime.datetime.strptime(sdate,'%Y%m%d')
edate=sdate+datetime.timedelta(days=1)
sdate=sdate.strftime('%Y%m%d')
edate=edate.strftime('%Y%m%d')
get_cpu(sdate,edate)
get_ldavg(sdate,edate)
get_mem(sdate,edate)
get_dev(sdate,edate)