一次公司需求记录,python处理sysstat收集的sa性能数据

鉴于每个月需要把公司上千台的服务的cpu使用利率,CPU负载,内存使用率、网络流量等数据取出,以前是手动通过zabbix的平台去获取,每次都需要花费1-2小时才能完成这个需求,太耗时,因此优化为systat软件采集数据,然后通过python脚本处理/var/log/sa下采集到数据,然后入库到mysql数据库中,以后每次取数据,只需要一行sql代码就能获取到数据,方便快捷,本文把采集数据的脚本截图,如下所示:
一次公司需求记录,python处理sysstat收集的sa性能数据

一次公司需求记录,python处理sysstat收集的sa性能数据

一次公司需求记录,python处理sysstat收集的sa性能数据

一次公司需求记录,python处理sysstat收集的sa性能数据

一次公司需求记录,python处理sysstat收集的sa性能数据


最后附上完整代码:
#!/bin/python2.7
#coding: utf-8
#################脚本用法:要取哪天的数据就输入哪天的时间,比如9月1输入应为20190901
import os
import sys
import datetime
import commands
import time


def get_cpu(sdate,edate):
    cpunum = int(commands.getoutput("cat /proc/cpuinfo |grep processor|wc -l"))
    path = os.popen("find /var/log/sa/  -type f -newermt "+sdate+" ! -newermt "+edate+" |grep -v sar").readlines()
    path = [m.rstrip("\n") for m in path]
    for n in path:
        idle = os.popen("sar -f "+n+" -u|awk '{print $NF}'|sed '1,3d'|sed '$d'|sed '/^$/d'").readlines()
        data = list(idle)
        data = [i.rstrip("\n") for i in idle]
        data = [j for j in data if j != '']

        while '%idle' in data:
            data.remove("%idle")
        while 'RESTART' in data:
            data.remove("RESTART")

        data_tmp = list(map(float,data))
        data_num = [100-x for x in data_tmp]
        max_num = max(data_num)
        min_num = min(data_num)
        average_num=sum(data_num)/len(data_num)

        print("%s CPU useed max:%.4f,min:%.4f,average:%.4f"  % (n,max_num,min_num,average_num))


def get_dev(sdate,edate):
    net = commands.getoutput("route -n|sed '1,2d'|awk -F' ' '{print $1,$8}'|grep 0.0.0.0|awk -F' ' '{print $2}'|sed -n '1p'")
    rxkB=list()
    txkB=list()
    path = os.popen("find /var/log/sa/  -type f -newermt "+sdate+" ! -newermt "+edate+" |grep -v sar").readlines()
    path = [m.rstrip("\n") for m in path]

    for n in path:
        rxkB = commands.getoutput("sar -f "+n+" -n DEV|grep "+net+" |grep -v Average|awk -F' ' '{print $6}'|sed '$d'").split("\n")
        txkB = commands.getoutput("sar -f "+n+" -n DEV|grep "+net+" |grep -v Average|awk -F' ' '{print $7}'").split("\n")
   
        data_txkB = list(map(float,txkB))
        max_txkB=max(data_txkB)
        min_txkB=min(data_txkB)
        average_txkB=sum(data_txkB)/len(data_txkB)

        data_rxkB = list(map(float,rxkB))
        max_rxkB=max(data_rxkB)
        min_rxkB=min(data_rxkB)
        average_rxkB=sum(data_rxkB)/len(data_rxkB)
        print "%s rxval max:%.4f,min:%.4f,average:%.4f,dev:%s" %(n,max_rxkB,min_rxkB,average_rxkB,net)
        print "%s txval max:%.4f,min:%.4f,average:%.4f,dev:%s" %(n,max_txkB,min_txkB,average_txkB,net)


def get_mem(sdate,edate):
    path = os.popen("find /var/log/sa/  -type f -newermt "+sdate+" ! -newermt "+edate+" |grep -v sar").readlines()
    path = [m.rstrip("\n") for m in path]
    for n in path:

        a=int(commands.getoutput("sar -f "+n+" -r|awk 'END{print NF}'"))
        if(a >= 11):
             memused = os.popen("sar -f "+n+" -r|grep -v ^$|grep -iv Linux|grep -iv memused|grep -iv Average|awk '{print(($(NF-8)-$(NF-5))/($(NF-9)+$(NF-8)))}'").readlines()
        else:
             memused = os.popen("sar -f "+n+" -r|grep -v ^$|grep -iv Linux|grep -iv memused|grep -iv Average|awk '{print(($(NF-5)-$(NF-2))/($(NF-6)+$(NF-5)))}'").readlines()
        data = list(memused)
        data = [i.rstrip("\n") for i in memused]
        data_num = list(map(float,data))
        max_num=max(data_num)
        min_num=min(data_num)
        average_num=sum(data_num)/len(data_num)
        print "%s memused max:%.4f,min:%.4f,average:%.4f" %(n,max_num,min_num,average_num)


def get_ldavg(sdate,edate):
    cpunum = int(commands.getoutput("cat /proc/cpuinfo |grep processor|wc -l"))
    path = os.popen("find /var/log/sa/  -type f -newermt "+sdate+" ! -newermt "+edate+" |grep -v sar").readlines()
    path = [m.rstrip("\n") for m in path]
    for n in path:

        a=int(commands.getoutput("sar -f "+n+" -q|sed -n '10p'|awk 'END{print NF}'"))
        if a == 7:
            ldavg_1 = os.popen("sar -f "+n+" -q|awk -F' ' '{print $5}'|sed '1,3d'|sed '$d'|sed '/^$/d'|grep -v ldavg-1").readlines()
            ldavg_15 = os.popen("sar -f "+n+" -q|awk -F' ' '{print $7}'|sed '1,3d'|sed '$d'|sed '/^$/d'|grep -v ldavg-15").readlines()
        elif a == 8:
            ldavg_1 = os.popen("sar -f "+n+" -q|awk -F' ' '{print $5}'|sed '1,3d'|sed '$d'|sed '/^$/d'|grep -v ldavg-1").readlines()
            ldavg_15 = os.popen("sar -f "+n+" -q|awk -F' ' '{print $7}'|sed '1,3d'|sed '$d'|sed '/^$/d'|grep -v ldavg-15").readlines()
        elif a == 6:
            ldavg_1 = os.popen("sar -f "+n+" -q|awk -F' ' '{print $4}'|sed '1,3d'|sed '$d'|sed '/^$/d'|grep -v ldavg-1").readlines()
            ldavg_15 = os.popen("sar -f "+n+" -q|awk -F' ' '{print $6}'|sed '1,3d'|sed '$d'|sed '/^$/d'|grep -v ldavg-15").readlines()


        data1 = [i.rstrip("\n") for i in ldavg_1]
        data15 =[j.rstrip("\n") for j in ldavg_15]
        data1 = list(map(float,data1))
        data15 = list(map(float,data15))
        max_data1=max(data1)
        min_data1=min(data1)
        average_data1=sum(data1)/len(data1)
        max_data15=max(data15)
        min_data15=min(data15)
        average_data15=sum(data15)/len(data15)
        print "%s cpu 1minute load max:%.4f,min:%.4f,average:%.4f" %(n,max_data1/cpunum,min_data1/cpunum,average_data1/cpunum)
        print "%s cpu 15minute load max:%.4f,min:%.4f,average:%.4f" %(n,max_data15/cpunum,min_data15/cpunum,average_data15/cpunum)


if  __name__ == "__main__":
    sdate=sys.argv[1]
    sdate=datetime.datetime.strptime(sdate,'%Y%m%d')
    edate=sdate+datetime.timedelta(days=1)
    sdate=sdate.strftime('%Y%m%d')
    edate=edate.strftime('%Y%m%d')
    get_cpu(sdate,edate)
    get_ldavg(sdate,edate)
    get_mem(sdate,edate)
    get_dev(sdate,edate)