监控redis多实例的负载情况

    单个服务器上创建多实例,对其重要参数的监控是非常重要的,以下是监控服务器上多实例的负载情况。主要包含:redis实例的QPS、内存使用情况、客户端连接数,服务器的内存使用率、CPU使用率、CPU load值、网卡流量等,脚本把采集到的数据显示并且写入到数据库中,方便查看多个服务器的多实例情况以及数据的汇总等,写的有点仓促,有兴趣的同学可以整理一下脚本使其模块化、简洁化。脚本如下:


    #!/usr/bin/env python
    #--coding:UTF-8--
    import os,threading,time,sys,sigar,MySQLdb
    '''
    安装python的sigar模块
    apt-get install libtool automake gettext python-MySQLdb screen pkg-config git
    git clone git://github.com/hyperic/sigar.git sigar.git
    ./autogen.sh
    ./configure
    make
    make install
    cd bindings/python/
    python setup.py install
    '''
    '''
    建库sql
    CREATE TABLE redis_stats (
    id int(11) NOT NULL AUTOINCREMENT,
    host_name varchar(50) NOT NULL,
    qps int(11) NOT NULL,
    clients int(11) NOT NULL,
    redis_mem_used varchar(50) NOT NULL,
    sys_mem_used_pers float NOT NULL,
    cpu_used float NOT NULL,
    cpu_load varchar(50) NOT NULL,
    netband varchar(255) NOT NULL,
    uptime timestamp NOT NULL DEFAULT CURRENTTIMESTAMP ON UPDATE CURRENTTIMESTAMP,
    PRIMARY KEY (id),
    KEY host_name (host_name),
    KEY uptime (uptime)
    ) ENGINE=InnoDB DEFAULT CHARSET=utf8
    查询每个实例的最新记录
    select hostname,qps,clients,redis_mem_used,concat(sys_mem_used_pers,'%') as sys_mem_used,concat(cpu_used,'%') as cpu_used,cpu_load,netband from DBA.redis_stats group by host_name desc order by qps desc;
    '''
    def log2db(check_log):
    log_host = '192.168.56.101'
    log_user = 'root'
    log_pass = '1q2w3e4r'
    try:
    conn = MySQLdb.connect(host = log_host,port = 3306,user = log_user,passwd = log_pass,charset='utf8',connect_timeout=20)
    cursor = conn.cursor()
    #cursor.execute(insert_sql)
    cursor.executemany("INSERT INTO redis.redis_stats (host_name,qps,clients,redis_mem_used,sys_mem_used_pers,cpu_used,cpu_load,netband) VALUES (%s,%s,%s,%s,%s,%s,%s,%s)",tuple(check_log))
    conn.commit()
    cursor.close()
    conn.close()
    except Exception,e:
    print e
    def redis_info(host,port,res):
    var = []
    var.append(host)
    var.append(port)
    aaa = os.popen("redis-cli -h %s -p %s info|grep -v '#'|tr -s '\r\n'" % (host,port)).readlines()
    dirc = {}
    for i in aaa:
    if i != '\r\n':
    a = i.strip()
    aa = a.split(":")
    dirc[aa[0]]=aa[1]
    var.append(dirc["connected_clients"])
    var.append(dirc["instantaneous_ops_per_sec"])
    var.append(dirc["used_memory_human"])
    res.append(var)
    def main():
    netband = {}
    stime = 5
    while True:
    try:
    sg = sigar.open()
    mem = sg.mem()#内存
    mem_percent = "%.2f" % mem.used_percent()
    cpu = sg.cpu()#CPU总的使用率
    cpu_idle = "%.2f" % ((1-float(cpu.idle())/cpu.total())100)
    loadavg = sg.loadavg()#CPU load值
    cpu_loadavg = ','.join([str(i) for i in loadavg])
    #nets = [i for i in sg.net_interface_list() if 'dum' not in i and i != 'lo']#网卡流量统计
    nets = [i.strip() for i in os.popen("/bin/ip a|grep global|awk '{print $7}'").readlines() if i.strip() != '']
    if len(netband) != 0:
    for net in nets:
    netband[net+'_Out'] = "%.2f" % (float((sg.net_interface_stat(net).tx_bytes()) - int(netband[net+'_Out']))/stime/1024/1024)
    netband[net+'_In'] = "%.2f" % (float((sg.net_interface_stat(net).rx_bytes()) - int(netband[net+'_In']))/stime/1024/1024)
    else:
    for net in nets:
    netband[net+'_Out'] = "%.2f" % (float(sg.net_interface_stat(net).tx_bytes())/stime/1024/1024)
    netband[net+'_In'] = "%.2f" % (float(sg.net_interface_stat(net).rx_bytes())/stime/1024/1024)
    redis_list = ['192.168.56.101:6379','192.168.1.87:16379']
    text = "
    "20 + " Redis Status %s " % time.strftime("%Y-%m-%d %H:%M:%S") + ""*20
    print "\033[1;31;40m%s\033[0m" % text
    threads = []
    res = []
    for i in redis_list:
    aa = i.split(':')
    host = aa[0]
    port = aa[1]
    t = threading.Thread(target=redis_info,args=(host,port,res))
    threads.append(t)
    for i in range(len(threads)):
    threads[i].start()
    for i in range(len(threads)):
    threads[i].join()
    print "\033[1;35;40m%s\033[0m" % ("Redis_host:port".ljust(23)+"Redis:QPS".ljust(10)+"Redis:Clients".ljust(15)+"Redis:Mem_used".ljust(15)+"Mem_percent".ljust(12)+"Cpu_used".ljust(10)+"Cpu_loadavg".ljust(17))
    All_qps = 0
    All_clients = 0
    res.sort()
    check_log = []
    for i in res:
    log = [i[0]+':'+i[1],int(i[3]),int(i[2]),i[4],float(mem_percent),float(cpu_idle),cpu_loadavg,str(netband)]
    check_log.append(log)
    print (i[0]+':'+i[1]).ljust(23)+i[3].ljust(10)+i[2].ljust(15)+i[4].ljust(15)+mem_percent.ljust(12)+cpu_idle.ljust(10)+cpu_loadavg.ljust(17)+str(netband)
    All_qps = All_qps + int(i[3])
    All_clients = All_clients + int(i[2])
    log2db(check_log)
    print "\033[1;35;40m%s\033[0m" % ("Summary All host:".ljust(23)+str(All_qps).ljust(10)+str(All_clients).ljust(10))
    netband = {}
    for net in nets:
    netband[net+'_Out'] = sg.net_interface_stat(net).tx_bytes()
    netband[net+'_In'] = sg.net_interface_stat(net).rx_bytes()
    time.sleep(stime)
    except KeyboardInterrupt :
    sys.exit(0)
    print
    break
    if __name
    == "__main":
    main()

    效果如图:

    监控redis多实例的负载情况 - 图1