利用shell脚本实现服务器的CPU负载、内存、磁盘空间巡检。
1、本脚本在redhat6.9环境下编写,对centos/redhat5/6/7系统具有良好兼容性。
2、此脚本主控机需要与被巡检服务器开启免密登陆!
此脚本分为主控机脚本与被巡检节点脚本。
主控机脚本主要负责初始化脚本设置、下发被巡检节点脚本配置、启动被巡检节点脚本、显示巡检结果。
被巡检节点脚本主要负责在节点服务器进行巡检。
注意!初始化时,IP列表文件需要在脚本启动同级目录。
主控机脚本(命名为:check.sh):
#!/bin/bash #安装巡检脚本 install_check(){ COUNTER=0 IPLISTSUM=${#IPLIST[@]} while true do ssh ${IPLIST[$COUNTER]} "ls $FILE" if [ $? -ne 0 ];then ssh ${IPLIST[$COUNTER]} "mkdir -p $FILE" fi scp -rp $PWD/node_check.sh ${IPLIST[$COUNTER]}:$FILE/node_check.sh scp -rp $PWD/check.ini ${IPLIST[$COUNTER]}:$FILE/check.ini COUNTER=$((COUNTER+1)) if [ $COUNTER -eq $IPLISTSUM ];then #退出当前循环 break fi #sleep 1 done } start_check(){ COUNTER=0 IPLISTSUM=${#IPLIST[@]} TEST=0 while true do ssh ${IPLIST[$COUNTER]} "ls $FILE/node_check.sh" if [ $? -ne 0 ];then echo "没有在${IPLIST[$COUNTER]}上检测到巡检脚本,请先安装。" TEST=1 fi ssh ${IPLIST[$COUNTER]} "ls $FILE/check.ini" if [ $? -ne 0 ];then echo "没有在${IPLIST[$COUNTER]}上检测到巡检脚本配置文件,请先安装。" TEST=2 fi if [ $TEST -eq 0 ];then ssh ${IPLIST[$COUNTER]} "$FILE/node_check.sh" fi COUNTER=$((COUNTER+1)) if [ $COUNTER -eq $IPLISTSUM ];then #退出当前循环 break fi done } put_chechk(){ COUNTER=0 IPLISTSUM=${#IPLIST[@]} echo '###############' while true do echo ssh ${IPLIST[$COUNTER]} "cat $FILE/log/*_$DATA_YMD*.log" echo echo '###############' COUNTER=$((COUNTER+1)) if [ $COUNTER -eq $IPLISTSUM ];then #退出当前循环 break fi done } put_ini(){ read -p "请设置磁盘巡检阀值百分比(默认80):" SDFMX SDFMX=${SDFMX:-'80'} read -p "请设置内存巡检阀值百分比(默认90):" SFREEMX SFREEMX=${SFREEMX:-'90'} read -p "请设置CPU负载巡检阀值百分比(默认60):" SCPUMX SCPUMX=${SCPUMX:-'60'} #read -p "请设置IP对比格式(默认172.16.0):" SHOSTIP #SHOSTIP=${SHOSTIP:-'172.16.0'} read -p "请设置巡检IP列表文件名(默认'iplist'):" SIPFILE SIPFILE=${SIPFILE:-'iplist'} read -p "请设置巡检文件存放路径(默认'/tmp/check'):" SFILE SFILE=${SFILE:-'/tmp/check'} #read -p "设置本机密码(用于结果回传):" SPSWD echo "DFMX=$SDFMX" > $PWD/check.ini echo "FREEMX=$SFREEMX" >> $PWD/check.ini echo "CPUMX=$SCPUMX" >> $PWD/check.ini #echo "HOSTIP=$SHOSTIP" >> $PWD/check.ini echo "FILE=$SFILE" >> $PWD/check.ini echo 'DATA_YMDHM=`date +%Y%m%d%H%M`' >> $PWD/check.ini echo 'DATA_YMD=`date +%Y%m%d`' >> $PWD/check.ini #echo "PSWD=$SPSWD" >> $PWD/check.ini DIR=`cd "$( dirname "$0" )" && pwd` ls $DIR/log &> /dev/null if [ $? -ne 0 ];then mkdir -p $DIR/log fi echo "PUTDIR=$DIR/log" >> $PWD/check.ini while true do ls $PWD/$SIPFILE &> /dev/null if [ $? -ne 0 ];then echo "文件在当前目录不存在请重新输入!" read -p "请设置巡检IP列表文件名(默认'iplist'):" SIPFILE SIPFILE=${SIPFILE:-'iplist'} else echo "IPLIST=(`cat $PWD/$SIPFILE`)" >> $PWD/check.ini break fi done #本机IP获取 #IPA=(`/sbin/ip a|grep inet|awk '{print $2}'|awk -F\/ '{print $1}'`) #IPSUM=`echo ${#IPA[@]}` #COUNTER=0 #while true #do # if [ $COUNTER -lt $IPSUM ];then # GIP=`echo "${IPA[$COUNTER]}"|grep ${IPLIST}|wc -l` # if [ $GIP -eq "1" ];then # HOSTIP=${IPA[$COUNTER]} # fi # COUNTER=$((COUNTER+1)) # if [ $COUNTER -eq $IPSUM ];then # #退出死循环 # break # fi # fi #sleep 1 #done #echo "REMOTEIP=$HOSTIP" >> $PWD/check.ini } ex(){ echo "bey!" exit } while :; do chmod +x $PWD/check.ini &> /dev/null source $PWD/check.ini &> /dev/null echo echo "1.初始化设置" echo "2.安装巡检脚本" echo "3.开始巡检" echo "4.输出上次巡检结果" echo "*.退出" echo read -p "请选择:" choose case $choose in 1) put_ini ;; 2) install_check ;; 3) start_check ;; 4) put_chechk ;; *) ex break ;; esac done
被巡检节点脚本(放置在主控机同级目录下,命名为:node_check.sh):
#!/bin/bash ######巡检参数读取###### DIR=`cd "$( dirname "$0" )" && pwd` source $DIR/check.ini &> /dev/null ######系统参数获取###### #本机IP获取 IPA=(`/sbin/ip a|grep inet|awk '{print $2}'|awk -F\/ '{print $1}'`) IPSUM=`echo ${#IPA[@]}` COUNTER=0 IPLISTSUM=`echo ${#IPLIST[@]}` while true do COUNTER_IP=0 while true do GIP=`echo "${IPA[$COUNTER]}"|grep -w ${IPLIST[$COUNTER_IP]}|wc -l` if [ $GIP -eq "1" ];then IP=${IPA[$COUNTER]} break fi COUNTER_IP=$((COUNTER_IP+1)) if [ $COUNTER_IP -eq $IPLISTSUM ];then #退出死循环 break fi done COUNTER=$((COUNTER+1)) if [ $COUNTER -eq $IPSUM ];then #退出死循环 break fi #sleep 3 done #生成本机标识 ID="${HOSTNAME}:${IP}" #日志路径 ls $DIR/log &> /dev/null if [ $? -ne 0 ];then mkdir -p $DIR/log fi PUTLOG="$DIR/log/${HOSTNAME}_${DATA_YMDHM}.log" touch $PUTLOG ERR=0 ######磁盘巡检###### DFA=(`df -P|grep -v "Filesystem"|grep -v "iso"|awk '{print $5}'|awk -F% '{print $1}'`) DFB=(`df -P|grep -v "Filesystem"|grep -v "iso"|awk '{print $6}'`) DFSUM=`echo ${#DFA[@]}` COUNTER=0 while true do if [ $COUNTER -lt $DFSUM ];then if [ ${DFA[$COUNTER]} -ge $DFMX ];then echo "发现挂载目录${DFB[$COUNTER]}占用较高,达到${DFA[$COUNTER]}%" >> $PUTLOG ERR=$((ERR+1)) fi COUNTER=$((COUNTER+1)) if [ $COUNTER -eq $DFSUM ];then #退出死循环 break fi fi #sleep 1 done ######内存巡检###### MEM_USED=`free -m | grep '^Mem:' | awk '{print $3 }' ` MEM_TOTAL=`free -m | grep '^Mem:' | awk '{print $2 }' ` MEM_PERCENT=$((MEM_USED*100/MEM_TOTAL)) if [ $MEM_PERCENT -ge $FREEMX ];then echo "发现内存占用达到${MEM_PERCENT}%" >> $PUTLOG ERR=$((ERR+1)) fi ######CPU占用率检查###### CPUCODE=`grep -c 'model name' /proc/cpuinfo` #UPTIME=(`uptime|awk '{print $10,$11,$12}'|sed 's/,//g'`) #CPU负载取五分钟负载均值计算 UPTIME=(`uptime|awk '{print $11}'|sed 's/,//g'`) UPTIMESUM=${#UPTIME[@]} COUNTER=0 while true do RESULI=`echo ${UPTIME[$COUNTER]} \* 100 / $CPUCODE|bc` if [ $RESULI -ge $CPUMX ];then echo "发现CPU负载较高,达到$RESULI%" >> $PUTLOG ERR=$((ERR+1)) fi COUNTER=$((COUNTER+1)) if [ $COUNTER -eq $UPTIMESUM ];then #退出死循环 break fi done ######巡检总结###### if [ $ERR -eq 0 ];then echo "设备${ID}巡检未发现问题;检查时间:$DATA_YMDHM" >> $PUTLOG else echo "设备${ID}巡检共发现${ERR}个问题,发现问题已列在上方;检查时间:$DATA_YMDHM" >> $PUTLOG fi ######回传结果###### #部分设备不支持命令且有安全隐患,取消此显示方案! #expect << EOF #spawn scp -rp $PUTLOG ${REMOTEIP}:$PUTDIR #expect "password:" { send "${PSWD}\n" } #expect eof #EOF
iplist文件示例:
192.168.0.1 192.168.0.2 192.168.0.3
发表评论