利用shell脚本实现服务器的CPU负载、内存、磁盘空间巡检。
1、本脚本在redhat6.9环境下编写,对centos/redhat5/6/7系统具有良好兼容性。
2、此脚本主控机需要与被巡检服务器开启免密登陆!
此脚本分为主控机脚本与被巡检节点脚本。
主控机脚本主要负责初始化脚本设置、下发被巡检节点脚本配置、启动被巡检节点脚本、显示巡检结果。
被巡检节点脚本主要负责在节点服务器进行巡检。
注意!初始化时,IP列表文件需要在脚本启动同级目录。
主控机脚本(命名为:check.sh):
#!/bin/bash
#安装巡检脚本
install_check(){
COUNTER=0
IPLISTSUM=${#IPLIST[@]}
while true
do
ssh ${IPLIST[$COUNTER]} "ls $FILE"
if [ $? -ne 0 ];then
ssh ${IPLIST[$COUNTER]} "mkdir -p $FILE"
fi
scp -rp $PWD/node_check.sh ${IPLIST[$COUNTER]}:$FILE/node_check.sh
scp -rp $PWD/check.ini ${IPLIST[$COUNTER]}:$FILE/check.ini
COUNTER=$((COUNTER+1))
if [ $COUNTER -eq $IPLISTSUM ];then
#退出当前循环
break
fi
#sleep 1
done
}
start_check(){
COUNTER=0
IPLISTSUM=${#IPLIST[@]}
TEST=0
while true
do
ssh ${IPLIST[$COUNTER]} "ls $FILE/node_check.sh"
if [ $? -ne 0 ];then
echo "没有在${IPLIST[$COUNTER]}上检测到巡检脚本,请先安装。"
TEST=1
fi
ssh ${IPLIST[$COUNTER]} "ls $FILE/check.ini"
if [ $? -ne 0 ];then
echo "没有在${IPLIST[$COUNTER]}上检测到巡检脚本配置文件,请先安装。"
TEST=2
fi
if [ $TEST -eq 0 ];then
ssh ${IPLIST[$COUNTER]} "$FILE/node_check.sh"
fi
COUNTER=$((COUNTER+1))
if [ $COUNTER -eq $IPLISTSUM ];then
#退出当前循环
break
fi
done
}
put_chechk(){
COUNTER=0
IPLISTSUM=${#IPLIST[@]}
echo '###############'
while true
do
echo
ssh ${IPLIST[$COUNTER]} "cat $FILE/log/*_$DATA_YMD*.log"
echo
echo '###############'
COUNTER=$((COUNTER+1))
if [ $COUNTER -eq $IPLISTSUM ];then
#退出当前循环
break
fi
done
}
put_ini(){
read -p "请设置磁盘巡检阀值百分比(默认80):" SDFMX
SDFMX=${SDFMX:-'80'}
read -p "请设置内存巡检阀值百分比(默认90):" SFREEMX
SFREEMX=${SFREEMX:-'90'}
read -p "请设置CPU负载巡检阀值百分比(默认60):" SCPUMX
SCPUMX=${SCPUMX:-'60'}
#read -p "请设置IP对比格式(默认172.16.0):" SHOSTIP
#SHOSTIP=${SHOSTIP:-'172.16.0'}
read -p "请设置巡检IP列表文件名(默认'iplist'):" SIPFILE
SIPFILE=${SIPFILE:-'iplist'}
read -p "请设置巡检文件存放路径(默认'/tmp/check'):" SFILE
SFILE=${SFILE:-'/tmp/check'}
#read -p "设置本机密码(用于结果回传):" SPSWD
echo "DFMX=$SDFMX" > $PWD/check.ini
echo "FREEMX=$SFREEMX" >> $PWD/check.ini
echo "CPUMX=$SCPUMX" >> $PWD/check.ini
#echo "HOSTIP=$SHOSTIP" >> $PWD/check.ini
echo "FILE=$SFILE" >> $PWD/check.ini
echo 'DATA_YMDHM=`date +%Y%m%d%H%M`' >> $PWD/check.ini
echo 'DATA_YMD=`date +%Y%m%d`' >> $PWD/check.ini
#echo "PSWD=$SPSWD" >> $PWD/check.ini
DIR=`cd "$( dirname "$0" )" && pwd`
ls $DIR/log &> /dev/null
if [ $? -ne 0 ];then
mkdir -p $DIR/log
fi
echo "PUTDIR=$DIR/log" >> $PWD/check.ini
while true
do
ls $PWD/$SIPFILE &> /dev/null
if [ $? -ne 0 ];then
echo "文件在当前目录不存在请重新输入!"
read -p "请设置巡检IP列表文件名(默认'iplist'):" SIPFILE
SIPFILE=${SIPFILE:-'iplist'}
else
echo "IPLIST=(`cat $PWD/$SIPFILE`)" >> $PWD/check.ini
break
fi
done
#本机IP获取
#IPA=(`/sbin/ip a|grep inet|awk '{print $2}'|awk -F\/ '{print $1}'`)
#IPSUM=`echo ${#IPA[@]}`
#COUNTER=0
#while true
#do
# if [ $COUNTER -lt $IPSUM ];then
# GIP=`echo "${IPA[$COUNTER]}"|grep ${IPLIST}|wc -l`
# if [ $GIP -eq "1" ];then
# HOSTIP=${IPA[$COUNTER]}
# fi
# COUNTER=$((COUNTER+1))
# if [ $COUNTER -eq $IPSUM ];then
# #退出死循环
# break
# fi
# fi
#sleep 1
#done
#echo "REMOTEIP=$HOSTIP" >> $PWD/check.ini
}
ex(){
echo "bey!"
exit
}
while :; do
chmod +x $PWD/check.ini &> /dev/null
source $PWD/check.ini &> /dev/null
echo
echo "1.初始化设置"
echo "2.安装巡检脚本"
echo "3.开始巡检"
echo "4.输出上次巡检结果"
echo "*.退出"
echo
read -p "请选择:" choose
case $choose in
1)
put_ini
;;
2)
install_check
;;
3)
start_check
;;
4)
put_chechk
;;
*)
ex
break
;;
esac
done被巡检节点脚本(放置在主控机同级目录下,命名为:node_check.sh):
#!/bin/bash
######巡检参数读取######
DIR=`cd "$( dirname "$0" )" && pwd`
source $DIR/check.ini &> /dev/null
######系统参数获取######
#本机IP获取
IPA=(`/sbin/ip a|grep inet|awk '{print $2}'|awk -F\/ '{print $1}'`)
IPSUM=`echo ${#IPA[@]}`
COUNTER=0
IPLISTSUM=`echo ${#IPLIST[@]}`
while true
do
COUNTER_IP=0
while true
do
GIP=`echo "${IPA[$COUNTER]}"|grep -w ${IPLIST[$COUNTER_IP]}|wc -l`
if [ $GIP -eq "1" ];then
IP=${IPA[$COUNTER]}
break
fi
COUNTER_IP=$((COUNTER_IP+1))
if [ $COUNTER_IP -eq $IPLISTSUM ];then
#退出死循环
break
fi
done
COUNTER=$((COUNTER+1))
if [ $COUNTER -eq $IPSUM ];then
#退出死循环
break
fi
#sleep 3
done
#生成本机标识
ID="${HOSTNAME}:${IP}"
#日志路径
ls $DIR/log &> /dev/null
if [ $? -ne 0 ];then
mkdir -p $DIR/log
fi
PUTLOG="$DIR/log/${HOSTNAME}_${DATA_YMDHM}.log"
touch $PUTLOG
ERR=0
######磁盘巡检######
DFA=(`df -P|grep -v "Filesystem"|grep -v "iso"|awk '{print $5}'|awk -F% '{print $1}'`)
DFB=(`df -P|grep -v "Filesystem"|grep -v "iso"|awk '{print $6}'`)
DFSUM=`echo ${#DFA[@]}`
COUNTER=0
while true
do
if [ $COUNTER -lt $DFSUM ];then
if [ ${DFA[$COUNTER]} -ge $DFMX ];then
echo "发现挂载目录${DFB[$COUNTER]}占用较高,达到${DFA[$COUNTER]}%" >> $PUTLOG
ERR=$((ERR+1))
fi
COUNTER=$((COUNTER+1))
if [ $COUNTER -eq $DFSUM ];then
#退出死循环
break
fi
fi
#sleep 1
done
######内存巡检######
MEM_USED=`free -m | grep '^Mem:' | awk '{print $3 }' `
MEM_TOTAL=`free -m | grep '^Mem:' | awk '{print $2 }' `
MEM_PERCENT=$((MEM_USED*100/MEM_TOTAL))
if [ $MEM_PERCENT -ge $FREEMX ];then
echo "发现内存占用达到${MEM_PERCENT}%" >> $PUTLOG
ERR=$((ERR+1))
fi
######CPU占用率检查######
CPUCODE=`grep -c 'model name' /proc/cpuinfo`
#UPTIME=(`uptime|awk '{print $10,$11,$12}'|sed 's/,//g'`)
#CPU负载取五分钟负载均值计算
UPTIME=(`uptime|awk '{print $11}'|sed 's/,//g'`)
UPTIMESUM=${#UPTIME[@]}
COUNTER=0
while true
do
RESULI=`echo ${UPTIME[$COUNTER]} \* 100 / $CPUCODE|bc`
if [ $RESULI -ge $CPUMX ];then
echo "发现CPU负载较高,达到$RESULI%" >> $PUTLOG
ERR=$((ERR+1))
fi
COUNTER=$((COUNTER+1))
if [ $COUNTER -eq $UPTIMESUM ];then
#退出死循环
break
fi
done
######巡检总结######
if [ $ERR -eq 0 ];then
echo "设备${ID}巡检未发现问题;检查时间:$DATA_YMDHM" >> $PUTLOG
else
echo "设备${ID}巡检共发现${ERR}个问题,发现问题已列在上方;检查时间:$DATA_YMDHM" >> $PUTLOG
fi
######回传结果######
#部分设备不支持命令且有安全隐患,取消此显示方案!
#expect << EOF
#spawn scp -rp $PUTLOG ${REMOTEIP}:$PUTDIR
#expect "password:" { send "${PSWD}\n" }
#expect eof
#EOFiplist文件示例:
192.168.0.1 192.168.0.2 192.168.0.3
发表评论