Script cluster Red Hat générique (rgmanager)

De Wiki de Romain RUDIGER
Aller à : navigation, rechercher

Introduction

Dans le cadre de la mise en cluster d'une application sous une 4.8, j'ai écrit ce script qui fonctionne donc avec :

rgmanager-1.9.87-1.el4_8.6
cman-1.0.24-1
fence-1.32.67-1.el4_8.5

Vous trouverez beaucoup d'informations sur le wiki Linux Cluster

script

Voici le script à proprement parlé, généralement dans '/usr/share/cluster/'. Il utilise beaucoup de fonctions provenant du dossier 'utils'.

#!/bin/bash

# +=======================================================================================================+
# |
# | FILENAME
# |   generic_prog.sh
# |
# | DESCRIPTION
# |   Manage a generic application as an HA resource
# |
# | USAGE
# |   exec './$0'
# |
# | PLATFORM
# |   Linux Generic
# |
# | OCF instance parameters 
# |   exec './$0 meta-data'
# |
# |-Ver-|-Date------|-Author-----------|-Comment----------------------------------------------------------|
# | 1.0  18-11-2010  RUDIGER Romain     Creation
# +=======================================================================================================+

export LC_ALL=C
export LANG=C
export PATH=/bin:/sbin:/usr/bin:/usr/sbin

. $(dirname $0)/ocf-shellfuncs
. $(dirname $0)/utils/config-utils.sh
. $(dirname $0)/utils/messages.sh
. $(dirname $0)/utils/ra-skelet.sh

declare CUSTOM_pid_file="`generate_name_for_pid_dir`_custom.pid"
declare CUSTOM1_pid_file="`generate_name_for_pid_dir`_custom1.pid"

# Verification d'une expression reguliere
#
# Args : <valeur a tester> <expression reguliere>
#
# Exemple : test_regExp $VAR "^([[:digit:]]{1,3}\.){3}[[:digit:]]{1,3}$"
#  test si $VAR est une adresse IPv4, renvoie 0 si oui.
function test_regExp
{
   grep_cmd="/bin/grep"
   echo "$1" | ${grep_cmd} -Eq "$2" > /dev/null
   return $?
}

# Get PID
#
# Application name
function get_pid_of_app
{
   case $1 in
      CUSTOM)
         ocf_log debug "$log_header CUSTOM check CUSTOM_pid_user=$CUSTOM_pid_user CUSTOM_pid_name=$CUSTOM_pid_name CUSTOM_pid_regexp=$CUSTOM_pid_regexp"
         [[ -z $CUSTOM_pid_user ]] && ps -e -o user,pid,ppid,args --no-headers | grep -m 1 -E "$CUSTOM_pid_regexp" | awk '{print $2}' > "$CUSTOM_pid_file"
         [[ ! -z $CUSTOM_pid_user ]] && ps -u $CUSTOM_pid_user -o user,pid,ppid,args --no-headers | grep -m 1 -E "$CUSTOM_pid_regexp" | awk '{print $2}' > "$CUSTOM_pid_file"
         ;;
      CUSTOM1)
         ocf_log debug "$log_header CUSTOM1 check CUSTOM1_pid_user=$CUSTOM1_pid_user CUSTOM1_pid_name=$CUSTOM1_pid_name CUSTOM1_pid_regexp=$CUSTOM1_pid_regexp"
         [[ -z $CUSTOM1_pid_user ]] && ps -e -o user,pid,ppid,args --no-headers | grep -m 1 -E "$CUSTOM1_pid_regexp" | awk '{print $2}' > "$CUSTOM1_pid_file"
         [[ ! -z $CUSTOM1_pid_user ]] && ps -u $CUSTOM1_pid_user -o user,pid,ppid,args --no-headers | grep -m 1 -E "$CUSTOM1_pid_regexp" | awk '{print $2}' > "$CUSTOM1_pid_file"
         ;;
   esac
}

verify_all()
{
   clog_service_verify $CLOG_INIT
   log_header="Verifying Configuration Of $OCF_RESOURCE_INSTANCE >"
  
   if [ -z "$OCF_RESKEY_name" ]; then
      clog_service_verify $CLOG_FAILED "$log_header Invalid Name Of Service"
      return $OCF_ERR_ARGS
   fi

   # Check service name (inherited var)
   if [ -z "$OCF_RESKEY_service_name" ]; then
      clog_service_verify $CLOG_FAILED_NOT_CHILD
      return $OCF_ERR_ARGS
   fi
   service_name=$OCF_RESKEY_service_name
   service_name_upper=$(echo $OCF_RESKEY_service_name | tr '[:lower:]' '[:upper:]')

   # Check the start stop application script
   if [ -z "$OCF_RESKEY_script" ]; then
      clog_check_file_exist $CLOG_FAILED_INVALID "$OCF_RESKEY_script"
      clog_service_verify $CLOG_FAILED
      return $OCF_ERR_ARGS
   fi
   script=$(eval echo $(eval echo ${OCF_RESKEY_script}))
   if [ -f "${script}" ]; then
      if [ ! -x "${script}" ]; then
         ocf_log error "$log_header The script must be executable $OCF_RESOURCE_INSTANCE (${script})"
         return $OCF_ERR_ARGS
      fi
   fi
   
   # Check the custom PID check var
   if [ ! -z "$OCF_RESKEY_custom_pid_check" ]; then
      # Format must be: <process_name>,<username>,<regexp>
      CUSTOM_pid=$(eval echo $(eval echo $OCF_RESKEY_custom_pid_check))
      CUSTOM_pid_name="$(echo $CUSTOM_pid | awk -F\, '{print $1}')"
      CUSTOM_pid_user="$(echo $CUSTOM_pid | awk -F\, '{print $2}')"
      CUSTOM_pid_regexp="$(echo $CUSTOM_pid | awk -F\, '{print $3}')"
      CUSTOM_check=CUSTOM # This is use to enable the check of the custom PID
      # Check args
      if [ -z "$CUSTOM_pid_name" ]; then
         clog_service_verify $CLOG_FAILED "$log_header Invalid name for the custom_pid_check value ($OCF_RESKEY_custom_pid_check)"
         return $OCF_ERR_ARGS
      fi
      if [ -z "$CUSTOM_pid_regexp" ]; then
         clog_service_verify $CLOG_FAILED "$log_header Invalid regexp for the custom_pid_check value ($OCF_RESKEY_custom_pid_check)"
         return $OCF_ERR_ARGS
      fi
   fi

   # Check the custom1 PID check var
   if [ ! -z "$OCF_RESKEY_custom1_pid_check" ]; then
      # Format must be: <process_name>,<username>,<regexp>
      CUSTOM1_pid=$(eval echo $(eval echo $OCF_RESKEY_custom1_pid_check))
      CUSTOM1_pid_name="$(echo $CUSTOM1_pid | awk -F\, '{print $1}')"
      CUSTOM1_pid_user="$(echo $CUSTOM1_pid | awk -F\, '{print $2}')"
      CUSTOM1_pid_regexp="$(echo $CUSTOM1_pid | awk -F\, '{print $3}')"
      CUSTOM1_check=CUSTOM1 # This is use to enable the check of the custom1 PID
      # Check args
      if [ -z "$CUSTOM1_pid_name" ]; then
         clog_service_verify $CLOG_FAILED "$log_header Invalid name for the custom1_pid_check value ($OCF_RESKEY_custom1_pid_check)"
         return $OCF_ERR_ARGS
      fi
      if [ -z "$CUSTOM1_pid_regexp" ]; then
         clog_service_verify $CLOG_FAILED "$log_header Invalid regexp for the custom1_pid_check value ($OCF_RESKEY_custom1_pid_check)"
         return $OCF_ERR_ARGS
      fi
   fi

   # Mount to checks
   if [ ! -z "$OCF_RESKEY_mount_to_check" ]; then
      # Format must be "/dir/dir /dir/dir2"
      mount_to_check=$(eval echo $(eval echo $OCF_RESKEY_mount_to_check))
      test_regExp "$mount_to_check" "^/[[:alnum:]]+(/([[:alnum:]]|_|-)+)*([[:space:]]+/[[:alnum:]]+(/([[:alnum:]]|_|-)+)*)*$"
      if [[ $? -ne 0 ]]; then
         clog_service_verify $CLOG_FAILED "$log_header Invalid format of mount_to_check option (value=$mount_to_check regexp=\"^/[[:alnum:]]+(/[[:alnum:]]+)*([[:space:]]+/[[:alnum:]]+(/[[:alnum:]]+)*)*$\""
         return $OCF_ERR_ARGS
      fi
   fi

   # Check lock_dir
   if [ -z "$OCF_RESKEY_lock_dir" ]; then
      clog_check_file_exist $CLOG_FAILED_INVALID "$OCF_RESKEY_lock_dir"
      clog_service_verify $CLOG_FAILED
      return $OCF_ERR_ARGS
   fi
   if [ ! -w "$OCF_RESKEY_lock_dir" ]; then
      clog_service_verify $CLOG_FAILED "$log_header The lock_dir is not writable $OCF_RESKEY_lock_dir"
      return $OCF_ERR_ARGS;
   fi
   BASLCK=$OCF_RESKEY_lock_dir

   clog_service_verify $CLOG_SUCCEED
      
   return 0
}

start()
{
   clog_service_start $CLOG_INIT
   log_header="Starting Service $OCF_RESOURCE_INSTANCE >"

   #Check the maint mode
   if [ -f $BASLCK/$service_name.maintenance ]; then
      ocf_log debug "$log_header Maintenance mode is enable, remove it !"
      rm -f $BASLCK/$service_name.maintenance
   fi

   ocf_log debug "$log_header Create the directory $service_name to contain the pid file(s) of the processe(s)"
   create_pid_directory
   ocf_log debug "$log_header Check the PID files: arg not empty, file exist, remove if empty and remove if PID doesn't exist"
   if [[ ! -z $CUSTOM_check ]]; then
      check_pid_file "$CUSTOM_pid_file"
      [ $? -ne 0 ] && clog_check_pid $CLOG_FAILED "$CUSTOM_pid_file" && clog_service_start $CLOG_FAILED && return $OCF_ERR_GENERIC
   fi
   if [[ ! -z $CUSTOM1_check ]]; then
      check_pid_file "$CUSTOM1_pid_file"
      [ $? -ne 0 ] && clog_check_pid $CLOG_FAILED "$CUSTOM1_pid_file" && clog_service_start $CLOG_FAILED && return $OCF_ERR_GENERIC
   fi

   # Start the program
   LOG=/var/log/$(echo ${OCF_RESOURCE_INSTANCE} | sed -e "s/:/_/g")_start.log # Put this as argument !
   ocf_log info "$log_header Start the application with \"$script start\" ($LOG)."
   if [ -x "${script}" ]; then # script is executable
      $script start 2>&1 | tee ${LOG}
      return_code=${PIPESTATUS[0]}
      if [ $return_code -ne 0 ]; then
         ocf_log error "$log_header Start $service_name failed with error code \"$return_code\" (script=$script)."
         clog_service_start $CLOG_FAILED
         return $OCF_ERR_GENERIC
      else
         ocf_log notice "$log_header $service_name started"
      fi
   else   
      ocf_log warning "$log_header The $service_name script is not executable ($script)!"
   fi
   
   # Get and Check the PID file
   ocf_log info "$log_header Get and Check the pid files"
   for app in $CUSTOM_check $CUSTOM1_check; do
      get_pid_of_app $app
      pid_file=$(eval echo \$${app}_pid_file)
      app_name=$(eval echo \$${app}_pid_name)
      [ ! -s $pid_file ] && rm -f $pid_file # remove if file size=0
      [ -r $pid_file ] && read pid < "$pid_file" || pid=
      [ -z "$pid" ] && ocf_log error "$log_header Failed to get the PID of $app_name." && return $OCF_ERR_GENERIC
      status_check_pid "$pid_file"
      [ $? -ne 0 ] && ocf_log error "$log_header Failed to check the PID of $app_name." && return $OCF_ERR_GENERIC
      ocf_log debug "$log_header Succeed $app_name (PID=$pid)"
   done

   clog_service_start $CLOG_SUCCEED

   return 0;
}

stop()
{
   clog_service_stop $CLOG_INIT
   log_header="Stopping Service $OCF_RESOURCE_INSTANCE >"

   # Stop the application
   LOG=/var/log/$(echo ${OCF_RESOURCE_INSTANCE} | sed -e "s/:/_/g")_stop.log # Put this as argument !
   ocf_log info "$log_header Stop the application with \"$script stop\" ($LOG)"
   if [ -x "${script}" ]; then # script is executable
      $script stop 2>&1 | tee ${LOG}
      return_code=${PIPESTATUS[0]}
      if [ $return_code -ne 0 ]; then
         ocf_log error "$log_header Stop $service_name failed with error code \"$return_code\" (script=$script)."
         return $OCF_ERR_GENERIC
      else
         ocf_log notice "$log_header $service_name stopped"
      fi
   else
      ocf_log warning "$log_header The $service_name script is not executable ($script)!"
   fi

   # Remove the PID files
   ocf_log info "$log_header Remove the PID file(s)"
   for app in $CUSTOM_check $CUSTOM1_check; do
      pid_file=$(eval echo \$${app}_pid_file)
      app_name=$(eval echo \$${app}_pid_name)
      rm -f $pid_file
      ocf_log debug "$log_header PID file of $app_name removed"
   done

   # Kill process on the FS
   [[ ! -z $mount_to_check ]] && ocf_log info "$log_header Check process on the mounted FS"
   for FS in $mount_to_check; do
      grep -E ".* ${FS} .*" /proc/mounts 2>&1 | tee -a ${LOG}
      if [ ${PIPESTATUS[0]} -eq 0 ]; then # it's a mount point
         fuser -m ${FS} 2>&1 | tee -a ${LOG}
         if [ ${PIPESTATUS[0]} -eq 0 ]; then # there are running process
            ocf_log info "$log_header FS ${FS} has process: kill -9"
            sync
            fuser -mk ${FS} 2>&1 | tee -a ${LOG}
         else
            ocf_log debug "$log_header FS ${FS} has no running process."
         fi
      else
         ocf_log debug "$log_header FS ${FS} is not a mount point."
      fi
   done

   clog_service_stop $CLOG_SUCCEED
   return 0;
}

status()
{
   clog_service_status $CLOG_INIT
   log_header="Monitoring Service $OCF_RESOURCE_INSTANCE >"

   if [ -f $BASLCK/$service_name.maintenance ]; then
      ocf_log info "$log_header Is in maintenance mode ($BASLCK/$service_name.maintenance)!"
      return 0
   fi

   # Check the PID file
   ocf_log info "$log_header Check the pid file(s)"
   for app in $CUSTOM_check $CUSTOM1_check; do
      pid_file=$(eval echo \$${app}_pid_file)
      app_name=$(eval echo \$${app}_pid_name)
      [ ! -s $pid_file ] && rm -f $pid_file # remove if file size=0
      status_check_pid "$pid_file"
      if [ $? -ne 0 ]; then
         ocf_log error "$log_header Failed to check the PID of $app_name, check if the application has a new PID."
         get_pid_of_app $app
         [ ! -s $pid_file ] && ocf_log error "$log_header Can't get the PID, $app_name is not running !" && return $OCF_ERR_GENERIC
         status_check_pid "$pid_file"
         [ $? -ne 0 ] && ocf_log error "$log_header We get a PID for $app_name but its not running !" && return $OCF_ERR_GENERIC
         ocf_log debug "$log_header Succeed $app_name (PID=$(cat $pid_file))"
      else
         ocf_log debug "$log_header Succeed $app_name (PID=$(cat $pid_file))"
      fi
   done

   clog_service_status $CLOG_SUCCEED
   return 0
}

action=$1

case $1 in
   meta-data)
      cat `echo $0 | sed 's/^\(.*\)\.sh$/\1.metadata/'`
      exit 0
      ;;
   verify-all)
      verify_all
      exit $?
      ;;
   start)
      verify_all && start
      exit $?
      ;;
   stop)
      verify_all && stop
      exit $?
      ;;
   status|monitor)
      verify_all
      status
      exit $?
      ;;
   restart)
      verify_all
      stop
      start
      exit $?
      ;;
   *)
      echo "Usage: $0 {start|stop|status|monitor|restart|meta-data|verify-all}"
      exit $OCF_ERR_GENERIC
      ;;

esac

fichier des paramètres

Voici le fichier xml décrivant les paramètres du script. Rien ne devrait être en dur dans le script, les modifications se feront ici.

<?xml version="1.0"?>
<resource-agent version="rgmanager 2.0" name="generic_prog">
   <version>1.0</version>
   <longdesc lang="en">
      This defines a generic program has an HA ressource.
   </longdesc>
   <parameters>
      <parameter name="name" unique="1" primary="1">
         <longdesc lang="en">
            Specifies a generic program name for: check maint lock, logging and other purposes
         </longdesc>
         <content type="string"/>
      </parameter>
      <parameter name="script" required="1">
         <longdesc lang="en">
            Specifies the full path of the program script to start and stop
            You can use \${service_name_upper} to have the service name in the PATH.
         </longdesc>
         <content type="string"/>
      </parameter>
      <parameter name="custom_pid_check">
         <longdesc lang="en">
            Define the regular expression to get the PID of a custom process.
            Multiple values arg separated by ,: [process_name],[username],[regexp]
            -process_name to display human readable log entry
            -username is the owner of the process, may be null
         </longdesc>
         <content type="string"/>
      </parameter>
      <parameter name="custom1_pid_check">
         <longdesc lang="en">
            Define the regular expression to get the PID of a second custom process.
            Multiple values arg separated by ,: [process_name],[username],[regexp]
            -process_name to display human readable log entry
            -username is the owner of the process, may be null
         </longdesc>
         <content type="string"/>
      </parameter>
      <parameter name="lock_dir">
         <longdesc lang="en">
            Define the path of the lock dir for the maintenance mode
         </longdesc>
         <content type="string" default="/usr/local/lockpkg"/>
      </parameter>
      <parameter name="mount_to_check">
         <longdesc lang="en">
            The mount point(s) to check after have stop the application
            Multiple values arg separated by space: "/\${service_name_upper}/dir0 /\${service_name_upper}/dir1"
         </longdesc>
         <content type="string"/>
      </parameter>
      <parameter name="service_name" inherit="service%name">
         <longdesc lang="en">
            Inherit the service name.  We need to know
            the service name in order to determine file
            systems and IPs for this service.
         </longdesc>
         <content type="string"/>
      </parameter>
   </parameters>

   <actions>
      <action name="start" timeout="180"/>
      <action name="stop" timeout="180"/>
   
      <!-- Checks pmon process -->
      <action name="status" interval="1m" timeout="10"/>
      <action name="monitor" interval="1m" timeout="10"/>
   
      <action name="meta-data" timeout="0"/>
      <action name="verify-all" timeout="0"/>
   </actions>

   <special tag="rgmanager">
   </special>
</resource-agent>

Exemple d'implémentation

Détail d'un service :

<service autostart="1" name="service1">
	<netfs export="/vol/service1" force_unmount="1" fstype="nfs" host="nas" mountpoint="/service1" name="service1" options="rw,nolock,bg,intr,hard,timeo=600,wsize=32768,rsize=32768,vers=3,tcp"/>
	<ip address="1.1.1.1" monitor_link="1"/>
	<generic_prog name="service1" script="/\${service_name_upper}/admin/srv.sh" custom_pid_check="JAVA-proc,,^[[:alnum:]]+[[:space:]]+[[:digit:]]+[[:space:]]+1[[:space:]]+.*/\${service_name_upper}/Java/bin/java" mount_to_check="/\${service_name_upper}"/>
</service>