#!/bin/sh #------------------------------------------------------------------------------ # ========= | # \\ / F ield | OpenFOAM: The Open Source CFD Toolbox # \\ / O peration | # \\ / A nd | www.openfoam.com # \\/ M anipulation | #------------------------------------------------------------------------------- # Copyright (C) 2011 OpenFOAM Foundation #------------------------------------------------------------------------------ # License # This file is part of OpenFOAM, distributed under GPL-3.0-or-later. # # Script # foamCheckJobs # # Description # Uses runningJobs/, finishedJobs/ and foamProcessInfo to create stateFile. # stateFile contains per pid information on state of process. Format: # pid state command # # where state is one of 'RUNN', 'SUSP', 'OTHR', 'FINI', 'ABRT' ('PEND') # (first three are from foamProcessInfo, others from jobInfo files) # (PEND is special state from when user has submitted but no jobInfo # file yet. Not supported by this script yet) # #------------------------------------------------------------------------------ Script=${0##*/} #------------------------------------------------------------------------------- #- User settings #- Number of days for files to be considered old NDAYSLIMIT=7 #------------------------------------------------------------------------------- #- work file TMPFILE=/tmp/${Script}$$.tmp #- work dir. Needs to be accessible for all machines MACHDIR=$HOME/.OpenFOAM/${Script} DEFSTATEFILE=$HOME/.OpenFOAM/foamCheckJobs.out # The default is "~/.OpenFOAM/jobControl" : ${FOAM_JOB_DIR:=$HOME/.OpenFOAM/jobControl} if [ `uname -s` = Linux ] then ECHO='echo -e' else ECHO='echo' fi usage() { cat< # Prints number of days between the two # Eg. dayDiff "Jan 10 2002" "Dec 28 1999" # ==> 13 dayDiff() { date -d "$1" > /dev/null 2>&1 if [ $? -ne 0 ] then #- option '-d' on date not supported. Give up. echo "0" else year1=`echo "$1" | awk '{print $3}'` year2=`echo "$2" | awk '{print $3}'` day1=`date -d "$1" "+%j"` day2=`date -d "$2" "+%j"` nYears=`expr $year1 - $year2` tmp1=`expr $nYears \* 365` tmp2=`expr $day1 - $day2` expr $tmp1 + $tmp2 fi } #dayDiff "`date '+%b %d %Y'`" "Dec 28 2001" # getAllJobs jobInfoDirectory # Prints list of all jobs in directory (e.g. runningJobs/) # Also handles 'slaves' entries in jobInfo: # slaves 1 ( penfold.23766 ); getAllJobs() { if notEmpty $1 then jobs=$1/* for f in $jobs do line=`grep '^[ ]*slaves' $f 2>/dev/null` if [ $? -eq 0 ] then slaveJobs=`echo "$line" | sed -e 's/.*(\(.*\)).*/\1/'` jobs="$jobs $slaveJobs" fi done else jobs='' fi echo "$jobs" } # releaseLock jobId lockFile # Releases lock on jobId releaseLock () { if [ -f $2 ] then #- move lock to finishedJobs mv $2 $FOAM_JOB_DIR/finishedJobs/ fi echo "Lock on job $1 released." } #------------------------------------------------------------------------------- # # Main # #------------------------------------------------------------------------------- case "$1" in (-h | -help*) usage ;; esac if [ $# -eq 1 ] then STATEFILE="$1" elif [ $# -eq 0 ] then STATEFILE=${STATEFILE:-$DEFSTATEFILE} else usage fi #- Check a few things if [ ! -d "$FOAM_JOB_DIR" ] then echo "$Script : directory does not exist." echo " FOAM_JOB_DIR=$FOAM_JOB_DIR" echo exit 1 fi if [ ! -d "$FOAM_JOB_DIR/runningJobs" -o ! -d "$FOAM_JOB_DIR/finishedJobs" ] then echo "$Script : invalid directory." echo " FOAM_JOB_DIR=$FOAM_JOB_DIR" echo exit 1 fi #- obtain rsh method RSH='ssh' echo "Using remote shell type : $RSH" echo "" echo "Collecting information on jobs in" echo " $FOAM_JOB_DIR" echo "" #- Collect machine names into $TMPFILE # Also handles 'slaves' entry in jobInfo: rm -f $TMPFILE; touch $TMPFILE RUNJOBS=`getAllJobs $FOAM_JOB_DIR/runningJobs` for f in $RUNJOBS do machinePid=`basename $f | sed -e 's/^"//' -e 's/"$//'` machine=`echo "$machinePid" | sed -e 's/\.[0-9][0-9]*$//'` pid=`echo "$machinePid" | sed -e 's/.*\.\([0-9][0-9]*\)$/\1/'` fgrep "$machine" $TMPFILE >/dev/null 2>&1 if [ $? -ne 0 ] then echo "$machine" >> $TMPFILE fi done echo "Found machines:" cat $TMPFILE echo "" #- Collect process info on all machines, one file per machine mkdir -p $MACHDIR cnt=1 while true do machine=`sed -n -e "${cnt}p" $TMPFILE` if [ ! "$machine" ] then break fi machFile=$MACHDIR/$machine rm -f $machFile echo "Contacting $machine to collect process information:" if [ $machine = `hostname` ] then echo " foamProcessInfo $machFile" foamProcessInfo $machFile >/dev/null 2>&1 else echo " $RSH $machine foamProcessInfo $machFile" $RSH $machine foamProcessInfo $machFile >/dev/null 2>&1 fi if [ $? -ne 0 -o ! -s $machFile ] then echo "** Failed collecting process information on $machine." echo "Check $machFile and run foamProcessInfo by hand" rm -f $machFile else echo "Succesfully collected information in $machFile ..." fi cnt=`expr $cnt + 1` done echo "" #- Construct state for runningJobs; move non runnning jobs to finishedJobs releaseAll='' rm -f $STATEFILE for f in $RUNJOBS do machinePid=`basename $f` machine=`echo $machinePid | sed -e 's/\.[0-9][0-9]*$//'` pid=`echo $machinePid | sed -e 's/.*\.\([0-9][0-9]*\)$/\1/'` machFile=$MACHDIR/$machine if [ -r $machFile ] then entry=`grep "^$pid " $machFile 2>/dev/null` if [ $? -ne 0 -o ! "$entry" ] then if [ "$releaseAll" ] then releaseLock $machinePid $f else echo "Job $machinePid seems to be no longer running. Release lock? (y/a)\c" read answ if [ "${answ:-y}" = 'y' ] then releaseLock $machinePid $f elif [ "${answ:-y}" = 'a' ] then releaseAll='yes' releaseLock $machinePid $f else state='OTHR' echo "$machinePid $state" >> $STATEFILE fi fi else state=`echo "$entry" | awk '{print $2}'` echo "$machinePid $state" >> $STATEFILE fi fi done #- Collect old jobs in finishedJobs OLDFILES=`find $FOAM_JOB_DIR/finishedJobs -mtime +$NDAYSLIMIT -print` #- Construct state for finishedJobs and check on date of files. if notEmpty $FOAM_JOB_DIR/finishedJobs then dateNow=`date '+%b %d %Y'` for f in $FOAM_JOB_DIR/finishedJobs/* do sz=`ls -s $f | awk '{print $1}'` if [ "$sz" -gt 0 ] then machinePid=`basename $f` machine=`echo $machinePid | sed -e 's/\.[0-9][0-9]*$//'` pid=`echo $machinePid | sed -e 's/.*\.\([0-9][0-9]*\)$/\1/'` end=`getEntry $f endDate` if [ ! "$end" ] then state='ABRT' else nDaysOld=`dayDiff "$dateNow" "$end"` if [ "$nDaysOld" -gt $NDAYSLIMIT ] then OLDFILES="$OLDFILES $f" fi state='FINI' fi echo "$machinePid $state" >> $STATEFILE fi done fi #- Remove old locks nOldFiles=`echo "$OLDFILES" | wc -w` if [ "$nOldFiles" -gt 0 ] then echo "You seem to have $nOldFiles locks older than $NDAYSLIMIT days in finishedJobs/" $ECHO "Do you want to remove these? (y)\c" read answ if [ "${answ:-y}" = 'y' ] then rm -f $OLDFILES fi fi rm -f $TMPFILE rm -r $MACHDIR echo "" echo "Updated stateFile:" echo " $STATEFILE" echo "" #------------------------------------------------------------------------------