ENH: mpirunDebug improvements

- provide '-clean' option for removing old files

- handle out of order '-decomposeParDict' as per RunFunctions

- implicit no-prompt '-yes' with the shortcuts (eg, -log, -xterm)
  and set -local if not already defined

- accept <return> to select defaults
This commit is contained in:
Mark Olesen 2021-05-21 09:15:03 +02:00
parent da30533a8f
commit 48c05e5441

View File

@ -7,34 +7,21 @@
# \\/ M anipulation |
#------------------------------------------------------------------------------
# Copyright (C) 2011-2015 OpenFOAM Foundation
# Copyright (C) 2017-2018 OpenCFD Ltd.
# Copyright (C) 2017-2021 OpenCFD Ltd.
#------------------------------------------------------------------------------
# License
# This file is part of OpenFOAM.
#
# OpenFOAM is free software: you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# OpenFOAM is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
# for more details.
#
# You should have received a copy of the GNU General Public License
# along with OpenFOAM. If not, see <http://www.gnu.org/licenses/>.
# This file is part of OpenFOAM, distributed under GPL-3.0-or-later.
#
# Script
# mpirunDebug
#
# Description
# Driver script to run mpi jobs with the processes in a separate XTerm
# or to separate log files.
# Invoke mpirun with separate per-processor log files
# or running in separate XTerms.
# Requires bash on all processors.
#
#------------------------------------------------------------------------------
. $WM_PROJECT_DIR/bin/tools/RunFunctions # Run functions
. "${WM_PROJECT_DIR:?}"/bin/tools/RunFunctions # Run functions
usage() {
exec 1>&2
@ -44,28 +31,76 @@ usage() {
Usage: ${0##*/} [OPTION] -np <N> <executable> <args>
options:
-method=MODE Run mode
(0) normal
(1) gdb+xterm
(2) gdb
(3) log
(4) log + xterm
(5) valgrind + xterm
(5l) valgrind + log
(6) gperftools(callgrind)
-method=MODE The run mode
(0) normal
(1) gdb+xterm
(2) gdb
(3) log
(4) log + xterm
(5) valgrind + xterm
(5l) valgrind + log
(6) gperftools(callgrind)
-spawn=TYPE Spawn type: (1) local (2) remote
-log Alias for -method=3
-valgrind Alias for -method=5l (valgrind + log)
-local Alias for -spawn=1
-yes Start without additional prompt
-yes Start without additional prompting
-local Same as -spawn=1
-remote Same as -spawn=2
-clean Remove old processor*.{log,sh} files, mpirun.schema etc
-help Print the usage
Invoke mpirun but with each process in a separate XTerm, or to separate logfile
Invoke mpirun with separate per-processor log files or running in
separate XTerms.
Common shortcuts. Sets default spawn to -local, add -yes.
-normal = -method=0
-log = -method=3
-xlog = -method=4 (log + xterm)
-valgrind = -method=5l (valgrind + log)
-xvalgrind = -method=5 (valgrind + xterm)
Also detects some OpenFOAM options:
-decomposeParDict <file> Use specified file for decomposePar dictionary
USAGE
exit 0 # A clean exit
}
# Report error and exit
die()
{
exec 1>&2
echo
echo "Error encountered:"
while [ "$#" -ge 1 ]; do echo " $1"; shift; done
echo
echo "See '${0##*/} -help' for usage"
echo
exit 1
}
#-------------------------------------------------------------------------------
# Method naming/numbering correspondence
methodPrompt="0)normal 1)gdb+xterm 2)gdb 3)log 4)log+xterm 5)valgrind+xterm 5l)valgrind+log 6)gperftools(callgrind)"
methodNumberToName()
{
case "$1" in
0 | norm* ) echo "normal" ;;
1) echo "gdb-xterm" ;;
2) echo "gdb" ;;
3 | log ) echo "log" ;;
4 | xterm ) echo "log-xterm" ;;
5) echo "valgrind-xterm" ;;
5l | valgr*) echo "valgrind" ;;
6) echo "gperf" ;;
*) return 1 ;;
esac
}
#-------------------------------------------------------------------------------
# Basic settings
case "$(uname -s)" in
Linux)
ECHO='echo -e'
@ -75,52 +110,102 @@ Linux)
;;
esac
unset nProcs appName appArgs
unset method spawn optNoAsk
unset appName appArgs nProcs
unset method spawn optClean
optConfirm=true
decompDict="system/decomposeParDict"
# parse options
# Parse options
while [ "$#" -gt 0 ]
do
# echo "$1" 1>&2
# Our own options (before the application is specified)
if [ -z "$appName" ]
then
knownOption=true # Assume success
case "$1" in
'') ;; # ignore junk
-clean) optClean=true ;;
-yes) unset optConfirm ;;
-local | -remote)
spawn="${1#-}"
;;
-spawn=1) spawn="local" ;;
-spawn=2) spawn="remote" ;;
-method=[0-9]*)
knownOption="${1#*=}" # Reuse for input
method="$(methodNumberToName "$knownOption")" || \
die "Unknown run method \"$knownOption\""
;;
-normal | -log)
method="${1#*-}"
unset optConfirm
: "${spawn:=local}"
;;
-xlog | -xterm)
method="log-xterm"
unset optConfirm
: "${spawn:=local}"
;;
-valgr*)
method="valgrind"
unset optConfirm
: "${spawn:=local}"
;;
-xvalgr*)
method="valgrind-xterm"
unset optConfirm
: "${spawn:=local}"
;;
-np)
nProcs="$2"
shift
;;
-decomposeParDict)
# Grab values and add to args immediately
decompDict="$2"
appArgs="${appArgs}${appArgs:+ }$1 \"$2\""
shift
;;
*)
knownOption=false # Fallthrough to regular processing
;;
esac
if [ "$knownOption" = true ]
then
shift
continue
fi
fi
case "$1" in
-help*)
usage
;;
-method=[0-6]* | -method=5l)
method="${1#*=}"
;;
-spawn=[1-2])
spawn="${1#*=}"
;;
-log)
method=3
;;
-valgrind)
method=5l
;;
-local)
spawn=1
;;
-yes)
optNoAsk=true
;;
-help* | --help*) usage ;;
'') ;; # ignore junk
-np)
nProcs=$2
nProcs="$2"
shift
;;
-decomposeParDict)
decompDict=$2
appArgs="${appArgs}${appArgs:+ }\"$1\""
# Grab values and add to args immediately
decompDict="$2"
appArgs="${appArgs}${appArgs:+ }$1 \"$2\""
shift
;;
*)
@ -135,65 +220,83 @@ do
shift
done
#-------------------------------------------------------------------------------
# No -np specified?
# Try guess from system/decomposeParDict or command-line -decomposeParDict
if [ -z "$nProcs" -a -f "$decompDict" ]
# Cleanup only
if [ -n "$optClean" ]
then
nProcs=$(getNumberOfProcessors $decompDict) || unset nProcs
echo "Cleanup old mpirunDebug files..." 1>&2
rm -f gdbCommands mpirun.schema
rm -f processor*.log processor*.sh
echo " gdbCommands mpirun.schema" 1>&2
echo " processor*.log processor*.sh" 1>&2
echo "Done" 1>&2
exit 0
fi
echo "nProcs=$nProcs"
echo "exec=$appName"
echo "args=$appArgs"
#-------------------------------------------------------------------------------
# No -np specified?
# Try guess from system/decomposeParDict or command-line -decomposeParDict
if [ -z "$nProcs" ] && [ -f "$decompDict" ]
then
nProcs=$(getNumberOfProcessors "$decompDict") || unset nProcs
fi
cat << REPORT_SETTINGS 1>&2
Run parameters:
procs : ${nProcs:-[]}
exec : ${appName:-[]}
args : ${appArgs:-[]}
REPORT_SETTINGS
[ -n "$nProcs" ] || die "Number of processors not specified or not detected"
[ -n "$appName" ] || die "No application specified"
[ -n "$appArgs" ] || die "No application arguments"
[ -n "$nProcs" ] || usage
[ -n "$appArgs" ] || usage
[ -n "$appName" ] || usage
exec=$(command -v $appName)
[ -x "$exec" ] || {
echo "Cannot find executable $appName or is not executable"
usage
}
[ -n "$PWD" ] || PWD=$(pwd)
echo "run $appArgs" > $PWD/gdbCommands
echo "where" >> $PWD/gdbCommands
echo "Constructed gdb initialization file $PWD/gdbCommands"
[ -x "$exec" ] || die "Command not found or not executable: $appName"
[ -n "$PWD" ] || PWD="$(pwd)"
# Choose method
if [ -z "$method" ]
then
$ECHO "Choose running method: 0)normal 1)gdb+xterm 2)gdb 3)log 4)log+xterm 5)valgrind+xterm 5l)valgrind+log 6)gperftools(callgrind): \c"
read method
case "$method" in
0 | 1 | 2 | 3 | 4 | 5 | 5l | 6)
# okay
;;
*)
usage
;;
esac
echo "Choose running method: ${methodPrompt}"
$ECHO "[normal] > \c"
read input
: "${input:=0}" # Default (0) normal
method="$(methodNumberToName "$input")" || \
die "Unknown run method \"$input\""
fi
# Choose spawn
if [ -z "$spawn" ]
then
$ECHO "Run all processes local or distributed? 1)local 2)remote: \c"
read spawn
case "$spawn" in
1 | 2)
# okay
;;
*)
usage
;;
echo "Run all processes local or distributed? 1)local 2)remote"
$ECHO "[local] > \c"
read input
: "${input:=1}" # Default (1) local
case "$input" in
(1) spawn="local" ;;
(2) spawn="remote" ;;
(*) die "Unknown spawn type \"$input\""
esac
fi
# Methods with gdb:
case "$method" in
(*gdb*)
echo "run $appArgs" > "$PWD"/gdbCommands
echo "where" >> "$PWD"/gdbCommands
echo "Constructed gdb initialization file $PWD/gdbCommands" 1>&2
;;
esac
sourceFoam=false # Fallback command
@ -227,10 +330,10 @@ case "$sourceFoam" in
;;
esac
echo "**sourceFoam: $sourceFoam"
echo "**sourceFoam: $sourceFoam" 1>&2
rm -f $PWD/mpirun.schema
touch $PWD/mpirun.schema
rm -f "$PWD"/mpirun.schema
touch "$PWD"/mpirun.schema
proc=0
xpos=0
@ -248,50 +351,50 @@ do
;;
esac
echo "#!/bin/bash" > $procCmdFile
echo "$sourceFoam" >> $procCmdFile
echo "cd $PWD" >> $procCmdFile
cat << COMMANDS > "$procCmdFile"
#!/bin/bash
$sourceFoam
cd "${PWD}" || exit
COMMANDS
# Add to the mpirun.schema
case "$method" in
(*xterm*) echo "${node}${xterm} -e ${procCmdFile}" >> "$PWD"/mpirun.schema ;;
(*) echo "${node}${procCmdFile}" >> "$PWD"/mpirun.schema ;;
esac
case "$method" in
0)
echo "${node}$procCmdFile" >> $PWD/mpirun.schema
echo "$exec $appArgs | tee $procLog" >> $procCmdFile
(normal)
echo "$exec $appArgs | tee $procLog"
;;
1)
echo "${node}$xterm -e $procCmdFile" >> $PWD/mpirun.schema
echo "gdb -command $PWD/gdbCommands $exec 2>&1 | tee $procLog"
echo "read dummy"
;;
2)
echo "${node}$procCmdFile" >> $PWD/mpirun.schema
echo "gdb -command $PWD/gdbCommands $exec > $procLog 2>&1"
;;
3)
echo "${node}$procCmdFile" >> $PWD/mpirun.schema
(log)
echo "$exec $appArgs > $procLog 2>&1"
;;
4)
echo "${node}$xterm -e $procCmdFile" >> $PWD/mpirun.schema
(log-xterm)
echo "$exec $appArgs 2>&1 | tee $procLog"
echo "read dummy"
echo "read input"
;;
5)
echo "${node}$xterm -e $procCmdFile" >> $PWD/mpirun.schema
echo "valgrind --leak-check=full --show-reachable=yes $exec $appArgs 2>&1 | tee $procLog"
echo "read dummy"
(gdb)
echo "gdb -command $PWD/gdbCommands $exec > $procLog 2>&1"
;;
5l)
echo "${node}$procCmdFile" >> $PWD/mpirun.schema
(gdb-xterm)
echo "gdb -command $PWD/gdbCommands $exec 2>&1 | tee $procLog"
echo "read input"
;;
(valgrind | valgrind-log)
echo "valgrind --leak-check=full --show-reachable=yes $exec $appArgs > $procLog 2>&1"
;;
6)
echo "${node}$procCmdFile" >> $PWD/mpirun.schema
(valgrind-xterm)
echo "valgrind --leak-check=full --show-reachable=yes $exec $appArgs 2>&1 | tee $procLog"
echo "read input"
;;
(gperf)
echo "CPUPROFILE=log.profiler_$proc $exec $appArgs"
echo "pprof --callgrind $exec log.profiler_$proc > log.profiler_$proc.callgrind"
;;
esac >> $procCmdFile
esac >> "$procCmdFile"
chmod +x $procCmdFile
chmod +x "$procCmdFile"
let column=proc%6
if [ $proc -ne 0 -a $column -eq 0 ]
@ -306,7 +409,7 @@ done
for ((proc=0; proc<$nProcs; proc++))
do
procLog="processor${proc}.log"
echo " tail -f $procLog"
echo " tail -f $procLog" 1>&2
done
unset cmd
@ -330,28 +433,27 @@ MPICH)
cmd="${cmd} :"
fi
cmd="${cmd} -n 1 ${procXtermCmdFile}"
done < $PWD/mpirun.schema
done < "$PWD"/mpirun.schema
;;
*)
echo
echo "Unsupported WM_MPLIB setting : $WM_MPLIB"
usage
exit 1
die "Unsupported WM_MPLIB setting : $WM_MPLIB"
;;
esac
echo "Constructed $PWD/mpirun.schema file."
echo
echo " $cmd"
echo
echo 1>&2
echo "Constructed $PWD/mpirun.schema file:" 1>&2
echo 1>&2
echo " $cmd" 1>&2
echo 1>&2
if [ -n "$optNoAsk" ]
if [ -n "$optConfirm" ]
then
echo "starting: " $(date '+%Y-%m-%d %H:%M:%S %z' 2>/dev/null)
echo
else
# Pause before running
$ECHO "Press return to execute.\c"
read dummy
read input
else
echo "starting: $(date '+%Y-%m-%d %H:%M:%S %z' 2>/dev/null)" 1>&2
echo 1>&2
fi
exec $cmd