Revision 59b6a738ea01e4d9f10d4b9def69f2fd39e68f21 authored by Dalei Hao on 02 August 2023, 14:45:53 UTC, committed by Dalei Hao on 02 August 2023, 14:45:53 UTC
1 parent a24e542
Raw File
syslog.theta
#!/bin/csh -f
# theta syslog script: 
#  mach_syslog <sampling interval (in seconds)> <job identifier> <timestamp> <run directory> <timing directory> <output directory> 

set sample_interval = $1
set jid = $2
set lid = $3
set run = $4
set timing = $5
set dir = $6

# Wait until job task-to-node mapping information is output before saving output file.
# Target length was determined empirically (maximum number of lines before job mapping 
#  information starts + number of nodes), and it may need to be adjusted in the future.
# (Note that calling script 'touch'es the e3sm log file before spawning this script, so that 'wc' does not fail.)
set nnodes = `qstat -lf $jid | grep -F Nodes | sed 's/ *Nodes *: *\([0-9]*\) */\1/' `
@ target_lines = 150 + $nnodes
sleep 10
set outlth = `wc \-l $run/e3sm.log.$lid | sed 's/ *\([0-9]*\) *.*/\1/' `
while ($outlth < $target_lines)
  sleep 60
  set outlth = `wc \-l $run/e3sm.log.$lid | sed 's/ *\([0-9]*\) *.*/\1/' `
end

set TimeRemaining = `qstat -l --header TimeRemaining $jid | grep -F TimeRemaining | sed 's/^ *TimeRemaining *: *\([0-9]*:[0-9]*:[0-9]*\) */\1/' `
set rem_hours     = `echo $TimeRemaining | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\1/' `
set rem_mins      = `echo $TimeRemaining | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\2/' `
set rem_secs      = `echo $TimeRemaining | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\3/' `
if ("X$rem_hours" == "X") set rem_hours = 0
if ("X$rem_mins" == "X")  set rem_mins  = 0
if ("X$rem_secs" == "X")  set rem_secs  = 0
@ remaining = 3600 * $rem_hours + 60 * $rem_mins + $rem_secs
cat > $run/Walltime.Remaining <<EOF1
$remaining $sample_interval
EOF1
/bin/cp --preserve=timestamps $run/e3sm.log.$lid $dir/e3sm.log.$lid.$remaining
if ($remaining <= 0) then
  xtnodestat > $dir/xtnodestat.$lid.$remaining
  qstat --header JobID:State:Nodes:Location | grep -Fa -e "running" -e "State" -e "starting" -e "exiting" > $dir/qstatn.$lid.$remaining
  qstat --header JobID:JobName:User:Project:WallTime:RunTime:TimeRemaining:Nodes:State:StartTime:attrs | grep -Fa -e "running" -e "State" -e "starting" -e "exiting" > $dir/qstatr.$lid.$remaining
endif

while ($remaining > 0)
  echo "Wallclock time remaining: $remaining" >> $dir/atm.log.$lid.step
  grep -Fa -e "nstep" -e "model date" $run/*atm.log.$lid | tail -n 4 >> $dir/atm.log.$lid.step
  echo "Wallclock time remaining: $remaining" >> $dir/lnd.log.$lid.step
  grep -Fa -e "timestep" -e "model date" $run/*lnd.log.$lid | tail -n 4 >> $dir/lnd.log.$lid.step
  echo "Wallclock time remaining: $remaining" >> $dir/ocn.log.$lid.step
  grep -Fa -e "timestep" -e "Step number" -e "model date" $run/*ocn.log.$lid | tail -n 4 >> $dir/ocn.log.$lid.step
  echo "Wallclock time remaining: $remaining" >> $dir/ice.log.$lid.step
  grep -Fa -e "timestep" -e "istep" -e "model date" $run/*ice.log.$lid | tail -n 4 >> $dir/ice.log.$lid.step
  echo "Wallclock time remaining: $remaining" >> $dir/rof.log.$lid.step
  grep -Fa "model date" $run/*rof.log.$lid | tail -n 4 >> $dir/rof.log.$lid.step
  grep -Fa "model date" $run/*cpl.log.$lid  > $dir/cpl.log.$lid.step-all
  echo "Wallclock time remaining: $remaining" >> $dir/cpl.log.$lid.step
  tail -n 4 $dir/cpl.log.$lid.step-all >> $dir/cpl.log.$lid.step
  /bin/cp --preserve=timestamps -u $timing/* $dir
  xtnodestat > $dir/xtnodestat.$lid.$remaining
  qstat --header JobID:State:Nodes:Location | grep -Fa -e "running" -e "State" -e "starting" -e "exiting" > $dir/qstatn.$lid.$remaining
  qstat --header JobID:JobName:User:Project:WallTime:RunTime:TimeRemaining:Nodes:State:StartTime:attrs | grep -Fa -e "running" -e "State" -e "starting" -e "exiting" > $dir/qstatr.$lid.$remaining
  chmod a+r $dir/*
  # sleep $sample_interval
  set sleep_remaining = $sample_interval
  while ($sleep_remaining > 120)
   sleep 120
   @ sleep_remaining = $sleep_remaining - 120
  end
  sleep $sleep_remaining
  set TimeRemaining = `qstat -l --header TimeRemaining $jid | grep -F TimeRemaining | sed 's/^ *TimeRemaining *: *\([0-9]*:[0-9]*:[0-9]*\) */\1/' `
  set rem_hours     = `echo $TimeRemaining | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\1/' `
  set rem_mins      = `echo $TimeRemaining | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\2/' `
  set rem_secs      = `echo $TimeRemaining | sed 's/^0*\([0-9]*\):0*\([0-9]*\):0*\([0-9]*\)/\3/' `
  if ("X$rem_hours" == "X") set rem_hours = 0
  if ("X$rem_mins" == "X")  set rem_mins  = 0
  if ("X$rem_secs" == "X")  set rem_secs  = 0
  @ remaining = 3600 * $rem_hours + 60 * $rem_mins + $rem_secs
  cat > $run/Walltime.Remaining << EOF2
$remaining $sample_interval
EOF2

end
back to top