Revision 59b6a738ea01e4d9f10d4b9def69f2fd39e68f21 authored by Dalei Hao on 02 August 2023, 14:45:53 UTC, committed by Dalei Hao on 02 August 2023, 14:45:53 UTC
1 parent a24e542
Raw File
syslog.summit
#!/bin/csh -f
# summit syslog script: 
#  mach_syslog <sampling interval (in seconds)> <job identifier> <timestamp> <run directory> <timing directory> <output directory> 

set sample_interval = $1
set jid = $2
set lid = $3
set run = $4
set timing = $5
set dir = $6

# Wait until some model output appears before saving output file.
# Target length was determined empirically, so it may need to be adjusted in the future.
# (Note that calling script 'touch'es the e3sm log file before spawning this script, so that 'wc' does not fail.)
# set ntasks = `bjobs -l -UF $jid | grep -F Started | sed 's/.* Started *\([0-9]*\) *Task.*/\1/' `
# @ nnodes = ($ntasks - 1) / 42
set nnodes = 0
@ target_lines = 20 + $nnodes
sleep 10
set outlth = `wc \-l $run/e3sm.log.$lid | sed 's/ *\([0-9]*\) *.*/\1/' `
while ($outlth < $target_lines)
  sleep 60
  set outlth = `wc \-l $run/e3sm.log.$lid | sed 's/ *\([0-9]*\) *.*/\1/' `
end

set time_left = `bjobs -noheader -hms -o "time_left" $jid`
set remaining_hours = `echo $time_left | sed 's/^\([0-9]*\):\([0-9]*\):\([0-9]*\) *[XLE]/\1/' `
set remaining_mins  = `echo $time_left | sed 's/^\([0-9]*\):\([0-9]*\):\([0-9]*\) *[XLE]/\2/' `
set remaining_secs  = `echo $time_left | sed 's/^\([0-9]*\):\([0-9]*\):\([0-9]*\) *[XLE]/\3/' `
if ("X$remaining_hours" == "X") set remaining_hours = 0
if ("X$remaining_mins" == "X")  set remaining_mins  = 0
if ("X$remaining_secs" == "X")  set remaining_secs  = 0
@ remaining = 3600 * $remaining_hours + 60 * $remaining_mins + $remaining_secs
cat > $run/Walltime.Remaining <<EOF1
$remaining $sample_interval
EOF1
/bin/cp --preserve=timestamps $run/e3sm.log.$lid $dir/e3sm.log.$lid.$remaining
if ($remaining <= 0) then
  bjobs -r -u all > $dir/bjobsru_all.$lid.$remaining
  bjobs -r -u all -o 'jobid slots exec_host' > $dir/bjobsru_allo.$lid.$remaining
endif

while ($remaining > 0)
  echo "Wallclock time remaining: $remaining" >> $dir/atm.log.$lid.step
  grep -Fa -e "nstep" -e "model date" $run/*atm.log.$lid | tail -n 4 >> $dir/atm.log.$lid.step
  echo "Wallclock time remaining: $remaining" >> $dir/lnd.log.$lid.step
  grep -Fa -e "timestep" -e "model date" $run/*lnd.log.$lid | tail -n 4 >> $dir/lnd.log.$lid.step
  echo "Wallclock time remaining: $remaining" >> $dir/ocn.log.$lid.step
  grep -Fa -e "timestep" -e "Step number" -e "model date" $run/*ocn.log.$lid | tail -n 4 >> $dir/ocn.log.$lid.step
  echo "Wallclock time remaining: $remaining" >> $dir/ice.log.$lid.step
  grep -Fa -e "timestep" -e "istep" -e "model date" $run/*ice.log.$lid | tail -n 4 >> $dir/ice.log.$lid.step
  echo "Wallclock time remaining: $remaining" >> $dir/rof.log.$lid.step
  grep -Fa "model date" $run/*rof.log.$lid | tail -n 4 >> $dir/rof.log.$lid.step
  grep -Fa "model date" $run/*cpl.log.$lid  > $dir/cpl.log.$lid.step-all
  echo "Wallclock time remaining: $remaining" >> $dir/cpl.log.$lid.step
  tail -n 4 $dir/cpl.log.$lid.step-all >> $dir/cpl.log.$lid.step
  /bin/cp --preserve=timestamps -u $timing/* $dir
  bjobs -r -u all > $dir/bjobsru_all.$lid.$remaining
  bjobs -r -u all -o 'jobid slots exec_host' > $dir/bjobsru_allo.$lid.$remaining
  chmod a+r $dir/*
  # sleep $sample_interval
  set sleep_remaining = $sample_interval
  while ($sleep_remaining > 120)
   sleep 120
   @ sleep_remaining = $sleep_remaining - 120
  end
  sleep $sleep_remaining
  set time_left = `bjobs -noheader -hms -o "time_left" $jid`
  set remaining_hours = `echo $time_left | sed 's/^\([0-9]*\):\([0-9]*\):\([0-9]*\) *[XLE]/\1/' `
  set remaining_mins  = `echo $time_left | sed 's/^\([0-9]*\):\([0-9]*\):\([0-9]*\) *[XLE]/\2/' `
  set remaining_secs  = `echo $time_left | sed 's/^\([0-9]*\):\([0-9]*\):\([0-9]*\) *[XLE]/\3/' `
  if ("X$remaining_hours" == "X") set remaining_hours = 0
  if ("X$remaining_mins" == "X")  set remaining_mins  = 0
  if ("X$remaining_secs" == "X")  set remaining_secs  = 0
  @ remaining = 3600 * $remaining_hours + 60 * $remaining_mins + $remaining_secs
  cat > $run/Walltime.Remaining << EOF2
$remaining $sample_interval
EOF2

end
back to top